VLIWMachineScheduler.cpp source code [llvm_projects/llvm/lib/CodeGen/VLIWMachineScheduler.cpp]

1	//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// MachineScheduler schedules machine instructions after phi elimination. It
10	// preserves LiveIntervals so it can be invoked before register allocation.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/VLIWMachineScheduler.h"
15	#include "llvm/ADT/SmallVector.h"
16	#include "llvm/CodeGen/DFAPacketizer.h"
17	#include "llvm/CodeGen/MachineBasicBlock.h"
18	#include "llvm/CodeGen/MachineFunction.h"
19	#include "llvm/CodeGen/MachineInstr.h"
20	#include "llvm/CodeGen/MachineLoopInfo.h"
21	#include "llvm/CodeGen/RegisterClassInfo.h"
22	#include "llvm/CodeGen/RegisterPressure.h"
23	#include "llvm/CodeGen/ScheduleDAG.h"
24	#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25	#include "llvm/CodeGen/TargetInstrInfo.h"
26	#include "llvm/CodeGen/TargetOpcodes.h"
27	#include "llvm/CodeGen/TargetRegisterInfo.h"
28	#include "llvm/CodeGen/TargetSchedule.h"
29	#include "llvm/CodeGen/TargetSubtargetInfo.h"
30	#include "llvm/Support/CommandLine.h"
31	#include "llvm/Support/Debug.h"
32	#include "llvm/Support/raw_ostream.h"
33	#include <algorithm>
34	#include <cassert>
35	#include <iomanip>
36	#include <limits>
37	#include <memory>
38	#include <sstream>
39
40	using namespace llvm;
41
42	#define DEBUG_TYPE "machine-scheduler"
43
44	static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
45	cl::init(Val: false));
46
47	static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
48	cl::init(Val: true));
49
50	static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
51	cl::Hidden, cl::init(Val: `1`));
52
53	// Check if the scheduler should penalize instructions that are available to
54	// early due to a zero-latency dependence.
55	static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
56	cl::init(Val: true));
57
58	// This value is used to determine if a register class is a high pressure set.
59	// We compute the maximum number of registers needed and divided by the total
60	// available. Then, we compare the result to this value.
61	static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
62	cl::init(Val: `0.75f`),
63	cl::desc ("High register pressure threhold."));
64
65	VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
66	const TargetSchedModel *SM)
67	: TII(STI.getInstrInfo()), SchedModel(SM) {
68	ResourcesModel = createPacketizer(STI);
69
70	// This hard requirement could be relaxed,
71	// but for now do not let it proceed.
72	assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
73
74	Packet.reserve(N: SchedModel->getIssueWidth());
75	Packet.clear();
76	ResourcesModel->clearResources();
77	}
78
79	void VLIWResourceModel::reset() {
80	Packet.clear();
81	ResourcesModel->clearResources();
82	}
83
84	VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
85
86	/// Return true if there is a dependence between SUd and SUu.
87	bool VLIWResourceModel::hasDependence(const SUnit SUd, const* SUnit *SUu) {
88	if (SUd->Succs.size() == `0`)
89	return false;
90
91	for (const auto &S : SUd->Succs) {
92	// Since we do not add pseudos to packets, might as well
93	// ignore order dependencies.
94	if (S.isCtrl())
95	continue;
96
97	if (S.getSUnit() == SUu && S.getLatency() > `0`)
98	return true;
99	}
100	return false;
101	}
102
103	/// Check if scheduling of this SU is possible
104	/// in the current packet.
105	/// It is _not_ precise (statefull), it is more like
106	/// another heuristic. Many corner cases are figured
107	/// empirically.
108	bool VLIWResourceModel::isResourceAvailable(SUnit SU, bool* IsTop) {
109	if (!SU \|\| !SU->getInstr())
110	return false;
111
112	// First see if the pipeline could receive this instruction
113	// in the current cycle.
114	switch (SU->getInstr()->getOpcode()) {
115	default:
116	if (!ResourcesModel->canReserveResources(MI&: *SU->getInstr()))
117	return false;
118	break;
119	case TargetOpcode::EXTRACT_SUBREG:
120	case TargetOpcode::INSERT_SUBREG:
121	case TargetOpcode::SUBREG_TO_REG:
122	case TargetOpcode::REG_SEQUENCE:
123	case TargetOpcode::IMPLICIT_DEF:
124	case TargetOpcode::COPY:
125	case TargetOpcode::INLINEASM:
126	case TargetOpcode::INLINEASM_BR:
127	break;
128	}
129
130	// Now see if there are no other dependencies to instructions already
131	// in the packet.
132	if (IsTop) {
133	for (const SUnit *U : Packet)
134	if (hasDependence(SUd: U, SUu: SU))
135	return false;
136	} else {
137	for (const SUnit *U : Packet)
138	if (hasDependence(SUd: SU, SUu: U))
139	return false;
140	}
141	return true;
142	}
143
144	/// Keep track of available resources.
145	bool VLIWResourceModel::reserveResources(SUnit SU, bool* IsTop) {
146	bool startNewCycle = false;
147	// Artificially reset state.
148	if (!SU) {
149	reset();
150	TotalPackets++;
151	return false;
152	}
153	// If this SU does not fit in the packet or the packet is now full
154	// start a new one.
155	if (!isResourceAvailable(SU, IsTop) \|\|
156	Packet.size() >= SchedModel->getIssueWidth()) {
157	reset();
158	TotalPackets++;
159	startNewCycle = true;
160	}
161
162	switch (SU->getInstr()->getOpcode()) {
163	default:
164	ResourcesModel->reserveResources(MI&: *SU->getInstr());
165	break;
166	case TargetOpcode::EXTRACT_SUBREG:
167	case TargetOpcode::INSERT_SUBREG:
168	case TargetOpcode::SUBREG_TO_REG:
169	case TargetOpcode::REG_SEQUENCE:
170	case TargetOpcode::IMPLICIT_DEF:
171	case TargetOpcode::KILL:
172	case TargetOpcode::CFI_INSTRUCTION:
173	case TargetOpcode::EH_LABEL:
174	case TargetOpcode::COPY:
175	case TargetOpcode::INLINEASM:
176	case TargetOpcode::INLINEASM_BR:
177	break;
178	}
179	Packet.push_back(Elt: SU);
180
181	#ifndef NDEBUG
182	LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
183	for (unsigned i = `0`, e = Packet.size(); i != e; ++i) {
184	LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
185	LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
186	LLVM_DEBUG(Packet[i]->getInstr()->dump());
187	}
188	#endif
189
190	return startNewCycle;
191	}
192
193	DFAPacketizer *
194	VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
195	return STI.getInstrInfo()->CreateTargetScheduleState(STI);
196	}
197
198	/// schedule - Called back from MachineScheduler::runOnMachineFunction
199	/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
200	/// only includes instructions that have DAG nodes, not scheduling boundaries.
201	void VLIWMachineScheduler::schedule() {
202	LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
203	<< printMBBReference(*BB) << " " << BB->getName()
204	<< " in_func " << BB->getParent()->getName()
205	<< " at loop depth " << MLI->getLoopDepth(BB) << " \n");
206
207	buildDAGWithRegPressure();
208
209	Topo.InitDAGTopologicalSorting();
210
211	// Postprocess the DAG to add platform-specific artificial dependencies.
212	postProcessDAG();
213
214	SmallVector<SUnit *, `8`> TopRoots, BotRoots;
215	findRootsAndBiasEdges(TopRoots, BotRoots);
216
217	// Initialize the strategy before modifying the DAG.
218	SchedImpl ->initialize(DAG: this);
219
220	LLVM_DEBUG({
221	unsigned maxH = `0`;
222	for (const SUnit &SU : SUnits)
223	if (SU.getHeight() > maxH)
224	maxH = SU.getHeight();
225	dbgs() << "Max Height " << maxH << "\n";
226	});
227	LLVM_DEBUG({
228	unsigned maxD = `0`;
229	for (const SUnit &SU : SUnits)
230	if (SU.getDepth() > maxD)
231	maxD = SU.getDepth();
232	dbgs() << "Max Depth " << maxD << "\n";
233	});
234	LLVM_DEBUG(dump());
235	if (ViewMISchedDAGs)
236	viewGraph();
237
238	initQueues(TopRoots, BotRoots);
239
240	bool IsTopNode = false;
241	while (true) {
242	LLVM_DEBUG(
243	dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
244	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
245	if (!SU)
246	break;
247
248	if (!checkSchedLimit())
249	break;
250
251	scheduleMI(SU, IsTopNode);
252
253	// Notify the scheduling strategy after updating the DAG.
254	SchedImpl ->schedNode(SU, IsTopNode);
255
256	updateQueues(SU, IsTopNode);
257	}
258	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
259
260	placeDebugValues();
261
262	LLVM_DEBUG({
263	dbgs() << "*** Final schedule for "
264	<< printMBBReference(begin()->getParent()) << " **\n";
265	dumpSchedule();
266	dbgs() << `'\n'`;
267	});
268	}
269
270	void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
271	DAG = static_cast<VLIWMachineScheduler *>(dag);
272	SchedModel = DAG->getSchedModel();
273
274	Top.init(dag: DAG, smodel: SchedModel);
275	Bot.init(dag: DAG, smodel: SchedModel);
276
277	// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
278	// are disabled, then these HazardRecs will be disabled.
279	const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
280	const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
281	const TargetInstrInfo *TII = STI.getInstrInfo();
282	delete Top.HazardRec;
283	delete Bot.HazardRec;
284	Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
285	Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
286
287	delete Top.ResourceModel;
288	delete Bot.ResourceModel;
289	Top.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
290	Bot.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
291
292	const std::vector<unsigned> &MaxPressure =
293	DAG->getRegPressure().MaxSetPressure;
294	HighPressureSets.assign(NumElts: MaxPressure.size(), Elt: false);
295	for (unsigned i = `0`, e = MaxPressure.size(); i < e; ++i) {
296	unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(Idx: i);
297	HighPressureSets [i] =
298	((float)MaxPressure [i] > ((float)Limit * RPThreshold));
299	}
300	}
301
302	VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
303	const TargetSubtargetInfo &STI, const TargetSchedModel SchedModel) const* {
304	return new VLIWResourceModel (STI, SchedModel);
305	}
306
307	void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
308	for (const SDep &PI : SU->Preds) {
309	unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
310	unsigned MinLatency = PI.getLatency();
311	#ifndef NDEBUG
312	Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
313	#endif
314	if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
315	SU->TopReadyCycle = PredReadyCycle + MinLatency;
316	}
317
318	if (!SU->isScheduled)
319	Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle);
320	}
321
322	void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
323	assert(SU->getInstr() && "Scheduled SUnit must have instr");
324
325	for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
326	++I) {
327	unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
328	unsigned MinLatency = I->getLatency();
329	#ifndef NDEBUG
330	Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
331	#endif
332	if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
333	SU->BotReadyCycle = SuccReadyCycle + MinLatency;
334	}
335
336	if (!SU->isScheduled)
337	Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle);
338	}
339
340	ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
341	delete ResourceModel;
342	delete HazardRec;
343	}
344
345	/// Does this SU have a hazard within the current instruction group.
346	///
347	/// The scheduler supports two modes of hazard recognition. The first is the
348	/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
349	/// supports highly complicated in-order reservation tables
350	/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
351	///
352	/// The second is a streamlined mechanism that checks for hazards based on
353	/// simple counters that the scheduler itself maintains. It explicitly checks
354	/// for instruction dispatch limitations, including the number of micro-ops that
355	/// can dispatch per cycle.
356	///
357	/// TODO: Also check whether the SU must start a new group.
358	bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
359	if (HazardRec->isEnabled())
360	return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
361
362	unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
363	if (IssueCount + uops > SchedModel->getIssueWidth())
364	return true;
365
366	return false;
367	}
368
369	void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
370	SUnit SU, unsigned* ReadyCycle) {
371	if (ReadyCycle < MinReadyCycle)
372	MinReadyCycle = ReadyCycle;
373
374	// Check for interlocks first. For the purpose of other heuristics, an
375	// instruction that cannot issue appears as if it's not in the ReadyQueue.
376	if (ReadyCycle > CurrCycle \|\| checkHazard(SU))
377
378	Pending.push(SU);
379	else
380	Available.push(SU);
381	}
382
383	/// Move the boundary of scheduled code by one cycle.
384	void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
385	unsigned Width = SchedModel->getIssueWidth();
386	IssueCount = (IssueCount <= Width) ? `0` : IssueCount - Width;
387
388	assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
389	"MinReadyCycle uninitialized");
390	unsigned NextCycle = std::max(a: CurrCycle + `1`, b: MinReadyCycle);
391
392	if (!HazardRec->isEnabled()) {
393	// Bypass HazardRec virtual calls.
394	CurrCycle = NextCycle;
395	} else {
396	// Bypass getHazardType calls in case of long latency.
397	for (; CurrCycle != NextCycle; ++CurrCycle) {
398	if (isTop())
399	HazardRec->AdvanceCycle();
400	else
401	HazardRec->RecedeCycle();
402	}
403	}
404	CheckPending = true;
405
406	LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
407	<< CurrCycle << `'\n'`);
408	}
409
410	/// Move the boundary of scheduled code by one SUnit.
411	void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
412	bool startNewCycle = false;
413
414	// Update the reservation table.
415	if (HazardRec->isEnabled()) {
416	if (!isTop() && SU->isCall) {
417	// Calls are scheduled with their preceding instructions. For bottom-up
418	// scheduling, clear the pipeline state before emitting.
419	HazardRec->Reset();
420	}
421	HazardRec->EmitInstruction(SU);
422	}
423
424	// Update DFA model.
425	startNewCycle = ResourceModel->reserveResources(SU, IsTop: isTop());
426
427	// Check the instruction group dispatch limit.
428	// TODO: Check if this SU must end a dispatch group.
429	IssueCount += SchedModel->getNumMicroOps(MI: SU->getInstr());
430	if (startNewCycle) {
431	LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << `'\n'`);
432	bumpCycle();
433	} else
434	LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
435	<< CurrCycle << `'\n'`);
436	}
437
438	/// Release pending ready nodes in to the available queue. This makes them
439	/// visible to heuristics.
440	void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
441	// If the available queue is empty, it is safe to reset MinReadyCycle.
442	if (Available.empty())
443	MinReadyCycle = std::numeric_limits<unsigned>::max();
444
445	// Check to see if any of the pending instructions are ready to issue. If
446	// so, add them to the available queue.
447	for (unsigned i = `0`, e = Pending.size(); i != e; ++i) {
448	SUnit SU = (Pending.begin() + i);
449	unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
450
451	if (ReadyCycle < MinReadyCycle)
452	MinReadyCycle = ReadyCycle;
453
454	if (ReadyCycle > CurrCycle)
455	continue;
456
457	if (checkHazard(SU))
458	continue;
459
460	Available.push(SU);
461	Pending.remove(I: Pending.begin() + i);
462	--i;
463	--e;
464	}
465	CheckPending = false;
466	}
467
468	/// Remove SU from the ready set for this boundary.
469	void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
470	if (Available.isInQueue(SU))
471	Available.remove(I: Available.find(SU));
472	else {
473	assert(Pending.isInQueue(SU) && "bad ready count");
474	Pending.remove(I: Pending.find(SU));
475	}
476	}
477
478	/// If this queue only has one ready candidate, return it. As a side effect,
479	/// advance the cycle until at least one node is ready. If multiple instructions
480	/// are ready, return NULL.
481	SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
482	if (CheckPending)
483	releasePending();
484
485	auto AdvanceCycle = [this]() {
486	if (Available.empty())
487	return true;
488	if (Available.size() == `1` && Pending.size() > `0`)
489	return !ResourceModel->isResourceAvailable(SU: *Available.begin(), IsTop: isTop()) \|\|
490	getWeakLeft(SU: *Available.begin(), isTop: isTop()) != `0`;
491	return false;
492	};
493	for (unsigned i = `0`; AdvanceCycle (); ++i) {
494	assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
495	"permanent hazard");
496	(void)i;
497	ResourceModel->reserveResources(SU: nullptr, IsTop: isTop());
498	bumpCycle();
499	releasePending();
500	}
501	if (Available.size() == `1`)
502	return *Available.begin();
503	return nullptr;
504	}
505
506	#ifndef NDEBUG
507	void ConvergingVLIWScheduler::traceCandidate(const char *Label,
508	const ReadyQueue &Q, SUnit *SU,
509	int Cost, PressureChange P) {
510	dbgs() << Label << " " << Q.getName() << " ";
511	if (P.isValid())
512	dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
513	<< P.getUnitInc() << " ";
514	else
515	dbgs() << " ";
516	dbgs() << "cost(" << Cost << ")\t";
517	DAG->dumpNode(*SU);
518	}
519
520	// Very detailed queue dump, to be used with higher verbosity levels.
521	void ConvergingVLIWScheduler::readyQueueVerboseDump(
522	const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
523	ReadyQueue &Q) {
524	RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
525
526	dbgs() << ">>> " << Q.getName() << "\n";
527	for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
528	RegPressureDelta RPDelta;
529	TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
530	DAG->getRegionCriticalPSets(),
531	DAG->getRegPressure().MaxSetPressure);
532	std::stringstream dbgstr;
533	dbgstr << "SU(" << std::setw(`3`) << (*I)->NodeNum << ")";
534	dbgs() << dbgstr.str();
535	SchedulingCost(Q, I, Candidate, RPDelta, true*);
536	dbgs() << "\t";
537	(*I)->getInstr()->dump();
538	}
539	dbgs() << "\n";
540	}
541	#endif
542
543	/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
544	/// of SU, return true (we may have duplicates)
545	static inline bool isSingleUnscheduledPred(SUnit SU, SUnit SU2) {
546	if (SU->NumPredsLeft == `0`)
547	return false;
548
549	for (auto &Pred : SU->Preds) {
550	// We found an available, but not scheduled, predecessor.
551	if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
552	return false;
553	}
554
555	return true;
556	}
557
558	/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
559	/// of SU, return true (we may have duplicates)
560	static inline bool isSingleUnscheduledSucc(SUnit SU, SUnit SU2) {
561	if (SU->NumSuccsLeft == `0`)
562	return false;
563
564	for (auto &Succ : SU->Succs) {
565	// We found an available, but not scheduled, successor.
566	if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
567	return false;
568	}
569	return true;
570	}
571
572	/// Check if the instruction changes the register pressure of a register in the
573	/// high pressure set. The function returns a negative value if the pressure
574	/// decreases and a positive value is the pressure increases. If the instruction
575	/// doesn't use a high pressure register or doesn't change the register
576	/// pressure, then return 0.
577	int ConvergingVLIWScheduler::pressureChange(const SUnit SU, bool* isBotUp) {
578	PressureDiff &PD = DAG->getPressureDiff(SU);
579	for (const auto &P : PD) {
580	if (!P.isValid())
581	continue;
582	// The pressure differences are computed bottom-up, so the comparison for
583	// an increase is positive in the bottom direction, but negative in the
584	// top-down direction.
585	if (HighPressureSets [P.getPSet()])
586	return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
587	}
588	return `0`;
589	}
590
591	/// Single point to compute overall scheduling cost.
592	/// TODO: More heuristics will be used soon.
593	int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
594	SchedCandidate &Candidate,
595	RegPressureDelta &Delta,
596	bool verbose) {
597	// Initial trivial priority.
598	int ResCount = `1`;
599
600	// Do not waste time on a node that is already scheduled.
601	if (!SU \|\| SU->isScheduled)
602	return ResCount;
603
604	LLVM_DEBUG(if (verbose) dbgs()
605	<< ((Q.getID() == TopQID) ? "(top\|" : "(bot\|"));
606	// Forced priority is high.
607	if (SU->isScheduleHigh) {
608	ResCount += PriorityOne;
609	LLVM_DEBUG(dbgs() << "H\|");
610	}
611
612	unsigned IsAvailableAmt = `0`;
613	// Critical path first.
614	if (Q.getID() == TopQID) {
615	if (Top.isLatencyBound(SU)) {
616	LLVM_DEBUG(if (verbose) dbgs() << "LB\|");
617	ResCount += (SU->getHeight() * ScaleTwo);
618	}
619
620	LLVM_DEBUG(if (verbose) {
621	std::stringstream dbgstr;
622	dbgstr << "h" << std::setw(`3`) << SU->getHeight() << "\|";
623	dbgs() << dbgstr.str();
624	});
625
626	// If resources are available for it, multiply the
627	// chance of scheduling.
628	if (Top.ResourceModel->isResourceAvailable(SU, IsTop: true)) {
629	IsAvailableAmt = (PriorityTwo + PriorityThree);
630	ResCount += IsAvailableAmt;
631	LLVM_DEBUG(if (verbose) dbgs() << "A\|");
632	} else
633	LLVM_DEBUG(if (verbose) dbgs() << " \|");
634	} else {
635	if (Bot.isLatencyBound(SU)) {
636	LLVM_DEBUG(if (verbose) dbgs() << "LB\|");
637	ResCount += (SU->getDepth() * ScaleTwo);
638	}
639
640	LLVM_DEBUG(if (verbose) {
641	std::stringstream dbgstr;
642	dbgstr << "d" << std::setw(`3`) << SU->getDepth() << "\|";
643	dbgs() << dbgstr.str();
644	});
645
646	// If resources are available for it, multiply the
647	// chance of scheduling.
648	if (Bot.ResourceModel->isResourceAvailable(SU, IsTop: false)) {
649	IsAvailableAmt = (PriorityTwo + PriorityThree);
650	ResCount += IsAvailableAmt;
651	LLVM_DEBUG(if (verbose) dbgs() << "A\|");
652	} else
653	LLVM_DEBUG(if (verbose) dbgs() << " \|");
654	}
655
656	unsigned NumNodesBlocking = `0`;
657	if (Q.getID() == TopQID) {
658	// How many SUs does it block from scheduling?
659	// Look at all of the successors of this node.
660	// Count the number of nodes that
661	// this node is the sole unscheduled node for.
662	if (Top.isLatencyBound(SU))
663	for (const SDep &SI : SU->Succs)
664	if (isSingleUnscheduledPred(SU: SI.getSUnit(), SU2: SU))
665	++NumNodesBlocking;
666	} else {
667	// How many unscheduled predecessors block this node?
668	if (Bot.isLatencyBound(SU))
669	for (const SDep &PI : SU->Preds)
670	if (isSingleUnscheduledSucc(SU: PI.getSUnit(), SU2: SU))
671	++NumNodesBlocking;
672	}
673	ResCount += (NumNodesBlocking * ScaleTwo);
674
675	LLVM_DEBUG(if (verbose) {
676	std::stringstream dbgstr;
677	dbgstr << "blk " << std::setw(`2`) << NumNodesBlocking << ")\|";
678	dbgs() << dbgstr.str();
679	});
680
681	// Factor in reg pressure as a heuristic.
682	if (!IgnoreBBRegPressure) {
683	// Decrease priority by the amount that register pressure exceeds the limit.
684	ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
685	// Decrease priority if register pressure exceeds the limit.
686	ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
687	// Decrease priority slightly if register pressure would increase over the
688	// current maximum.
689	ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
690	// If there are register pressure issues, then we remove the value added for
691	// the instruction being available. The rationale is that we really don't
692	// want to schedule an instruction that causes a spill.
693	if (IsAvailableAmt && pressureChange(SU, isBotUp: Q.getID() != TopQID) > `0` &&
694	(Delta.Excess.getUnitInc() \|\| Delta.CriticalMax.getUnitInc() \|\|
695	Delta.CurrentMax.getUnitInc()))
696	ResCount -= IsAvailableAmt;
697	LLVM_DEBUG(if (verbose) {
698	dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
699	<< Delta.CriticalMax.getUnitInc() << "/"
700	<< Delta.CurrentMax.getUnitInc() << ")\|";
701	});
702	}
703
704	// Give preference to a zero latency instruction if the dependent
705	// instruction is in the current packet.
706	if (Q.getID() == TopQID && getWeakLeft(SU, isTop: true) == `0`) {
707	for (const SDep &PI : SU->Preds) {
708	if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
709	PI.getLatency() == `0` &&
710	Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
711	ResCount += PriorityThree;
712	LLVM_DEBUG(if (verbose) dbgs() << "Z\|");
713	}
714	}
715	} else if (Q.getID() == BotQID && getWeakLeft(SU, isTop: false) == `0`) {
716	for (const SDep &SI : SU->Succs) {
717	if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
718	SI.getLatency() == `0` &&
719	Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
720	ResCount += PriorityThree;
721	LLVM_DEBUG(if (verbose) dbgs() << "Z\|");
722	}
723	}
724	}
725
726	// If the instruction has a non-zero latency dependence with an instruction in
727	// the current packet, then it should not be scheduled yet. The case occurs
728	// when the dependent instruction is scheduled in a new packet, so the
729	// scheduler updates the current cycle and pending instructions become
730	// available.
731	if (CheckEarlyAvail) {
732	if (Q.getID() == TopQID) {
733	for (const auto &PI : SU->Preds) {
734	if (PI.getLatency() > `0` &&
735	Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
736	ResCount -= PriorityOne;
737	LLVM_DEBUG(if (verbose) dbgs() << "D\|");
738	}
739	}
740	} else {
741	for (const auto &SI : SU->Succs) {
742	if (SI.getLatency() > `0` &&
743	Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
744	ResCount -= PriorityOne;
745	LLVM_DEBUG(if (verbose) dbgs() << "D\|");
746	}
747	}
748	}
749	}
750
751	LLVM_DEBUG(if (verbose) {
752	std::stringstream dbgstr;
753	dbgstr << "Total " << std::setw(`4`) << ResCount << ")";
754	dbgs() << dbgstr.str();
755	});
756
757	return ResCount;
758	}
759
760	/// Pick the best candidate from the top queue.
761	///
762	/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
763	/// DAG building. To adjust for the current scheduling location we need to
764	/// maintain the number of vreg uses remaining to be top-scheduled.
765	ConvergingVLIWScheduler::CandResult
766	ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
767	const RegPressureTracker &RPTracker,
768	SchedCandidate &Candidate) {
769	ReadyQueue &Q = Zone.Available;
770	LLVM_DEBUG(if (SchedDebugVerboseLevel > `1`)
771	readyQueueVerboseDump(RPTracker, Candidate, Q);
772	else Q.dump(););
773
774	// getMaxPressureDelta temporarily modifies the tracker.
775	RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
776
777	// BestSU remains NULL if no top candidates beat the best existing candidate.
778	CandResult FoundCandidate = NoCand;
779	for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
780	RegPressureDelta RPDelta;
781	TempTracker.getMaxPressureDelta(MI: (*I)->getInstr(), Delta&: RPDelta,
782	CriticalPSets: DAG->getRegionCriticalPSets(),
783	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
784
785	int CurrentCost = SchedulingCost(Q, SU: I, Candidate, Delta&: RPDelta, verbose: false*);
786
787	// Initialize the candidate if needed.
788	if (!Candidate.SU) {
789	LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
790	Candidate.SU = *I;
791	Candidate.RPDelta = RPDelta;
792	Candidate.SCost = CurrentCost;
793	FoundCandidate = NodeOrder;
794	continue;
795	}
796
797	// Choose node order for negative cost candidates. There is no good
798	// candidate in this case.
799	if (CurrentCost < `0` && Candidate.SCost < `0`) {
800	if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) \|\|
801	(Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
802	LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
803	Candidate.SU = *I;
804	Candidate.RPDelta = RPDelta;
805	Candidate.SCost = CurrentCost;
806	FoundCandidate = NodeOrder;
807	}
808	continue;
809	}
810
811	// Best cost.
812	if (CurrentCost > Candidate.SCost) {
813	LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
814	Candidate.SU = *I;
815	Candidate.RPDelta = RPDelta;
816	Candidate.SCost = CurrentCost;
817	FoundCandidate = BestCost;
818	continue;
819	}
820
821	// Choose an instruction that does not depend on an artificial edge.
822	unsigned CurrWeak = getWeakLeft(SU: *I, isTop: (Q.getID() == TopQID));
823	unsigned CandWeak = getWeakLeft(SU: Candidate.SU, isTop: (Q.getID() == TopQID));
824	if (CurrWeak != CandWeak) {
825	if (CurrWeak < CandWeak) {
826	LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
827	Candidate.SU = *I;
828	Candidate.RPDelta = RPDelta;
829	Candidate.SCost = CurrentCost;
830	FoundCandidate = Weak;
831	}
832	continue;
833	}
834
835	if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(SU: *I)) {
836	unsigned CurrSize, CandSize;
837	if (Q.getID() == TopQID) {
838	CurrSize = (*I)->Succs.size();
839	CandSize = Candidate.SU->Succs.size();
840	} else {
841	CurrSize = (*I)->Preds.size();
842	CandSize = Candidate.SU->Preds.size();
843	}
844	if (CurrSize > CandSize) {
845	LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
846	Candidate.SU = *I;
847	Candidate.RPDelta = RPDelta;
848	Candidate.SCost = CurrentCost;
849	FoundCandidate = BestCost;
850	}
851	// Keep the old candidate if it's a better candidate. That is, don't use
852	// the subsequent tie breaker.
853	if (CurrSize != CandSize)
854	continue;
855	}
856
857	// Tie breaker.
858	// To avoid scheduling indeterminism, we need a tie breaker
859	// for the case when cost is identical for two nodes.
860	if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
861	if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) \|\|
862	(Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
863	LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
864	Candidate.SU = *I;
865	Candidate.RPDelta = RPDelta;
866	Candidate.SCost = CurrentCost;
867	FoundCandidate = NodeOrder;
868	continue;
869	}
870	}
871
872	// Fall through to original instruction order.
873	// Only consider node order if Candidate was chosen from this Q.
874	if (FoundCandidate == NoCand)
875	continue;
876	}
877	return FoundCandidate;
878	}
879
880	/// Pick the best candidate node from either the top or bottom queue.
881	SUnit ConvergingVLIWScheduler::pickNodeBidrectional(bool* &IsTopNode) {
882	// Schedule as far as possible in the direction of no choice. This is most
883	// efficient, but also provides the best heuristics for CriticalPSets.
884	if (SUnit *SU = Bot.pickOnlyChoice()) {
885	LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
886	IsTopNode = false;
887	return SU;
888	}
889	if (SUnit *SU = Top.pickOnlyChoice()) {
890	LLVM_DEBUG(dbgs() << "Picked only Top\n");
891	IsTopNode = true;
892	return SU;
893	}
894	SchedCandidate BotCand;
895	// Prefer bottom scheduling when heuristics are silent.
896	CandResult BotResult =
897	pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
898	assert(BotResult != NoCand && "failed to find the first candidate");
899
900	// If either Q has a single candidate that provides the least increase in
901	// Excess pressure, we can immediately schedule from that Q.
902	//
903	// RegionCriticalPSets summarizes the pressure within the scheduled region and
904	// affects picking from either Q. If scheduling in one direction must
905	// increase pressure for one of the excess PSets, then schedule in that
906	// direction first to provide more freedom in the other direction.
907	if (BotResult == SingleExcess \|\| BotResult == SingleCritical) {
908	LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
909	IsTopNode = false;
910	return BotCand.SU;
911	}
912	// Check if the top Q has a better candidate.
913	SchedCandidate TopCand;
914	CandResult TopResult =
915	pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
916	assert(TopResult != NoCand && "failed to find the first candidate");
917
918	if (TopResult == SingleExcess \|\| TopResult == SingleCritical) {
919	LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
920	IsTopNode = true;
921	return TopCand.SU;
922	}
923	// If either Q has a single candidate that minimizes pressure above the
924	// original region's pressure pick it.
925	if (BotResult == SingleMax) {
926	LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
927	IsTopNode = false;
928	return BotCand.SU;
929	}
930	if (TopResult == SingleMax) {
931	LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
932	IsTopNode = true;
933	return TopCand.SU;
934	}
935	if (TopCand.SCost > BotCand.SCost) {
936	LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
937	IsTopNode = true;
938	return TopCand.SU;
939	}
940	// Otherwise prefer the bottom candidate in node order.
941	LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
942	IsTopNode = false;
943	return BotCand.SU;
944	}
945
946	/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
947	SUnit ConvergingVLIWScheduler::pickNode(bool* &IsTopNode) {
948	if (DAG->top() == DAG->bottom()) {
949	assert(Top.Available.empty() && Top.Pending.empty() &&
950	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
951	return nullptr;
952	}
953	SUnit *SU;
954	if (PreRADirection == MISched::TopDown) {
955	SU = Top.pickOnlyChoice();
956	if (!SU) {
957	SchedCandidate TopCand;
958	CandResult TopResult =
959	pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
960	assert(TopResult != NoCand && "failed to find the first candidate");
961	(void)TopResult;
962	SU = TopCand.SU;
963	}
964	IsTopNode = true;
965	} else if (PreRADirection == MISched::BottomUp) {
966	SU = Bot.pickOnlyChoice();
967	if (!SU) {
968	SchedCandidate BotCand;
969	CandResult BotResult =
970	pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
971	assert(BotResult != NoCand && "failed to find the first candidate");
972	(void)BotResult;
973	SU = BotCand.SU;
974	}
975	IsTopNode = false;
976	} else {
977	SU = pickNodeBidrectional(IsTopNode);
978	}
979	if (SU->isTopReady())
980	Top.removeReady(SU);
981	if (SU->isBottomReady())
982	Bot.removeReady(SU);
983
984	LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
985	<< " Scheduling instruction in cycle "
986	<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
987	<< reportPackets() << ")\n";
988	DAG->dumpNode(*SU));
989	return SU;
990	}
991
992	/// Update the scheduler's state after scheduling a node. This is the same node
993	/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
994	/// to update it's state based on the current cycle before MachineSchedStrategy
995	/// does.
996	void ConvergingVLIWScheduler::schedNode(SUnit SU, bool* IsTopNode) {
997	if (IsTopNode) {
998	Top.bumpNode(SU);
999	SU->TopReadyCycle = Top.CurrCycle;
1000	} else {
1001	Bot.bumpNode(SU);
1002	SU->BotReadyCycle = Bot.CurrCycle;
1003	}
1004	}
1005

Browse the source code of llvm_projects/llvm/lib/CodeGen/VLIWMachineScheduler.cpp