VLIWMachineScheduler.cpp source code [llvm_projects/llvm/lib/CodeGen/VLIWMachineScheduler.cpp]

1	//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// MachineScheduler schedules machine instructions after phi elimination. It
10	// preserves LiveIntervals so it can be invoked before register allocation.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/VLIWMachineScheduler.h"
15	#include "llvm/ADT/SmallVector.h"
16	#include "llvm/CodeGen/DFAPacketizer.h"
17	#include "llvm/CodeGen/MachineBasicBlock.h"
18	#include "llvm/CodeGen/MachineFunction.h"
19	#include "llvm/CodeGen/MachineInstr.h"
20	#include "llvm/CodeGen/MachineLoopInfo.h"
21	#include "llvm/CodeGen/RegisterClassInfo.h"
22	#include "llvm/CodeGen/RegisterPressure.h"
23	#include "llvm/CodeGen/ScheduleDAG.h"
24	#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25	#include "llvm/CodeGen/TargetInstrInfo.h"
26	#include "llvm/CodeGen/TargetOpcodes.h"
27	#include "llvm/CodeGen/TargetRegisterInfo.h"
28	#include "llvm/CodeGen/TargetSchedule.h"
29	#include "llvm/CodeGen/TargetSubtargetInfo.h"
30	#include "llvm/Support/CommandLine.h"
31	#include "llvm/Support/Debug.h"
32	#include "llvm/Support/raw_ostream.h"
33	#include <algorithm>
34	#include <cassert>
35	#include <iomanip>
36	#include <limits>
37	#include <sstream>
38
39	using namespace llvm;
40
41	#define DEBUG_TYPE "machine-scheduler"
42
43	static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
44	cl::init(Val: false));
45
46	static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
47	cl::init(Val: true));
48
49	static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
50	cl::Hidden, cl::init(Val: `1`));
51
52	// Check if the scheduler should penalize instructions that are available to
53	// early due to a zero-latency dependence.
54	static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
55	cl::init(Val: true));
56
57	// This value is used to determine if a register class is a high pressure set.
58	// We compute the maximum number of registers needed and divided by the total
59	// available. Then, we compare the result to this value.
60	static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
61	cl::init(Val: `0.75f`),
62	cl::desc ("High register pressure threhold."));
63
64	VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
65	const TargetSchedModel *SM)
66	: TII(STI.getInstrInfo()), SchedModel(SM) {
67	ResourcesModel = createPacketizer(STI);
68
69	// This hard requirement could be relaxed,
70	// but for now do not let it proceed.
71	assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
72
73	Packet.reserve(N: SchedModel->getIssueWidth());
74	Packet.clear();
75	ResourcesModel->clearResources();
76	}
77
78	void VLIWResourceModel::reset() {
79	Packet.clear();
80	ResourcesModel->clearResources();
81	}
82
83	VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
84
85	/// Return true if there is a dependence between SUd and SUu.
86	bool VLIWResourceModel::hasDependence(const SUnit SUd, const* SUnit *SUu) {
87	if (SUd->Succs.size() == `0`)
88	return false;
89
90	for (const auto &S : SUd->Succs) {
91	// Since we do not add pseudos to packets, might as well
92	// ignore order dependencies.
93	if (S.isCtrl())
94	continue;
95
96	if (S.getSUnit() == SUu && S.getLatency() > `0`)
97	return true;
98	}
99	return false;
100	}
101
102	/// Check if scheduling of this SU is possible
103	/// in the current packet.
104	/// It is _not_ precise (statefull), it is more like
105	/// another heuristic. Many corner cases are figured
106	/// empirically.
107	bool VLIWResourceModel::isResourceAvailable(SUnit SU, bool* IsTop) {
108	if (!SU \|\| !SU->getInstr())
109	return false;
110
111	// First see if the pipeline could receive this instruction
112	// in the current cycle.
113	switch (SU->getInstr()->getOpcode()) {
114	default:
115	if (!ResourcesModel->canReserveResources(MI&: *SU->getInstr()))
116	return false;
117	break;
118	case TargetOpcode::EXTRACT_SUBREG:
119	case TargetOpcode::INSERT_SUBREG:
120	case TargetOpcode::SUBREG_TO_REG:
121	case TargetOpcode::REG_SEQUENCE:
122	case TargetOpcode::IMPLICIT_DEF:
123	case TargetOpcode::COPY:
124	case TargetOpcode::INLINEASM:
125	case TargetOpcode::INLINEASM_BR:
126	break;
127	}
128
129	// Now see if there are no other dependencies to instructions already
130	// in the packet.
131	if (IsTop) {
132	for (const SUnit *U : Packet)
133	if (hasDependence(SUd: U, SUu: SU))
134	return false;
135	} else {
136	for (const SUnit *U : Packet)
137	if (hasDependence(SUd: SU, SUu: U))
138	return false;
139	}
140	return true;
141	}
142
143	/// Keep track of available resources.
144	bool VLIWResourceModel::reserveResources(SUnit SU, bool* IsTop) {
145	bool startNewCycle = false;
146	// Artificially reset state.
147	if (!SU) {
148	reset();
149	TotalPackets++;
150	return false;
151	}
152	// If this SU does not fit in the packet or the packet is now full
153	// start a new one.
154	if (!isResourceAvailable(SU, IsTop) \|\|
155	Packet.size() >= SchedModel->getIssueWidth()) {
156	reset();
157	TotalPackets++;
158	startNewCycle = true;
159	}
160
161	switch (SU->getInstr()->getOpcode()) {
162	default:
163	ResourcesModel->reserveResources(MI&: *SU->getInstr());
164	break;
165	case TargetOpcode::EXTRACT_SUBREG:
166	case TargetOpcode::INSERT_SUBREG:
167	case TargetOpcode::SUBREG_TO_REG:
168	case TargetOpcode::REG_SEQUENCE:
169	case TargetOpcode::IMPLICIT_DEF:
170	case TargetOpcode::KILL:
171	case TargetOpcode::CFI_INSTRUCTION:
172	case TargetOpcode::EH_LABEL:
173	case TargetOpcode::COPY:
174	case TargetOpcode::INLINEASM:
175	case TargetOpcode::INLINEASM_BR:
176	break;
177	}
178	Packet.push_back(Elt: SU);
179
180	#ifndef NDEBUG
181	LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
182	for (unsigned i = `0`, e = Packet.size(); i != e; ++i) {
183	LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
184	LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
185	LLVM_DEBUG(Packet[i]->getInstr()->dump());
186	}
187	#endif
188
189	return startNewCycle;
190	}
191
192	DFAPacketizer *
193	VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
194	return STI.getInstrInfo()->CreateTargetScheduleState(STI);
195	}
196
197	/// schedule - Called back from MachineScheduler::runOnMachineFunction
198	/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
199	/// only includes instructions that have DAG nodes, not scheduling boundaries.
200	void VLIWMachineScheduler::schedule() {
201	LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
202	<< printMBBReference(*BB) << " " << BB->getName()
203	<< " in_func " << BB->getParent()->getName()
204	<< " at loop depth " << MLI->getLoopDepth(BB) << " \n");
205
206	buildDAGWithRegPressure();
207
208	Topo.InitDAGTopologicalSorting();
209
210	// Postprocess the DAG to add platform-specific artificial dependencies.
211	postProcessDAG();
212
213	SmallVector<SUnit *, `8`> TopRoots, BotRoots;
214	findRootsAndBiasEdges(TopRoots, BotRoots);
215
216	// Initialize the strategy before modifying the DAG.
217	SchedImpl ->initialize(DAG: this);
218
219	LLVM_DEBUG({
220	unsigned maxH = `0`;
221	for (const SUnit &SU : SUnits)
222	if (SU.getHeight() > maxH)
223	maxH = SU.getHeight();
224	dbgs() << "Max Height " << maxH << "\n";
225	});
226	LLVM_DEBUG({
227	unsigned maxD = `0`;
228	for (const SUnit &SU : SUnits)
229	if (SU.getDepth() > maxD)
230	maxD = SU.getDepth();
231	dbgs() << "Max Depth " << maxD << "\n";
232	});
233	LLVM_DEBUG(dump());
234	if (ViewMISchedDAGs)
235	viewGraph();
236
237	initQueues(TopRoots, BotRoots);
238
239	bool IsTopNode = false;
240	while (true) {
241	LLVM_DEBUG(
242	dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
243	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
244	if (!SU)
245	break;
246
247	if (!checkSchedLimit())
248	break;
249
250	scheduleMI(SU, IsTopNode);
251
252	// Notify the scheduling strategy after updating the DAG.
253	SchedImpl ->schedNode(SU, IsTopNode);
254
255	updateQueues(SU, IsTopNode);
256	}
257	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
258
259	placeDebugValues();
260
261	LLVM_DEBUG({
262	dbgs() << "*** Final schedule for "
263	<< printMBBReference(begin()->getParent()) << " **\n";
264	dumpSchedule();
265	dbgs() << `'\n'`;
266	});
267	}
268
269	void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
270	DAG = static_cast<VLIWMachineScheduler *>(dag);
271	SchedModel = DAG->getSchedModel();
272
273	Top.init(dag: DAG, smodel: SchedModel);
274	Bot.init(dag: DAG, smodel: SchedModel);
275
276	// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
277	// are disabled, then these HazardRecs will be disabled.
278	const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
279	const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
280	const TargetInstrInfo *TII = STI.getInstrInfo();
281	delete Top.HazardRec;
282	delete Bot.HazardRec;
283	Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
284	Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
285
286	delete Top.ResourceModel;
287	delete Bot.ResourceModel;
288	Top.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
289	Bot.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
290
291	const std::vector<unsigned> &MaxPressure =
292	DAG->getRegPressure().MaxSetPressure;
293	HighPressureSets.assign(NumElts: MaxPressure.size(), Elt: false);
294	for (unsigned i = `0`, e = MaxPressure.size(); i < e; ++i) {
295	unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(Idx: i);
296	HighPressureSets [i] =
297	((float)MaxPressure [i] > ((float)Limit * RPThreshold));
298	}
299	}
300
301	VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
302	const TargetSubtargetInfo &STI, const TargetSchedModel SchedModel) const* {
303	return new VLIWResourceModel (STI, SchedModel);
304	}
305
306	void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
307	for (const SDep &PI : SU->Preds) {
308	unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
309	unsigned MinLatency = PI.getLatency();
310	#ifndef NDEBUG
311	Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
312	#endif
313	if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
314	SU->TopReadyCycle = PredReadyCycle + MinLatency;
315	}
316
317	if (!SU->isScheduled)
318	Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle);
319	}
320
321	void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
322	assert(SU->getInstr() && "Scheduled SUnit must have instr");
323
324	for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
325	++I) {
326	unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
327	unsigned MinLatency = I->getLatency();
328	#ifndef NDEBUG
329	Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
330	#endif
331	if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
332	SU->BotReadyCycle = SuccReadyCycle + MinLatency;
333	}
334
335	if (!SU->isScheduled)
336	Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle);
337	}
338
339	ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
340	delete ResourceModel;
341	delete HazardRec;
342	}
343
344	/// Does this SU have a hazard within the current instruction group.
345	///
346	/// The scheduler supports two modes of hazard recognition. The first is the
347	/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
348	/// supports highly complicated in-order reservation tables
349	/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
350	///
351	/// The second is a streamlined mechanism that checks for hazards based on
352	/// simple counters that the scheduler itself maintains. It explicitly checks
353	/// for instruction dispatch limitations, including the number of micro-ops that
354	/// can dispatch per cycle.
355	///
356	/// TODO: Also check whether the SU must start a new group.
357	bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
358	if (HazardRec->isEnabled())
359	return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
360
361	unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
362	if (IssueCount + uops > SchedModel->getIssueWidth())
363	return true;
364
365	return false;
366	}
367
368	void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
369	SUnit SU, unsigned* ReadyCycle) {
370	if (ReadyCycle < MinReadyCycle)
371	MinReadyCycle = ReadyCycle;
372
373	// Check for interlocks first. For the purpose of other heuristics, an
374	// instruction that cannot issue appears as if it's not in the ReadyQueue.
375	if (ReadyCycle > CurrCycle \|\| checkHazard(SU))
376
377	Pending.push(SU);
378	else
379	Available.push(SU);
380	}
381
382	/// Move the boundary of scheduled code by one cycle.
383	void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
384	unsigned Width = SchedModel->getIssueWidth();
385	IssueCount = (IssueCount <= Width) ? `0` : IssueCount - Width;
386
387	assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
388	"MinReadyCycle uninitialized");
389	unsigned NextCycle = std::max(a: CurrCycle + `1`, b: MinReadyCycle);
390
391	if (!HazardRec->isEnabled()) {
392	// Bypass HazardRec virtual calls.
393	CurrCycle = NextCycle;
394	} else {
395	// Bypass getHazardType calls in case of long latency.
396	for (; CurrCycle != NextCycle; ++CurrCycle) {
397	if (isTop())
398	HazardRec->AdvanceCycle();
399	else
400	HazardRec->RecedeCycle();
401	}
402	}
403	CheckPending = true;
404
405	LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
406	<< CurrCycle << `'\n'`);
407	}
408
409	/// Move the boundary of scheduled code by one SUnit.
410	void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
411	bool startNewCycle = false;
412
413	// Update the reservation table.
414	if (HazardRec->isEnabled()) {
415	if (!isTop() && SU->isCall) {
416	// Calls are scheduled with their preceding instructions. For bottom-up
417	// scheduling, clear the pipeline state before emitting.
418	HazardRec->Reset();
419	}
420	HazardRec->EmitInstruction(SU);
421	}
422
423	// Update DFA model.
424	startNewCycle = ResourceModel->reserveResources(SU, IsTop: isTop());
425
426	// Check the instruction group dispatch limit.
427	// TODO: Check if this SU must end a dispatch group.
428	IssueCount += SchedModel->getNumMicroOps(MI: SU->getInstr());
429	if (startNewCycle) {
430	LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << `'\n'`);
431	bumpCycle();
432	} else
433	LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
434	<< CurrCycle << `'\n'`);
435	}
436
437	/// Release pending ready nodes in to the available queue. This makes them
438	/// visible to heuristics.
439	void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
440	// If the available queue is empty, it is safe to reset MinReadyCycle.
441	if (Available.empty())
442	MinReadyCycle = std::numeric_limits<unsigned>::max();
443
444	// Check to see if any of the pending instructions are ready to issue. If
445	// so, add them to the available queue.
446	for (unsigned i = `0`, e = Pending.size(); i != e; ++i) {
447	SUnit SU = (Pending.begin() + i);
448	unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
449
450	if (ReadyCycle < MinReadyCycle)
451	MinReadyCycle = ReadyCycle;
452
453	if (ReadyCycle > CurrCycle)
454	continue;
455
456	if (checkHazard(SU))
457	continue;
458
459	Available.push(SU);
460	Pending.remove(I: Pending.begin() + i);
461	--i;
462	--e;
463	}
464	CheckPending = false;
465	}
466
467	/// Remove SU from the ready set for this boundary.
468	void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
469	if (Available.isInQueue(SU))
470	Available.remove(I: Available.find(SU));
471	else {
472	assert(Pending.isInQueue(SU) && "bad ready count");
473	Pending.remove(I: Pending.find(SU));
474	}
475	}
476
477	/// If this queue only has one ready candidate, return it. As a side effect,
478	/// advance the cycle until at least one node is ready. If multiple instructions
479	/// are ready, return NULL.
480	SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
481	if (CheckPending)
482	releasePending();
483
484	auto AdvanceCycle = [this]() {
485	if (Available.empty())
486	return true;
487	if (Available.size() == `1` && Pending.size() > `0`)
488	return !ResourceModel->isResourceAvailable(SU: *Available.begin(), IsTop: isTop()) \|\|
489	getWeakLeft(SU: *Available.begin(), isTop: isTop()) != `0`;
490	return false;
491	};
492	for (unsigned i = `0`; AdvanceCycle (); ++i) {
493	assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
494	"permanent hazard");
495	(void)i;
496	ResourceModel->reserveResources(SU: nullptr, IsTop: isTop());
497	bumpCycle();
498	releasePending();
499	}
500	if (Available.size() == `1`)
501	return *Available.begin();
502	return nullptr;
503	}
504
505	#ifndef NDEBUG
506	void ConvergingVLIWScheduler::traceCandidate(const char *Label,
507	const ReadyQueue &Q, SUnit *SU,
508	int Cost, PressureChange P) {
509	dbgs() << Label << " " << Q.getName() << " ";
510	if (P.isValid())
511	dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
512	<< P.getUnitInc() << " ";
513	else
514	dbgs() << " ";
515	dbgs() << "cost(" << Cost << ")\t";
516	DAG->dumpNode(*SU);
517	}
518
519	// Very detailed queue dump, to be used with higher verbosity levels.
520	void ConvergingVLIWScheduler::readyQueueVerboseDump(
521	const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
522	ReadyQueue &Q) {
523	RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
524
525	dbgs() << ">>> " << Q.getName() << "\n";
526	for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
527	RegPressureDelta RPDelta;
528	TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
529	DAG->getRegionCriticalPSets(),
530	DAG->getRegPressure().MaxSetPressure);
531	std::stringstream dbgstr;
532	dbgstr << "SU(" << std::setw(`3`) << (*I)->NodeNum << ")";
533	dbgs() << dbgstr.str();
534	SchedulingCost(Q, I, Candidate, RPDelta, true*);
535	dbgs() << "\t";
536	(*I)->getInstr()->dump();
537	}
538	dbgs() << "\n";
539	}
540	#endif
541
542	/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
543	/// of SU, return true (we may have duplicates)
544	static inline bool isSingleUnscheduledPred(SUnit SU, SUnit SU2) {
545	if (SU->NumPredsLeft == `0`)
546	return false;
547
548	for (auto &Pred : SU->Preds) {
549	// We found an available, but not scheduled, predecessor.
550	if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
551	return false;
552	}
553
554	return true;
555	}
556
557	/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
558	/// of SU, return true (we may have duplicates)
559	static inline bool isSingleUnscheduledSucc(SUnit SU, SUnit SU2) {
560	if (SU->NumSuccsLeft == `0`)
561	return false;
562
563	for (auto &Succ : SU->Succs) {
564	// We found an available, but not scheduled, successor.
565	if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
566	return false;
567	}
568	return true;
569	}
570
571	/// Check if the instruction changes the register pressure of a register in the
572	/// high pressure set. The function returns a negative value if the pressure
573	/// decreases and a positive value is the pressure increases. If the instruction
574	/// doesn't use a high pressure register or doesn't change the register
575	/// pressure, then return 0.
576	int ConvergingVLIWScheduler::pressureChange(const SUnit SU, bool* isBotUp) {
577	PressureDiff &PD = DAG->getPressureDiff(SU);
578	for (const auto &P : PD) {
579	if (!P.isValid())
580	continue;
581	// The pressure differences are computed bottom-up, so the comparison for
582	// an increase is positive in the bottom direction, but negative in the
583	// top-down direction.
584	if (HighPressureSets [P.getPSet()])
585	return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
586	}
587	return `0`;
588	}
589
590	/// Single point to compute overall scheduling cost.
591	/// TODO: More heuristics will be used soon.
592	int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
593	SchedCandidate &Candidate,
594	RegPressureDelta &Delta,
595	bool verbose) {
596	// Initial trivial priority.
597	int ResCount = `1`;
598
599	// Do not waste time on a node that is already scheduled.
600	if (!SU \|\| SU->isScheduled)
601	return ResCount;
602
603	LLVM_DEBUG(if (verbose) dbgs()
604	<< ((Q.getID() == TopQID) ? "(top\|" : "(bot\|"));
605	// Forced priority is high.
606	if (SU->isScheduleHigh) {
607	ResCount += PriorityOne;
608	LLVM_DEBUG(dbgs() << "H\|");
609	}
610
611	unsigned IsAvailableAmt = `0`;
612	// Critical path first.
613	if (Q.getID() == TopQID) {
614	if (Top.isLatencyBound(SU)) {
615	LLVM_DEBUG(if (verbose) dbgs() << "LB\|");
616	ResCount += (SU->getHeight() * ScaleTwo);
617	}
618
619	LLVM_DEBUG(if (verbose) {
620	std::stringstream dbgstr;
621	dbgstr << "h" << std::setw(`3`) << SU->getHeight() << "\|";
622	dbgs() << dbgstr.str();
623	});
624
625	// If resources are available for it, multiply the
626	// chance of scheduling.
627	if (Top.ResourceModel->isResourceAvailable(SU, IsTop: true)) {
628	IsAvailableAmt = (PriorityTwo + PriorityThree);
629	ResCount += IsAvailableAmt;
630	LLVM_DEBUG(if (verbose) dbgs() << "A\|");
631	} else
632	LLVM_DEBUG(if (verbose) dbgs() << " \|");
633	} else {
634	if (Bot.isLatencyBound(SU)) {
635	LLVM_DEBUG(if (verbose) dbgs() << "LB\|");
636	ResCount += (SU->getDepth() * ScaleTwo);
637	}
638
639	LLVM_DEBUG(if (verbose) {
640	std::stringstream dbgstr;
641	dbgstr << "d" << std::setw(`3`) << SU->getDepth() << "\|";
642	dbgs() << dbgstr.str();
643	});
644
645	// If resources are available for it, multiply the
646	// chance of scheduling.
647	if (Bot.ResourceModel->isResourceAvailable(SU, IsTop: false)) {
648	IsAvailableAmt = (PriorityTwo + PriorityThree);
649	ResCount += IsAvailableAmt;
650	LLVM_DEBUG(if (verbose) dbgs() << "A\|");
651	} else
652	LLVM_DEBUG(if (verbose) dbgs() << " \|");
653	}
654
655	unsigned NumNodesBlocking = `0`;
656	if (Q.getID() == TopQID) {
657	// How many SUs does it block from scheduling?
658	// Look at all of the successors of this node.
659	// Count the number of nodes that
660	// this node is the sole unscheduled node for.
661	if (Top.isLatencyBound(SU))
662	for (const SDep &SI : SU->Succs)
663	if (isSingleUnscheduledPred(SU: SI.getSUnit(), SU2: SU))
664	++NumNodesBlocking;
665	} else {
666	// How many unscheduled predecessors block this node?
667	if (Bot.isLatencyBound(SU))
668	for (const SDep &PI : SU->Preds)
669	if (isSingleUnscheduledSucc(SU: PI.getSUnit(), SU2: SU))
670	++NumNodesBlocking;
671	}
672	ResCount += (NumNodesBlocking * ScaleTwo);
673
674	LLVM_DEBUG(if (verbose) {
675	std::stringstream dbgstr;
676	dbgstr << "blk " << std::setw(`2`) << NumNodesBlocking << ")\|";
677	dbgs() << dbgstr.str();
678	});
679
680	// Factor in reg pressure as a heuristic.
681	if (!IgnoreBBRegPressure) {
682	// Decrease priority by the amount that register pressure exceeds the limit.
683	ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
684	// Decrease priority if register pressure exceeds the limit.
685	ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
686	// Decrease priority slightly if register pressure would increase over the
687	// current maximum.
688	ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
689	// If there are register pressure issues, then we remove the value added for
690	// the instruction being available. The rationale is that we really don't
691	// want to schedule an instruction that causes a spill.
692	if (IsAvailableAmt && pressureChange(SU, isBotUp: Q.getID() != TopQID) > `0` &&
693	(Delta.Excess.getUnitInc() \|\| Delta.CriticalMax.getUnitInc() \|\|
694	Delta.CurrentMax.getUnitInc()))
695	ResCount -= IsAvailableAmt;
696	LLVM_DEBUG(if (verbose) {
697	dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
698	<< Delta.CriticalMax.getUnitInc() << "/"
699	<< Delta.CurrentMax.getUnitInc() << ")\|";
700	});
701	}
702
703	// Give preference to a zero latency instruction if the dependent
704	// instruction is in the current packet.
705	if (Q.getID() == TopQID && getWeakLeft(SU, isTop: true) == `0`) {
706	for (const SDep &PI : SU->Preds) {
707	if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
708	PI.getLatency() == `0` &&
709	Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
710	ResCount += PriorityThree;
711	LLVM_DEBUG(if (verbose) dbgs() << "Z\|");
712	}
713	}
714	} else if (Q.getID() == BotQID && getWeakLeft(SU, isTop: false) == `0`) {
715	for (const SDep &SI : SU->Succs) {
716	if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
717	SI.getLatency() == `0` &&
718	Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
719	ResCount += PriorityThree;
720	LLVM_DEBUG(if (verbose) dbgs() << "Z\|");
721	}
722	}
723	}
724
725	// If the instruction has a non-zero latency dependence with an instruction in
726	// the current packet, then it should not be scheduled yet. The case occurs
727	// when the dependent instruction is scheduled in a new packet, so the
728	// scheduler updates the current cycle and pending instructions become
729	// available.
730	if (CheckEarlyAvail) {
731	if (Q.getID() == TopQID) {
732	for (const auto &PI : SU->Preds) {
733	if (PI.getLatency() > `0` &&
734	Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
735	ResCount -= PriorityOne;
736	LLVM_DEBUG(if (verbose) dbgs() << "D\|");
737	}
738	}
739	} else {
740	for (const auto &SI : SU->Succs) {
741	if (SI.getLatency() > `0` &&
742	Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
743	ResCount -= PriorityOne;
744	LLVM_DEBUG(if (verbose) dbgs() << "D\|");
745	}
746	}
747	}
748	}
749
750	LLVM_DEBUG(if (verbose) {
751	std::stringstream dbgstr;
752	dbgstr << "Total " << std::setw(`4`) << ResCount << ")";
753	dbgs() << dbgstr.str();
754	});
755
756	return ResCount;
757	}
758
759	/// Pick the best candidate from the top queue.
760	///
761	/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
762	/// DAG building. To adjust for the current scheduling location we need to
763	/// maintain the number of vreg uses remaining to be top-scheduled.
764	ConvergingVLIWScheduler::CandResult
765	ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
766	const RegPressureTracker &RPTracker,
767	SchedCandidate &Candidate) {
768	ReadyQueue &Q = Zone.Available;
769	LLVM_DEBUG(if (SchedDebugVerboseLevel > `1`)
770	readyQueueVerboseDump(RPTracker, Candidate, Q);
771	else Q.dump(););
772
773	// getMaxPressureDelta temporarily modifies the tracker.
774	RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
775
776	// BestSU remains NULL if no top candidates beat the best existing candidate.
777	CandResult FoundCandidate = NoCand;
778	for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
779	RegPressureDelta RPDelta;
780	TempTracker.getMaxPressureDelta(MI: (*I)->getInstr(), Delta&: RPDelta,
781	CriticalPSets: DAG->getRegionCriticalPSets(),
782	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
783
784	int CurrentCost = SchedulingCost(Q, SU: I, Candidate, Delta&: RPDelta, verbose: false*);
785
786	// Initialize the candidate if needed.
787	if (!Candidate.SU) {
788	LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
789	Candidate.SU = *I;
790	Candidate.RPDelta = RPDelta;
791	Candidate.SCost = CurrentCost;
792	FoundCandidate = NodeOrder;
793	continue;
794	}
795
796	// Choose node order for negative cost candidates. There is no good
797	// candidate in this case.
798	if (CurrentCost < `0` && Candidate.SCost < `0`) {
799	if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) \|\|
800	(Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
801	LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
802	Candidate.SU = *I;
803	Candidate.RPDelta = RPDelta;
804	Candidate.SCost = CurrentCost;
805	FoundCandidate = NodeOrder;
806	}
807	continue;
808	}
809
810	// Best cost.
811	if (CurrentCost > Candidate.SCost) {
812	LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
813	Candidate.SU = *I;
814	Candidate.RPDelta = RPDelta;
815	Candidate.SCost = CurrentCost;
816	FoundCandidate = BestCost;
817	continue;
818	}
819
820	// Choose an instruction that does not depend on an artificial edge.
821	unsigned CurrWeak = getWeakLeft(SU: *I, isTop: (Q.getID() == TopQID));
822	unsigned CandWeak = getWeakLeft(SU: Candidate.SU, isTop: (Q.getID() == TopQID));
823	if (CurrWeak != CandWeak) {
824	if (CurrWeak < CandWeak) {
825	LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
826	Candidate.SU = *I;
827	Candidate.RPDelta = RPDelta;
828	Candidate.SCost = CurrentCost;
829	FoundCandidate = Weak;
830	}
831	continue;
832	}
833
834	if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(SU: *I)) {
835	unsigned CurrSize, CandSize;
836	if (Q.getID() == TopQID) {
837	CurrSize = (*I)->Succs.size();
838	CandSize = Candidate.SU->Succs.size();
839	} else {
840	CurrSize = (*I)->Preds.size();
841	CandSize = Candidate.SU->Preds.size();
842	}
843	if (CurrSize > CandSize) {
844	LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
845	Candidate.SU = *I;
846	Candidate.RPDelta = RPDelta;
847	Candidate.SCost = CurrentCost;
848	FoundCandidate = BestCost;
849	}
850	// Keep the old candidate if it's a better candidate. That is, don't use
851	// the subsequent tie breaker.
852	if (CurrSize != CandSize)
853	continue;
854	}
855
856	// Tie breaker.
857	// To avoid scheduling indeterminism, we need a tie breaker
858	// for the case when cost is identical for two nodes.
859	if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
860	if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) \|\|
861	(Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
862	LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
863	Candidate.SU = *I;
864	Candidate.RPDelta = RPDelta;
865	Candidate.SCost = CurrentCost;
866	FoundCandidate = NodeOrder;
867	continue;
868	}
869	}
870
871	// Fall through to original instruction order.
872	// Only consider node order if Candidate was chosen from this Q.
873	if (FoundCandidate == NoCand)
874	continue;
875	}
876	return FoundCandidate;
877	}
878
879	/// Pick the best candidate node from either the top or bottom queue.
880	SUnit ConvergingVLIWScheduler::pickNodeBidrectional(bool* &IsTopNode) {
881	// Schedule as far as possible in the direction of no choice. This is most
882	// efficient, but also provides the best heuristics for CriticalPSets.
883	if (SUnit *SU = Bot.pickOnlyChoice()) {
884	LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
885	IsTopNode = false;
886	return SU;
887	}
888	if (SUnit *SU = Top.pickOnlyChoice()) {
889	LLVM_DEBUG(dbgs() << "Picked only Top\n");
890	IsTopNode = true;
891	return SU;
892	}
893	SchedCandidate BotCand;
894	// Prefer bottom scheduling when heuristics are silent.
895	CandResult BotResult =
896	pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
897	assert(BotResult != NoCand && "failed to find the first candidate");
898
899	// If either Q has a single candidate that provides the least increase in
900	// Excess pressure, we can immediately schedule from that Q.
901	//
902	// RegionCriticalPSets summarizes the pressure within the scheduled region and
903	// affects picking from either Q. If scheduling in one direction must
904	// increase pressure for one of the excess PSets, then schedule in that
905	// direction first to provide more freedom in the other direction.
906	if (BotResult == SingleExcess \|\| BotResult == SingleCritical) {
907	LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
908	IsTopNode = false;
909	return BotCand.SU;
910	}
911	// Check if the top Q has a better candidate.
912	SchedCandidate TopCand;
913	CandResult TopResult =
914	pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
915	assert(TopResult != NoCand && "failed to find the first candidate");
916
917	if (TopResult == SingleExcess \|\| TopResult == SingleCritical) {
918	LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
919	IsTopNode = true;
920	return TopCand.SU;
921	}
922	// If either Q has a single candidate that minimizes pressure above the
923	// original region's pressure pick it.
924	if (BotResult == SingleMax) {
925	LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
926	IsTopNode = false;
927	return BotCand.SU;
928	}
929	if (TopResult == SingleMax) {
930	LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
931	IsTopNode = true;
932	return TopCand.SU;
933	}
934	if (TopCand.SCost > BotCand.SCost) {
935	LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
936	IsTopNode = true;
937	return TopCand.SU;
938	}
939	// Otherwise prefer the bottom candidate in node order.
940	LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
941	IsTopNode = false;
942	return BotCand.SU;
943	}
944
945	/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
946	SUnit ConvergingVLIWScheduler::pickNode(bool* &IsTopNode) {
947	if (DAG->top() == DAG->bottom()) {
948	assert(Top.Available.empty() && Top.Pending.empty() &&
949	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
950	return nullptr;
951	}
952	SUnit *SU;
953	if (PreRADirection == MISched::TopDown) {
954	SU = Top.pickOnlyChoice();
955	if (!SU) {
956	SchedCandidate TopCand;
957	CandResult TopResult =
958	pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
959	assert(TopResult != NoCand && "failed to find the first candidate");
960	(void)TopResult;
961	SU = TopCand.SU;
962	}
963	IsTopNode = true;
964	} else if (PreRADirection == MISched::BottomUp) {
965	SU = Bot.pickOnlyChoice();
966	if (!SU) {
967	SchedCandidate BotCand;
968	CandResult BotResult =
969	pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
970	assert(BotResult != NoCand && "failed to find the first candidate");
971	(void)BotResult;
972	SU = BotCand.SU;
973	}
974	IsTopNode = false;
975	} else {
976	SU = pickNodeBidrectional(IsTopNode);
977	}
978	if (SU->isTopReady())
979	Top.removeReady(SU);
980	if (SU->isBottomReady())
981	Bot.removeReady(SU);
982
983	LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
984	<< " Scheduling instruction in cycle "
985	<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
986	<< reportPackets() << ")\n";
987	DAG->dumpNode(*SU));
988	return SU;
989	}
990
991	/// Update the scheduler's state after scheduling a node. This is the same node
992	/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
993	/// to update it's state based on the current cycle before MachineSchedStrategy
994	/// does.
995	void ConvergingVLIWScheduler::schedNode(SUnit SU, bool* IsTopNode) {
996	if (IsTopNode) {
997	Top.bumpNode(SU);
998	SU->TopReadyCycle = Top.CurrCycle;
999	} else {
1000	Bot.bumpNode(SU);
1001	SU->BotReadyCycle = Bot.CurrCycle;
1002	}
1003	}
1004

Browse the source code of llvm_projects/llvm/lib/CodeGen/VLIWMachineScheduler.cpp