VLIWMachineScheduler.cpp source code [llvm_projects/llvm/lib/CodeGen/VLIWMachineScheduler.cpp]

1	//===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// MachineScheduler schedules machine instructions after phi elimination. It
10	// preserves LiveIntervals so it can be invoked before register allocation.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/VLIWMachineScheduler.h"
15	#include "llvm/ADT/SmallVector.h"
16	#include "llvm/CodeGen/DFAPacketizer.h"
17	#include "llvm/CodeGen/MachineBasicBlock.h"
18	#include "llvm/CodeGen/MachineFunction.h"
19	#include "llvm/CodeGen/MachineInstr.h"
20	#include "llvm/CodeGen/MachineLoopInfo.h"
21	#include "llvm/CodeGen/RegisterClassInfo.h"
22	#include "llvm/CodeGen/RegisterPressure.h"
23	#include "llvm/CodeGen/ScheduleDAG.h"
24	#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25	#include "llvm/CodeGen/TargetInstrInfo.h"
26	#include "llvm/CodeGen/TargetOpcodes.h"
27	#include "llvm/CodeGen/TargetRegisterInfo.h"
28	#include "llvm/CodeGen/TargetSchedule.h"
29	#include "llvm/CodeGen/TargetSubtargetInfo.h"
30	#include "llvm/Support/CommandLine.h"
31	#include "llvm/Support/Debug.h"
32	#include "llvm/Support/raw_ostream.h"
33	#include <algorithm>
34	#include <cassert>
35	#include <iomanip>
36	#include <limits>
37	#include <memory>
38	#include <sstream>
39
40	using namespace llvm;
41
42	#define DEBUG_TYPE "machine-scheduler"
43
44	static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden,
45	cl::init(Val: false));
46
47	static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden,
48	cl::init(Val: true));
49
50	static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
51	cl::Hidden, cl::init(Val: `1`));
52
53	// Check if the scheduler should penalize instructions that are available to
54	// early due to a zero-latency dependence.
55	static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
56	cl::init(Val: true));
57
58	// This value is used to determine if a register class is a high pressure set.
59	// We compute the maximum number of registers needed and divided by the total
60	// available. Then, we compare the result to this value.
61	static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden,
62	cl::init(Val: `0.75f`),
63	cl::desc ("High register pressure threhold."));
64
65	VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI,
66	const TargetSchedModel *SM)
67	: TII(STI.getInstrInfo()), SchedModel(SM) {
68	ResourcesModel = createPacketizer(STI);
69
70	// This hard requirement could be relaxed,
71	// but for now do not let it proceed.
72	assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
73
74	Packet.reserve(N: SchedModel->getIssueWidth());
75	Packet.clear();
76	ResourcesModel->clearResources();
77	}
78
79	void VLIWResourceModel::reset() {
80	Packet.clear();
81	ResourcesModel->clearResources();
82	}
83
84	VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; }
85
86	/// Return true if there is a dependence between SUd and SUu.
87	bool VLIWResourceModel::hasDependence(const SUnit SUd, const* SUnit *SUu) {
88	if (SUd->Succs.size() == `0`)
89	return false;
90
91	for (const auto &S : SUd->Succs) {
92	// Since we do not add pseudos to packets, might as well
93	// ignore order dependencies.
94	if (S.isCtrl())
95	continue;
96
97	if (S.getSUnit() == SUu && S.getLatency() > `0`)
98	return true;
99	}
100	return false;
101	}
102
103	/// Check if scheduling of this SU is possible
104	/// in the current packet.
105	/// It is _not_ precise (statefull), it is more like
106	/// another heuristic. Many corner cases are figured
107	/// empirically.
108	bool VLIWResourceModel::isResourceAvailable(SUnit SU, bool* IsTop) {
109	if (!SU \|\| !SU->getInstr())
110	return false;
111
112	// First see if the pipeline could receive this instruction
113	// in the current cycle.
114	switch (SU->getInstr()->getOpcode()) {
115	default:
116	if (!ResourcesModel->canReserveResources(MI&: *SU->getInstr()))
117	return false;
118	break;
119	case TargetOpcode::EXTRACT_SUBREG:
120	case TargetOpcode::INSERT_SUBREG:
121	case TargetOpcode::SUBREG_TO_REG:
122	case TargetOpcode::REG_SEQUENCE:
123	case TargetOpcode::IMPLICIT_DEF:
124	case TargetOpcode::COPY:
125	case TargetOpcode::INLINEASM:
126	case TargetOpcode::INLINEASM_BR:
127	break;
128	}
129
130	// Now see if there are no other dependencies to instructions already
131	// in the packet.
132	if (IsTop) {
133	for (const SUnit *U : Packet)
134	if (hasDependence(SUd: U, SUu: SU))
135	return false;
136	} else {
137	for (const SUnit *U : Packet)
138	if (hasDependence(SUd: SU, SUu: U))
139	return false;
140	}
141	return true;
142	}
143
144	/// Keep track of available resources.
145	bool VLIWResourceModel::reserveResources(SUnit SU, bool* IsTop) {
146	bool startNewCycle = false;
147	// Artificially reset state.
148	if (!SU) {
149	reset();
150	TotalPackets++;
151	return false;
152	}
153	// If this SU does not fit in the packet or the packet is now full
154	// start a new one.
155	if (!isResourceAvailable(SU, IsTop) \|\|
156	Packet.size() >= SchedModel->getIssueWidth()) {
157	reset();
158	TotalPackets++;
159	startNewCycle = true;
160	}
161
162	switch (SU->getInstr()->getOpcode()) {
163	default:
164	ResourcesModel->reserveResources(MI&: *SU->getInstr());
165	break;
166	case TargetOpcode::EXTRACT_SUBREG:
167	case TargetOpcode::INSERT_SUBREG:
168	case TargetOpcode::SUBREG_TO_REG:
169	case TargetOpcode::REG_SEQUENCE:
170	case TargetOpcode::IMPLICIT_DEF:
171	case TargetOpcode::KILL:
172	case TargetOpcode::CFI_INSTRUCTION:
173	case TargetOpcode::EH_LABEL:
174	case TargetOpcode::COPY:
175	case TargetOpcode::INLINEASM:
176	case TargetOpcode::INLINEASM_BR:
177	break;
178	}
179	Packet.push_back(Elt: SU);
180
181	#ifndef NDEBUG
182	LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
183	for (unsigned i = `0`, e = Packet.size(); i != e; ++i) {
184	LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
185	LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
186	LLVM_DEBUG(Packet[i]->getInstr()->dump());
187	}
188	#endif
189
190	return startNewCycle;
191	}
192
193	DFAPacketizer *
194	VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const {
195	return STI.getInstrInfo()->CreateTargetScheduleState(STI);
196	}
197
198	/// schedule - Called back from MachineScheduler::runOnMachineFunction
199	/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
200	/// only includes instructions that have DAG nodes, not scheduling boundaries.
201	void VLIWMachineScheduler::schedule() {
202	LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
203	<< printMBBReference(*BB) << " " << BB->getName()
204	<< " in_func " << BB->getParent()->getName()
205	<< " at loop depth " << MLI->getLoopDepth(BB) << " \n");
206
207	buildDAGWithRegPressure();
208
209	Topo.InitDAGTopologicalSorting();
210
211	// Postprocess the DAG to add platform-specific artificial dependencies.
212	postProcessDAG();
213
214	SmallVector<SUnit *, `8`> TopRoots, BotRoots;
215	findRootsAndBiasEdges(TopRoots, BotRoots);
216
217	// Initialize the strategy before modifying the DAG.
218	SchedImpl ->initialize(DAG: this);
219
220	LLVM_DEBUG({
221	unsigned maxH = `0`;
222	for (const SUnit &SU : SUnits)
223	if (SU.getHeight() > maxH)
224	maxH = SU.getHeight();
225	dbgs() << "Max Height " << maxH << "\n";
226	});
227	LLVM_DEBUG({
228	unsigned maxD = `0`;
229	for (const SUnit &SU : SUnits)
230	if (SU.getDepth() > maxD)
231	maxD = SU.getDepth();
232	dbgs() << "Max Depth " << maxD << "\n";
233	});
234	LLVM_DEBUG(dump());
235	if (ViewMISchedDAGs)
236	viewGraph();
237
238	initQueues(TopRoots, BotRoots);
239
240	bool IsTopNode = false;
241	while (true) {
242	LLVM_DEBUG(
243	dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
244	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
245	if (!SU)
246	break;
247
248	if (!checkSchedLimit())
249	break;
250
251	scheduleMI(SU, IsTopNode);
252
253	// Notify the scheduling strategy after updating the DAG.
254	SchedImpl ->schedNode(SU, IsTopNode);
255
256	updateQueues(SU, IsTopNode);
257	}
258	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
259
260	placeDebugValues();
261
262	LLVM_DEBUG({
263	dbgs() << "*** Final schedule for "
264	<< printMBBReference(begin()->getParent()) << " **\n";
265	dumpSchedule();
266	dbgs() << `'\n'`;
267	});
268	}
269
270	void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
271	DAG = static_cast<VLIWMachineScheduler *>(dag);
272	SchedModel = DAG->getSchedModel();
273
274	Top.init(dag: DAG, smodel: SchedModel);
275	Bot.init(dag: DAG, smodel: SchedModel);
276
277	// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
278	// are disabled, then these HazardRecs will be disabled.
279	const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
280	const TargetSubtargetInfo &STI = DAG->MF.getSubtarget();
281	const TargetInstrInfo *TII = STI.getInstrInfo();
282	delete Top.HazardRec;
283	delete Bot.HazardRec;
284	Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
285	Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG);
286
287	delete Top.ResourceModel;
288	delete Bot.ResourceModel;
289	Top.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
290	Bot.ResourceModel = createVLIWResourceModel(STI, SchedModel: DAG->getSchedModel());
291
292	const std::vector<unsigned> &MaxPressure =
293	DAG->getRegPressure().MaxSetPressure;
294	HighPressureSets.assign(NumElts: MaxPressure.size(), Elt: false);
295	for (unsigned i = `0`, e = MaxPressure.size(); i < e; ++i) {
296	unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(Idx: i);
297	HighPressureSets [i] =
298	((float)MaxPressure [i] > ((float)Limit * RPThreshold));
299	}
300
301	assert((!ForceTopDown \|\| !ForceBottomUp) &&
302	"-misched-topdown incompatible with -misched-bottomup");
303	}
304
305	VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel(
306	const TargetSubtargetInfo &STI, const TargetSchedModel SchedModel) const* {
307	return new VLIWResourceModel (STI, SchedModel);
308	}
309
310	void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
311	for (const SDep &PI : SU->Preds) {
312	unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
313	unsigned MinLatency = PI.getLatency();
314	#ifndef NDEBUG
315	Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
316	#endif
317	if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
318	SU->TopReadyCycle = PredReadyCycle + MinLatency;
319	}
320
321	if (!SU->isScheduled)
322	Top.releaseNode(SU, ReadyCycle: SU->TopReadyCycle);
323	}
324
325	void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
326	assert(SU->getInstr() && "Scheduled SUnit must have instr");
327
328	for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E;
329	++I) {
330	unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
331	unsigned MinLatency = I->getLatency();
332	#ifndef NDEBUG
333	Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
334	#endif
335	if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
336	SU->BotReadyCycle = SuccReadyCycle + MinLatency;
337	}
338
339	if (!SU->isScheduled)
340	Bot.releaseNode(SU, ReadyCycle: SU->BotReadyCycle);
341	}
342
343	ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() {
344	delete ResourceModel;
345	delete HazardRec;
346	}
347
348	/// Does this SU have a hazard within the current instruction group.
349	///
350	/// The scheduler supports two modes of hazard recognition. The first is the
351	/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
352	/// supports highly complicated in-order reservation tables
353	/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
354	///
355	/// The second is a streamlined mechanism that checks for hazards based on
356	/// simple counters that the scheduler itself maintains. It explicitly checks
357	/// for instruction dispatch limitations, including the number of micro-ops that
358	/// can dispatch per cycle.
359	///
360	/// TODO: Also check whether the SU must start a new group.
361	bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) {
362	if (HazardRec->isEnabled())
363	return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
364
365	unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
366	if (IssueCount + uops > SchedModel->getIssueWidth())
367	return true;
368
369	return false;
370	}
371
372	void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode(
373	SUnit SU, unsigned* ReadyCycle) {
374	if (ReadyCycle < MinReadyCycle)
375	MinReadyCycle = ReadyCycle;
376
377	// Check for interlocks first. For the purpose of other heuristics, an
378	// instruction that cannot issue appears as if it's not in the ReadyQueue.
379	if (ReadyCycle > CurrCycle \|\| checkHazard(SU))
380
381	Pending.push(SU);
382	else
383	Available.push(SU);
384	}
385
386	/// Move the boundary of scheduled code by one cycle.
387	void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
388	unsigned Width = SchedModel->getIssueWidth();
389	IssueCount = (IssueCount <= Width) ? `0` : IssueCount - Width;
390
391	assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
392	"MinReadyCycle uninitialized");
393	unsigned NextCycle = std::max(a: CurrCycle + `1`, b: MinReadyCycle);
394
395	if (!HazardRec->isEnabled()) {
396	// Bypass HazardRec virtual calls.
397	CurrCycle = NextCycle;
398	} else {
399	// Bypass getHazardType calls in case of long latency.
400	for (; CurrCycle != NextCycle; ++CurrCycle) {
401	if (isTop())
402	HazardRec->AdvanceCycle();
403	else
404	HazardRec->RecedeCycle();
405	}
406	}
407	CheckPending = true;
408
409	LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
410	<< CurrCycle << `'\n'`);
411	}
412
413	/// Move the boundary of scheduled code by one SUnit.
414	void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
415	bool startNewCycle = false;
416
417	// Update the reservation table.
418	if (HazardRec->isEnabled()) {
419	if (!isTop() && SU->isCall) {
420	// Calls are scheduled with their preceding instructions. For bottom-up
421	// scheduling, clear the pipeline state before emitting.
422	HazardRec->Reset();
423	}
424	HazardRec->EmitInstruction(SU);
425	}
426
427	// Update DFA model.
428	startNewCycle = ResourceModel->reserveResources(SU, IsTop: isTop());
429
430	// Check the instruction group dispatch limit.
431	// TODO: Check if this SU must end a dispatch group.
432	IssueCount += SchedModel->getNumMicroOps(MI: SU->getInstr());
433	if (startNewCycle) {
434	LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << `'\n'`);
435	bumpCycle();
436	} else
437	LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
438	<< CurrCycle << `'\n'`);
439	}
440
441	/// Release pending ready nodes in to the available queue. This makes them
442	/// visible to heuristics.
443	void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() {
444	// If the available queue is empty, it is safe to reset MinReadyCycle.
445	if (Available.empty())
446	MinReadyCycle = std::numeric_limits<unsigned>::max();
447
448	// Check to see if any of the pending instructions are ready to issue. If
449	// so, add them to the available queue.
450	for (unsigned i = `0`, e = Pending.size(); i != e; ++i) {
451	SUnit SU = (Pending.begin() + i);
452	unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
453
454	if (ReadyCycle < MinReadyCycle)
455	MinReadyCycle = ReadyCycle;
456
457	if (ReadyCycle > CurrCycle)
458	continue;
459
460	if (checkHazard(SU))
461	continue;
462
463	Available.push(SU);
464	Pending.remove(I: Pending.begin() + i);
465	--i;
466	--e;
467	}
468	CheckPending = false;
469	}
470
471	/// Remove SU from the ready set for this boundary.
472	void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) {
473	if (Available.isInQueue(SU))
474	Available.remove(I: Available.find(SU));
475	else {
476	assert(Pending.isInQueue(SU) && "bad ready count");
477	Pending.remove(I: Pending.find(SU));
478	}
479	}
480
481	/// If this queue only has one ready candidate, return it. As a side effect,
482	/// advance the cycle until at least one node is ready. If multiple instructions
483	/// are ready, return NULL.
484	SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
485	if (CheckPending)
486	releasePending();
487
488	auto AdvanceCycle = [this]() {
489	if (Available.empty())
490	return true;
491	if (Available.size() == `1` && Pending.size() > `0`)
492	return !ResourceModel->isResourceAvailable(SU: *Available.begin(), IsTop: isTop()) \|\|
493	getWeakLeft(SU: *Available.begin(), isTop: isTop()) != `0`;
494	return false;
495	};
496	for (unsigned i = `0`; AdvanceCycle (); ++i) {
497	assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
498	"permanent hazard");
499	(void)i;
500	ResourceModel->reserveResources(SU: nullptr, IsTop: isTop());
501	bumpCycle();
502	releasePending();
503	}
504	if (Available.size() == `1`)
505	return *Available.begin();
506	return nullptr;
507	}
508
509	#ifndef NDEBUG
510	void ConvergingVLIWScheduler::traceCandidate(const char *Label,
511	const ReadyQueue &Q, SUnit *SU,
512	int Cost, PressureChange P) {
513	dbgs() << Label << " " << Q.getName() << " ";
514	if (P.isValid())
515	dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
516	<< P.getUnitInc() << " ";
517	else
518	dbgs() << " ";
519	dbgs() << "cost(" << Cost << ")\t";
520	DAG->dumpNode(*SU);
521	}
522
523	// Very detailed queue dump, to be used with higher verbosity levels.
524	void ConvergingVLIWScheduler::readyQueueVerboseDump(
525	const RegPressureTracker &RPTracker, SchedCandidate &Candidate,
526	ReadyQueue &Q) {
527	RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
528
529	dbgs() << ">>> " << Q.getName() << "\n";
530	for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
531	RegPressureDelta RPDelta;
532	TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
533	DAG->getRegionCriticalPSets(),
534	DAG->getRegPressure().MaxSetPressure);
535	std::stringstream dbgstr;
536	dbgstr << "SU(" << std::setw(`3`) << (*I)->NodeNum << ")";
537	dbgs() << dbgstr.str();
538	SchedulingCost(Q, I, Candidate, RPDelta, true*);
539	dbgs() << "\t";
540	(*I)->getInstr()->dump();
541	}
542	dbgs() << "\n";
543	}
544	#endif
545
546	/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor
547	/// of SU, return true (we may have duplicates)
548	static inline bool isSingleUnscheduledPred(SUnit SU, SUnit SU2) {
549	if (SU->NumPredsLeft == `0`)
550	return false;
551
552	for (auto &Pred : SU->Preds) {
553	// We found an available, but not scheduled, predecessor.
554	if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2))
555	return false;
556	}
557
558	return true;
559	}
560
561	/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor
562	/// of SU, return true (we may have duplicates)
563	static inline bool isSingleUnscheduledSucc(SUnit SU, SUnit SU2) {
564	if (SU->NumSuccsLeft == `0`)
565	return false;
566
567	for (auto &Succ : SU->Succs) {
568	// We found an available, but not scheduled, successor.
569	if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2))
570	return false;
571	}
572	return true;
573	}
574
575	/// Check if the instruction changes the register pressure of a register in the
576	/// high pressure set. The function returns a negative value if the pressure
577	/// decreases and a positive value is the pressure increases. If the instruction
578	/// doesn't use a high pressure register or doesn't change the register
579	/// pressure, then return 0.
580	int ConvergingVLIWScheduler::pressureChange(const SUnit SU, bool* isBotUp) {
581	PressureDiff &PD = DAG->getPressureDiff(SU);
582	for (const auto &P : PD) {
583	if (!P.isValid())
584	continue;
585	// The pressure differences are computed bottom-up, so the comparison for
586	// an increase is positive in the bottom direction, but negative in the
587	// top-down direction.
588	if (HighPressureSets [P.getPSet()])
589	return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
590	}
591	return `0`;
592	}
593
594	/// Single point to compute overall scheduling cost.
595	/// TODO: More heuristics will be used soon.
596	int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
597	SchedCandidate &Candidate,
598	RegPressureDelta &Delta,
599	bool verbose) {
600	// Initial trivial priority.
601	int ResCount = `1`;
602
603	// Do not waste time on a node that is already scheduled.
604	if (!SU \|\| SU->isScheduled)
605	return ResCount;
606
607	LLVM_DEBUG(if (verbose) dbgs()
608	<< ((Q.getID() == TopQID) ? "(top\|" : "(bot\|"));
609	// Forced priority is high.
610	if (SU->isScheduleHigh) {
611	ResCount += PriorityOne;
612	LLVM_DEBUG(dbgs() << "H\|");
613	}
614
615	unsigned IsAvailableAmt = `0`;
616	// Critical path first.
617	if (Q.getID() == TopQID) {
618	if (Top.isLatencyBound(SU)) {
619	LLVM_DEBUG(if (verbose) dbgs() << "LB\|");
620	ResCount += (SU->getHeight() * ScaleTwo);
621	}
622
623	LLVM_DEBUG(if (verbose) {
624	std::stringstream dbgstr;
625	dbgstr << "h" << std::setw(`3`) << SU->getHeight() << "\|";
626	dbgs() << dbgstr.str();
627	});
628
629	// If resources are available for it, multiply the
630	// chance of scheduling.
631	if (Top.ResourceModel->isResourceAvailable(SU, IsTop: true)) {
632	IsAvailableAmt = (PriorityTwo + PriorityThree);
633	ResCount += IsAvailableAmt;
634	LLVM_DEBUG(if (verbose) dbgs() << "A\|");
635	} else
636	LLVM_DEBUG(if (verbose) dbgs() << " \|");
637	} else {
638	if (Bot.isLatencyBound(SU)) {
639	LLVM_DEBUG(if (verbose) dbgs() << "LB\|");
640	ResCount += (SU->getDepth() * ScaleTwo);
641	}
642
643	LLVM_DEBUG(if (verbose) {
644	std::stringstream dbgstr;
645	dbgstr << "d" << std::setw(`3`) << SU->getDepth() << "\|";
646	dbgs() << dbgstr.str();
647	});
648
649	// If resources are available for it, multiply the
650	// chance of scheduling.
651	if (Bot.ResourceModel->isResourceAvailable(SU, IsTop: false)) {
652	IsAvailableAmt = (PriorityTwo + PriorityThree);
653	ResCount += IsAvailableAmt;
654	LLVM_DEBUG(if (verbose) dbgs() << "A\|");
655	} else
656	LLVM_DEBUG(if (verbose) dbgs() << " \|");
657	}
658
659	unsigned NumNodesBlocking = `0`;
660	if (Q.getID() == TopQID) {
661	// How many SUs does it block from scheduling?
662	// Look at all of the successors of this node.
663	// Count the number of nodes that
664	// this node is the sole unscheduled node for.
665	if (Top.isLatencyBound(SU))
666	for (const SDep &SI : SU->Succs)
667	if (isSingleUnscheduledPred(SU: SI.getSUnit(), SU2: SU))
668	++NumNodesBlocking;
669	} else {
670	// How many unscheduled predecessors block this node?
671	if (Bot.isLatencyBound(SU))
672	for (const SDep &PI : SU->Preds)
673	if (isSingleUnscheduledSucc(SU: PI.getSUnit(), SU2: SU))
674	++NumNodesBlocking;
675	}
676	ResCount += (NumNodesBlocking * ScaleTwo);
677
678	LLVM_DEBUG(if (verbose) {
679	std::stringstream dbgstr;
680	dbgstr << "blk " << std::setw(`2`) << NumNodesBlocking << ")\|";
681	dbgs() << dbgstr.str();
682	});
683
684	// Factor in reg pressure as a heuristic.
685	if (!IgnoreBBRegPressure) {
686	// Decrease priority by the amount that register pressure exceeds the limit.
687	ResCount -= (Delta.Excess.getUnitInc() * PriorityOne);
688	// Decrease priority if register pressure exceeds the limit.
689	ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne);
690	// Decrease priority slightly if register pressure would increase over the
691	// current maximum.
692	ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo);
693	// If there are register pressure issues, then we remove the value added for
694	// the instruction being available. The rationale is that we really don't
695	// want to schedule an instruction that causes a spill.
696	if (IsAvailableAmt && pressureChange(SU, isBotUp: Q.getID() != TopQID) > `0` &&
697	(Delta.Excess.getUnitInc() \|\| Delta.CriticalMax.getUnitInc() \|\|
698	Delta.CurrentMax.getUnitInc()))
699	ResCount -= IsAvailableAmt;
700	LLVM_DEBUG(if (verbose) {
701	dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
702	<< Delta.CriticalMax.getUnitInc() << "/"
703	<< Delta.CurrentMax.getUnitInc() << ")\|";
704	});
705	}
706
707	// Give preference to a zero latency instruction if the dependent
708	// instruction is in the current packet.
709	if (Q.getID() == TopQID && getWeakLeft(SU, isTop: true) == `0`) {
710	for (const SDep &PI : SU->Preds) {
711	if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
712	PI.getLatency() == `0` &&
713	Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
714	ResCount += PriorityThree;
715	LLVM_DEBUG(if (verbose) dbgs() << "Z\|");
716	}
717	}
718	} else if (Q.getID() == BotQID && getWeakLeft(SU, isTop: false) == `0`) {
719	for (const SDep &SI : SU->Succs) {
720	if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
721	SI.getLatency() == `0` &&
722	Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
723	ResCount += PriorityThree;
724	LLVM_DEBUG(if (verbose) dbgs() << "Z\|");
725	}
726	}
727	}
728
729	// If the instruction has a non-zero latency dependence with an instruction in
730	// the current packet, then it should not be scheduled yet. The case occurs
731	// when the dependent instruction is scheduled in a new packet, so the
732	// scheduler updates the current cycle and pending instructions become
733	// available.
734	if (CheckEarlyAvail) {
735	if (Q.getID() == TopQID) {
736	for (const auto &PI : SU->Preds) {
737	if (PI.getLatency() > `0` &&
738	Top.ResourceModel->isInPacket(SU: PI.getSUnit())) {
739	ResCount -= PriorityOne;
740	LLVM_DEBUG(if (verbose) dbgs() << "D\|");
741	}
742	}
743	} else {
744	for (const auto &SI : SU->Succs) {
745	if (SI.getLatency() > `0` &&
746	Bot.ResourceModel->isInPacket(SU: SI.getSUnit())) {
747	ResCount -= PriorityOne;
748	LLVM_DEBUG(if (verbose) dbgs() << "D\|");
749	}
750	}
751	}
752	}
753
754	LLVM_DEBUG(if (verbose) {
755	std::stringstream dbgstr;
756	dbgstr << "Total " << std::setw(`4`) << ResCount << ")";
757	dbgs() << dbgstr.str();
758	});
759
760	return ResCount;
761	}
762
763	/// Pick the best candidate from the top queue.
764	///
765	/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
766	/// DAG building. To adjust for the current scheduling location we need to
767	/// maintain the number of vreg uses remaining to be top-scheduled.
768	ConvergingVLIWScheduler::CandResult
769	ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone,
770	const RegPressureTracker &RPTracker,
771	SchedCandidate &Candidate) {
772	ReadyQueue &Q = Zone.Available;
773	LLVM_DEBUG(if (SchedDebugVerboseLevel > `1`)
774	readyQueueVerboseDump(RPTracker, Candidate, Q);
775	else Q.dump(););
776
777	// getMaxPressureDelta temporarily modifies the tracker.
778	RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker);
779
780	// BestSU remains NULL if no top candidates beat the best existing candidate.
781	CandResult FoundCandidate = NoCand;
782	for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
783	RegPressureDelta RPDelta;
784	TempTracker.getMaxPressureDelta(MI: (*I)->getInstr(), Delta&: RPDelta,
785	CriticalPSets: DAG->getRegionCriticalPSets(),
786	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
787
788	int CurrentCost = SchedulingCost(Q, SU: I, Candidate, Delta&: RPDelta, verbose: false*);
789
790	// Initialize the candidate if needed.
791	if (!Candidate.SU) {
792	LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
793	Candidate.SU = *I;
794	Candidate.RPDelta = RPDelta;
795	Candidate.SCost = CurrentCost;
796	FoundCandidate = NodeOrder;
797	continue;
798	}
799
800	// Choose node order for negative cost candidates. There is no good
801	// candidate in this case.
802	if (CurrentCost < `0` && Candidate.SCost < `0`) {
803	if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) \|\|
804	(Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
805	LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
806	Candidate.SU = *I;
807	Candidate.RPDelta = RPDelta;
808	Candidate.SCost = CurrentCost;
809	FoundCandidate = NodeOrder;
810	}
811	continue;
812	}
813
814	// Best cost.
815	if (CurrentCost > Candidate.SCost) {
816	LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
817	Candidate.SU = *I;
818	Candidate.RPDelta = RPDelta;
819	Candidate.SCost = CurrentCost;
820	FoundCandidate = BestCost;
821	continue;
822	}
823
824	// Choose an instruction that does not depend on an artificial edge.
825	unsigned CurrWeak = getWeakLeft(SU: *I, isTop: (Q.getID() == TopQID));
826	unsigned CandWeak = getWeakLeft(SU: Candidate.SU, isTop: (Q.getID() == TopQID));
827	if (CurrWeak != CandWeak) {
828	if (CurrWeak < CandWeak) {
829	LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
830	Candidate.SU = *I;
831	Candidate.RPDelta = RPDelta;
832	Candidate.SCost = CurrentCost;
833	FoundCandidate = Weak;
834	}
835	continue;
836	}
837
838	if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(SU: *I)) {
839	unsigned CurrSize, CandSize;
840	if (Q.getID() == TopQID) {
841	CurrSize = (*I)->Succs.size();
842	CandSize = Candidate.SU->Succs.size();
843	} else {
844	CurrSize = (*I)->Preds.size();
845	CandSize = Candidate.SU->Preds.size();
846	}
847	if (CurrSize > CandSize) {
848	LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
849	Candidate.SU = *I;
850	Candidate.RPDelta = RPDelta;
851	Candidate.SCost = CurrentCost;
852	FoundCandidate = BestCost;
853	}
854	// Keep the old candidate if it's a better candidate. That is, don't use
855	// the subsequent tie breaker.
856	if (CurrSize != CandSize)
857	continue;
858	}
859
860	// Tie breaker.
861	// To avoid scheduling indeterminism, we need a tie breaker
862	// for the case when cost is identical for two nodes.
863	if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
864	if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) \|\|
865	(Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
866	LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
867	Candidate.SU = *I;
868	Candidate.RPDelta = RPDelta;
869	Candidate.SCost = CurrentCost;
870	FoundCandidate = NodeOrder;
871	continue;
872	}
873	}
874
875	// Fall through to original instruction order.
876	// Only consider node order if Candidate was chosen from this Q.
877	if (FoundCandidate == NoCand)
878	continue;
879	}
880	return FoundCandidate;
881	}
882
883	/// Pick the best candidate node from either the top or bottom queue.
884	SUnit ConvergingVLIWScheduler::pickNodeBidrectional(bool* &IsTopNode) {
885	// Schedule as far as possible in the direction of no choice. This is most
886	// efficient, but also provides the best heuristics for CriticalPSets.
887	if (SUnit *SU = Bot.pickOnlyChoice()) {
888	LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
889	IsTopNode = false;
890	return SU;
891	}
892	if (SUnit *SU = Top.pickOnlyChoice()) {
893	LLVM_DEBUG(dbgs() << "Picked only Top\n");
894	IsTopNode = true;
895	return SU;
896	}
897	SchedCandidate BotCand;
898	// Prefer bottom scheduling when heuristics are silent.
899	CandResult BotResult =
900	pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
901	assert(BotResult != NoCand && "failed to find the first candidate");
902
903	// If either Q has a single candidate that provides the least increase in
904	// Excess pressure, we can immediately schedule from that Q.
905	//
906	// RegionCriticalPSets summarizes the pressure within the scheduled region and
907	// affects picking from either Q. If scheduling in one direction must
908	// increase pressure for one of the excess PSets, then schedule in that
909	// direction first to provide more freedom in the other direction.
910	if (BotResult == SingleExcess \|\| BotResult == SingleCritical) {
911	LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
912	IsTopNode = false;
913	return BotCand.SU;
914	}
915	// Check if the top Q has a better candidate.
916	SchedCandidate TopCand;
917	CandResult TopResult =
918	pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
919	assert(TopResult != NoCand && "failed to find the first candidate");
920
921	if (TopResult == SingleExcess \|\| TopResult == SingleCritical) {
922	LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
923	IsTopNode = true;
924	return TopCand.SU;
925	}
926	// If either Q has a single candidate that minimizes pressure above the
927	// original region's pressure pick it.
928	if (BotResult == SingleMax) {
929	LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
930	IsTopNode = false;
931	return BotCand.SU;
932	}
933	if (TopResult == SingleMax) {
934	LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
935	IsTopNode = true;
936	return TopCand.SU;
937	}
938	if (TopCand.SCost > BotCand.SCost) {
939	LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
940	IsTopNode = true;
941	return TopCand.SU;
942	}
943	// Otherwise prefer the bottom candidate in node order.
944	LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
945	IsTopNode = false;
946	return BotCand.SU;
947	}
948
949	/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
950	SUnit ConvergingVLIWScheduler::pickNode(bool* &IsTopNode) {
951	if (DAG->top() == DAG->bottom()) {
952	assert(Top.Available.empty() && Top.Pending.empty() &&
953	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
954	return nullptr;
955	}
956	SUnit *SU;
957	if (ForceTopDown) {
958	SU = Top.pickOnlyChoice();
959	if (!SU) {
960	SchedCandidate TopCand;
961	CandResult TopResult =
962	pickNodeFromQueue(Zone&: Top, RPTracker: DAG->getTopRPTracker(), Candidate&: TopCand);
963	assert(TopResult != NoCand && "failed to find the first candidate");
964	(void)TopResult;
965	SU = TopCand.SU;
966	}
967	IsTopNode = true;
968	} else if (ForceBottomUp) {
969	SU = Bot.pickOnlyChoice();
970	if (!SU) {
971	SchedCandidate BotCand;
972	CandResult BotResult =
973	pickNodeFromQueue(Zone&: Bot, RPTracker: DAG->getBotRPTracker(), Candidate&: BotCand);
974	assert(BotResult != NoCand && "failed to find the first candidate");
975	(void)BotResult;
976	SU = BotCand.SU;
977	}
978	IsTopNode = false;
979	} else {
980	SU = pickNodeBidrectional(IsTopNode);
981	}
982	if (SU->isTopReady())
983	Top.removeReady(SU);
984	if (SU->isBottomReady())
985	Bot.removeReady(SU);
986
987	LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
988	<< " Scheduling instruction in cycle "
989	<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
990	<< reportPackets() << ")\n";
991	DAG->dumpNode(*SU));
992	return SU;
993	}
994
995	/// Update the scheduler's state after scheduling a node. This is the same node
996	/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
997	/// to update it's state based on the current cycle before MachineSchedStrategy
998	/// does.
999	void ConvergingVLIWScheduler::schedNode(SUnit SU, bool* IsTopNode) {
1000	if (IsTopNode) {
1001	Top.bumpNode(SU);
1002	SU->TopReadyCycle = Top.CurrCycle;
1003	} else {
1004	Bot.bumpNode(SU);
1005	SU->BotReadyCycle = Bot.CurrCycle;
1006	}
1007	}
1008

Browse the source code of llvm_projects/llvm/lib/CodeGen/VLIWMachineScheduler.cpp