GCNILPSched.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNILPSched.cpp]

1	//===---------------------------- GCNILPSched.cpp - -----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/CodeGen/ScheduleDAG.h"
14
15	using namespace llvm;
16
17	#define DEBUG_TYPE "machine-scheduler"
18
19	namespace {
20
21	class GCNILPScheduler {
22	struct Candidate : ilist_node<Candidate> {
23	SUnit *SU;
24
25	Candidate(SUnit *SU_)
26	: SU(SU_) {}
27	};
28
29	SpecificBumpPtrAllocator<Candidate> Alloc;
30	using Queue = simple_ilist<Candidate>;
31	Queue PendingQueue;
32	Queue AvailQueue;
33	unsigned CurQueueId = `0`;
34
35	std::vector<unsigned> SUNumbers;
36
37	/// CurCycle - The current scheduler state corresponds to this cycle.
38	unsigned CurCycle = `0`;
39
40	unsigned getNodePriority(const SUnit SU) const*;
41
42	const SUnit pickBest(const* SUnit left, const* SUnit *right);
43	Candidate* pickCandidate();
44
45	void releasePending();
46	void advanceToCycle(unsigned NextCycle);
47	void releasePredecessors(const SUnit* SU);
48
49	public:
50	std::vector<const SUnit> schedule(ArrayRef<const* SUnit*> TopRoots,
51	const ScheduleDAG &DAG);
52	};
53	} // namespace
54
55	/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
56	/// Smaller number is the higher priority.
57	static unsigned
58	CalcNodeSethiUllmanNumber(const SUnit SU, std::vector<unsigned*> &SUNumbers) {
59	unsigned &SethiUllmanNumber = SUNumbers [SU->NodeNum];
60	if (SethiUllmanNumber != `0`)
61	return SethiUllmanNumber;
62
63	unsigned Extra = `0`;
64	for (const SDep &Pred : SU->Preds) {
65	if (Pred.isCtrl()) continue; // ignore chain preds
66	SUnit *PredSU = Pred.getSUnit();
67	unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(SU: PredSU, SUNumbers);
68	if (PredSethiUllman > SethiUllmanNumber) {
69	SethiUllmanNumber = PredSethiUllman;
70	Extra = `0`;
71	}
72	else if (PredSethiUllman == SethiUllmanNumber)
73	++Extra;
74	}
75
76	SethiUllmanNumber += Extra;
77
78	if (SethiUllmanNumber == `0`)
79	SethiUllmanNumber = `1`;
80
81	return SethiUllmanNumber;
82	}
83
84	// Lower priority means schedule further down. For bottom-up scheduling, lower
85	// priority SUs are scheduled before higher priority SUs.
86	unsigned GCNILPScheduler::getNodePriority(const SUnit SU) const* {
87	assert(SU->NodeNum < SUNumbers.size());
88	if (SU->NumSuccs == `0` && SU->NumPreds != `0`)
89	// If SU does not have a register use, i.e. it doesn't produce a value
90	// that would be consumed (e.g. store), then it terminates a chain of
91	// computation. Give it a large SethiUllman number so it will be
92	// scheduled right before its predecessors that it doesn't lengthen
93	// their live ranges.
94	return `0xffff`;
95
96	if (SU->NumPreds == `0` && SU->NumSuccs != `0`)
97	// If SU does not have a register def, schedule it close to its uses
98	// because it does not lengthen any live ranges.
99	return `0`;
100
101	return SUNumbers [SU->NodeNum];
102	}
103
104	/// closestSucc - Returns the scheduled cycle of the successor which is
105	/// closest to the current cycle.
106	static unsigned closestSucc(const SUnit *SU) {
107	unsigned MaxHeight = `0`;
108	for (const SDep &Succ : SU->Succs) {
109	if (Succ.isCtrl()) continue; // ignore chain succs
110	unsigned Height = Succ.getSUnit()->getHeight();
111	// If there are bunch of CopyToRegs stacked up, they should be considered
112	// to be at the same position.
113	if (Height > MaxHeight)
114	MaxHeight = Height;
115	}
116	return MaxHeight;
117	}
118
119	/// calcMaxScratches - Returns an cost estimate of the worse case requirement
120	/// for scratch registers, i.e. number of data dependencies.
121	static unsigned calcMaxScratches(const SUnit *SU) {
122	unsigned Scratches = `0`;
123	for (const SDep &Pred : SU->Preds) {
124	if (Pred.isCtrl()) continue; // ignore chain preds
125	Scratches++;
126	}
127	return Scratches;
128	}
129
130	// Return -1 if left has higher priority, 1 if right has higher priority.
131	// Return 0 if latency-based priority is equivalent.
132	static int BUCompareLatency(const SUnit left, const* SUnit *right) {
133	// Scheduling an instruction that uses a VReg whose postincrement has not yet
134	// been scheduled will induce a copy. Model this as an extra cycle of latency.
135	int LHeight = (int)left->getHeight();
136	int RHeight = (int)right->getHeight();
137
138	// If either node is scheduling for latency, sort them by height/depth
139	// and latency.
140
141	// If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
142	// is enabled, grouping instructions by cycle, then its height is already
143	// covered so only its depth matters. We also reach this point if both stall
144	// but have the same height.
145	if (LHeight != RHeight)
146	return LHeight > RHeight ? `1` : -`1`;
147
148	int LDepth = left->getDepth();
149	int RDepth = right->getDepth();
150	if (LDepth != RDepth) {
151	LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
152	<< ") depth " << LDepth << " vs SU (" << right->NodeNum
153	<< ") depth " << RDepth << "\n");
154	return LDepth < RDepth ? `1` : -`1`;
155	}
156	if (left->Latency != right->Latency)
157	return left->Latency > right->Latency ? `1` : -`1`;
158
159	return `0`;
160	}
161
162	const SUnit GCNILPScheduler::pickBest(const* SUnit left, const* SUnit *right)
163	{
164	// TODO: add register pressure lowering checks
165
166	bool const DisableSchedCriticalPath = false;
167	int MaxReorderWindow = `6`;
168	if (!DisableSchedCriticalPath) {
169	int spread = (int)left->getDepth() - (int)right->getDepth();
170	if (std::abs(x: spread) > MaxReorderWindow) {
171	LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
172	<< left->getDepth() << " != SU(" << right->NodeNum
173	<< "): " << right->getDepth() << "\n");
174	return left->getDepth() < right->getDepth() ? right : left;
175	}
176	}
177
178	bool const DisableSchedHeight = false;
179	if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
180	int spread = (int)left->getHeight() - (int)right->getHeight();
181	if (std::abs(x: spread) > MaxReorderWindow)
182	return left->getHeight() > right->getHeight() ? right : left;
183	}
184
185	// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
186	unsigned LPriority = getNodePriority(SU: left);
187	unsigned RPriority = getNodePriority(SU: right);
188
189	if (LPriority != RPriority)
190	return LPriority > RPriority ? right : left;
191
192	// Try schedule def + use closer when Sethi-Ullman numbers are the same.
193	// e.g.
194	// t1 = op t2, c1
195	// t3 = op t4, c2
196	//
197	// and the following instructions are both ready.
198	// t2 = op c3
199	// t4 = op c4
200	//
201	// Then schedule t2 = op first.
202	// i.e.
203	// t4 = op c4
204	// t2 = op c3
205	// t1 = op t2, c1
206	// t3 = op t4, c2
207	//
208	// This creates more short live intervals.
209	unsigned LDist = closestSucc(SU: left);
210	unsigned RDist = closestSucc(SU: right);
211	if (LDist != RDist)
212	return LDist < RDist ? right : left;
213
214	// How many registers becomes live when the node is scheduled.
215	unsigned LScratch = calcMaxScratches(SU: left);
216	unsigned RScratch = calcMaxScratches(SU: right);
217	if (LScratch != RScratch)
218	return LScratch > RScratch ? right : left;
219
220	bool const DisableSchedCycles = false;
221	if (!DisableSchedCycles) {
222	int result = BUCompareLatency(left, right);
223	if (result != `0`)
224	return result > `0` ? right : left;
225	return left;
226	}
227	if (left->getHeight() != right->getHeight())
228	return (left->getHeight() > right->getHeight()) ? right : left;
229
230	if (left->getDepth() != right->getDepth())
231	return (left->getDepth() < right->getDepth()) ? right : left;
232
233	assert(left->NodeQueueId && right->NodeQueueId &&
234	"NodeQueueId cannot be zero");
235	return (left->NodeQueueId > right->NodeQueueId) ? right : left;
236	}
237
238	GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
239	if (AvailQueue.empty())
240	return nullptr;
241	auto Best = AvailQueue.begin();
242	for (auto I = std::next(x: AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
243	auto NewBestSU = pickBest(left: Best ->SU, right: I ->SU);
244	if (NewBestSU != Best ->SU) {
245	assert(NewBestSU == I->SU);
246	Best = I;
247	}
248	}
249	return &*Best;
250	}
251
252	void GCNILPScheduler::releasePending() {
253	// Check to see if any of the pending instructions are ready to issue. If
254	// so, add them to the available queue.
255	for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
256	auto &C = *I ++;
257	if (C.SU->getHeight() <= CurCycle) {
258	PendingQueue.remove(N&: C);
259	AvailQueue.push_back(Node&: C);
260	C.SU->NodeQueueId = CurQueueId++;
261	}
262	}
263	}
264
265	/// Move the scheduler state forward by the specified number of Cycles.
266	void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
267	if (NextCycle <= CurCycle)
268	return;
269	CurCycle = NextCycle;
270	releasePending();
271	}
272
273	void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
274	for (const auto &PredEdge : SU->Preds) {
275	auto PredSU = PredEdge.getSUnit();
276	if (PredEdge.isWeak())
277	continue;
278	assert(PredSU->isBoundaryNode() \|\| PredSU->NumSuccsLeft > `0`);
279
280	PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
281
282	if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == `0`)
283	PendingQueue.push_front(Node&: *new (Alloc.Allocate()) Candidate (PredSU));
284	}
285	}
286
287	std::vector<const SUnit*>
288	GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
289	const ScheduleDAG &DAG) {
290	auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
291
292	std::vector<SUnit> SUSavedCopy;
293	SUSavedCopy.resize(new_size: SUnits.size());
294
295	// we cannot save only those fields we touch: some of them are private
296	// so save units verbatim: this assumes SUnit should have value semantics
297	for (const SUnit &SU : SUnits)
298	SUSavedCopy [SU.NodeNum] = SU;
299
300	SUNumbers.assign(n: SUnits.size(), val: `0`);
301	for (const SUnit &SU : SUnits)
302	CalcNodeSethiUllmanNumber(SU: &SU, SUNumbers);
303
304	for (const auto *SU : BotRoots) {
305	AvailQueue.push_back(
306	Node&: *new (Alloc.Allocate()) Candidate (const_cast<SUnit*>(SU)));
307	}
308	releasePredecessors(SU: &DAG.ExitSU);
309
310	std::vector<const SUnit*> Schedule;
311	Schedule.reserve(n: SUnits.size());
312	while (true) {
313	if (AvailQueue.empty() && !PendingQueue.empty()) {
314	auto EarliestSU =
315	llvm::min_element(Range&: PendingQueue, C: [=](const Candidate &C1,
316	const Candidate &C2) {
317	return C1.SU->getHeight() < C2.SU->getHeight();
318	})->SU;
319	advanceToCycle(NextCycle: std::max(a: CurCycle + `1`, b: EarliestSU->getHeight()));
320	}
321	if (AvailQueue.empty())
322	break;
323
324	LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
325	"Ready queue:";
326	for (auto &C
327	: AvailQueue) dbgs()
328	<< `' '` << C.SU->NodeNum;
329	dbgs() << `'\n'`;);
330
331	auto C = pickCandidate();
332	assert(C);
333	AvailQueue.remove(N&: *C);
334	auto SU = C->SU;
335	LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
336
337	advanceToCycle(NextCycle: SU->getHeight());
338
339	releasePredecessors(SU);
340	Schedule.push_back(x: SU);
341	SU->isScheduled = true;
342	}
343	assert(SUnits.size() == Schedule.size());
344
345	std::reverse(first: Schedule.begin(), last: Schedule.end());
346
347	// restore units
348	for (auto &SU : SUnits)
349	SU = SUSavedCopy [SU.NodeNum];
350
351	return Schedule;
352	}
353
354	namespace llvm {
355	std::vector<const SUnit> makeGCNILPScheduler(ArrayRef<const* SUnit*> BotRoots,
356	const ScheduleDAG &DAG) {
357	GCNILPScheduler S;
358	return S.schedule(BotRoots, DAG);
359	}
360	} // namespace llvm
361

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNILPSched.cpp