1 | //===---------------------------- GCNILPSched.cpp - -----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/CodeGen/ScheduleDAG.h" |
14 | |
15 | using namespace llvm; |
16 | |
17 | #define DEBUG_TYPE "machine-scheduler" |
18 | |
19 | namespace { |
20 | |
21 | class GCNILPScheduler { |
22 | struct Candidate : ilist_node<Candidate> { |
23 | SUnit *SU; |
24 | |
25 | Candidate(SUnit *SU_) |
26 | : SU(SU_) {} |
27 | }; |
28 | |
29 | SpecificBumpPtrAllocator<Candidate> Alloc; |
30 | using Queue = simple_ilist<Candidate>; |
31 | Queue PendingQueue; |
32 | Queue AvailQueue; |
33 | unsigned CurQueueId = 0; |
34 | |
35 | std::vector<unsigned> SUNumbers; |
36 | |
37 | /// CurCycle - The current scheduler state corresponds to this cycle. |
38 | unsigned CurCycle = 0; |
39 | |
40 | unsigned getNodePriority(const SUnit *SU) const; |
41 | |
42 | const SUnit *pickBest(const SUnit *left, const SUnit *right); |
43 | Candidate* pickCandidate(); |
44 | |
45 | void releasePending(); |
46 | void advanceToCycle(unsigned NextCycle); |
47 | void releasePredecessors(const SUnit* SU); |
48 | |
49 | public: |
50 | std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, |
51 | const ScheduleDAG &DAG); |
52 | }; |
53 | } // namespace |
54 | |
55 | /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. |
56 | /// Smaller number is the higher priority. |
57 | static unsigned |
58 | CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { |
59 | unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; |
60 | if (SethiUllmanNumber != 0) |
61 | return SethiUllmanNumber; |
62 | |
63 | unsigned = 0; |
64 | for (const SDep &Pred : SU->Preds) { |
65 | if (Pred.isCtrl()) continue; // ignore chain preds |
66 | SUnit *PredSU = Pred.getSUnit(); |
67 | unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(SU: PredSU, SUNumbers); |
68 | if (PredSethiUllman > SethiUllmanNumber) { |
69 | SethiUllmanNumber = PredSethiUllman; |
70 | Extra = 0; |
71 | } |
72 | else if (PredSethiUllman == SethiUllmanNumber) |
73 | ++Extra; |
74 | } |
75 | |
76 | SethiUllmanNumber += Extra; |
77 | |
78 | if (SethiUllmanNumber == 0) |
79 | SethiUllmanNumber = 1; |
80 | |
81 | return SethiUllmanNumber; |
82 | } |
83 | |
84 | // Lower priority means schedule further down. For bottom-up scheduling, lower |
85 | // priority SUs are scheduled before higher priority SUs. |
86 | unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const { |
87 | assert(SU->NodeNum < SUNumbers.size()); |
88 | if (SU->NumSuccs == 0 && SU->NumPreds != 0) |
89 | // If SU does not have a register use, i.e. it doesn't produce a value |
90 | // that would be consumed (e.g. store), then it terminates a chain of |
91 | // computation. Give it a large SethiUllman number so it will be |
92 | // scheduled right before its predecessors that it doesn't lengthen |
93 | // their live ranges. |
94 | return 0xffff; |
95 | |
96 | if (SU->NumPreds == 0 && SU->NumSuccs != 0) |
97 | // If SU does not have a register def, schedule it close to its uses |
98 | // because it does not lengthen any live ranges. |
99 | return 0; |
100 | |
101 | return SUNumbers[SU->NodeNum]; |
102 | } |
103 | |
104 | /// closestSucc - Returns the scheduled cycle of the successor which is |
105 | /// closest to the current cycle. |
106 | static unsigned closestSucc(const SUnit *SU) { |
107 | unsigned MaxHeight = 0; |
108 | for (const SDep &Succ : SU->Succs) { |
109 | if (Succ.isCtrl()) continue; // ignore chain succs |
110 | unsigned Height = Succ.getSUnit()->getHeight(); |
111 | // If there are bunch of CopyToRegs stacked up, they should be considered |
112 | // to be at the same position. |
113 | if (Height > MaxHeight) |
114 | MaxHeight = Height; |
115 | } |
116 | return MaxHeight; |
117 | } |
118 | |
119 | /// calcMaxScratches - Returns an cost estimate of the worse case requirement |
120 | /// for scratch registers, i.e. number of data dependencies. |
121 | static unsigned calcMaxScratches(const SUnit *SU) { |
122 | unsigned Scratches = 0; |
123 | for (const SDep &Pred : SU->Preds) { |
124 | if (Pred.isCtrl()) continue; // ignore chain preds |
125 | Scratches++; |
126 | } |
127 | return Scratches; |
128 | } |
129 | |
130 | // Return -1 if left has higher priority, 1 if right has higher priority. |
131 | // Return 0 if latency-based priority is equivalent. |
132 | static int BUCompareLatency(const SUnit *left, const SUnit *right) { |
133 | // Scheduling an instruction that uses a VReg whose postincrement has not yet |
134 | // been scheduled will induce a copy. Model this as an extra cycle of latency. |
135 | int LHeight = (int)left->getHeight(); |
136 | int RHeight = (int)right->getHeight(); |
137 | |
138 | // If either node is scheduling for latency, sort them by height/depth |
139 | // and latency. |
140 | |
141 | // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer |
142 | // is enabled, grouping instructions by cycle, then its height is already |
143 | // covered so only its depth matters. We also reach this point if both stall |
144 | // but have the same height. |
145 | if (LHeight != RHeight) |
146 | return LHeight > RHeight ? 1 : -1; |
147 | |
148 | int LDepth = left->getDepth(); |
149 | int RDepth = right->getDepth(); |
150 | if (LDepth != RDepth) { |
151 | LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum |
152 | << ") depth " << LDepth << " vs SU (" << right->NodeNum |
153 | << ") depth " << RDepth << "\n" ); |
154 | return LDepth < RDepth ? 1 : -1; |
155 | } |
156 | if (left->Latency != right->Latency) |
157 | return left->Latency > right->Latency ? 1 : -1; |
158 | |
159 | return 0; |
160 | } |
161 | |
162 | const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right) |
163 | { |
164 | // TODO: add register pressure lowering checks |
165 | |
166 | bool const DisableSchedCriticalPath = false; |
167 | int MaxReorderWindow = 6; |
168 | if (!DisableSchedCriticalPath) { |
169 | int spread = (int)left->getDepth() - (int)right->getDepth(); |
170 | if (std::abs(x: spread) > MaxReorderWindow) { |
171 | LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " |
172 | << left->getDepth() << " != SU(" << right->NodeNum |
173 | << "): " << right->getDepth() << "\n" ); |
174 | return left->getDepth() < right->getDepth() ? right : left; |
175 | } |
176 | } |
177 | |
178 | bool const DisableSchedHeight = false; |
179 | if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { |
180 | int spread = (int)left->getHeight() - (int)right->getHeight(); |
181 | if (std::abs(x: spread) > MaxReorderWindow) |
182 | return left->getHeight() > right->getHeight() ? right : left; |
183 | } |
184 | |
185 | // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. |
186 | unsigned LPriority = getNodePriority(SU: left); |
187 | unsigned RPriority = getNodePriority(SU: right); |
188 | |
189 | if (LPriority != RPriority) |
190 | return LPriority > RPriority ? right : left; |
191 | |
192 | // Try schedule def + use closer when Sethi-Ullman numbers are the same. |
193 | // e.g. |
194 | // t1 = op t2, c1 |
195 | // t3 = op t4, c2 |
196 | // |
197 | // and the following instructions are both ready. |
198 | // t2 = op c3 |
199 | // t4 = op c4 |
200 | // |
201 | // Then schedule t2 = op first. |
202 | // i.e. |
203 | // t4 = op c4 |
204 | // t2 = op c3 |
205 | // t1 = op t2, c1 |
206 | // t3 = op t4, c2 |
207 | // |
208 | // This creates more short live intervals. |
209 | unsigned LDist = closestSucc(SU: left); |
210 | unsigned RDist = closestSucc(SU: right); |
211 | if (LDist != RDist) |
212 | return LDist < RDist ? right : left; |
213 | |
214 | // How many registers becomes live when the node is scheduled. |
215 | unsigned LScratch = calcMaxScratches(SU: left); |
216 | unsigned RScratch = calcMaxScratches(SU: right); |
217 | if (LScratch != RScratch) |
218 | return LScratch > RScratch ? right : left; |
219 | |
220 | bool const DisableSchedCycles = false; |
221 | if (!DisableSchedCycles) { |
222 | int result = BUCompareLatency(left, right); |
223 | if (result != 0) |
224 | return result > 0 ? right : left; |
225 | return left; |
226 | } |
227 | if (left->getHeight() != right->getHeight()) |
228 | return (left->getHeight() > right->getHeight()) ? right : left; |
229 | |
230 | if (left->getDepth() != right->getDepth()) |
231 | return (left->getDepth() < right->getDepth()) ? right : left; |
232 | |
233 | assert(left->NodeQueueId && right->NodeQueueId && |
234 | "NodeQueueId cannot be zero" ); |
235 | return (left->NodeQueueId > right->NodeQueueId) ? right : left; |
236 | } |
237 | |
238 | GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() { |
239 | if (AvailQueue.empty()) |
240 | return nullptr; |
241 | auto Best = AvailQueue.begin(); |
242 | for (auto I = std::next(x: AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) { |
243 | auto NewBestSU = pickBest(left: Best->SU, right: I->SU); |
244 | if (NewBestSU != Best->SU) { |
245 | assert(NewBestSU == I->SU); |
246 | Best = I; |
247 | } |
248 | } |
249 | return &*Best; |
250 | } |
251 | |
252 | void GCNILPScheduler::releasePending() { |
253 | // Check to see if any of the pending instructions are ready to issue. If |
254 | // so, add them to the available queue. |
255 | for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) { |
256 | auto &C = *I++; |
257 | if (C.SU->getHeight() <= CurCycle) { |
258 | PendingQueue.remove(N&: C); |
259 | AvailQueue.push_back(Node&: C); |
260 | C.SU->NodeQueueId = CurQueueId++; |
261 | } |
262 | } |
263 | } |
264 | |
265 | /// Move the scheduler state forward by the specified number of Cycles. |
266 | void GCNILPScheduler::advanceToCycle(unsigned NextCycle) { |
267 | if (NextCycle <= CurCycle) |
268 | return; |
269 | CurCycle = NextCycle; |
270 | releasePending(); |
271 | } |
272 | |
273 | void GCNILPScheduler::releasePredecessors(const SUnit* SU) { |
274 | for (const auto &PredEdge : SU->Preds) { |
275 | auto PredSU = PredEdge.getSUnit(); |
276 | if (PredEdge.isWeak()) |
277 | continue; |
278 | assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0); |
279 | |
280 | PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency()); |
281 | |
282 | if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0) |
283 | PendingQueue.push_front(Node&: *new (Alloc.Allocate()) Candidate(PredSU)); |
284 | } |
285 | } |
286 | |
287 | std::vector<const SUnit*> |
288 | GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots, |
289 | const ScheduleDAG &DAG) { |
290 | auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits; |
291 | |
292 | std::vector<SUnit> SUSavedCopy; |
293 | SUSavedCopy.resize(new_size: SUnits.size()); |
294 | |
295 | // we cannot save only those fields we touch: some of them are private |
296 | // so save units verbatim: this assumes SUnit should have value semantics |
297 | for (const SUnit &SU : SUnits) |
298 | SUSavedCopy[SU.NodeNum] = SU; |
299 | |
300 | SUNumbers.assign(n: SUnits.size(), val: 0); |
301 | for (const SUnit &SU : SUnits) |
302 | CalcNodeSethiUllmanNumber(SU: &SU, SUNumbers); |
303 | |
304 | for (const auto *SU : BotRoots) { |
305 | AvailQueue.push_back( |
306 | Node&: *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU))); |
307 | } |
308 | releasePredecessors(SU: &DAG.ExitSU); |
309 | |
310 | std::vector<const SUnit*> Schedule; |
311 | Schedule.reserve(n: SUnits.size()); |
312 | while (true) { |
313 | if (AvailQueue.empty() && !PendingQueue.empty()) { |
314 | auto EarliestSU = |
315 | llvm::min_element(Range&: PendingQueue, C: [=](const Candidate &C1, |
316 | const Candidate &C2) { |
317 | return C1.SU->getHeight() < C2.SU->getHeight(); |
318 | })->SU; |
319 | advanceToCycle(NextCycle: std::max(a: CurCycle + 1, b: EarliestSU->getHeight())); |
320 | } |
321 | if (AvailQueue.empty()) |
322 | break; |
323 | |
324 | LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n" |
325 | "Ready queue:" ; |
326 | for (auto &C |
327 | : AvailQueue) dbgs() |
328 | << ' ' << C.SU->NodeNum; |
329 | dbgs() << '\n';); |
330 | |
331 | auto C = pickCandidate(); |
332 | assert(C); |
333 | AvailQueue.remove(N&: *C); |
334 | auto SU = C->SU; |
335 | LLVM_DEBUG(dbgs() << "Selected " ; DAG.dumpNode(*SU)); |
336 | |
337 | advanceToCycle(NextCycle: SU->getHeight()); |
338 | |
339 | releasePredecessors(SU); |
340 | Schedule.push_back(x: SU); |
341 | SU->isScheduled = true; |
342 | } |
343 | assert(SUnits.size() == Schedule.size()); |
344 | |
345 | std::reverse(first: Schedule.begin(), last: Schedule.end()); |
346 | |
347 | // restore units |
348 | for (auto &SU : SUnits) |
349 | SU = SUSavedCopy[SU.NodeNum]; |
350 | |
351 | return Schedule; |
352 | } |
353 | |
354 | namespace llvm { |
355 | std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots, |
356 | const ScheduleDAG &DAG) { |
357 | GCNILPScheduler S; |
358 | return S.schedule(BotRoots, DAG); |
359 | } |
360 | } // namespace llvm |
361 | |