1 | //===- GCNIterativeScheduler.cpp ------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements the class GCNIterativeScheduler. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "GCNIterativeScheduler.h" |
15 | #include "GCNSchedStrategy.h" |
16 | #include "SIMachineFunctionInfo.h" |
17 | |
18 | using namespace llvm; |
19 | |
20 | #define DEBUG_TYPE "machine-scheduler" |
21 | |
22 | namespace llvm { |
23 | |
24 | std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots, |
25 | const ScheduleDAG &DAG); |
26 | |
27 | std::vector<const SUnit *> makeGCNILPScheduler(ArrayRef<const SUnit *> BotRoots, |
28 | const ScheduleDAG &DAG); |
29 | } // namespace llvm |
30 | |
31 | // shim accessors for different order containers |
32 | static inline MachineInstr *getMachineInstr(MachineInstr *MI) { |
33 | return MI; |
34 | } |
35 | static inline MachineInstr *getMachineInstr(const SUnit *SU) { |
36 | return SU->getInstr(); |
37 | } |
38 | static inline MachineInstr *getMachineInstr(const SUnit &SU) { |
39 | return SU.getInstr(); |
40 | } |
41 | |
42 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
43 | LLVM_DUMP_METHOD |
44 | static void printRegion(raw_ostream &OS, |
45 | MachineBasicBlock::iterator Begin, |
46 | MachineBasicBlock::iterator End, |
47 | const LiveIntervals *LIS, |
48 | unsigned MaxInstNum = |
49 | std::numeric_limits<unsigned>::max()) { |
50 | auto BB = Begin->getParent(); |
51 | OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' ' |
52 | << BB->getName() << ":\n" ; |
53 | auto I = Begin; |
54 | MaxInstNum = std::max(MaxInstNum, 1u); |
55 | for (; I != End && MaxInstNum; ++I, --MaxInstNum) { |
56 | if (!I->isDebugInstr() && LIS) |
57 | OS << LIS->getInstructionIndex(*I); |
58 | OS << '\t' << *I; |
59 | } |
60 | if (I != End) { |
61 | OS << "\t...\n" ; |
62 | I = std::prev(End); |
63 | if (!I->isDebugInstr() && LIS) |
64 | OS << LIS->getInstructionIndex(*I); |
65 | OS << '\t' << *I; |
66 | } |
67 | if (End != BB->end()) { // print boundary inst if present |
68 | OS << "----\n" ; |
69 | if (LIS) OS << LIS->getInstructionIndex(*End) << '\t'; |
70 | OS << *End; |
71 | } |
72 | } |
73 | |
74 | LLVM_DUMP_METHOD |
75 | static void printLivenessInfo(raw_ostream &OS, |
76 | MachineBasicBlock::iterator Begin, |
77 | MachineBasicBlock::iterator End, |
78 | const LiveIntervals *LIS) { |
79 | const auto BB = Begin->getParent(); |
80 | const auto &MRI = BB->getParent()->getRegInfo(); |
81 | |
82 | const auto LiveIns = getLiveRegsBefore(*Begin, *LIS); |
83 | OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns)); |
84 | |
85 | const auto BottomMI = End == BB->end() ? std::prev(End) : End; |
86 | const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS); |
87 | OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts)); |
88 | } |
89 | |
90 | LLVM_DUMP_METHOD |
91 | void GCNIterativeScheduler::printRegions(raw_ostream &OS) const { |
92 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
93 | for (const auto R : Regions) { |
94 | OS << "Region to schedule " ; |
95 | printRegion(OS, R->Begin, R->End, LIS, 1); |
96 | printLivenessInfo(OS, R->Begin, R->End, LIS); |
97 | OS << "Max RP: " << print(R->MaxPressure, &ST); |
98 | } |
99 | } |
100 | |
101 | LLVM_DUMP_METHOD |
102 | void GCNIterativeScheduler::printSchedResult(raw_ostream &OS, |
103 | const Region *R, |
104 | const GCNRegPressure &RP) const { |
105 | OS << "\nAfter scheduling " ; |
106 | printRegion(OS, R->Begin, R->End, LIS); |
107 | printSchedRP(OS, R->MaxPressure, RP); |
108 | OS << '\n'; |
109 | } |
110 | |
111 | LLVM_DUMP_METHOD |
112 | void GCNIterativeScheduler::printSchedRP(raw_ostream &OS, |
113 | const GCNRegPressure &Before, |
114 | const GCNRegPressure &After) const { |
115 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
116 | OS << "RP before: " << print(Before, &ST) |
117 | << "RP after: " << print(After, &ST); |
118 | } |
119 | #endif |
120 | |
121 | // DAG builder helper |
122 | class GCNIterativeScheduler::BuildDAG { |
123 | GCNIterativeScheduler &Sch; |
124 | SmallVector<SUnit *, 8> TopRoots; |
125 | |
126 | SmallVector<SUnit*, 8> BotRoots; |
127 | public: |
128 | BuildDAG(const Region &R, GCNIterativeScheduler &_Sch) |
129 | : Sch(_Sch) { |
130 | auto BB = R.Begin->getParent(); |
131 | Sch.BaseClass::startBlock(bb: BB); |
132 | Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs); |
133 | |
134 | Sch.buildSchedGraph(AA: Sch.AA, RPTracker: nullptr, PDiffs: nullptr, LIS: nullptr, |
135 | /*TrackLaneMask*/TrackLaneMasks: true); |
136 | Sch.Topo.InitDAGTopologicalSorting(); |
137 | Sch.findRootsAndBiasEdges(TopRoots, BotRoots); |
138 | } |
139 | |
140 | ~BuildDAG() { |
141 | Sch.BaseClass::exitRegion(); |
142 | Sch.BaseClass::finishBlock(); |
143 | } |
144 | |
145 | ArrayRef<const SUnit *> getTopRoots() const { |
146 | return TopRoots; |
147 | } |
148 | ArrayRef<SUnit*> getBottomRoots() const { |
149 | return BotRoots; |
150 | } |
151 | }; |
152 | |
153 | class GCNIterativeScheduler::OverrideLegacyStrategy { |
154 | GCNIterativeScheduler &Sch; |
155 | Region &Rgn; |
156 | std::unique_ptr<MachineSchedStrategy> SaveSchedImpl; |
157 | GCNRegPressure SaveMaxRP; |
158 | |
159 | public: |
160 | OverrideLegacyStrategy(Region &R, |
161 | MachineSchedStrategy &OverrideStrategy, |
162 | GCNIterativeScheduler &_Sch) |
163 | : Sch(_Sch) |
164 | , Rgn(R) |
165 | , SaveSchedImpl(std::move(_Sch.SchedImpl)) |
166 | , SaveMaxRP(R.MaxPressure) { |
167 | Sch.SchedImpl.reset(p: &OverrideStrategy); |
168 | auto BB = R.Begin->getParent(); |
169 | Sch.BaseClass::startBlock(bb: BB); |
170 | Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs); |
171 | } |
172 | |
173 | ~OverrideLegacyStrategy() { |
174 | Sch.BaseClass::exitRegion(); |
175 | Sch.BaseClass::finishBlock(); |
176 | Sch.SchedImpl.release(); |
177 | Sch.SchedImpl = std::move(SaveSchedImpl); |
178 | } |
179 | |
180 | void schedule() { |
181 | assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End); |
182 | LLVM_DEBUG(dbgs() << "\nScheduling " ; |
183 | printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2)); |
184 | Sch.BaseClass::schedule(); |
185 | |
186 | // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore |
187 | Sch.RegionEnd = Rgn.End; |
188 | //assert(Rgn.End == Sch.RegionEnd); |
189 | Rgn.Begin = Sch.RegionBegin; |
190 | Rgn.MaxPressure.clear(); |
191 | } |
192 | |
193 | void restoreOrder() { |
194 | assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End); |
195 | // DAG SUnits are stored using original region's order |
196 | // so just use SUnits as the restoring schedule |
197 | Sch.scheduleRegion(R&: Rgn, Schedule&: Sch.SUnits, MaxRP: SaveMaxRP); |
198 | } |
199 | }; |
200 | |
201 | namespace { |
202 | |
203 | // just a stub to make base class happy |
204 | class SchedStrategyStub : public MachineSchedStrategy { |
205 | public: |
206 | bool shouldTrackPressure() const override { return false; } |
207 | bool shouldTrackLaneMasks() const override { return false; } |
208 | void initialize(ScheduleDAGMI *DAG) override {} |
209 | SUnit *pickNode(bool &IsTopNode) override { return nullptr; } |
210 | void schedNode(SUnit *SU, bool IsTopNode) override {} |
211 | void releaseTopNode(SUnit *SU) override {} |
212 | void releaseBottomNode(SUnit *SU) override {} |
213 | }; |
214 | |
215 | } // end anonymous namespace |
216 | |
217 | GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, |
218 | StrategyKind S) |
219 | : BaseClass(C, std::make_unique<SchedStrategyStub>()) |
220 | , Context(C) |
221 | , Strategy(S) |
222 | , UPTracker(*LIS) { |
223 | } |
224 | |
225 | // returns max pressure for a region |
226 | GCNRegPressure |
227 | GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin, |
228 | MachineBasicBlock::iterator End) |
229 | const { |
230 | // For the purpose of pressure tracking bottom inst of the region should |
231 | // be also processed. End is either BB end, BB terminator inst or sched |
232 | // boundary inst. |
233 | auto const BBEnd = Begin->getParent()->end(); |
234 | auto const BottomMI = End == BBEnd ? std::prev(x: End) : End; |
235 | |
236 | // scheduleRegions walks bottom to top, so its likely we just get next |
237 | // instruction to track |
238 | auto AfterBottomMI = std::next(x: BottomMI); |
239 | if (AfterBottomMI == BBEnd || |
240 | &*AfterBottomMI != UPTracker.getLastTrackedMI()) { |
241 | UPTracker.reset(MI: *BottomMI); |
242 | } else { |
243 | assert(UPTracker.isValid()); |
244 | } |
245 | |
246 | for (auto I = BottomMI; I != Begin; --I) |
247 | UPTracker.recede(MI: *I); |
248 | |
249 | UPTracker.recede(MI: *Begin); |
250 | |
251 | assert(UPTracker.isValid() || |
252 | (dbgs() << "Tracked region " , |
253 | printRegion(dbgs(), Begin, End, LIS), false)); |
254 | return UPTracker.getMaxPressureAndReset(); |
255 | } |
256 | |
257 | // returns max pressure for a tentative schedule |
258 | template <typename Range> GCNRegPressure |
259 | GCNIterativeScheduler::getSchedulePressure(const Region &R, |
260 | Range &&Schedule) const { |
261 | auto const BBEnd = R.Begin->getParent()->end(); |
262 | GCNUpwardRPTracker RPTracker(*LIS); |
263 | if (R.End != BBEnd) { |
264 | // R.End points to the boundary instruction but the |
265 | // schedule doesn't include it |
266 | RPTracker.reset(MI: *R.End); |
267 | RPTracker.recede(MI: *R.End); |
268 | } else { |
269 | // R.End doesn't point to the boundary instruction |
270 | RPTracker.reset(MI: *std::prev(x: BBEnd)); |
271 | } |
272 | for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) { |
273 | RPTracker.recede(MI: *getMachineInstr(*--I)); |
274 | } |
275 | return RPTracker.getMaxPressureAndReset(); |
276 | } |
277 | |
278 | void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden |
279 | MachineBasicBlock::iterator Begin, |
280 | MachineBasicBlock::iterator End, |
281 | unsigned NumRegionInstrs) { |
282 | BaseClass::enterRegion(bb: BB, begin: Begin, end: End, regioninstrs: NumRegionInstrs); |
283 | if (NumRegionInstrs > 2) { |
284 | Regions.push_back( |
285 | x: new (Alloc.Allocate()) |
286 | Region { .Begin: Begin, .End: End, .NumRegionInstrs: NumRegionInstrs, |
287 | .MaxPressure: getRegionPressure(Begin, End), .BestSchedule: nullptr }); |
288 | } |
289 | } |
290 | |
291 | void GCNIterativeScheduler::schedule() { // overridden |
292 | // do nothing |
293 | LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS); |
294 | if (!Regions.empty() && Regions.back()->Begin == RegionBegin) { |
295 | dbgs() << "Max RP: " |
296 | << print(Regions.back()->MaxPressure, |
297 | &MF.getSubtarget<GCNSubtarget>()); |
298 | } dbgs() |
299 | << '\n';); |
300 | } |
301 | |
302 | void GCNIterativeScheduler::finalizeSchedule() { // overridden |
303 | if (Regions.empty()) |
304 | return; |
305 | switch (Strategy) { |
306 | case SCHEDULE_MINREGONLY: scheduleMinReg(); break; |
307 | case SCHEDULE_MINREGFORCED: scheduleMinReg(force: true); break; |
308 | case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break; |
309 | case SCHEDULE_ILP: scheduleILP(TryMaximizeOccupancy: false); break; |
310 | } |
311 | } |
312 | |
313 | // Detach schedule from SUnits and interleave it with debug values. |
314 | // Returned schedule becomes independent of DAG state. |
315 | std::vector<MachineInstr*> |
316 | GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const { |
317 | std::vector<MachineInstr*> Res; |
318 | Res.reserve(n: Schedule.size() * 2); |
319 | |
320 | if (FirstDbgValue) |
321 | Res.push_back(x: FirstDbgValue); |
322 | |
323 | const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end(); |
324 | for (const auto *SU : Schedule) { |
325 | Res.push_back(x: SU->getInstr()); |
326 | const auto &D = std::find_if(first: DbgB, last: DbgE, pred: [SU](decltype(*DbgB) &P) { |
327 | return P.second == SU->getInstr(); |
328 | }); |
329 | if (D != DbgE) |
330 | Res.push_back(x: D->first); |
331 | } |
332 | return Res; |
333 | } |
334 | |
335 | void GCNIterativeScheduler::setBestSchedule(Region &R, |
336 | ScheduleRef Schedule, |
337 | const GCNRegPressure &MaxRP) { |
338 | R.BestSchedule.reset( |
339 | p: new TentativeSchedule{ .Schedule: detachSchedule(Schedule), .MaxPressure: MaxRP }); |
340 | } |
341 | |
342 | void GCNIterativeScheduler::scheduleBest(Region &R) { |
343 | assert(R.BestSchedule.get() && "No schedule specified" ); |
344 | scheduleRegion(R, Schedule&: R.BestSchedule->Schedule, MaxRP: R.BestSchedule->MaxPressure); |
345 | R.BestSchedule.reset(); |
346 | } |
347 | |
348 | // minimal required region scheduler, works for ranges of SUnits*, |
349 | // SUnits or MachineIntrs* |
350 | template <typename Range> |
351 | void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule, |
352 | const GCNRegPressure &MaxRP) { |
353 | assert(RegionBegin == R.Begin && RegionEnd == R.End); |
354 | assert(LIS != nullptr); |
355 | #ifndef NDEBUG |
356 | const auto SchedMaxRP = getSchedulePressure(R, Schedule); |
357 | #endif |
358 | auto BB = R.Begin->getParent(); |
359 | auto Top = R.Begin; |
360 | for (const auto &I : Schedule) { |
361 | auto MI = getMachineInstr(I); |
362 | if (MI != &*Top) { |
363 | BB->remove(I: MI); |
364 | BB->insert(Top, MI); |
365 | if (!MI->isDebugInstr()) |
366 | LIS->handleMove(MI&: *MI, UpdateFlags: true); |
367 | } |
368 | if (!MI->isDebugInstr()) { |
369 | // Reset read - undef flags and update them later. |
370 | for (auto &Op : MI->all_defs()) |
371 | Op.setIsUndef(false); |
372 | |
373 | RegisterOperands RegOpers; |
374 | RegOpers.collect(MI: *MI, TRI: *TRI, MRI, /*ShouldTrackLaneMasks*/TrackLaneMasks: true, |
375 | /*IgnoreDead*/false); |
376 | // Adjust liveness and add missing dead+read-undef flags. |
377 | auto SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot(); |
378 | RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI); |
379 | } |
380 | Top = std::next(MI->getIterator()); |
381 | } |
382 | RegionBegin = getMachineInstr(Schedule.front()); |
383 | |
384 | // Schedule consisting of MachineInstr* is considered 'detached' |
385 | // and already interleaved with debug values |
386 | if (!std::is_same_v<decltype(*Schedule.begin()), MachineInstr*>) { |
387 | placeDebugValues(); |
388 | // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore |
389 | // assert(R.End == RegionEnd); |
390 | RegionEnd = R.End; |
391 | } |
392 | |
393 | R.Begin = RegionBegin; |
394 | R.MaxPressure = MaxRP; |
395 | |
396 | #ifndef NDEBUG |
397 | const auto RegionMaxRP = getRegionPressure(R); |
398 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
399 | #endif |
400 | assert( |
401 | (SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP)) || |
402 | (dbgs() << "Max RP mismatch!!!\n" |
403 | "RP for schedule (calculated): " |
404 | << print(SchedMaxRP, &ST) |
405 | << "RP for schedule (reported): " << print(MaxRP, &ST) |
406 | << "RP after scheduling: " << print(RegionMaxRP, &ST), |
407 | false)); |
408 | } |
409 | |
410 | // Sort recorded regions by pressure - highest at the front |
411 | void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) { |
412 | llvm::sort(C&: Regions, Comp: [this, TargetOcc](const Region *R1, const Region *R2) { |
413 | return R2->MaxPressure.less(MF, O: R1->MaxPressure, MaxOccupancy: TargetOcc); |
414 | }); |
415 | } |
416 | |
417 | /////////////////////////////////////////////////////////////////////////////// |
418 | // Legacy MaxOccupancy Strategy |
419 | |
420 | // Tries to increase occupancy applying minreg scheduler for a sequence of |
421 | // most demanding regions. Obtained schedules are saved as BestSchedule for a |
422 | // region. |
423 | // TargetOcc is the best achievable occupancy for a kernel. |
424 | // Returns better occupancy on success or current occupancy on fail. |
425 | // BestSchedules aren't deleted on fail. |
426 | unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { |
427 | // TODO: assert Regions are sorted descending by pressure |
428 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
429 | const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); |
430 | LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc |
431 | << ", current = " << Occ << '\n'); |
432 | |
433 | auto NewOcc = TargetOcc; |
434 | for (auto *R : Regions) { |
435 | if (R->MaxPressure.getOccupancy(ST) >= NewOcc) |
436 | break; |
437 | |
438 | LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3); |
439 | printLivenessInfo(dbgs(), R->Begin, R->End, LIS)); |
440 | |
441 | BuildDAG DAG(*R, *this); |
442 | const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this); |
443 | const auto MaxRP = getSchedulePressure(R: *R, Schedule: MinSchedule); |
444 | LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n" ; |
445 | printSchedRP(dbgs(), R->MaxPressure, MaxRP)); |
446 | |
447 | NewOcc = std::min(a: NewOcc, b: MaxRP.getOccupancy(ST)); |
448 | if (NewOcc <= Occ) |
449 | break; |
450 | |
451 | setBestSchedule(R&: *R, Schedule: MinSchedule, MaxRP); |
452 | } |
453 | LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc |
454 | << ", prev occupancy = " << Occ << '\n'); |
455 | if (NewOcc > Occ) { |
456 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
457 | MFI->increaseOccupancy(MF, Limit: NewOcc); |
458 | } |
459 | |
460 | return std::max(a: NewOcc, b: Occ); |
461 | } |
462 | |
463 | void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( |
464 | bool TryMaximizeOccupancy) { |
465 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
466 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
467 | auto TgtOcc = MFI->getMinAllowedOccupancy(); |
468 | |
469 | sortRegionsByPressure(TargetOcc: TgtOcc); |
470 | auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); |
471 | |
472 | if (TryMaximizeOccupancy && Occ < TgtOcc) |
473 | Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc); |
474 | |
475 | // This is really weird but for some magic scheduling regions twice |
476 | // gives performance improvement |
477 | const int NumPasses = Occ < TgtOcc ? 2 : 1; |
478 | |
479 | TgtOcc = std::min(a: Occ, b: TgtOcc); |
480 | LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " |
481 | "target occupancy = " |
482 | << TgtOcc << '\n'); |
483 | GCNMaxOccupancySchedStrategy LStrgy(Context); |
484 | unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy()); |
485 | |
486 | for (int I = 0; I < NumPasses; ++I) { |
487 | // running first pass with TargetOccupancy = 0 mimics previous scheduling |
488 | // approach and is a performance magic |
489 | LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc); |
490 | for (auto *R : Regions) { |
491 | OverrideLegacyStrategy Ovr(*R, LStrgy, *this); |
492 | |
493 | Ovr.schedule(); |
494 | const auto RP = getRegionPressure(R: *R); |
495 | LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); |
496 | |
497 | if (RP.getOccupancy(ST) < TgtOcc) { |
498 | LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); |
499 | if (R->BestSchedule.get() && |
500 | R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) { |
501 | LLVM_DEBUG(dbgs() << ", scheduling minimal register\n" ); |
502 | scheduleBest(R&: *R); |
503 | } else { |
504 | LLVM_DEBUG(dbgs() << ", restoring\n" ); |
505 | Ovr.restoreOrder(); |
506 | assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc); |
507 | } |
508 | } |
509 | FinalOccupancy = std::min(a: FinalOccupancy, b: RP.getOccupancy(ST)); |
510 | } |
511 | } |
512 | MFI->limitOccupancy(Limit: FinalOccupancy); |
513 | } |
514 | |
515 | /////////////////////////////////////////////////////////////////////////////// |
516 | // Minimal Register Strategy |
517 | |
518 | void GCNIterativeScheduler::scheduleMinReg(bool force) { |
519 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
520 | const auto TgtOcc = MFI->getOccupancy(); |
521 | sortRegionsByPressure(TargetOcc: TgtOcc); |
522 | |
523 | auto MaxPressure = Regions.front()->MaxPressure; |
524 | for (auto *R : Regions) { |
525 | if (!force && R->MaxPressure.less(MF, O: MaxPressure, MaxOccupancy: TgtOcc)) |
526 | break; |
527 | |
528 | BuildDAG DAG(*R, *this); |
529 | const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this); |
530 | |
531 | const auto RP = getSchedulePressure(R: *R, Schedule: MinSchedule); |
532 | LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) { |
533 | dbgs() << "\nWarning: Pressure becomes worse after minreg!" ; |
534 | printSchedRP(dbgs(), R->MaxPressure, RP); |
535 | }); |
536 | |
537 | if (!force && MaxPressure.less(MF, O: RP, MaxOccupancy: TgtOcc)) |
538 | break; |
539 | |
540 | scheduleRegion(R&: *R, Schedule: MinSchedule, MaxRP: RP); |
541 | LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); |
542 | |
543 | MaxPressure = RP; |
544 | } |
545 | } |
546 | |
547 | /////////////////////////////////////////////////////////////////////////////// |
548 | // ILP scheduler port |
549 | |
550 | void GCNIterativeScheduler::scheduleILP( |
551 | bool TryMaximizeOccupancy) { |
552 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
553 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
554 | auto TgtOcc = MFI->getMinAllowedOccupancy(); |
555 | |
556 | sortRegionsByPressure(TargetOcc: TgtOcc); |
557 | auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); |
558 | |
559 | if (TryMaximizeOccupancy && Occ < TgtOcc) |
560 | Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc); |
561 | |
562 | TgtOcc = std::min(a: Occ, b: TgtOcc); |
563 | LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " |
564 | "target occupancy = " |
565 | << TgtOcc << '\n'); |
566 | |
567 | unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy()); |
568 | for (auto *R : Regions) { |
569 | BuildDAG DAG(*R, *this); |
570 | const auto ILPSchedule = makeGCNILPScheduler(BotRoots: DAG.getBottomRoots(), DAG: *this); |
571 | |
572 | const auto RP = getSchedulePressure(R: *R, Schedule: ILPSchedule); |
573 | LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); |
574 | |
575 | if (RP.getOccupancy(ST) < TgtOcc) { |
576 | LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); |
577 | if (R->BestSchedule.get() && |
578 | R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) { |
579 | LLVM_DEBUG(dbgs() << ", scheduling minimal register\n" ); |
580 | scheduleBest(R&: *R); |
581 | } |
582 | } else { |
583 | scheduleRegion(R&: *R, Schedule: ILPSchedule, MaxRP: RP); |
584 | LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); |
585 | FinalOccupancy = std::min(a: FinalOccupancy, b: RP.getOccupancy(ST)); |
586 | } |
587 | } |
588 | MFI->limitOccupancy(Limit: FinalOccupancy); |
589 | } |
590 | |