1//===- GCNIterativeScheduler.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the class GCNIterativeScheduler.
11///
12//===----------------------------------------------------------------------===//
13
14#include "GCNIterativeScheduler.h"
15#include "AMDGPUIGroupLP.h"
16#include "GCNSchedStrategy.h"
17#include "SIMachineFunctionInfo.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "machine-scheduler"
22
23namespace llvm {
24
25std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots,
26 const ScheduleDAG &DAG);
27
28std::vector<const SUnit *> makeGCNILPScheduler(ArrayRef<const SUnit *> BotRoots,
29 const ScheduleDAG &DAG);
30} // namespace llvm
31
32// shim accessors for different order containers
33static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
34 return MI;
35}
36static inline MachineInstr *getMachineInstr(const SUnit *SU) {
37 return SU->getInstr();
38}
39static inline MachineInstr *getMachineInstr(const SUnit &SU) {
40 return SU.getInstr();
41}
42
43#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
44LLVM_DUMP_METHOD
45static void printRegion(raw_ostream &OS,
46 MachineBasicBlock::iterator Begin,
47 MachineBasicBlock::iterator End,
48 const LiveIntervals *LIS,
49 unsigned MaxInstNum =
50 std::numeric_limits<unsigned>::max()) {
51 auto *BB = Begin->getParent();
52 OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' '
53 << BB->getName() << ":\n";
54 auto I = Begin;
55 MaxInstNum = std::max(MaxInstNum, 1u);
56 for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
57 if (!I->isDebugInstr() && LIS)
58 OS << LIS->getInstructionIndex(*I);
59 OS << '\t' << *I;
60 }
61 if (I != End) {
62 OS << "\t...\n";
63 I = std::prev(End);
64 if (!I->isDebugInstr() && LIS)
65 OS << LIS->getInstructionIndex(*I);
66 OS << '\t' << *I;
67 }
68 if (End != BB->end()) { // print boundary inst if present
69 OS << "----\n";
70 if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
71 OS << *End;
72 }
73}
74
75LLVM_DUMP_METHOD
76static void printLivenessInfo(raw_ostream &OS,
77 MachineBasicBlock::iterator Begin,
78 MachineBasicBlock::iterator End,
79 const LiveIntervals *LIS) {
80 auto *const BB = Begin->getParent();
81 const auto &MRI = BB->getParent()->getRegInfo();
82
83 const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
84 OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns));
85
86 const auto BottomMI = End == BB->end() ? std::prev(End) : End;
87 const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
88 OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts));
89}
90
91LLVM_DUMP_METHOD
92void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
93 const auto &ST = MF.getSubtarget<GCNSubtarget>();
94 for (auto *const R : Regions) {
95 OS << "Region to schedule ";
96 printRegion(OS, R->Begin, R->End, LIS, 1);
97 printLivenessInfo(OS, R->Begin, R->End, LIS);
98 OS << "Max RP: " << print(R->MaxPressure, &ST);
99 }
100}
101
102LLVM_DUMP_METHOD
103void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
104 const Region *R,
105 const GCNRegPressure &RP) const {
106 OS << "\nAfter scheduling ";
107 printRegion(OS, R->Begin, R->End, LIS);
108 printSchedRP(OS, R->MaxPressure, RP);
109 OS << '\n';
110}
111
112LLVM_DUMP_METHOD
113void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
114 const GCNRegPressure &Before,
115 const GCNRegPressure &After) const {
116 const auto &ST = MF.getSubtarget<GCNSubtarget>();
117 OS << "RP before: " << print(Before, &ST)
118 << "RP after: " << print(After, &ST);
119}
120#endif
121
122void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) {
123 bool HasIGLPInstrs = false;
124 const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(TII);
125 for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I++) {
126 if (SII->isIGLPMutationOnly(Opcode: I->getOpcode())) {
127 HasIGLPInstrs = true;
128 break;
129 }
130 }
131
132 if (HasIGLPInstrs) {
133 SavedMutations.clear();
134 SavedMutations.swap(x&: Mutations);
135 auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry
136 : AMDGPU::SchedulingPhase::Initial;
137
138 addMutation(Mutation: createIGroupLPDAGMutation(Phase: SchedPhase));
139 }
140}
141
142// DAG builder helper
143class GCNIterativeScheduler::BuildDAG {
144 GCNIterativeScheduler &Sch;
145 SmallVector<SUnit *, 8> TopRoots;
146
147 SmallVector<SUnit*, 8> BotRoots;
148public:
149 BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false)
150 : Sch(_Sch) {
151 auto *BB = R.Begin->getParent();
152 Sch.BaseClass::startBlock(bb: BB);
153 Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs);
154 Sch.swapIGLPMutations(R, IsReentry);
155 Sch.buildSchedGraph(AA: Sch.AA, RPTracker: nullptr, PDiffs: nullptr, LIS: nullptr,
156 /*TrackLaneMask*/TrackLaneMasks: true);
157 Sch.postProcessDAG();
158 Sch.Topo.InitDAGTopologicalSorting();
159 Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
160 }
161
162 ~BuildDAG() {
163 Sch.BaseClass::exitRegion();
164 Sch.BaseClass::finishBlock();
165 }
166
167 ArrayRef<const SUnit *> getTopRoots() const {
168 return TopRoots;
169 }
170 ArrayRef<SUnit*> getBottomRoots() const {
171 return BotRoots;
172 }
173};
174
175class GCNIterativeScheduler::OverrideLegacyStrategy {
176 GCNIterativeScheduler &Sch;
177 Region &Rgn;
178 std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
179 GCNRegPressure SaveMaxRP;
180
181public:
182 OverrideLegacyStrategy(Region &R,
183 MachineSchedStrategy &OverrideStrategy,
184 GCNIterativeScheduler &_Sch)
185 : Sch(_Sch)
186 , Rgn(R)
187 , SaveSchedImpl(std::move(_Sch.SchedImpl))
188 , SaveMaxRP(R.MaxPressure) {
189 Sch.SchedImpl.reset(p: &OverrideStrategy);
190 auto *BB = R.Begin->getParent();
191 Sch.BaseClass::startBlock(bb: BB);
192 Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs);
193 }
194
195 ~OverrideLegacyStrategy() {
196 Sch.BaseClass::exitRegion();
197 Sch.BaseClass::finishBlock();
198 Sch.SchedImpl.release();
199 Sch.SchedImpl = std::move(SaveSchedImpl);
200 }
201
202 void schedule() {
203 assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
204 LLVM_DEBUG(dbgs() << "\nScheduling ";
205 printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
206 Sch.BaseClass::schedule();
207
208 // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
209 Sch.RegionEnd = Rgn.End;
210 //assert(Rgn.End == Sch.RegionEnd);
211 Rgn.Begin = Sch.RegionBegin;
212 Rgn.MaxPressure.clear();
213 }
214
215 void restoreOrder() {
216 assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
217 // DAG SUnits are stored using original region's order
218 // so just use SUnits as the restoring schedule
219 Sch.scheduleRegion(R&: Rgn, Schedule&: Sch.SUnits, MaxRP: SaveMaxRP);
220 }
221};
222
223namespace {
224
225// just a stub to make base class happy
226class SchedStrategyStub : public MachineSchedStrategy {
227public:
228 bool shouldTrackPressure() const override { return false; }
229 bool shouldTrackLaneMasks() const override { return false; }
230 void initialize(ScheduleDAGMI *DAG) override {}
231 SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
232 void schedNode(SUnit *SU, bool IsTopNode) override {}
233 void releaseTopNode(SUnit *SU) override {}
234 void releaseBottomNode(SUnit *SU) override {}
235};
236
237} // end anonymous namespace
238
239GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
240 StrategyKind S)
241 : BaseClass(C, std::make_unique<SchedStrategyStub>())
242 , Context(C)
243 , Strategy(S)
244 , UPTracker(*LIS) {
245}
246
247// returns max pressure for a region
248GCNRegPressure
249GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
250 MachineBasicBlock::iterator End)
251 const {
252 // For the purpose of pressure tracking bottom inst of the region should
253 // be also processed. End is either BB end, BB terminator inst or sched
254 // boundary inst.
255 auto const BBEnd = Begin->getParent()->end();
256 auto const BottomMI = End == BBEnd ? std::prev(x: End) : End;
257
258 // scheduleRegions walks bottom to top, so its likely we just get next
259 // instruction to track
260 auto AfterBottomMI = std::next(x: BottomMI);
261 if (AfterBottomMI == BBEnd ||
262 &*AfterBottomMI != UPTracker.getLastTrackedMI()) {
263 UPTracker.reset(MI: *BottomMI);
264 } else {
265 assert(UPTracker.isValid());
266 }
267
268 for (auto I = BottomMI; I != Begin; --I)
269 UPTracker.recede(MI: *I);
270
271 UPTracker.recede(MI: *Begin);
272
273 assert(UPTracker.isValid() ||
274 (dbgs() << "Tracked region ",
275 printRegion(dbgs(), Begin, End, LIS), false));
276 return UPTracker.getMaxPressureAndReset();
277}
278
279// returns max pressure for a tentative schedule
280template <typename Range> GCNRegPressure
281GCNIterativeScheduler::getSchedulePressure(const Region &R,
282 Range &&Schedule) const {
283 auto const BBEnd = R.Begin->getParent()->end();
284 GCNUpwardRPTracker RPTracker(*LIS);
285 if (R.End != BBEnd) {
286 // R.End points to the boundary instruction but the
287 // schedule doesn't include it
288 RPTracker.reset(MI: *R.End);
289 RPTracker.recede(MI: *R.End);
290 } else {
291 // R.End doesn't point to the boundary instruction
292 RPTracker.reset(MI: *std::prev(x: BBEnd));
293 }
294 for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
295 RPTracker.recede(MI: *getMachineInstr(*--I));
296 }
297 return RPTracker.getMaxPressureAndReset();
298}
299
300void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden
301 MachineBasicBlock::iterator Begin,
302 MachineBasicBlock::iterator End,
303 unsigned NumRegionInstrs) {
304 BaseClass::enterRegion(bb: BB, begin: Begin, end: End, regioninstrs: NumRegionInstrs);
305 if (NumRegionInstrs > 2) {
306 Regions.push_back(
307 x: new (Alloc.Allocate())
308 Region { .Begin: Begin, .End: End, .NumRegionInstrs: NumRegionInstrs,
309 .MaxPressure: getRegionPressure(Begin, End), .BestSchedule: nullptr });
310 }
311}
312
313void GCNIterativeScheduler::schedule() { // overridden
314 // do nothing
315 LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
316 if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
317 dbgs() << "Max RP: "
318 << print(Regions.back()->MaxPressure,
319 &MF.getSubtarget<GCNSubtarget>());
320 } dbgs()
321 << '\n';);
322}
323
324void GCNIterativeScheduler::finalizeSchedule() { // overridden
325 if (Regions.empty())
326 return;
327 switch (Strategy) {
328 case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
329 case SCHEDULE_MINREGFORCED: scheduleMinReg(force: true); break;
330 case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
331 case SCHEDULE_ILP: scheduleILP(TryMaximizeOccupancy: false); break;
332 }
333}
334
335// Detach schedule from SUnits and interleave it with debug values.
336// Returned schedule becomes independent of DAG state.
337std::vector<MachineInstr*>
338GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
339 std::vector<MachineInstr*> Res;
340 Res.reserve(n: Schedule.size() * 2);
341
342 if (FirstDbgValue)
343 Res.push_back(x: FirstDbgValue);
344
345 const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
346 for (const auto *SU : Schedule) {
347 Res.push_back(x: SU->getInstr());
348 const auto &D = std::find_if(first: DbgB, last: DbgE, pred: [SU](decltype(*DbgB) &P) {
349 return P.second == SU->getInstr();
350 });
351 if (D != DbgE)
352 Res.push_back(x: D->first);
353 }
354 return Res;
355}
356
357void GCNIterativeScheduler::setBestSchedule(Region &R,
358 ScheduleRef Schedule,
359 const GCNRegPressure &MaxRP) {
360 R.BestSchedule.reset(
361 p: new TentativeSchedule{ .Schedule: detachSchedule(Schedule), .MaxPressure: MaxRP });
362}
363
364void GCNIterativeScheduler::scheduleBest(Region &R) {
365 assert(R.BestSchedule.get() && "No schedule specified");
366 scheduleRegion(R, Schedule&: R.BestSchedule->Schedule, MaxRP: R.BestSchedule->MaxPressure);
367 R.BestSchedule.reset();
368}
369
370// minimal required region scheduler, works for ranges of SUnits*,
371// SUnits or MachineIntrs*
372template <typename Range>
373void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
374 const GCNRegPressure &MaxRP) {
375 assert(RegionBegin == R.Begin && RegionEnd == R.End);
376 assert(LIS != nullptr);
377#ifndef NDEBUG
378 const auto SchedMaxRP = getSchedulePressure(R, Schedule);
379#endif
380 auto *BB = R.Begin->getParent();
381 auto Top = R.Begin;
382 for (const auto &I : Schedule) {
383 auto MI = getMachineInstr(I);
384
385 MachineBasicBlock::iterator MII = MI->getIterator();
386 if (MII != Top) {
387 bool NonDebugReordered =
388 !MI->isDebugInstr() && skipDebugInstructionsForward(It: Top, End: MII) != MII;
389 BB->remove(I: MI);
390 BB->insert(Top, MI);
391 if (NonDebugReordered)
392 LIS->handleMove(MI&: *MI, UpdateFlags: true);
393 }
394 if (!MI->isDebugInstr()) {
395 // Reset read - undef flags and update them later.
396 for (auto &Op : MI->all_defs())
397 Op.setIsUndef(false);
398
399 RegisterOperands RegOpers;
400 RegOpers.collect(MI: *MI, TRI: *TRI, MRI, /*ShouldTrackLaneMasks*/TrackLaneMasks: true,
401 /*IgnoreDead*/false);
402 // Adjust liveness and add missing dead+read-undef flags.
403 auto SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
404 RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
405 }
406 Top = std::next(MI->getIterator());
407 }
408 RegionBegin = getMachineInstr(Schedule.front());
409
410 // Schedule consisting of MachineInstr* is considered 'detached'
411 // and already interleaved with debug values
412 if (!std::is_same_v<decltype(*Schedule.begin()), MachineInstr*>) {
413 placeDebugValues();
414 // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
415 // assert(R.End == RegionEnd);
416 RegionEnd = R.End;
417 }
418
419 R.Begin = RegionBegin;
420 R.MaxPressure = MaxRP;
421
422#ifndef NDEBUG
423 const auto RegionMaxRP = getRegionPressure(R);
424 const auto &ST = MF.getSubtarget<GCNSubtarget>();
425#endif
426 assert(
427 (SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP)) ||
428 (dbgs() << "Max RP mismatch!!!\n"
429 "RP for schedule (calculated): "
430 << print(SchedMaxRP, &ST)
431 << "RP for schedule (reported): " << print(MaxRP, &ST)
432 << "RP after scheduling: " << print(RegionMaxRP, &ST),
433 false));
434}
435
436// Sort recorded regions by pressure - highest at the front
437void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
438 llvm::sort(C&: Regions, Comp: [this, TargetOcc](const Region *R1, const Region *R2) {
439 return R2->MaxPressure.less(MF, O: R1->MaxPressure, MaxOccupancy: TargetOcc);
440 });
441}
442
443///////////////////////////////////////////////////////////////////////////////
444// Legacy MaxOccupancy Strategy
445
446// Tries to increase occupancy applying minreg scheduler for a sequence of
447// most demanding regions. Obtained schedules are saved as BestSchedule for a
448// region.
449// TargetOcc is the best achievable occupancy for a kernel.
450// Returns better occupancy on success or current occupancy on fail.
451// BestSchedules aren't deleted on fail.
452unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
453 // TODO: assert Regions are sorted descending by pressure
454 const auto &ST = MF.getSubtarget<GCNSubtarget>();
455 const unsigned DynamicVGPRBlockSize =
456 MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
457 const auto Occ =
458 Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
459 LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
460 << ", current = " << Occ << '\n');
461
462 auto NewOcc = TargetOcc;
463 for (auto *R : Regions) {
464 // Always build the DAG to add mutations
465 BuildDAG DAG(*R, *this);
466
467 if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
468 continue;
469
470 LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
471 printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
472
473 const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this);
474 const auto MaxRP = getSchedulePressure(R: *R, Schedule: MinSchedule);
475 LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
476 printSchedRP(dbgs(), R->MaxPressure, MaxRP));
477
478 NewOcc = std::min(a: NewOcc, b: MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
479 if (NewOcc <= Occ)
480 break;
481
482 setBestSchedule(R&: *R, Schedule: MinSchedule, MaxRP);
483 }
484 LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
485 << ", prev occupancy = " << Occ << '\n');
486 if (NewOcc > Occ) {
487 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
488 MFI->increaseOccupancy(MF, Limit: NewOcc);
489 }
490
491 return std::max(a: NewOcc, b: Occ);
492}
493
494void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
495 bool TryMaximizeOccupancy) {
496 const auto &ST = MF.getSubtarget<GCNSubtarget>();
497 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
498 auto TgtOcc = MFI->getMinAllowedOccupancy();
499 unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
500
501 sortRegionsByPressure(TargetOcc: TgtOcc);
502 auto Occ =
503 Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
504
505 bool IsReentry = false;
506 if (TryMaximizeOccupancy && Occ < TgtOcc) {
507 Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc);
508 IsReentry = true;
509 }
510
511 // This is really weird but for some magic scheduling regions twice
512 // gives performance improvement
513 const int NumPasses = Occ < TgtOcc ? 2 : 1;
514
515 TgtOcc = std::min(a: Occ, b: TgtOcc);
516 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
517 "target occupancy = "
518 << TgtOcc << '\n');
519 GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/true);
520 unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy());
521
522 for (int I = 0; I < NumPasses; ++I) {
523 // running first pass with TargetOccupancy = 0 mimics previous scheduling
524 // approach and is a performance magic
525 LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
526 for (auto *R : Regions) {
527 OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
528 IsReentry |= I > 0;
529 swapIGLPMutations(R: *R, IsReentry);
530 Ovr.schedule();
531 const auto RP = getRegionPressure(R: *R);
532 LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
533
534 if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
535 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
536 if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
537 ST, DynamicVGPRBlockSize) >= TgtOcc) {
538 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
539 scheduleBest(R&: *R);
540 } else {
541 LLVM_DEBUG(dbgs() << ", restoring\n");
542 Ovr.restoreOrder();
543 assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
544 TgtOcc);
545 }
546 }
547 FinalOccupancy =
548 std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize));
549 }
550 }
551 MFI->limitOccupancy(Limit: FinalOccupancy);
552}
553
554///////////////////////////////////////////////////////////////////////////////
555// Minimal Register Strategy
556
557void GCNIterativeScheduler::scheduleMinReg(bool force) {
558 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
559 const auto TgtOcc = MFI->getOccupancy();
560 sortRegionsByPressure(TargetOcc: TgtOcc);
561
562 auto MaxPressure = Regions.front()->MaxPressure;
563 for (auto *R : Regions) {
564 if (!force && R->MaxPressure.less(MF, O: MaxPressure, MaxOccupancy: TgtOcc))
565 break;
566
567 BuildDAG DAG(*R, *this);
568 const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this);
569
570 const auto RP = getSchedulePressure(R: *R, Schedule: MinSchedule);
571 LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) {
572 dbgs() << "\nWarning: Pressure becomes worse after minreg!";
573 printSchedRP(dbgs(), R->MaxPressure, RP);
574 });
575
576 if (!force && MaxPressure.less(MF, O: RP, MaxOccupancy: TgtOcc))
577 break;
578
579 scheduleRegion(R&: *R, Schedule: MinSchedule, MaxRP: RP);
580 LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
581
582 MaxPressure = RP;
583 }
584}
585
586///////////////////////////////////////////////////////////////////////////////
587// ILP scheduler port
588
589void GCNIterativeScheduler::scheduleILP(
590 bool TryMaximizeOccupancy) {
591 const auto &ST = MF.getSubtarget<GCNSubtarget>();
592 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
593 auto TgtOcc = MFI->getMinAllowedOccupancy();
594 unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
595
596 sortRegionsByPressure(TargetOcc: TgtOcc);
597 auto Occ =
598 Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
599
600 bool IsReentry = false;
601 if (TryMaximizeOccupancy && Occ < TgtOcc) {
602 Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc);
603 IsReentry = true;
604 }
605
606 TgtOcc = std::min(a: Occ, b: TgtOcc);
607 LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
608 "target occupancy = "
609 << TgtOcc << '\n');
610
611 unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy());
612 for (auto *R : Regions) {
613 BuildDAG DAG(*R, *this, IsReentry);
614 const auto ILPSchedule = makeGCNILPScheduler(BotRoots: DAG.getBottomRoots(), DAG: *this);
615
616 const auto RP = getSchedulePressure(R: *R, Schedule: ILPSchedule);
617 LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
618
619 if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
620 LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
621 if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
622 ST, DynamicVGPRBlockSize) >= TgtOcc) {
623 LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
624 scheduleBest(R&: *R);
625 }
626 } else {
627 scheduleRegion(R&: *R, Schedule: ILPSchedule, MaxRP: RP);
628 LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
629 FinalOccupancy =
630 std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize));
631 }
632 }
633 MFI->limitOccupancy(Limit: FinalOccupancy);
634}
635