1 | //===- GCNIterativeScheduler.cpp ------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements the class GCNIterativeScheduler. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "GCNIterativeScheduler.h" |
15 | #include "AMDGPUIGroupLP.h" |
16 | #include "GCNSchedStrategy.h" |
17 | #include "SIMachineFunctionInfo.h" |
18 | |
19 | using namespace llvm; |
20 | |
21 | #define DEBUG_TYPE "machine-scheduler" |
22 | |
23 | namespace llvm { |
24 | |
25 | std::vector<const SUnit *> makeMinRegSchedule(ArrayRef<const SUnit *> TopRoots, |
26 | const ScheduleDAG &DAG); |
27 | |
28 | std::vector<const SUnit *> makeGCNILPScheduler(ArrayRef<const SUnit *> BotRoots, |
29 | const ScheduleDAG &DAG); |
30 | } // namespace llvm |
31 | |
32 | // shim accessors for different order containers |
33 | static inline MachineInstr *getMachineInstr(MachineInstr *MI) { |
34 | return MI; |
35 | } |
36 | static inline MachineInstr *getMachineInstr(const SUnit *SU) { |
37 | return SU->getInstr(); |
38 | } |
39 | static inline MachineInstr *getMachineInstr(const SUnit &SU) { |
40 | return SU.getInstr(); |
41 | } |
42 | |
43 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
44 | LLVM_DUMP_METHOD |
45 | static void printRegion(raw_ostream &OS, |
46 | MachineBasicBlock::iterator Begin, |
47 | MachineBasicBlock::iterator End, |
48 | const LiveIntervals *LIS, |
49 | unsigned MaxInstNum = |
50 | std::numeric_limits<unsigned>::max()) { |
51 | auto *BB = Begin->getParent(); |
52 | OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << ' ' |
53 | << BB->getName() << ":\n" ; |
54 | auto I = Begin; |
55 | MaxInstNum = std::max(MaxInstNum, 1u); |
56 | for (; I != End && MaxInstNum; ++I, --MaxInstNum) { |
57 | if (!I->isDebugInstr() && LIS) |
58 | OS << LIS->getInstructionIndex(*I); |
59 | OS << '\t' << *I; |
60 | } |
61 | if (I != End) { |
62 | OS << "\t...\n" ; |
63 | I = std::prev(End); |
64 | if (!I->isDebugInstr() && LIS) |
65 | OS << LIS->getInstructionIndex(*I); |
66 | OS << '\t' << *I; |
67 | } |
68 | if (End != BB->end()) { // print boundary inst if present |
69 | OS << "----\n" ; |
70 | if (LIS) OS << LIS->getInstructionIndex(*End) << '\t'; |
71 | OS << *End; |
72 | } |
73 | } |
74 | |
75 | LLVM_DUMP_METHOD |
76 | static void printLivenessInfo(raw_ostream &OS, |
77 | MachineBasicBlock::iterator Begin, |
78 | MachineBasicBlock::iterator End, |
79 | const LiveIntervals *LIS) { |
80 | auto *const BB = Begin->getParent(); |
81 | const auto &MRI = BB->getParent()->getRegInfo(); |
82 | |
83 | const auto LiveIns = getLiveRegsBefore(*Begin, *LIS); |
84 | OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns)); |
85 | |
86 | const auto BottomMI = End == BB->end() ? std::prev(End) : End; |
87 | const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS); |
88 | OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts)); |
89 | } |
90 | |
91 | LLVM_DUMP_METHOD |
92 | void GCNIterativeScheduler::printRegions(raw_ostream &OS) const { |
93 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
94 | for (auto *const R : Regions) { |
95 | OS << "Region to schedule " ; |
96 | printRegion(OS, R->Begin, R->End, LIS, 1); |
97 | printLivenessInfo(OS, R->Begin, R->End, LIS); |
98 | OS << "Max RP: " << print(R->MaxPressure, &ST); |
99 | } |
100 | } |
101 | |
102 | LLVM_DUMP_METHOD |
103 | void GCNIterativeScheduler::printSchedResult(raw_ostream &OS, |
104 | const Region *R, |
105 | const GCNRegPressure &RP) const { |
106 | OS << "\nAfter scheduling " ; |
107 | printRegion(OS, R->Begin, R->End, LIS); |
108 | printSchedRP(OS, R->MaxPressure, RP); |
109 | OS << '\n'; |
110 | } |
111 | |
112 | LLVM_DUMP_METHOD |
113 | void GCNIterativeScheduler::printSchedRP(raw_ostream &OS, |
114 | const GCNRegPressure &Before, |
115 | const GCNRegPressure &After) const { |
116 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
117 | OS << "RP before: " << print(Before, &ST) |
118 | << "RP after: " << print(After, &ST); |
119 | } |
120 | #endif |
121 | |
122 | void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) { |
123 | bool HasIGLPInstrs = false; |
124 | const SIInstrInfo *SII = static_cast<const SIInstrInfo *>(TII); |
125 | for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I++) { |
126 | if (SII->isIGLPMutationOnly(Opcode: I->getOpcode())) { |
127 | HasIGLPInstrs = true; |
128 | break; |
129 | } |
130 | } |
131 | |
132 | if (HasIGLPInstrs) { |
133 | SavedMutations.clear(); |
134 | SavedMutations.swap(x&: Mutations); |
135 | auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry |
136 | : AMDGPU::SchedulingPhase::Initial; |
137 | |
138 | addMutation(Mutation: createIGroupLPDAGMutation(Phase: SchedPhase)); |
139 | } |
140 | } |
141 | |
142 | // DAG builder helper |
143 | class GCNIterativeScheduler::BuildDAG { |
144 | GCNIterativeScheduler &Sch; |
145 | SmallVector<SUnit *, 8> TopRoots; |
146 | |
147 | SmallVector<SUnit*, 8> BotRoots; |
148 | public: |
149 | BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false) |
150 | : Sch(_Sch) { |
151 | auto *BB = R.Begin->getParent(); |
152 | Sch.BaseClass::startBlock(bb: BB); |
153 | Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs); |
154 | Sch.swapIGLPMutations(R, IsReentry); |
155 | Sch.buildSchedGraph(AA: Sch.AA, RPTracker: nullptr, PDiffs: nullptr, LIS: nullptr, |
156 | /*TrackLaneMask*/TrackLaneMasks: true); |
157 | Sch.postProcessDAG(); |
158 | Sch.Topo.InitDAGTopologicalSorting(); |
159 | Sch.findRootsAndBiasEdges(TopRoots, BotRoots); |
160 | } |
161 | |
162 | ~BuildDAG() { |
163 | Sch.BaseClass::exitRegion(); |
164 | Sch.BaseClass::finishBlock(); |
165 | } |
166 | |
167 | ArrayRef<const SUnit *> getTopRoots() const { |
168 | return TopRoots; |
169 | } |
170 | ArrayRef<SUnit*> getBottomRoots() const { |
171 | return BotRoots; |
172 | } |
173 | }; |
174 | |
175 | class GCNIterativeScheduler::OverrideLegacyStrategy { |
176 | GCNIterativeScheduler &Sch; |
177 | Region &Rgn; |
178 | std::unique_ptr<MachineSchedStrategy> SaveSchedImpl; |
179 | GCNRegPressure SaveMaxRP; |
180 | |
181 | public: |
182 | OverrideLegacyStrategy(Region &R, |
183 | MachineSchedStrategy &OverrideStrategy, |
184 | GCNIterativeScheduler &_Sch) |
185 | : Sch(_Sch) |
186 | , Rgn(R) |
187 | , SaveSchedImpl(std::move(_Sch.SchedImpl)) |
188 | , SaveMaxRP(R.MaxPressure) { |
189 | Sch.SchedImpl.reset(p: &OverrideStrategy); |
190 | auto *BB = R.Begin->getParent(); |
191 | Sch.BaseClass::startBlock(bb: BB); |
192 | Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs); |
193 | } |
194 | |
195 | ~OverrideLegacyStrategy() { |
196 | Sch.BaseClass::exitRegion(); |
197 | Sch.BaseClass::finishBlock(); |
198 | Sch.SchedImpl.release(); |
199 | Sch.SchedImpl = std::move(SaveSchedImpl); |
200 | } |
201 | |
202 | void schedule() { |
203 | assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End); |
204 | LLVM_DEBUG(dbgs() << "\nScheduling " ; |
205 | printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2)); |
206 | Sch.BaseClass::schedule(); |
207 | |
208 | // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore |
209 | Sch.RegionEnd = Rgn.End; |
210 | //assert(Rgn.End == Sch.RegionEnd); |
211 | Rgn.Begin = Sch.RegionBegin; |
212 | Rgn.MaxPressure.clear(); |
213 | } |
214 | |
215 | void restoreOrder() { |
216 | assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End); |
217 | // DAG SUnits are stored using original region's order |
218 | // so just use SUnits as the restoring schedule |
219 | Sch.scheduleRegion(R&: Rgn, Schedule&: Sch.SUnits, MaxRP: SaveMaxRP); |
220 | } |
221 | }; |
222 | |
223 | namespace { |
224 | |
225 | // just a stub to make base class happy |
226 | class SchedStrategyStub : public MachineSchedStrategy { |
227 | public: |
228 | bool shouldTrackPressure() const override { return false; } |
229 | bool shouldTrackLaneMasks() const override { return false; } |
230 | void initialize(ScheduleDAGMI *DAG) override {} |
231 | SUnit *pickNode(bool &IsTopNode) override { return nullptr; } |
232 | void schedNode(SUnit *SU, bool IsTopNode) override {} |
233 | void releaseTopNode(SUnit *SU) override {} |
234 | void releaseBottomNode(SUnit *SU) override {} |
235 | }; |
236 | |
237 | } // end anonymous namespace |
238 | |
239 | GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, |
240 | StrategyKind S) |
241 | : BaseClass(C, std::make_unique<SchedStrategyStub>()) |
242 | , Context(C) |
243 | , Strategy(S) |
244 | , UPTracker(*LIS) { |
245 | } |
246 | |
247 | // returns max pressure for a region |
248 | GCNRegPressure |
249 | GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin, |
250 | MachineBasicBlock::iterator End) |
251 | const { |
252 | // For the purpose of pressure tracking bottom inst of the region should |
253 | // be also processed. End is either BB end, BB terminator inst or sched |
254 | // boundary inst. |
255 | auto const BBEnd = Begin->getParent()->end(); |
256 | auto const BottomMI = End == BBEnd ? std::prev(x: End) : End; |
257 | |
258 | // scheduleRegions walks bottom to top, so its likely we just get next |
259 | // instruction to track |
260 | auto AfterBottomMI = std::next(x: BottomMI); |
261 | if (AfterBottomMI == BBEnd || |
262 | &*AfterBottomMI != UPTracker.getLastTrackedMI()) { |
263 | UPTracker.reset(MI: *BottomMI); |
264 | } else { |
265 | assert(UPTracker.isValid()); |
266 | } |
267 | |
268 | for (auto I = BottomMI; I != Begin; --I) |
269 | UPTracker.recede(MI: *I); |
270 | |
271 | UPTracker.recede(MI: *Begin); |
272 | |
273 | assert(UPTracker.isValid() || |
274 | (dbgs() << "Tracked region " , |
275 | printRegion(dbgs(), Begin, End, LIS), false)); |
276 | return UPTracker.getMaxPressureAndReset(); |
277 | } |
278 | |
279 | // returns max pressure for a tentative schedule |
280 | template <typename Range> GCNRegPressure |
281 | GCNIterativeScheduler::getSchedulePressure(const Region &R, |
282 | Range &&Schedule) const { |
283 | auto const BBEnd = R.Begin->getParent()->end(); |
284 | GCNUpwardRPTracker RPTracker(*LIS); |
285 | if (R.End != BBEnd) { |
286 | // R.End points to the boundary instruction but the |
287 | // schedule doesn't include it |
288 | RPTracker.reset(MI: *R.End); |
289 | RPTracker.recede(MI: *R.End); |
290 | } else { |
291 | // R.End doesn't point to the boundary instruction |
292 | RPTracker.reset(MI: *std::prev(x: BBEnd)); |
293 | } |
294 | for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) { |
295 | RPTracker.recede(MI: *getMachineInstr(*--I)); |
296 | } |
297 | return RPTracker.getMaxPressureAndReset(); |
298 | } |
299 | |
300 | void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden |
301 | MachineBasicBlock::iterator Begin, |
302 | MachineBasicBlock::iterator End, |
303 | unsigned NumRegionInstrs) { |
304 | BaseClass::enterRegion(bb: BB, begin: Begin, end: End, regioninstrs: NumRegionInstrs); |
305 | if (NumRegionInstrs > 2) { |
306 | Regions.push_back( |
307 | x: new (Alloc.Allocate()) |
308 | Region { .Begin: Begin, .End: End, .NumRegionInstrs: NumRegionInstrs, |
309 | .MaxPressure: getRegionPressure(Begin, End), .BestSchedule: nullptr }); |
310 | } |
311 | } |
312 | |
313 | void GCNIterativeScheduler::schedule() { // overridden |
314 | // do nothing |
315 | LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS); |
316 | if (!Regions.empty() && Regions.back()->Begin == RegionBegin) { |
317 | dbgs() << "Max RP: " |
318 | << print(Regions.back()->MaxPressure, |
319 | &MF.getSubtarget<GCNSubtarget>()); |
320 | } dbgs() |
321 | << '\n';); |
322 | } |
323 | |
324 | void GCNIterativeScheduler::finalizeSchedule() { // overridden |
325 | if (Regions.empty()) |
326 | return; |
327 | switch (Strategy) { |
328 | case SCHEDULE_MINREGONLY: scheduleMinReg(); break; |
329 | case SCHEDULE_MINREGFORCED: scheduleMinReg(force: true); break; |
330 | case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break; |
331 | case SCHEDULE_ILP: scheduleILP(TryMaximizeOccupancy: false); break; |
332 | } |
333 | } |
334 | |
335 | // Detach schedule from SUnits and interleave it with debug values. |
336 | // Returned schedule becomes independent of DAG state. |
337 | std::vector<MachineInstr*> |
338 | GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const { |
339 | std::vector<MachineInstr*> Res; |
340 | Res.reserve(n: Schedule.size() * 2); |
341 | |
342 | if (FirstDbgValue) |
343 | Res.push_back(x: FirstDbgValue); |
344 | |
345 | const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end(); |
346 | for (const auto *SU : Schedule) { |
347 | Res.push_back(x: SU->getInstr()); |
348 | const auto &D = std::find_if(first: DbgB, last: DbgE, pred: [SU](decltype(*DbgB) &P) { |
349 | return P.second == SU->getInstr(); |
350 | }); |
351 | if (D != DbgE) |
352 | Res.push_back(x: D->first); |
353 | } |
354 | return Res; |
355 | } |
356 | |
357 | void GCNIterativeScheduler::setBestSchedule(Region &R, |
358 | ScheduleRef Schedule, |
359 | const GCNRegPressure &MaxRP) { |
360 | R.BestSchedule.reset( |
361 | p: new TentativeSchedule{ .Schedule: detachSchedule(Schedule), .MaxPressure: MaxRP }); |
362 | } |
363 | |
364 | void GCNIterativeScheduler::scheduleBest(Region &R) { |
365 | assert(R.BestSchedule.get() && "No schedule specified" ); |
366 | scheduleRegion(R, Schedule&: R.BestSchedule->Schedule, MaxRP: R.BestSchedule->MaxPressure); |
367 | R.BestSchedule.reset(); |
368 | } |
369 | |
370 | // minimal required region scheduler, works for ranges of SUnits*, |
371 | // SUnits or MachineIntrs* |
372 | template <typename Range> |
373 | void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule, |
374 | const GCNRegPressure &MaxRP) { |
375 | assert(RegionBegin == R.Begin && RegionEnd == R.End); |
376 | assert(LIS != nullptr); |
377 | #ifndef NDEBUG |
378 | const auto SchedMaxRP = getSchedulePressure(R, Schedule); |
379 | #endif |
380 | auto *BB = R.Begin->getParent(); |
381 | auto Top = R.Begin; |
382 | for (const auto &I : Schedule) { |
383 | auto MI = getMachineInstr(I); |
384 | if (MI != &*Top) { |
385 | BB->remove(I: MI); |
386 | BB->insert(Top, MI); |
387 | if (!MI->isDebugInstr()) |
388 | LIS->handleMove(MI&: *MI, UpdateFlags: true); |
389 | } |
390 | if (!MI->isDebugInstr()) { |
391 | // Reset read - undef flags and update them later. |
392 | for (auto &Op : MI->all_defs()) |
393 | Op.setIsUndef(false); |
394 | |
395 | RegisterOperands RegOpers; |
396 | RegOpers.collect(MI: *MI, TRI: *TRI, MRI, /*ShouldTrackLaneMasks*/TrackLaneMasks: true, |
397 | /*IgnoreDead*/false); |
398 | // Adjust liveness and add missing dead+read-undef flags. |
399 | auto SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot(); |
400 | RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI); |
401 | } |
402 | Top = std::next(MI->getIterator()); |
403 | } |
404 | RegionBegin = getMachineInstr(Schedule.front()); |
405 | |
406 | // Schedule consisting of MachineInstr* is considered 'detached' |
407 | // and already interleaved with debug values |
408 | if (!std::is_same_v<decltype(*Schedule.begin()), MachineInstr*>) { |
409 | placeDebugValues(); |
410 | // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore |
411 | // assert(R.End == RegionEnd); |
412 | RegionEnd = R.End; |
413 | } |
414 | |
415 | R.Begin = RegionBegin; |
416 | R.MaxPressure = MaxRP; |
417 | |
418 | #ifndef NDEBUG |
419 | const auto RegionMaxRP = getRegionPressure(R); |
420 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
421 | #endif |
422 | assert( |
423 | (SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP)) || |
424 | (dbgs() << "Max RP mismatch!!!\n" |
425 | "RP for schedule (calculated): " |
426 | << print(SchedMaxRP, &ST) |
427 | << "RP for schedule (reported): " << print(MaxRP, &ST) |
428 | << "RP after scheduling: " << print(RegionMaxRP, &ST), |
429 | false)); |
430 | } |
431 | |
432 | // Sort recorded regions by pressure - highest at the front |
433 | void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) { |
434 | llvm::sort(C&: Regions, Comp: [this, TargetOcc](const Region *R1, const Region *R2) { |
435 | return R2->MaxPressure.less(MF, O: R1->MaxPressure, MaxOccupancy: TargetOcc); |
436 | }); |
437 | } |
438 | |
439 | /////////////////////////////////////////////////////////////////////////////// |
440 | // Legacy MaxOccupancy Strategy |
441 | |
442 | // Tries to increase occupancy applying minreg scheduler for a sequence of |
443 | // most demanding regions. Obtained schedules are saved as BestSchedule for a |
444 | // region. |
445 | // TargetOcc is the best achievable occupancy for a kernel. |
446 | // Returns better occupancy on success or current occupancy on fail. |
447 | // BestSchedules aren't deleted on fail. |
448 | unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { |
449 | // TODO: assert Regions are sorted descending by pressure |
450 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
451 | const unsigned DynamicVGPRBlockSize = |
452 | MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize(); |
453 | const auto Occ = |
454 | Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); |
455 | LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc |
456 | << ", current = " << Occ << '\n'); |
457 | |
458 | auto NewOcc = TargetOcc; |
459 | for (auto *R : Regions) { |
460 | // Always build the DAG to add mutations |
461 | BuildDAG DAG(*R, *this); |
462 | |
463 | if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc) |
464 | continue; |
465 | |
466 | LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3); |
467 | printLivenessInfo(dbgs(), R->Begin, R->End, LIS)); |
468 | |
469 | const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this); |
470 | const auto MaxRP = getSchedulePressure(R: *R, Schedule: MinSchedule); |
471 | LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n" ; |
472 | printSchedRP(dbgs(), R->MaxPressure, MaxRP)); |
473 | |
474 | NewOcc = std::min(a: NewOcc, b: MaxRP.getOccupancy(ST, DynamicVGPRBlockSize)); |
475 | if (NewOcc <= Occ) |
476 | break; |
477 | |
478 | setBestSchedule(R&: *R, Schedule: MinSchedule, MaxRP); |
479 | } |
480 | LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc |
481 | << ", prev occupancy = " << Occ << '\n'); |
482 | if (NewOcc > Occ) { |
483 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
484 | MFI->increaseOccupancy(MF, Limit: NewOcc); |
485 | } |
486 | |
487 | return std::max(a: NewOcc, b: Occ); |
488 | } |
489 | |
490 | void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( |
491 | bool TryMaximizeOccupancy) { |
492 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
493 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
494 | auto TgtOcc = MFI->getMinAllowedOccupancy(); |
495 | unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize(); |
496 | |
497 | sortRegionsByPressure(TargetOcc: TgtOcc); |
498 | auto Occ = |
499 | Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); |
500 | |
501 | bool IsReentry = false; |
502 | if (TryMaximizeOccupancy && Occ < TgtOcc) { |
503 | Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc); |
504 | IsReentry = true; |
505 | } |
506 | |
507 | // This is really weird but for some magic scheduling regions twice |
508 | // gives performance improvement |
509 | const int NumPasses = Occ < TgtOcc ? 2 : 1; |
510 | |
511 | TgtOcc = std::min(a: Occ, b: TgtOcc); |
512 | LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " |
513 | "target occupancy = " |
514 | << TgtOcc << '\n'); |
515 | GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler=*/true); |
516 | unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy()); |
517 | |
518 | for (int I = 0; I < NumPasses; ++I) { |
519 | // running first pass with TargetOccupancy = 0 mimics previous scheduling |
520 | // approach and is a performance magic |
521 | LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc); |
522 | for (auto *R : Regions) { |
523 | OverrideLegacyStrategy Ovr(*R, LStrgy, *this); |
524 | IsReentry |= I > 0; |
525 | swapIGLPMutations(R: *R, IsReentry); |
526 | Ovr.schedule(); |
527 | const auto RP = getRegionPressure(R: *R); |
528 | LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); |
529 | |
530 | if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) { |
531 | LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); |
532 | if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy( |
533 | ST, DynamicVGPRBlockSize) >= TgtOcc) { |
534 | LLVM_DEBUG(dbgs() << ", scheduling minimal register\n" ); |
535 | scheduleBest(R&: *R); |
536 | } else { |
537 | LLVM_DEBUG(dbgs() << ", restoring\n" ); |
538 | Ovr.restoreOrder(); |
539 | assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= |
540 | TgtOcc); |
541 | } |
542 | } |
543 | FinalOccupancy = |
544 | std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize)); |
545 | } |
546 | } |
547 | MFI->limitOccupancy(Limit: FinalOccupancy); |
548 | } |
549 | |
550 | /////////////////////////////////////////////////////////////////////////////// |
551 | // Minimal Register Strategy |
552 | |
553 | void GCNIterativeScheduler::scheduleMinReg(bool force) { |
554 | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
555 | const auto TgtOcc = MFI->getOccupancy(); |
556 | sortRegionsByPressure(TargetOcc: TgtOcc); |
557 | |
558 | auto MaxPressure = Regions.front()->MaxPressure; |
559 | for (auto *R : Regions) { |
560 | if (!force && R->MaxPressure.less(MF, O: MaxPressure, MaxOccupancy: TgtOcc)) |
561 | break; |
562 | |
563 | BuildDAG DAG(*R, *this); |
564 | const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this); |
565 | |
566 | const auto RP = getSchedulePressure(R: *R, Schedule: MinSchedule); |
567 | LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) { |
568 | dbgs() << "\nWarning: Pressure becomes worse after minreg!" ; |
569 | printSchedRP(dbgs(), R->MaxPressure, RP); |
570 | }); |
571 | |
572 | if (!force && MaxPressure.less(MF, O: RP, MaxOccupancy: TgtOcc)) |
573 | break; |
574 | |
575 | scheduleRegion(R&: *R, Schedule: MinSchedule, MaxRP: RP); |
576 | LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); |
577 | |
578 | MaxPressure = RP; |
579 | } |
580 | } |
581 | |
582 | /////////////////////////////////////////////////////////////////////////////// |
583 | // ILP scheduler port |
584 | |
585 | void GCNIterativeScheduler::scheduleILP( |
586 | bool TryMaximizeOccupancy) { |
587 | const auto &ST = MF.getSubtarget<GCNSubtarget>(); |
588 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
589 | auto TgtOcc = MFI->getMinAllowedOccupancy(); |
590 | unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize(); |
591 | |
592 | sortRegionsByPressure(TargetOcc: TgtOcc); |
593 | auto Occ = |
594 | Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize); |
595 | |
596 | bool IsReentry = false; |
597 | if (TryMaximizeOccupancy && Occ < TgtOcc) { |
598 | Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc); |
599 | IsReentry = true; |
600 | } |
601 | |
602 | TgtOcc = std::min(a: Occ, b: TgtOcc); |
603 | LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " |
604 | "target occupancy = " |
605 | << TgtOcc << '\n'); |
606 | |
607 | unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy()); |
608 | for (auto *R : Regions) { |
609 | BuildDAG DAG(*R, *this, IsReentry); |
610 | const auto ILPSchedule = makeGCNILPScheduler(BotRoots: DAG.getBottomRoots(), DAG: *this); |
611 | |
612 | const auto RP = getSchedulePressure(R: *R, Schedule: ILPSchedule); |
613 | LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP)); |
614 | |
615 | if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) { |
616 | LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc); |
617 | if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy( |
618 | ST, DynamicVGPRBlockSize) >= TgtOcc) { |
619 | LLVM_DEBUG(dbgs() << ", scheduling minimal register\n" ); |
620 | scheduleBest(R&: *R); |
621 | } |
622 | } else { |
623 | scheduleRegion(R&: *R, Schedule: ILPSchedule, MaxRP: RP); |
624 | LLVM_DEBUG(printSchedResult(dbgs(), R, RP)); |
625 | FinalOccupancy = |
626 | std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize)); |
627 | } |
628 | } |
629 | MFI->limitOccupancy(Limit: FinalOccupancy); |
630 | } |
631 | |