1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/CodeGen/MachineInstr.h"
20#include "llvm/CodeGen/MachineScheduler.h"
21
22namespace llvm {
23
24class SIMachineFunctionInfo;
25class SIRegisterInfo;
26class GCNSubtarget;
27class GCNSchedStage;
28
29enum class GCNSchedStageID : unsigned {
30 OccInitialSchedule = 0,
31 UnclusteredHighRPReschedule = 1,
32 ClusteredLowOccupancyReschedule = 2,
33 PreRARematerialize = 3,
34 ILPInitialSchedule = 4,
35 MemoryClauseInitialSchedule = 5
36};
37
38#ifndef NDEBUG
39raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
40#endif
41
42/// This is a minimal scheduler strategy. The main difference between this
43/// and the GenericScheduler is that GCNSchedStrategy uses different
44/// heuristics to determine excess/critical pressure sets.
45class GCNSchedStrategy : public GenericScheduler {
46protected:
47 SUnit *pickNodeBidirectional(bool &IsTopNode);
48
49 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
50 const RegPressureTracker &RPTracker,
51 SchedCandidate &Cand, bool IsBottomUp);
52
53 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
54 const RegPressureTracker &RPTracker,
55 const SIRegisterInfo *SRI, unsigned SGPRPressure,
56 unsigned VGPRPressure, bool IsBottomUp);
57
58 std::vector<unsigned> Pressure;
59
60 std::vector<unsigned> MaxPressure;
61
62 unsigned SGPRExcessLimit;
63
64 unsigned VGPRExcessLimit;
65
66 unsigned TargetOccupancy;
67
68 MachineFunction *MF;
69
70 // Scheduling stages for this strategy.
71 SmallVector<GCNSchedStageID, 4> SchedStages;
72
73 // Pointer to the current SchedStageID.
74 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
75
76 // GCN RP Tracker for top-down scheduling
77 mutable GCNDownwardRPTracker DownwardTracker;
78
79 // GCN RP Tracker for botttom-up scheduling
80 mutable GCNUpwardRPTracker UpwardTracker;
81
82public:
83 // schedule() have seen register pressure over the critical limits and had to
84 // track register pressure for actual scheduling heuristics.
85 bool HasHighPressure;
86
87 // Schedule known to have excess register pressure. Be more conservative in
88 // increasing ILP and preserving VGPRs.
89 bool KnownExcessRP = false;
90
91 // An error margin is necessary because of poor performance of the generic RP
92 // tracker and can be adjusted up for tuning heuristics to try and more
93 // aggressively reduce register pressure.
94 unsigned ErrorMargin = 3;
95
96 // Bias for SGPR limits under a high register pressure.
97 const unsigned HighRPSGPRBias = 7;
98
99 // Bias for VGPR limits under a high register pressure.
100 const unsigned HighRPVGPRBias = 7;
101
102 unsigned SGPRCriticalLimit;
103
104 unsigned VGPRCriticalLimit;
105
106 unsigned SGPRLimitBias = 0;
107
108 unsigned VGPRLimitBias = 0;
109
110 GCNSchedStrategy(const MachineSchedContext *C);
111
112 SUnit *pickNode(bool &IsTopNode) override;
113
114 void schedNode(SUnit *SU, bool IsTopNode) override;
115
116 void initialize(ScheduleDAGMI *DAG) override;
117
118 unsigned getTargetOccupancy() { return TargetOccupancy; }
119
120 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
121
122 GCNSchedStageID getCurrentStage();
123
124 // Advances stage. Returns true if there are remaining stages.
125 bool advanceStage();
126
127 bool hasNextStage() const;
128
129 GCNSchedStageID getNextStage() const;
130
131 GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
132
133 GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
134};
135
136/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
137/// maximum number of waves per simd).
138class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
139public:
140 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
141 bool IsLegacyScheduler = false);
142};
143
144/// The goal of this scheduling strategy is to maximize ILP for a single wave
145/// (i.e. latency hiding).
146class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
147protected:
148 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
149 SchedBoundary *Zone) const override;
150
151public:
152 GCNMaxILPSchedStrategy(const MachineSchedContext *C);
153};
154
155/// The goal of this scheduling strategy is to maximize memory clause for a
156/// single wave.
157class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
158protected:
159 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
160 SchedBoundary *Zone) const override;
161
162public:
163 GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
164};
165
166class ScheduleMetrics {
167 unsigned ScheduleLength;
168 unsigned BubbleCycles;
169
170public:
171 ScheduleMetrics() {}
172 ScheduleMetrics(unsigned L, unsigned BC)
173 : ScheduleLength(L), BubbleCycles(BC) {}
174 unsigned getLength() const { return ScheduleLength; }
175 unsigned getBubbles() const { return BubbleCycles; }
176 unsigned getMetric() const {
177 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
178 // Metric is zero if the amount of bubbles is less than 1% which is too
179 // small. So, return 1.
180 return Metric ? Metric : 1;
181 }
182 static const unsigned ScaleFactor;
183};
184
185inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
186 dbgs() << "\n Schedule Metric (scaled by "
187 << ScheduleMetrics::ScaleFactor
188 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
189 << Sm.getLength() << " ]\n";
190 return OS;
191}
192
193class GCNScheduleDAGMILive;
194class RegionPressureMap {
195 GCNScheduleDAGMILive *DAG;
196 // The live in/out pressure as indexed by the first or last MI in the region
197 // before scheduling.
198 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
199 // The mapping of RegionIDx to key instruction
200 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
201 // Whether we are calculating LiveOuts or LiveIns
202 bool IsLiveOut;
203
204public:
205 RegionPressureMap() {}
206 RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
207 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
208 // Build the Instr->LiveReg and RegionIdx->Instr maps
209 void buildLiveRegMap();
210
211 // Retrieve the LiveReg for a given RegionIdx
212 GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
213 assert(IdxToInstruction.contains(RegionIdx));
214 MachineInstr *Key = IdxToInstruction[RegionIdx];
215 return RegionLiveRegMap[Key];
216 }
217};
218
219/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
220/// boundary is inclusive, the upper boundary is exclusive.
221using RegionBoundaries =
222 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
223
224class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
225 friend class GCNSchedStage;
226 friend class OccInitialScheduleStage;
227 friend class UnclusteredHighRPStage;
228 friend class ClusteredLowOccStage;
229 friend class PreRARematStage;
230 friend class ILPInitialScheduleStage;
231 friend class RegionPressureMap;
232
233 const GCNSubtarget &ST;
234
235 SIMachineFunctionInfo &MFI;
236
237 // Occupancy target at the beginning of function scheduling cycle.
238 unsigned StartingOccupancy;
239
240 // Minimal real occupancy recorder for the function.
241 unsigned MinOccupancy;
242
243 // Vector of regions recorder for later rescheduling
244 SmallVector<RegionBoundaries, 32> Regions;
245
246 // Record regions with high register pressure.
247 BitVector RegionsWithHighRP;
248
249 // Record regions with excess register pressure over the physical register
250 // limit. Register pressure in these regions usually will result in spilling.
251 BitVector RegionsWithExcessRP;
252
253 // Regions that has the same occupancy as the latest MinOccupancy
254 BitVector RegionsWithMinOcc;
255
256 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
257 BitVector RegionsWithIGLPInstrs;
258
259 // Region live-in cache.
260 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
261
262 // Region pressure cache.
263 SmallVector<GCNRegPressure, 32> Pressure;
264
265 // Temporary basic block live-in cache.
266 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
267
268 // The map of the initial first region instruction to region live in registers
269 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
270
271 // Calculate the map of the initial first region instruction to region live in
272 // registers
273 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
274
275 // Calculate the map of the initial last region instruction to region live out
276 // registers
277 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
278 getRegionLiveOutMap() const;
279
280 // The live out registers per region. These are internally stored as a map of
281 // the initial last region instruction to region live out registers, but can
282 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
283 RegionPressureMap RegionLiveOuts;
284
285 // Return current region pressure.
286 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
287
288 // Compute and cache live-ins and pressure for all regions in block.
289 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
290
291 /// If necessary, updates a region's boundaries following insertion ( \p NewMI
292 /// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
293 /// For an MI removal, this must be called before the MI is actually erased
294 /// from its parent MBB.
295 void updateRegionBoundaries(RegionBoundaries &RegionBounds,
296 MachineBasicBlock::iterator MI,
297 MachineInstr *NewMI);
298
299 void runSchedStages();
300
301 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
302
303public:
304 GCNScheduleDAGMILive(MachineSchedContext *C,
305 std::unique_ptr<MachineSchedStrategy> S);
306
307 void schedule() override;
308
309 void finalizeSchedule() override;
310};
311
312// GCNSchedStrategy applies multiple scheduling stages to a function.
313class GCNSchedStage {
314protected:
315 GCNScheduleDAGMILive &DAG;
316
317 GCNSchedStrategy &S;
318
319 MachineFunction &MF;
320
321 SIMachineFunctionInfo &MFI;
322
323 const GCNSubtarget &ST;
324
325 const GCNSchedStageID StageID;
326
327 // The current block being scheduled.
328 MachineBasicBlock *CurrentMBB = nullptr;
329
330 // Current region index.
331 unsigned RegionIdx = 0;
332
333 // Record the original order of instructions before scheduling.
334 std::vector<MachineInstr *> Unsched;
335
336 // RP before scheduling the current region.
337 GCNRegPressure PressureBefore;
338
339 // RP after scheduling the current region.
340 GCNRegPressure PressureAfter;
341
342 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
343
344 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
345
346public:
347 // Initialize state for a scheduling stage. Returns false if the current stage
348 // should be skipped.
349 virtual bool initGCNSchedStage();
350
351 // Finalize state after finishing a scheduling pass on the function.
352 virtual void finalizeGCNSchedStage();
353
354 // Setup for scheduling a region. Returns false if the current region should
355 // be skipped.
356 virtual bool initGCNRegion();
357
358 // Track whether a new region is also a new MBB.
359 void setupNewBlock();
360
361 // Finalize state after scheudling a region.
362 void finalizeGCNRegion();
363
364 // Check result of scheduling.
365 void checkScheduling();
366
367 // computes the given schedule virtual execution time in clocks
368 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
369 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
370 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
371 DenseMap<unsigned, unsigned> &ReadyCycles,
372 const TargetSchedModel &SM);
373
374 // Returns true if scheduling should be reverted.
375 virtual bool shouldRevertScheduling(unsigned WavesAfter);
376
377 // Returns true if current region has known excess pressure.
378 bool isRegionWithExcessRP() const {
379 return DAG.RegionsWithExcessRP[RegionIdx];
380 }
381
382 // The region number this stage is currently working on
383 unsigned getRegionIdx() { return RegionIdx; }
384
385 // Returns true if the new schedule may result in more spilling.
386 bool mayCauseSpilling(unsigned WavesAfter);
387
388 // Attempt to revert scheduling for this region.
389 void revertScheduling();
390
391 void advanceRegion() { RegionIdx++; }
392
393 virtual ~GCNSchedStage() = default;
394};
395
396class OccInitialScheduleStage : public GCNSchedStage {
397public:
398 bool shouldRevertScheduling(unsigned WavesAfter) override;
399
400 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
401 : GCNSchedStage(StageID, DAG) {}
402};
403
404class UnclusteredHighRPStage : public GCNSchedStage {
405private:
406 // Save the initial occupancy before starting this stage.
407 unsigned InitialOccupancy;
408
409public:
410 bool initGCNSchedStage() override;
411
412 void finalizeGCNSchedStage() override;
413
414 bool initGCNRegion() override;
415
416 bool shouldRevertScheduling(unsigned WavesAfter) override;
417
418 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
419 : GCNSchedStage(StageID, DAG) {}
420};
421
422// Retry function scheduling if we found resulting occupancy and it is
423// lower than used for other scheduling passes. This will give more freedom
424// to schedule low register pressure blocks.
425class ClusteredLowOccStage : public GCNSchedStage {
426public:
427 bool initGCNSchedStage() override;
428
429 bool initGCNRegion() override;
430
431 bool shouldRevertScheduling(unsigned WavesAfter) override;
432
433 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
434 : GCNSchedStage(StageID, DAG) {}
435};
436
437/// Attempts to reduce function spilling or, if there is no spilling, to
438/// increase function occupancy by one with respect to ArchVGPR usage by sinking
439/// trivially rematerializable instructions to their use. When the stage
440/// estimates reducing spilling or increasing occupancy is possible, as few
441/// instructions as possible are rematerialized to reduce potential negative
442/// effects on function latency.
443class PreRARematStage : public GCNSchedStage {
444private:
445 /// Useful information about a rematerializable instruction.
446 struct RematInstruction {
447 /// Single use of the rematerializable instruction's defined register,
448 /// located in a different block.
449 MachineInstr *UseMI;
450 /// Rematerialized version of \p DefMI, set in
451 /// PreRARematStage::rematerialize. Used for reverting rematerializations.
452 MachineInstr *RematMI;
453 /// Set of regions in which the rematerializable instruction's defined
454 /// register is a live-in.
455 SmallDenseSet<unsigned, 4> LiveInRegions;
456
457 RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
458 };
459
460 /// Maps all MIs to their parent region. MI terminators are considered to be
461 /// outside the region they delimitate, and as such are not stored in the map.
462 DenseMap<MachineInstr *, unsigned> MIRegion;
463 /// Parent MBB to each region, in region order.
464 SmallVector<MachineBasicBlock *> RegionBB;
465 /// Collects instructions to rematerialize.
466 MapVector<MachineInstr *, RematInstruction> Rematerializations;
467 /// Collects regions whose live-ins or register pressure will change due to
468 /// rematerializations.
469 DenseMap<unsigned, GCNRegPressure> ImpactedRegions;
470 /// In case we need to rollback rematerializations, save lane masks for all
471 /// rematerialized registers in all regions in which they are live-ins.
472 DenseMap<std::pair<unsigned, Register>, LaneBitmask> RegMasks;
473 /// After successful stage initialization, indicates which regions should be
474 /// rescheduled.
475 BitVector RescheduleRegions;
476 /// Target occupancy the stage estimates is reachable through
477 /// rematerialization. Greater than or equal to the pre-stage min occupancy.
478 unsigned TargetOcc;
479 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
480 /// Smaller than or equal to the target occupancy.
481 unsigned AchievedOcc;
482 /// Whether the stage is attempting to increase occupancy in the abscence of
483 /// spilling.
484 bool IncreaseOccupancy;
485
486 /// Returns whether remat can reduce spilling or increase function occupancy
487 /// by 1 through rematerialization. If it can do one, collects instructions in
488 /// PreRARematStage::Rematerializations and sets the target occupancy in
489 /// PreRARematStage::TargetOccupancy.
490 bool canIncreaseOccupancyOrReduceSpill();
491
492 /// Whether the MI is trivially rematerializable and does not have any virtual
493 /// register use.
494 bool isTriviallyReMaterializable(const MachineInstr &MI);
495
496 /// Rematerializes all instructions in PreRARematStage::Rematerializations
497 /// and stores the achieved occupancy after remat in
498 /// PreRARematStage::AchievedOcc.
499 void rematerialize();
500
501 /// If remat alone did not increase occupancy to the target one, rollbacks all
502 /// rematerializations and resets live-ins/RP in all regions impacted by the
503 /// stage to their pre-stage values.
504 void finalizeGCNSchedStage() override;
505
506 /// \p Returns true if all the uses in \p InstToRemat defined at \p
507 /// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
508 /// reg uses.
509 bool allUsesAvailableAt(const MachineInstr *InstToRemat,
510 SlotIndex OriginalIdx, SlotIndex RematIdx) const;
511
512public:
513 bool initGCNSchedStage() override;
514
515 bool initGCNRegion() override;
516
517 bool shouldRevertScheduling(unsigned WavesAfter) override;
518
519 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
520 : GCNSchedStage(StageID, DAG), RescheduleRegions(DAG.Regions.size()) {}
521};
522
523class ILPInitialScheduleStage : public GCNSchedStage {
524public:
525 bool shouldRevertScheduling(unsigned WavesAfter) override;
526
527 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
528 : GCNSchedStage(StageID, DAG) {}
529};
530
531class MemoryClauseInitialScheduleStage : public GCNSchedStage {
532public:
533 bool shouldRevertScheduling(unsigned WavesAfter) override;
534
535 MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
536 GCNScheduleDAGMILive &DAG)
537 : GCNSchedStage(StageID, DAG) {}
538};
539
540class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
541private:
542 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
543
544 bool HasIGLPInstrs = false;
545
546public:
547 void schedule() override;
548
549 void finalizeSchedule() override;
550
551 GCNPostScheduleDAGMILive(MachineSchedContext *C,
552 std::unique_ptr<MachineSchedStrategy> S,
553 bool RemoveKillFlags);
554};
555
556} // End namespace llvm
557
558#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
559