1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/MapVector.h"
18#include "llvm/CodeGen/MachineScheduler.h"
19
20namespace llvm {
21
22class SIMachineFunctionInfo;
23class SIRegisterInfo;
24class GCNSubtarget;
25class GCNSchedStage;
26
27enum class GCNSchedStageID : unsigned {
28 OccInitialSchedule = 0,
29 UnclusteredHighRPReschedule = 1,
30 ClusteredLowOccupancyReschedule = 2,
31 PreRARematerialize = 3,
32 ILPInitialSchedule = 4
33};
34
35#ifndef NDEBUG
36raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
37#endif
38
39/// This is a minimal scheduler strategy. The main difference between this
40/// and the GenericScheduler is that GCNSchedStrategy uses different
41/// heuristics to determine excess/critical pressure sets.
42class GCNSchedStrategy : public GenericScheduler {
43protected:
44 SUnit *pickNodeBidirectional(bool &IsTopNode);
45
46 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
47 const RegPressureTracker &RPTracker,
48 SchedCandidate &Cand, bool IsBottomUp);
49
50 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
51 const RegPressureTracker &RPTracker,
52 const SIRegisterInfo *SRI, unsigned SGPRPressure,
53 unsigned VGPRPressure, bool IsBottomUp);
54
55 std::vector<unsigned> Pressure;
56
57 std::vector<unsigned> MaxPressure;
58
59 unsigned SGPRExcessLimit;
60
61 unsigned VGPRExcessLimit;
62
63 unsigned TargetOccupancy;
64
65 MachineFunction *MF;
66
67 // Scheduling stages for this strategy.
68 SmallVector<GCNSchedStageID, 4> SchedStages;
69
70 // Pointer to the current SchedStageID.
71 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
72
73public:
74 // schedule() have seen register pressure over the critical limits and had to
75 // track register pressure for actual scheduling heuristics.
76 bool HasHighPressure;
77
78 // Schedule known to have excess register pressure. Be more conservative in
79 // increasing ILP and preserving VGPRs.
80 bool KnownExcessRP = false;
81
82 // An error margin is necessary because of poor performance of the generic RP
83 // tracker and can be adjusted up for tuning heuristics to try and more
84 // aggressively reduce register pressure.
85 unsigned ErrorMargin = 3;
86
87 // Bias for SGPR limits under a high register pressure.
88 const unsigned HighRPSGPRBias = 7;
89
90 // Bias for VGPR limits under a high register pressure.
91 const unsigned HighRPVGPRBias = 7;
92
93 unsigned SGPRCriticalLimit;
94
95 unsigned VGPRCriticalLimit;
96
97 unsigned SGPRLimitBias = 0;
98
99 unsigned VGPRLimitBias = 0;
100
101 GCNSchedStrategy(const MachineSchedContext *C);
102
103 SUnit *pickNode(bool &IsTopNode) override;
104
105 void initialize(ScheduleDAGMI *DAG) override;
106
107 unsigned getTargetOccupancy() { return TargetOccupancy; }
108
109 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
110
111 GCNSchedStageID getCurrentStage();
112
113 // Advances stage. Returns true if there are remaining stages.
114 bool advanceStage();
115
116 bool hasNextStage() const;
117
118 GCNSchedStageID getNextStage() const;
119};
120
121/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
122/// maximum number of waves per simd).
123class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
124public:
125 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
126};
127
128/// The goal of this scheduling strategy is to maximize ILP for a single wave
129/// (i.e. latency hiding).
130class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
131protected:
132 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
133 SchedBoundary *Zone) const override;
134
135public:
136 GCNMaxILPSchedStrategy(const MachineSchedContext *C);
137};
138
139class ScheduleMetrics {
140 unsigned ScheduleLength;
141 unsigned BubbleCycles;
142
143public:
144 ScheduleMetrics() {}
145 ScheduleMetrics(unsigned L, unsigned BC)
146 : ScheduleLength(L), BubbleCycles(BC) {}
147 unsigned getLength() const { return ScheduleLength; }
148 unsigned getBubbles() const { return BubbleCycles; }
149 unsigned getMetric() const {
150 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
151 // Metric is zero if the amount of bubbles is less than 1% which is too
152 // small. So, return 1.
153 return Metric ? Metric : 1;
154 }
155 static const unsigned ScaleFactor;
156};
157
158inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
159 dbgs() << "\n Schedule Metric (scaled by "
160 << ScheduleMetrics::ScaleFactor
161 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
162 << Sm.getLength() << " ]\n";
163 return OS;
164}
165
166class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
167 friend class GCNSchedStage;
168 friend class OccInitialScheduleStage;
169 friend class UnclusteredHighRPStage;
170 friend class ClusteredLowOccStage;
171 friend class PreRARematStage;
172 friend class ILPInitialScheduleStage;
173
174 const GCNSubtarget &ST;
175
176 SIMachineFunctionInfo &MFI;
177
178 // Occupancy target at the beginning of function scheduling cycle.
179 unsigned StartingOccupancy;
180
181 // Minimal real occupancy recorder for the function.
182 unsigned MinOccupancy;
183
184 // Vector of regions recorder for later rescheduling
185 SmallVector<std::pair<MachineBasicBlock::iterator,
186 MachineBasicBlock::iterator>, 32> Regions;
187
188 // Records if a region is not yet scheduled, or schedule has been reverted,
189 // or we generally desire to reschedule it.
190 BitVector RescheduleRegions;
191
192 // Record regions with high register pressure.
193 BitVector RegionsWithHighRP;
194
195 // Record regions with excess register pressure over the physical register
196 // limit. Register pressure in these regions usually will result in spilling.
197 BitVector RegionsWithExcessRP;
198
199 // Regions that has the same occupancy as the latest MinOccupancy
200 BitVector RegionsWithMinOcc;
201
202 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
203 BitVector RegionsWithIGLPInstrs;
204
205 // Region live-in cache.
206 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
207
208 // Region pressure cache.
209 SmallVector<GCNRegPressure, 32> Pressure;
210
211 // Temporary basic block live-in cache.
212 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
213
214 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
215
216 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
217
218 // Return current region pressure.
219 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
220
221 // Compute and cache live-ins and pressure for all regions in block.
222 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
223
224 // Update region boundaries when removing MI or inserting NewMI before MI.
225 void updateRegionBoundaries(
226 SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
227 MachineBasicBlock::iterator>> &RegionBoundaries,
228 MachineBasicBlock::iterator MI, MachineInstr *NewMI,
229 bool Removing = false);
230
231 void runSchedStages();
232
233 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
234
235public:
236 GCNScheduleDAGMILive(MachineSchedContext *C,
237 std::unique_ptr<MachineSchedStrategy> S);
238
239 void schedule() override;
240
241 void finalizeSchedule() override;
242};
243
244// GCNSchedStrategy applies multiple scheduling stages to a function.
245class GCNSchedStage {
246protected:
247 GCNScheduleDAGMILive &DAG;
248
249 GCNSchedStrategy &S;
250
251 MachineFunction &MF;
252
253 SIMachineFunctionInfo &MFI;
254
255 const GCNSubtarget &ST;
256
257 const GCNSchedStageID StageID;
258
259 // The current block being scheduled.
260 MachineBasicBlock *CurrentMBB = nullptr;
261
262 // Current region index.
263 unsigned RegionIdx = 0;
264
265 // Record the original order of instructions before scheduling.
266 std::vector<MachineInstr *> Unsched;
267
268 // RP before scheduling the current region.
269 GCNRegPressure PressureBefore;
270
271 // RP after scheduling the current region.
272 GCNRegPressure PressureAfter;
273
274 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
275
276 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
277
278public:
279 // Initialize state for a scheduling stage. Returns false if the current stage
280 // should be skipped.
281 virtual bool initGCNSchedStage();
282
283 // Finalize state after finishing a scheduling pass on the function.
284 virtual void finalizeGCNSchedStage();
285
286 // Setup for scheduling a region. Returns false if the current region should
287 // be skipped.
288 virtual bool initGCNRegion();
289
290 // Track whether a new region is also a new MBB.
291 void setupNewBlock();
292
293 // Finalize state after scheudling a region.
294 void finalizeGCNRegion();
295
296 // Check result of scheduling.
297 void checkScheduling();
298
299 // computes the given schedule virtual execution time in clocks
300 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
301 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
302 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
303 DenseMap<unsigned, unsigned> &ReadyCycles,
304 const TargetSchedModel &SM);
305
306 // Returns true if scheduling should be reverted.
307 virtual bool shouldRevertScheduling(unsigned WavesAfter);
308
309 // Returns true if current region has known excess pressure.
310 bool isRegionWithExcessRP() const {
311 return DAG.RegionsWithExcessRP[RegionIdx];
312 }
313
314 // Returns true if the new schedule may result in more spilling.
315 bool mayCauseSpilling(unsigned WavesAfter);
316
317 // Attempt to revert scheduling for this region.
318 void revertScheduling();
319
320 void advanceRegion() { RegionIdx++; }
321
322 virtual ~GCNSchedStage() = default;
323};
324
325class OccInitialScheduleStage : public GCNSchedStage {
326public:
327 bool shouldRevertScheduling(unsigned WavesAfter) override;
328
329 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
330 : GCNSchedStage(StageID, DAG) {}
331};
332
333class UnclusteredHighRPStage : public GCNSchedStage {
334private:
335 // Save the initial occupancy before starting this stage.
336 unsigned InitialOccupancy;
337
338public:
339 bool initGCNSchedStage() override;
340
341 void finalizeGCNSchedStage() override;
342
343 bool initGCNRegion() override;
344
345 bool shouldRevertScheduling(unsigned WavesAfter) override;
346
347 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
348 : GCNSchedStage(StageID, DAG) {}
349};
350
351// Retry function scheduling if we found resulting occupancy and it is
352// lower than used for other scheduling passes. This will give more freedom
353// to schedule low register pressure blocks.
354class ClusteredLowOccStage : public GCNSchedStage {
355public:
356 bool initGCNSchedStage() override;
357
358 bool initGCNRegion() override;
359
360 bool shouldRevertScheduling(unsigned WavesAfter) override;
361
362 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
363 : GCNSchedStage(StageID, DAG) {}
364};
365
366class PreRARematStage : public GCNSchedStage {
367private:
368 // Each region at MinOccupancy will have their own list of trivially
369 // rematerializable instructions we can remat to reduce RP. The list maps an
370 // instruction to the position we should remat before, usually the MI using
371 // the rematerializable instruction.
372 MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
373 RematerializableInsts;
374
375 // Map a trivially rematerializable def to a list of regions at MinOccupancy
376 // that has the defined reg as a live-in.
377 DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
378
379 // Collect all trivially rematerializable VGPR instructions with a single def
380 // and single use outside the defining block into RematerializableInsts.
381 void collectRematerializableInstructions();
382
383 bool isTriviallyReMaterializable(const MachineInstr &MI);
384
385 // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
386 // Attempt to reduce RP of VGPR by sinking trivially rematerializable
387 // instructions. Returns true if we were able to sink instruction(s).
388 bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
389 const TargetInstrInfo *TII);
390
391public:
392 bool initGCNSchedStage() override;
393
394 bool initGCNRegion() override;
395
396 bool shouldRevertScheduling(unsigned WavesAfter) override;
397
398 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
399 : GCNSchedStage(StageID, DAG) {}
400};
401
402class ILPInitialScheduleStage : public GCNSchedStage {
403public:
404 bool shouldRevertScheduling(unsigned WavesAfter) override;
405
406 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
407 : GCNSchedStage(StageID, DAG) {}
408};
409
410class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
411private:
412 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
413
414 bool HasIGLPInstrs = false;
415
416public:
417 void schedule() override;
418
419 void finalizeSchedule() override;
420
421 GCNPostScheduleDAGMILive(MachineSchedContext *C,
422 std::unique_ptr<MachineSchedStrategy> S,
423 bool RemoveKillFlags);
424};
425
426} // End namespace llvm
427
428#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
429