1 | //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H |
14 | #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H |
15 | |
16 | #include "GCNRegPressure.h" |
17 | #include "llvm/ADT/MapVector.h" |
18 | #include "llvm/CodeGen/MachineScheduler.h" |
19 | |
20 | namespace llvm { |
21 | |
22 | class SIMachineFunctionInfo; |
23 | class SIRegisterInfo; |
24 | class GCNSubtarget; |
25 | class GCNSchedStage; |
26 | |
27 | enum class GCNSchedStageID : unsigned { |
28 | OccInitialSchedule = 0, |
29 | UnclusteredHighRPReschedule = 1, |
30 | ClusteredLowOccupancyReschedule = 2, |
31 | PreRARematerialize = 3, |
32 | ILPInitialSchedule = 4 |
33 | }; |
34 | |
35 | #ifndef NDEBUG |
36 | raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); |
37 | #endif |
38 | |
39 | /// This is a minimal scheduler strategy. The main difference between this |
40 | /// and the GenericScheduler is that GCNSchedStrategy uses different |
41 | /// heuristics to determine excess/critical pressure sets. |
42 | class GCNSchedStrategy : public GenericScheduler { |
43 | protected: |
44 | SUnit *pickNodeBidirectional(bool &IsTopNode); |
45 | |
46 | void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, |
47 | const RegPressureTracker &RPTracker, |
48 | SchedCandidate &Cand, bool IsBottomUp); |
49 | |
50 | void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, |
51 | const RegPressureTracker &RPTracker, |
52 | const SIRegisterInfo *SRI, unsigned SGPRPressure, |
53 | unsigned VGPRPressure, bool IsBottomUp); |
54 | |
55 | std::vector<unsigned> Pressure; |
56 | |
57 | std::vector<unsigned> MaxPressure; |
58 | |
59 | unsigned SGPRExcessLimit; |
60 | |
61 | unsigned VGPRExcessLimit; |
62 | |
63 | unsigned TargetOccupancy; |
64 | |
65 | MachineFunction *MF; |
66 | |
67 | // Scheduling stages for this strategy. |
68 | SmallVector<GCNSchedStageID, 4> SchedStages; |
69 | |
70 | // Pointer to the current SchedStageID. |
71 | SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr; |
72 | |
73 | public: |
74 | // schedule() have seen register pressure over the critical limits and had to |
75 | // track register pressure for actual scheduling heuristics. |
76 | bool HasHighPressure; |
77 | |
78 | // Schedule known to have excess register pressure. Be more conservative in |
79 | // increasing ILP and preserving VGPRs. |
80 | bool KnownExcessRP = false; |
81 | |
82 | // An error margin is necessary because of poor performance of the generic RP |
83 | // tracker and can be adjusted up for tuning heuristics to try and more |
84 | // aggressively reduce register pressure. |
85 | unsigned ErrorMargin = 3; |
86 | |
87 | // Bias for SGPR limits under a high register pressure. |
88 | const unsigned HighRPSGPRBias = 7; |
89 | |
90 | // Bias for VGPR limits under a high register pressure. |
91 | const unsigned HighRPVGPRBias = 7; |
92 | |
93 | unsigned SGPRCriticalLimit; |
94 | |
95 | unsigned VGPRCriticalLimit; |
96 | |
97 | unsigned SGPRLimitBias = 0; |
98 | |
99 | unsigned VGPRLimitBias = 0; |
100 | |
101 | GCNSchedStrategy(const MachineSchedContext *C); |
102 | |
103 | SUnit *pickNode(bool &IsTopNode) override; |
104 | |
105 | void initialize(ScheduleDAGMI *DAG) override; |
106 | |
107 | unsigned getTargetOccupancy() { return TargetOccupancy; } |
108 | |
109 | void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } |
110 | |
111 | GCNSchedStageID getCurrentStage(); |
112 | |
113 | // Advances stage. Returns true if there are remaining stages. |
114 | bool advanceStage(); |
115 | |
116 | bool hasNextStage() const; |
117 | |
118 | GCNSchedStageID getNextStage() const; |
119 | }; |
120 | |
121 | /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e. |
122 | /// maximum number of waves per simd). |
123 | class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy { |
124 | public: |
125 | GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); |
126 | }; |
127 | |
128 | /// The goal of this scheduling strategy is to maximize ILP for a single wave |
129 | /// (i.e. latency hiding). |
130 | class GCNMaxILPSchedStrategy final : public GCNSchedStrategy { |
131 | protected: |
132 | bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, |
133 | SchedBoundary *Zone) const override; |
134 | |
135 | public: |
136 | GCNMaxILPSchedStrategy(const MachineSchedContext *C); |
137 | }; |
138 | |
139 | class ScheduleMetrics { |
140 | unsigned ScheduleLength; |
141 | unsigned BubbleCycles; |
142 | |
143 | public: |
144 | ScheduleMetrics() {} |
145 | ScheduleMetrics(unsigned L, unsigned BC) |
146 | : ScheduleLength(L), BubbleCycles(BC) {} |
147 | unsigned getLength() const { return ScheduleLength; } |
148 | unsigned getBubbles() const { return BubbleCycles; } |
149 | unsigned getMetric() const { |
150 | unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength; |
151 | // Metric is zero if the amount of bubbles is less than 1% which is too |
152 | // small. So, return 1. |
153 | return Metric ? Metric : 1; |
154 | } |
155 | static const unsigned ScaleFactor; |
156 | }; |
157 | |
158 | inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) { |
159 | dbgs() << "\n Schedule Metric (scaled by " |
160 | << ScheduleMetrics::ScaleFactor |
161 | << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/" |
162 | << Sm.getLength() << " ]\n" ; |
163 | return OS; |
164 | } |
165 | |
166 | class GCNScheduleDAGMILive final : public ScheduleDAGMILive { |
167 | friend class GCNSchedStage; |
168 | friend class OccInitialScheduleStage; |
169 | friend class UnclusteredHighRPStage; |
170 | friend class ClusteredLowOccStage; |
171 | friend class PreRARematStage; |
172 | friend class ILPInitialScheduleStage; |
173 | |
174 | const GCNSubtarget &ST; |
175 | |
176 | SIMachineFunctionInfo &MFI; |
177 | |
178 | // Occupancy target at the beginning of function scheduling cycle. |
179 | unsigned StartingOccupancy; |
180 | |
181 | // Minimal real occupancy recorder for the function. |
182 | unsigned MinOccupancy; |
183 | |
184 | // Vector of regions recorder for later rescheduling |
185 | SmallVector<std::pair<MachineBasicBlock::iterator, |
186 | MachineBasicBlock::iterator>, 32> Regions; |
187 | |
188 | // Records if a region is not yet scheduled, or schedule has been reverted, |
189 | // or we generally desire to reschedule it. |
190 | BitVector RescheduleRegions; |
191 | |
192 | // Record regions with high register pressure. |
193 | BitVector RegionsWithHighRP; |
194 | |
195 | // Record regions with excess register pressure over the physical register |
196 | // limit. Register pressure in these regions usually will result in spilling. |
197 | BitVector RegionsWithExcessRP; |
198 | |
199 | // Regions that has the same occupancy as the latest MinOccupancy |
200 | BitVector RegionsWithMinOcc; |
201 | |
202 | // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT). |
203 | BitVector RegionsWithIGLPInstrs; |
204 | |
205 | // Region live-in cache. |
206 | SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns; |
207 | |
208 | // Region pressure cache. |
209 | SmallVector<GCNRegPressure, 32> Pressure; |
210 | |
211 | // Temporary basic block live-in cache. |
212 | DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns; |
213 | |
214 | DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap; |
215 | |
216 | DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const; |
217 | |
218 | // Return current region pressure. |
219 | GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; |
220 | |
221 | // Compute and cache live-ins and pressure for all regions in block. |
222 | void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); |
223 | |
224 | // Update region boundaries when removing MI or inserting NewMI before MI. |
225 | void updateRegionBoundaries( |
226 | SmallVectorImpl<std::pair<MachineBasicBlock::iterator, |
227 | MachineBasicBlock::iterator>> &RegionBoundaries, |
228 | MachineBasicBlock::iterator MI, MachineInstr *NewMI, |
229 | bool Removing = false); |
230 | |
231 | void runSchedStages(); |
232 | |
233 | std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID); |
234 | |
235 | public: |
236 | GCNScheduleDAGMILive(MachineSchedContext *C, |
237 | std::unique_ptr<MachineSchedStrategy> S); |
238 | |
239 | void schedule() override; |
240 | |
241 | void finalizeSchedule() override; |
242 | }; |
243 | |
244 | // GCNSchedStrategy applies multiple scheduling stages to a function. |
245 | class GCNSchedStage { |
246 | protected: |
247 | GCNScheduleDAGMILive &DAG; |
248 | |
249 | GCNSchedStrategy &S; |
250 | |
251 | MachineFunction &MF; |
252 | |
253 | SIMachineFunctionInfo &MFI; |
254 | |
255 | const GCNSubtarget &ST; |
256 | |
257 | const GCNSchedStageID StageID; |
258 | |
259 | // The current block being scheduled. |
260 | MachineBasicBlock *CurrentMBB = nullptr; |
261 | |
262 | // Current region index. |
263 | unsigned RegionIdx = 0; |
264 | |
265 | // Record the original order of instructions before scheduling. |
266 | std::vector<MachineInstr *> Unsched; |
267 | |
268 | // RP before scheduling the current region. |
269 | GCNRegPressure PressureBefore; |
270 | |
271 | // RP after scheduling the current region. |
272 | GCNRegPressure PressureAfter; |
273 | |
274 | std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; |
275 | |
276 | GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); |
277 | |
278 | public: |
279 | // Initialize state for a scheduling stage. Returns false if the current stage |
280 | // should be skipped. |
281 | virtual bool initGCNSchedStage(); |
282 | |
283 | // Finalize state after finishing a scheduling pass on the function. |
284 | virtual void finalizeGCNSchedStage(); |
285 | |
286 | // Setup for scheduling a region. Returns false if the current region should |
287 | // be skipped. |
288 | virtual bool initGCNRegion(); |
289 | |
290 | // Track whether a new region is also a new MBB. |
291 | void setupNewBlock(); |
292 | |
293 | // Finalize state after scheudling a region. |
294 | void finalizeGCNRegion(); |
295 | |
296 | // Check result of scheduling. |
297 | void checkScheduling(); |
298 | |
299 | // computes the given schedule virtual execution time in clocks |
300 | ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule); |
301 | ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG); |
302 | unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, |
303 | DenseMap<unsigned, unsigned> &ReadyCycles, |
304 | const TargetSchedModel &SM); |
305 | |
306 | // Returns true if scheduling should be reverted. |
307 | virtual bool shouldRevertScheduling(unsigned WavesAfter); |
308 | |
309 | // Returns true if current region has known excess pressure. |
310 | bool isRegionWithExcessRP() const { |
311 | return DAG.RegionsWithExcessRP[RegionIdx]; |
312 | } |
313 | |
314 | // Returns true if the new schedule may result in more spilling. |
315 | bool mayCauseSpilling(unsigned WavesAfter); |
316 | |
317 | // Attempt to revert scheduling for this region. |
318 | void revertScheduling(); |
319 | |
320 | void advanceRegion() { RegionIdx++; } |
321 | |
322 | virtual ~GCNSchedStage() = default; |
323 | }; |
324 | |
325 | class OccInitialScheduleStage : public GCNSchedStage { |
326 | public: |
327 | bool shouldRevertScheduling(unsigned WavesAfter) override; |
328 | |
329 | OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
330 | : GCNSchedStage(StageID, DAG) {} |
331 | }; |
332 | |
333 | class UnclusteredHighRPStage : public GCNSchedStage { |
334 | private: |
335 | // Save the initial occupancy before starting this stage. |
336 | unsigned InitialOccupancy; |
337 | |
338 | public: |
339 | bool initGCNSchedStage() override; |
340 | |
341 | void finalizeGCNSchedStage() override; |
342 | |
343 | bool initGCNRegion() override; |
344 | |
345 | bool shouldRevertScheduling(unsigned WavesAfter) override; |
346 | |
347 | UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
348 | : GCNSchedStage(StageID, DAG) {} |
349 | }; |
350 | |
351 | // Retry function scheduling if we found resulting occupancy and it is |
352 | // lower than used for other scheduling passes. This will give more freedom |
353 | // to schedule low register pressure blocks. |
354 | class ClusteredLowOccStage : public GCNSchedStage { |
355 | public: |
356 | bool initGCNSchedStage() override; |
357 | |
358 | bool initGCNRegion() override; |
359 | |
360 | bool shouldRevertScheduling(unsigned WavesAfter) override; |
361 | |
362 | ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
363 | : GCNSchedStage(StageID, DAG) {} |
364 | }; |
365 | |
366 | class PreRARematStage : public GCNSchedStage { |
367 | private: |
368 | // Each region at MinOccupancy will have their own list of trivially |
369 | // rematerializable instructions we can remat to reduce RP. The list maps an |
370 | // instruction to the position we should remat before, usually the MI using |
371 | // the rematerializable instruction. |
372 | MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>> |
373 | RematerializableInsts; |
374 | |
375 | // Map a trivially rematerializable def to a list of regions at MinOccupancy |
376 | // that has the defined reg as a live-in. |
377 | DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions; |
378 | |
379 | // Collect all trivially rematerializable VGPR instructions with a single def |
380 | // and single use outside the defining block into RematerializableInsts. |
381 | void collectRematerializableInstructions(); |
382 | |
383 | bool isTriviallyReMaterializable(const MachineInstr &MI); |
384 | |
385 | // TODO: Should also attempt to reduce RP of SGPRs and AGPRs |
386 | // Attempt to reduce RP of VGPR by sinking trivially rematerializable |
387 | // instructions. Returns true if we were able to sink instruction(s). |
388 | bool sinkTriviallyRematInsts(const GCNSubtarget &ST, |
389 | const TargetInstrInfo *TII); |
390 | |
391 | public: |
392 | bool initGCNSchedStage() override; |
393 | |
394 | bool initGCNRegion() override; |
395 | |
396 | bool shouldRevertScheduling(unsigned WavesAfter) override; |
397 | |
398 | PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
399 | : GCNSchedStage(StageID, DAG) {} |
400 | }; |
401 | |
402 | class ILPInitialScheduleStage : public GCNSchedStage { |
403 | public: |
404 | bool shouldRevertScheduling(unsigned WavesAfter) override; |
405 | |
406 | ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) |
407 | : GCNSchedStage(StageID, DAG) {} |
408 | }; |
409 | |
410 | class GCNPostScheduleDAGMILive final : public ScheduleDAGMI { |
411 | private: |
412 | std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations; |
413 | |
414 | bool HasIGLPInstrs = false; |
415 | |
416 | public: |
417 | void schedule() override; |
418 | |
419 | void finalizeSchedule() override; |
420 | |
421 | GCNPostScheduleDAGMILive(MachineSchedContext *C, |
422 | std::unique_ptr<MachineSchedStrategy> S, |
423 | bool RemoveKillFlags); |
424 | }; |
425 | |
426 | } // End namespace llvm |
427 | |
428 | #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H |
429 | |