1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/CodeGen/MachineBasicBlock.h"
19#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
20#include "llvm/CodeGen/MachineInstr.h"
21#include "llvm/CodeGen/MachineScheduler.h"
22#include "llvm/CodeGen/Rematerializer.h"
23
24namespace llvm {
25
26class SIMachineFunctionInfo;
27class SIRegisterInfo;
28class GCNSubtarget;
29class GCNSchedStage;
30
31enum class GCNSchedStageID : unsigned {
32 OccInitialSchedule = 0,
33 RewriteMFMAForm = 1,
34 UnclusteredHighRPReschedule = 2,
35 ClusteredLowOccupancyReschedule = 3,
36 PreRARematerialize = 4,
37 ILPInitialSchedule = 5,
38 MemoryClauseInitialSchedule = 6
39};
40
41#ifndef NDEBUG
42raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
43#endif
44
45/// This is a minimal scheduler strategy. The main difference between this
46/// and the GenericScheduler is that GCNSchedStrategy uses different
47/// heuristics to determine excess/critical pressure sets.
48class GCNSchedStrategy : public GenericScheduler {
49protected:
50 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
51
52 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
53 const RegPressureTracker &RPTracker,
54 SchedCandidate &Cand, bool &IsPending,
55 bool IsBottomUp);
56
57 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
58 const RegPressureTracker &RPTracker,
59 const SIRegisterInfo *SRI, unsigned SGPRPressure,
60 unsigned VGPRPressure, bool IsBottomUp);
61
62 /// Estimate how many cycles \p SU must wait due to structural hazards at the
63 /// current boundary cycle. Returns zero when no stall is required.
64 unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const;
65
66 /// Evaluates instructions in the pending queue using a subset of scheduling
67 /// heuristics.
68 ///
69 /// Instructions that cannot be issued due to hardware constraints are placed
70 /// in the pending queue rather than the available queue, making them normally
71 /// invisible to scheduling heuristics. However, in certain scenarios (such as
72 /// avoiding register spilling), it may be beneficial to consider scheduling
73 /// these not-yet-ready instructions.
74 bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
75 SchedBoundary *Zone) const;
76
77 void printCandidateDecision(const SchedCandidate &Current,
78 const SchedCandidate &Preferred);
79
80 void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker,
81 SUnit *SU, std::vector<unsigned> &Pressure,
82 std::vector<unsigned> &MaxPressure,
83 GCNDownwardRPTracker &DownwardTracker,
84 GCNUpwardRPTracker &UpwardTracker,
85 ScheduleDAGMI *DAG, const SIRegisterInfo *SRI);
86
87 std::vector<unsigned> Pressure;
88
89 std::vector<unsigned> MaxPressure;
90
91 unsigned SGPRExcessLimit;
92
93 unsigned VGPRExcessLimit;
94
95 unsigned TargetOccupancy;
96
97 MachineFunction *MF;
98
99 // Scheduling stages for this strategy.
100 SmallVector<GCNSchedStageID, 4> SchedStages;
101
102 // Pointer to the current SchedStageID.
103 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
104
105 // GCN RP Tracker for top-down scheduling
106 mutable GCNDownwardRPTracker DownwardTracker;
107
108 // GCN RP Tracker for botttom-up scheduling
109 mutable GCNUpwardRPTracker UpwardTracker;
110
111 bool UseGCNTrackers = false;
112
113 std::optional<bool> GCNTrackersOverride;
114
115public:
116 // schedule() have seen register pressure over the critical limits and had to
117 // track register pressure for actual scheduling heuristics.
118 bool HasHighPressure;
119
120 // Schedule known to have excess register pressure. Be more conservative in
121 // increasing ILP and preserving VGPRs.
122 bool KnownExcessRP = false;
123
124 // An error margin is necessary because of poor performance of the generic RP
125 // tracker and can be adjusted up for tuning heuristics to try and more
126 // aggressively reduce register pressure.
127 unsigned ErrorMargin = 3;
128
129 // Bias for SGPR limits under a high register pressure.
130 const unsigned HighRPSGPRBias = 7;
131
132 // Bias for VGPR limits under a high register pressure.
133 const unsigned HighRPVGPRBias = 7;
134
135 unsigned SGPRCriticalLimit;
136
137 unsigned VGPRCriticalLimit;
138
139 unsigned SGPRLimitBias = 0;
140
141 unsigned VGPRLimitBias = 0;
142
143 GCNSchedStrategy(const MachineSchedContext *C);
144
145 SUnit *pickNode(bool &IsTopNode) override;
146
147 void schedNode(SUnit *SU, bool IsTopNode) override;
148
149 void initialize(ScheduleDAGMI *DAG) override;
150
151 unsigned getTargetOccupancy() { return TargetOccupancy; }
152
153 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
154
155 GCNSchedStageID getCurrentStage();
156
157 // Advances stage. Returns true if there are remaining stages.
158 bool advanceStage();
159
160 bool hasNextStage() const;
161
162 bool useGCNTrackers() const {
163 return GCNTrackersOverride.value_or(u: UseGCNTrackers);
164 }
165
166 GCNSchedStageID getNextStage() const;
167
168 GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
169
170 GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
171};
172
173/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
174/// maximum number of waves per simd).
175class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
176public:
177 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
178 bool IsLegacyScheduler = false);
179};
180
181/// The goal of this scheduling strategy is to maximize ILP for a single wave
182/// (i.e. latency hiding).
183class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
184protected:
185 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
186 SchedBoundary *Zone) const override;
187
188public:
189 GCNMaxILPSchedStrategy(const MachineSchedContext *C);
190};
191
192/// The goal of this scheduling strategy is to maximize memory clause for a
193/// single wave.
194class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
195protected:
196 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
197 SchedBoundary *Zone) const override;
198
199public:
200 GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
201};
202
203class ScheduleMetrics {
204 unsigned ScheduleLength;
205 unsigned BubbleCycles;
206
207public:
208 ScheduleMetrics() = default;
209 ScheduleMetrics(unsigned L, unsigned BC)
210 : ScheduleLength(L), BubbleCycles(BC) {}
211 unsigned getLength() const { return ScheduleLength; }
212 unsigned getBubbles() const { return BubbleCycles; }
213 unsigned getMetric() const {
214 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
215 // Metric is zero if the amount of bubbles is less than 1% which is too
216 // small. So, return 1.
217 return Metric ? Metric : 1;
218 }
219 static const unsigned ScaleFactor;
220};
221
222inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
223 dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
224 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
225 << Sm.getLength() << " ]\n";
226 return OS;
227}
228
229class GCNScheduleDAGMILive;
230class RegionPressureMap {
231 GCNScheduleDAGMILive *DAG;
232 // The live in/out pressure as indexed by the first or last MI in the region
233 // before scheduling.
234 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
235 // The mapping of RegionIDx to key instruction
236 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
237 // Whether we are calculating LiveOuts or LiveIns
238 bool IsLiveOut;
239
240public:
241 RegionPressureMap() = default;
242 RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
243 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
244 // Build the Instr->LiveReg and RegionIdx->Instr maps
245 void buildLiveRegMap();
246
247 // Retrieve the LiveReg for a given RegionIdx
248 GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
249 assert(IdxToInstruction.contains(RegionIdx));
250 MachineInstr *Key = IdxToInstruction[RegionIdx];
251 return RegionLiveRegMap[Key];
252 }
253};
254
255/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
256/// boundary is inclusive, the upper boundary is exclusive.
257using RegionBoundaries =
258 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
259
260class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
261 friend class GCNSchedStage;
262 friend class OccInitialScheduleStage;
263 friend class RewriteMFMAFormStage;
264 friend class UnclusteredHighRPStage;
265 friend class ClusteredLowOccStage;
266 friend class PreRARematStage;
267 friend class ILPInitialScheduleStage;
268 friend class RegionPressureMap;
269
270 const GCNSubtarget &ST;
271
272 SIMachineFunctionInfo &MFI;
273
274 // Occupancy target at the beginning of function scheduling cycle.
275 unsigned StartingOccupancy;
276
277 // Minimal real occupancy recorder for the function.
278 unsigned MinOccupancy;
279
280 // Vector of regions recorder for later rescheduling
281 SmallVector<RegionBoundaries, 32> Regions;
282
283 // Record regions with high register pressure.
284 BitVector RegionsWithHighRP;
285
286 // Record regions with excess register pressure over the physical register
287 // limit. Register pressure in these regions usually will result in spilling.
288 BitVector RegionsWithExcessRP;
289
290 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
291 BitVector RegionsWithIGLPInstrs;
292
293 // Region live-in cache.
294 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
295
296 // Region pressure cache.
297 SmallVector<GCNRegPressure, 32> Pressure;
298
299 // Temporary basic block live-in cache.
300 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
301
302 // The map of the initial first region instruction to region live in registers
303 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
304
305 // Calculate the map of the initial first region instruction to region live in
306 // registers
307 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
308
309 // Calculate the map of the initial last region instruction to region live out
310 // registers
311 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
312 getRegionLiveOutMap() const;
313
314 // The live out registers per region. These are internally stored as a map of
315 // the initial last region instruction to region live out registers, but can
316 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
317 RegionPressureMap RegionLiveOuts;
318
319 // Return current region pressure.
320 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
321
322 // Compute and cache live-ins and pressure for all regions in block.
323 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
324
325 /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
326 void setTargetOccupancy(unsigned TargetOccupancy);
327
328 void runSchedStages();
329
330 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
331
332public:
333 GCNScheduleDAGMILive(MachineSchedContext *C,
334 std::unique_ptr<MachineSchedStrategy> S);
335
336 void schedule() override;
337
338 void finalizeSchedule() override;
339};
340
341// GCNSchedStrategy applies multiple scheduling stages to a function.
342class GCNSchedStage {
343protected:
344 GCNScheduleDAGMILive &DAG;
345
346 GCNSchedStrategy &S;
347
348 MachineFunction &MF;
349
350 SIMachineFunctionInfo &MFI;
351
352 const GCNSubtarget &ST;
353
354 const GCNSchedStageID StageID;
355
356 // The current block being scheduled.
357 MachineBasicBlock *CurrentMBB = nullptr;
358
359 // Current region index.
360 unsigned RegionIdx = 0;
361
362 // Record the original order of instructions before scheduling.
363 std::vector<MachineInstr *> Unsched;
364
365 // RP before scheduling the current region.
366 GCNRegPressure PressureBefore;
367
368 // RP after scheduling the current region.
369 GCNRegPressure PressureAfter;
370
371 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
372
373 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
374
375public:
376 // Initialize state for a scheduling stage. Returns false if the current stage
377 // should be skipped.
378 virtual bool initGCNSchedStage();
379
380 // Finalize state after finishing a scheduling pass on the function.
381 virtual void finalizeGCNSchedStage();
382
383 // Setup for scheduling a region. Returns false if the current region should
384 // be skipped.
385 virtual bool initGCNRegion();
386
387 // Finalize state after scheduling a region.
388 virtual void finalizeGCNRegion();
389
390 // Track whether a new region is also a new MBB.
391 void setupNewBlock();
392
393 // Check result of scheduling.
394 void checkScheduling();
395
396 // computes the given schedule virtual execution time in clocks
397 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
398 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
399 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
400 DenseMap<unsigned, unsigned> &ReadyCycles,
401 const TargetSchedModel &SM);
402
403 // Returns true if scheduling should be reverted.
404 virtual bool shouldRevertScheduling(unsigned WavesAfter);
405
406 // Returns true if current region has known excess pressure.
407 bool isRegionWithExcessRP() const {
408 return DAG.RegionsWithExcessRP[RegionIdx];
409 }
410
411 // The region number this stage is currently working on
412 unsigned getRegionIdx() { return RegionIdx; }
413
414 // Returns true if the new schedule may result in more spilling.
415 bool mayCauseSpilling(unsigned WavesAfter);
416
417 /// Sets the schedule of region \p RegionIdx to \p MIOrder. The MIs in \p
418 /// MIOrder must be exactly the same as the ones currently existing inside the
419 /// region, only in a different order that honors def-use chains.
420 void modifyRegionSchedule(unsigned RegionIdx,
421 ArrayRef<MachineInstr *> MIOrder);
422
423 void advanceRegion() { RegionIdx++; }
424
425 virtual ~GCNSchedStage() = default;
426};
427
428class OccInitialScheduleStage : public GCNSchedStage {
429public:
430 bool shouldRevertScheduling(unsigned WavesAfter) override;
431
432 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
433 : GCNSchedStage(StageID, DAG) {}
434};
435
436class RewriteMFMAFormStage : public GCNSchedStage {
437private:
438 // Record regions with excess archvgpr register pressure over the physical
439 // register limit. Register pressure in these regions usually will result in
440 // spilling.
441 BitVector RegionsWithExcessArchVGPR;
442
443 const SIInstrInfo *TII;
444 const SIRegisterInfo *SRI;
445
446 /// Per-candidate cache of the src2 "needs VGPR" decision, computed once
447 /// and reused on-demand.
448 DenseMap<const MachineInstr *, bool> Src2NeedsVGPRCache;
449
450 /// Do a speculative rewrite and collect copy locations. The speculative
451 /// rewrite allows us to calculate the RP of the code after the rewrite, and
452 /// the copy locations allow us to calculate the total cost of copies required
453 /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
454 /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
455 /// copy locations for defs (of the MFMA operands) in \p CopyForDef
456 bool
457 initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
458 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
459 SmallPtrSetImpl<MachineInstr *> &CopyForDef);
460
461 /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
462 /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
463 /// costs, and \p RewriteCands to undo rewriting.
464 int64_t getRewriteCost(
465 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands,
466 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
467 const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
468
469 /// Do the final rewrite on \p RewriteCands and insert any needed copies.
470 bool rewrite(ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands);
471
472 /// \returns true if this MI is a rewrite candidate.
473 bool isRewriteCandidate(MachineInstr *MI) const;
474
475 /// Resets all candidates in \p RewriteCands back to VGPR form.
476 void resetRewriteCandsToVGPR(
477 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands);
478
479 /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
480 /// DefIdxs
481 void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
482 SmallVectorImpl<SlotIndex> &DefIdxs);
483
484 /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
485 /// ReachingUses
486 void findReachingUses(const MachineInstr *DefMI, LiveIntervals *LIS,
487 SmallVectorImpl<MachineOperand *> &ReachingUses);
488
489 /// Returns true if the src2 register with reaching defs \p Src2ReachingDefs
490 /// has a use other than a group MFMA (in \p RewriteSet) or a copy, which
491 /// would keep it in VGPR form rather than let it be reclassified to AGPR.
492 bool hasUseRequiringVGPR(ArrayRef<SlotIndex> Src2ReachingDefs,
493 const SmallPtrSetImpl<MachineInstr *> &RewriteSet);
494
495public:
496 bool initGCNSchedStage() override;
497
498 RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
499 : GCNSchedStage(StageID, DAG) {}
500};
501
502class UnclusteredHighRPStage : public GCNSchedStage {
503private:
504 // Save the initial occupancy before starting this stage.
505 unsigned InitialOccupancy;
506 // Save the temporary target occupancy before starting this stage.
507 unsigned TempTargetOccupancy;
508 // Track whether any region was scheduled by this stage.
509 bool IsAnyRegionScheduled;
510
511public:
512 bool initGCNSchedStage() override;
513
514 void finalizeGCNSchedStage() override;
515
516 bool initGCNRegion() override;
517
518 bool shouldRevertScheduling(unsigned WavesAfter) override;
519
520 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
521 : GCNSchedStage(StageID, DAG) {}
522};
523
524// Retry function scheduling if we found resulting occupancy and it is
525// lower than used for other scheduling passes. This will give more freedom
526// to schedule low register pressure blocks.
527class ClusteredLowOccStage : public GCNSchedStage {
528public:
529 bool initGCNSchedStage() override;
530
531 bool initGCNRegion() override;
532
533 bool shouldRevertScheduling(unsigned WavesAfter) override;
534
535 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
536 : GCNSchedStage(StageID, DAG) {}
537};
538
539/// Attempts to reduce function spilling or, if there is no spilling, to
540/// increase function occupancy by one with respect to register usage by sinking
541/// rematerializable instructions to their use. When the stage estimates that
542/// reducing spilling or increasing occupancy is possible, it tries to
543/// rematerialize as few registers as possible to reduce potential negative
544/// effects on function latency.
545///
546/// The stage only supports rematerializing registers that meet all of the
547/// following constraints.
548/// 1. The register is virtual and has a single defining instruction.
549/// 2. The single defining instruction is either deemed rematerializable by the
550/// target-independent logic, or if not, has no non-constant and
551/// non-ignorable physical register use.
552/// 3 The register has no virtual register use whose live range would be
553/// extended by the rematerialization.
554/// 4. The register has a single non-debug user in a different region from its
555/// defining region.
556/// 5. The register is not used by or using another register that is going to be
557/// rematerialized.
558class PreRARematStage : public GCNSchedStage {
559private:
560 using RegisterIdx = Rematerializer::RegisterIdx;
561
562 /// A scored rematerialization candidate. Higher scores indicate more
563 /// beneficial rematerializations. A null score indicate the rematerialization
564 /// is not helpful to reduce RP in target regions.
565 struct ScoredRemat {
566 /// The register index handle in the rematerializer.
567 RegisterIdx RegIdx;
568 /// Regions in which the register is live-in/live-out/live anywhere.
569 BitVector LiveIn, LiveOut, Live;
570 /// Subset of \ref Live regions in which the rematerialization is not
571 /// guaranteed to reduce RP (i.e., regions in which the register is not
572 /// live-through and unused).
573 BitVector UnpredictableRPSave;
574 /// Expected register pressure decrease induced by rematerializing this
575 /// candidate.
576 GCNRegPressure RPSave;
577
578 /// Execution frequency information required by scoring heuristics.
579 /// Frequencies are scaled down if they are high to avoid overflow/underflow
580 /// when combining them.
581 struct FreqInfo {
582 /// Per-region execution frequencies. 0 when unknown.
583 SmallVector<uint64_t> Regions;
584 /// Minimum and maximum observed frequencies.
585 uint64_t MinFreq, MaxFreq;
586
587 FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
588
589 private:
590 static const uint64_t ScaleFactor = 1024;
591 };
592
593 /// Initializes the candidate with state-independent characteristics for
594 /// rematerializable register with index handle \p RegIdx. This doesn't
595 /// update the actual score (call \ref update for this).
596 void init(RegisterIdx RegIdx, const FreqInfo &Freq,
597 const Rematerializer &Remater, GCNScheduleDAGMILive &DAG);
598
599 /// Rematerializes the candidate using the \p Remater.
600 void rematerialize(Rematerializer &Remater) const;
601
602 /// Determines whether this rematerialization may be beneficial in at least
603 /// one target region.
604 bool maybeBeneficial(const BitVector &TargetRegions,
605 ArrayRef<GCNRPTarget> RPTargets) const;
606
607 /// Rematerializes the candidate and returns the new MI. This removes the
608 /// rematerialized register from live-in/out lists in the \p DAG and updates
609 /// \p RPTargets in all affected regions. Regions in which RP savings are
610 /// not guaranteed are set in \p RecomputeRP.
611 MachineInstr *rematerialize(BitVector &RecomputeRP,
612 SmallVectorImpl<GCNRPTarget> &RPTargets,
613 GCNScheduleDAGMILive &DAG) const;
614
615 /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
616 /// \p RegionFreq indicates the frequency of each region.
617 void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
618 const FreqInfo &Freq, bool ReduceSpill);
619
620 /// Returns whether the current score is null, indicating the
621 /// rematerialization is useless.
622 bool hasNullScore() const { return !RegionImpact; }
623
624 /// Compare score components of non-null scores pair-wise. Scores shouldn't
625 /// be null (as defined by \ref hasNullScore).
626 bool operator<(const ScoredRemat &O) const {
627 assert(!hasNullScore() && "this has null score");
628 assert(!O.hasNullScore() && "other has null score");
629 if (MaxFreq != O.MaxFreq)
630 return MaxFreq < O.MaxFreq;
631 if (FreqDiff != O.FreqDiff)
632 return FreqDiff < O.FreqDiff;
633 if (RegionImpact != O.RegionImpact)
634 return RegionImpact < O.RegionImpact;
635 // Break ties using register index handles. If the two registers are
636 // connected in some dependency DAG of rematerializable registers, this
637 // will tend to give a higher score to the register further from the
638 // dependency DAG's root. If the two registers are disconnected, this will
639 // give a higher score to the register with lower virtual register index.
640 // In general, within a region, this should prefer registers defined
641 // earlier that have longer live ranges in their defining region (since
642 // the registers we consider are always live-out in their defining
643 // region).
644 return RegIdx > O.RegIdx;
645 }
646
647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
648 Printable print() const;
649#endif
650
651 private:
652 // The three members below are the scoring components, top to bottom from
653 // most important to least important when comparing candidates.
654
655 /// Frequency of impacted target region with highest known frequency. This
656 /// only matters when the stage is trying to reduce spilling, so it is
657 /// always 0 when it is not.
658 uint64_t MaxFreq;
659 /// Frequency difference between defining and using regions. Negative values
660 /// indicate we are rematerializing to higher frequency regions; positive
661 /// values indicate the contrary.
662 int64_t FreqDiff;
663 /// Expected number of target regions impacted by the rematerialization,
664 /// scaled by the size of the register being rematerialized.
665 unsigned RegionImpact;
666 };
667
668 /// Register pressure targets for all regions.
669 SmallVector<GCNRPTarget> RPTargets;
670 /// Regions which are above the stage's RP target.
671 BitVector TargetRegions;
672 /// The target occupancy the set is trying to achieve. Empty when the
673 /// objective is spilling reduction.
674 std::optional<unsigned> TargetOcc;
675 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
676 unsigned AchievedOcc;
677 /// After successful stage initialization, indicates which regions should be
678 /// rescheduled.
679 BitVector RescheduleRegions;
680
681 /// Underlying utilities to identify and perform rematerializations.
682 Rematerializer Remater;
683
684 struct RollbackSupport {
685 struct LiveMapUpdate {
686 /// The register index handle in the rematerializer.
687 RegisterIdx RegIdx;
688 /// Regions in which the original register was live-in or live-out.
689 BitVector LiveIn, LiveOut;
690
691 LiveMapUpdate(RegisterIdx RegIdx, const BitVector &LiveIn,
692 const BitVector &LiveOut)
693 : RegIdx(RegIdx), LiveIn(LiveIn), LiveOut(LiveOut) {}
694 };
695
696 /// Rollback listener.
697 Rollbacker Listener;
698 /// Registers removed from live-maps along with bitvectors indicationg the
699 /// regions in which they were live-ins and live-outs.
700 SmallVector<LiveMapUpdate> LiveMapUpdates;
701
702 /// Attaches the rollback listener to the rematerializer.
703 RollbackSupport(Rematerializer &Remater) { Remater.addListener(Listen: &Listener); }
704 };
705
706 /// Rollback support. Maintained through a unique pointer because it is
707 /// optional and needs to persist between stage initialization and
708 /// finalization.
709 std::unique_ptr<RollbackSupport> Rollback;
710
711 /// State of a region pre-re-scheduling but post-rematerializations that we
712 /// must keep to be able to revert re-scheduling effects.
713 struct RegionSchedRevert {
714 /// Region number;
715 unsigned RegionIdx;
716 /// Original instruction order (both debug and non-debug MIs).
717 std::vector<MachineInstr *> OrigMIOrder;
718 /// Maximum pressure recorded in the region.
719 GCNRegPressure MaxPressure;
720
721 RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
722 const GCNRegPressure &MaxPressure)
723 : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
724 MaxPressure(MaxPressure) {}
725 };
726 /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
727 /// regions.
728 SmallVector<RegionSchedRevert> RegionReverts;
729 /// Whether we should revert all re-scheduled regions.
730 bool RevertAllRegions = false;
731
732 /// Returns the occupancy the stage is trying to achieve.
733 unsigned getStageTargetOccupancy() const;
734
735 /// Determines the stage's objective (increasing occupancy or reducing
736 /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
737 /// achieve that objective and mark those that don't achieve it in \ref
738 /// TargetRegions. Returns whether there is any target region.
739 bool setObjective();
740
741 /// In all regions set in \p Regions, saves pressure \p RPSave and clear it as
742 /// a target if its RP target has been reached.
743 void updateRPTargets(const BitVector &Regions, const GCNRegPressure &RPSave);
744
745 /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
746 /// again all \ref TargetRegions that were optimistically marked as satisfied
747 /// but are actually not, and returns whether there were any such regions.
748 bool updateAndVerifyRPTargets(const BitVector &Regions);
749
750 /// Removes register \p Reg from the live-ins of regions set in \p LiveIn and
751 /// the live-outs of regions set in \p LiveOut.
752 void removeFromLiveMaps(Register Reg, const BitVector &LiveIn,
753 const BitVector &LiveOut);
754
755 /// Adds register \p Reg with mask \p Mask to the live-ins of regions set in
756 /// \p LiveIn and the live-outs of regions set in \p LiveOut.
757 void addToLiveMaps(Register Reg, LaneBitmask Mask, const BitVector &LiveIn,
758 const BitVector &LiveOut);
759
760 /// If remat alone did not increase occupancy to the target one, rollbacks all
761 /// rematerializations and resets live-ins/RP in all regions impacted by the
762 /// stage to their pre-stage values.
763 void finalizeGCNSchedStage() override;
764
765public:
766 bool initGCNSchedStage() override;
767
768 bool initGCNRegion() override;
769
770 void finalizeGCNRegion() override;
771
772 bool shouldRevertScheduling(unsigned WavesAfter) override;
773
774 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
775 : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
776 RescheduleRegions(DAG.Regions.size()),
777 Remater(MF, DAG.Regions, *DAG.LIS) {
778 const unsigned NumRegions = DAG.Regions.size();
779 RPTargets.reserve(N: NumRegions);
780 }
781};
782
783class ILPInitialScheduleStage : public GCNSchedStage {
784public:
785 bool shouldRevertScheduling(unsigned WavesAfter) override;
786
787 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
788 : GCNSchedStage(StageID, DAG) {}
789};
790
791class MemoryClauseInitialScheduleStage : public GCNSchedStage {
792public:
793 bool shouldRevertScheduling(unsigned WavesAfter) override;
794
795 MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
796 GCNScheduleDAGMILive &DAG)
797 : GCNSchedStage(StageID, DAG) {}
798};
799
800class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
801private:
802 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
803
804 bool HasIGLPInstrs = false;
805
806public:
807 void schedule() override;
808
809 void finalizeSchedule() override;
810
811 GCNPostScheduleDAGMILive(MachineSchedContext *C,
812 std::unique_ptr<MachineSchedStrategy> S,
813 bool RemoveKillFlags);
814};
815
816} // End namespace llvm
817
818#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
819