1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/CodeGen/MachineBasicBlock.h"
20#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
21#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
22#include "llvm/CodeGen/MachineInstr.h"
23#include "llvm/CodeGen/MachineScheduler.h"
24
25namespace llvm {
26
27class SIMachineFunctionInfo;
28class SIRegisterInfo;
29class GCNSubtarget;
30class GCNSchedStage;
31
32enum class GCNSchedStageID : unsigned {
33 OccInitialSchedule = 0,
34 RewriteMFMAForm = 1,
35 UnclusteredHighRPReschedule = 2,
36 ClusteredLowOccupancyReschedule = 3,
37 PreRARematerialize = 4,
38 ILPInitialSchedule = 5,
39 MemoryClauseInitialSchedule = 6
40};
41
42#ifndef NDEBUG
43raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
44#endif
45
46/// This is a minimal scheduler strategy. The main difference between this
47/// and the GenericScheduler is that GCNSchedStrategy uses different
48/// heuristics to determine excess/critical pressure sets.
49class GCNSchedStrategy : public GenericScheduler {
50protected:
51 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
52
53 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
54 const RegPressureTracker &RPTracker,
55 SchedCandidate &Cand, bool &IsPending,
56 bool IsBottomUp);
57
58 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
59 const RegPressureTracker &RPTracker,
60 const SIRegisterInfo *SRI, unsigned SGPRPressure,
61 unsigned VGPRPressure, bool IsBottomUp);
62
63 /// Estimate how many cycles \p SU must wait due to structural hazards at the
64 /// current boundary cycle. Returns zero when no stall is required.
65 unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const;
66
67 /// Evaluates instructions in the pending queue using a subset of scheduling
68 /// heuristics.
69 ///
70 /// Instructions that cannot be issued due to hardware constraints are placed
71 /// in the pending queue rather than the available queue, making them normally
72 /// invisible to scheduling heuristics. However, in certain scenarios (such as
73 /// avoiding register spilling), it may be beneficial to consider scheduling
74 /// these not-yet-ready instructions.
75 bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
76 SchedBoundary *Zone) const;
77
78 void printCandidateDecision(const SchedCandidate &Current,
79 const SchedCandidate &Preferred);
80
81 void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker,
82 SUnit *SU, std::vector<unsigned> &Pressure,
83 std::vector<unsigned> &MaxPressure,
84 GCNDownwardRPTracker &DownwardTracker,
85 GCNUpwardRPTracker &UpwardTracker,
86 ScheduleDAGMI *DAG, const SIRegisterInfo *SRI);
87
88 std::vector<unsigned> Pressure;
89
90 std::vector<unsigned> MaxPressure;
91
92 unsigned SGPRExcessLimit;
93
94 unsigned VGPRExcessLimit;
95
96 unsigned TargetOccupancy;
97
98 MachineFunction *MF;
99
100 // Scheduling stages for this strategy.
101 SmallVector<GCNSchedStageID, 4> SchedStages;
102
103 // Pointer to the current SchedStageID.
104 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
105
106 // GCN RP Tracker for top-down scheduling
107 mutable GCNDownwardRPTracker DownwardTracker;
108
109 // GCN RP Tracker for botttom-up scheduling
110 mutable GCNUpwardRPTracker UpwardTracker;
111
112 bool UseGCNTrackers = false;
113
114 std::optional<bool> GCNTrackersOverride;
115
116public:
117 // schedule() have seen register pressure over the critical limits and had to
118 // track register pressure for actual scheduling heuristics.
119 bool HasHighPressure;
120
121 // Schedule known to have excess register pressure. Be more conservative in
122 // increasing ILP and preserving VGPRs.
123 bool KnownExcessRP = false;
124
125 // An error margin is necessary because of poor performance of the generic RP
126 // tracker and can be adjusted up for tuning heuristics to try and more
127 // aggressively reduce register pressure.
128 unsigned ErrorMargin = 3;
129
130 // Bias for SGPR limits under a high register pressure.
131 const unsigned HighRPSGPRBias = 7;
132
133 // Bias for VGPR limits under a high register pressure.
134 const unsigned HighRPVGPRBias = 7;
135
136 unsigned SGPRCriticalLimit;
137
138 unsigned VGPRCriticalLimit;
139
140 unsigned SGPRLimitBias = 0;
141
142 unsigned VGPRLimitBias = 0;
143
144 GCNSchedStrategy(const MachineSchedContext *C);
145
146 SUnit *pickNode(bool &IsTopNode) override;
147
148 void schedNode(SUnit *SU, bool IsTopNode) override;
149
150 void initialize(ScheduleDAGMI *DAG) override;
151
152 unsigned getTargetOccupancy() { return TargetOccupancy; }
153
154 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
155
156 GCNSchedStageID getCurrentStage();
157
158 // Advances stage. Returns true if there are remaining stages.
159 bool advanceStage();
160
161 bool hasNextStage() const;
162
163 bool useGCNTrackers() const {
164 return GCNTrackersOverride.value_or(u: UseGCNTrackers);
165 }
166
167 GCNSchedStageID getNextStage() const;
168
169 GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
170
171 GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
172};
173
174/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
175/// maximum number of waves per simd).
176class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
177public:
178 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
179 bool IsLegacyScheduler = false);
180};
181
182/// The goal of this scheduling strategy is to maximize ILP for a single wave
183/// (i.e. latency hiding).
184class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
185protected:
186 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
187 SchedBoundary *Zone) const override;
188
189public:
190 GCNMaxILPSchedStrategy(const MachineSchedContext *C);
191};
192
193/// The goal of this scheduling strategy is to maximize memory clause for a
194/// single wave.
195class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
196protected:
197 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
198 SchedBoundary *Zone) const override;
199
200public:
201 GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
202};
203
204class ScheduleMetrics {
205 unsigned ScheduleLength;
206 unsigned BubbleCycles;
207
208public:
209 ScheduleMetrics() = default;
210 ScheduleMetrics(unsigned L, unsigned BC)
211 : ScheduleLength(L), BubbleCycles(BC) {}
212 unsigned getLength() const { return ScheduleLength; }
213 unsigned getBubbles() const { return BubbleCycles; }
214 unsigned getMetric() const {
215 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
216 // Metric is zero if the amount of bubbles is less than 1% which is too
217 // small. So, return 1.
218 return Metric ? Metric : 1;
219 }
220 static const unsigned ScaleFactor;
221};
222
223inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
224 dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
225 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
226 << Sm.getLength() << " ]\n";
227 return OS;
228}
229
230class GCNScheduleDAGMILive;
231class RegionPressureMap {
232 GCNScheduleDAGMILive *DAG;
233 // The live in/out pressure as indexed by the first or last MI in the region
234 // before scheduling.
235 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
236 // The mapping of RegionIDx to key instruction
237 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
238 // Whether we are calculating LiveOuts or LiveIns
239 bool IsLiveOut;
240
241public:
242 RegionPressureMap() = default;
243 RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
244 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
245 // Build the Instr->LiveReg and RegionIdx->Instr maps
246 void buildLiveRegMap();
247
248 // Retrieve the LiveReg for a given RegionIdx
249 GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
250 assert(IdxToInstruction.contains(RegionIdx));
251 MachineInstr *Key = IdxToInstruction[RegionIdx];
252 return RegionLiveRegMap[Key];
253 }
254};
255
256/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
257/// boundary is inclusive, the upper boundary is exclusive.
258using RegionBoundaries =
259 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
260
261class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
262 friend class GCNSchedStage;
263 friend class OccInitialScheduleStage;
264 friend class RewriteMFMAFormStage;
265 friend class UnclusteredHighRPStage;
266 friend class ClusteredLowOccStage;
267 friend class PreRARematStage;
268 friend class ILPInitialScheduleStage;
269 friend class RegionPressureMap;
270
271 const GCNSubtarget &ST;
272
273 SIMachineFunctionInfo &MFI;
274
275 // Occupancy target at the beginning of function scheduling cycle.
276 unsigned StartingOccupancy;
277
278 // Minimal real occupancy recorder for the function.
279 unsigned MinOccupancy;
280
281 // Vector of regions recorder for later rescheduling
282 SmallVector<RegionBoundaries, 32> Regions;
283
284 // Record regions with high register pressure.
285 BitVector RegionsWithHighRP;
286
287 // Record regions with excess register pressure over the physical register
288 // limit. Register pressure in these regions usually will result in spilling.
289 BitVector RegionsWithExcessRP;
290
291 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
292 BitVector RegionsWithIGLPInstrs;
293
294 // Region live-in cache.
295 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
296
297 // Region pressure cache.
298 SmallVector<GCNRegPressure, 32> Pressure;
299
300 // Temporary basic block live-in cache.
301 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
302
303 // The map of the initial first region instruction to region live in registers
304 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
305
306 // Calculate the map of the initial first region instruction to region live in
307 // registers
308 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
309
310 // Calculate the map of the initial last region instruction to region live out
311 // registers
312 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
313 getRegionLiveOutMap() const;
314
315 // The live out registers per region. These are internally stored as a map of
316 // the initial last region instruction to region live out registers, but can
317 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
318 RegionPressureMap RegionLiveOuts;
319
320 // Return current region pressure.
321 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
322
323 // Compute and cache live-ins and pressure for all regions in block.
324 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
325
326 /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
327 void setTargetOccupancy(unsigned TargetOccupancy);
328
329 void runSchedStages();
330
331 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
332
333 void deleteMI(unsigned RegionIdx, MachineInstr *MI);
334
335public:
336 GCNScheduleDAGMILive(MachineSchedContext *C,
337 std::unique_ptr<MachineSchedStrategy> S);
338
339 void schedule() override;
340
341 void finalizeSchedule() override;
342};
343
344// GCNSchedStrategy applies multiple scheduling stages to a function.
345class GCNSchedStage {
346protected:
347 GCNScheduleDAGMILive &DAG;
348
349 GCNSchedStrategy &S;
350
351 MachineFunction &MF;
352
353 SIMachineFunctionInfo &MFI;
354
355 const GCNSubtarget &ST;
356
357 const GCNSchedStageID StageID;
358
359 // The current block being scheduled.
360 MachineBasicBlock *CurrentMBB = nullptr;
361
362 // Current region index.
363 unsigned RegionIdx = 0;
364
365 // Record the original order of instructions before scheduling.
366 std::vector<MachineInstr *> Unsched;
367
368 // RP before scheduling the current region.
369 GCNRegPressure PressureBefore;
370
371 // RP after scheduling the current region.
372 GCNRegPressure PressureAfter;
373
374 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
375
376 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
377
378public:
379 // Initialize state for a scheduling stage. Returns false if the current stage
380 // should be skipped.
381 virtual bool initGCNSchedStage();
382
383 // Finalize state after finishing a scheduling pass on the function.
384 virtual void finalizeGCNSchedStage();
385
386 // Setup for scheduling a region. Returns false if the current region should
387 // be skipped.
388 virtual bool initGCNRegion();
389
390 // Finalize state after scheduling a region.
391 virtual void finalizeGCNRegion();
392
393 // Track whether a new region is also a new MBB.
394 void setupNewBlock();
395
396 // Check result of scheduling.
397 void checkScheduling();
398
399 // computes the given schedule virtual execution time in clocks
400 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
401 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
402 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
403 DenseMap<unsigned, unsigned> &ReadyCycles,
404 const TargetSchedModel &SM);
405
406 // Returns true if scheduling should be reverted.
407 virtual bool shouldRevertScheduling(unsigned WavesAfter);
408
409 // Returns true if current region has known excess pressure.
410 bool isRegionWithExcessRP() const {
411 return DAG.RegionsWithExcessRP[RegionIdx];
412 }
413
414 // The region number this stage is currently working on
415 unsigned getRegionIdx() { return RegionIdx; }
416
417 // Returns true if the new schedule may result in more spilling.
418 bool mayCauseSpilling(unsigned WavesAfter);
419
420 /// Sets the schedule of region \p RegionIdx in block \p MBB to \p MIOrder.
421 /// The MIs in \p MIOrder must be exactly the same as the ones currently
422 /// existing inside the region, only in a different order that honors def-use
423 /// chains.
424 void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB,
425 ArrayRef<MachineInstr *> MIOrder);
426
427 void advanceRegion() { RegionIdx++; }
428
429 virtual ~GCNSchedStage() = default;
430};
431
432class OccInitialScheduleStage : public GCNSchedStage {
433public:
434 bool shouldRevertScheduling(unsigned WavesAfter) override;
435
436 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
437 : GCNSchedStage(StageID, DAG) {}
438};
439
440class RewriteMFMAFormStage : public GCNSchedStage {
441private:
442 // Record regions with excess archvgpr register pressure over the physical
443 // register limit. Register pressure in these regions usually will result in
444 // spilling.
445 BitVector RegionsWithExcessArchVGPR;
446
447 const SIInstrInfo *TII;
448 const SIRegisterInfo *SRI;
449
450 /// Do a speculative rewrite and collect copy locations. The speculative
451 /// rewrite allows us to calculate the RP of the code after the rewrite, and
452 /// the copy locations allow us to calculate the total cost of copies required
453 /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
454 /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
455 /// copy locations for defs (of the MFMA operands) in \p CopyForDef
456 bool
457 initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
458 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
459 SmallPtrSetImpl<MachineInstr *> &CopyForDef);
460
461 /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
462 /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
463 /// costs, and \p RewriteCands to undo rewriting.
464 int64_t getRewriteCost(
465 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
466 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
467 const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
468
469 /// Do the final rewrite on \p RewriteCands and insert any needed copies.
470 bool
471 rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
472
473 /// \returns true if this MI is a rewrite candidate.
474 bool isRewriteCandidate(MachineInstr *MI) const;
475
476 /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
477 /// DefIdxs
478 void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
479 SmallVectorImpl<SlotIndex> &DefIdxs);
480
481 /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
482 /// ReachingUses
483 void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS,
484 SmallVectorImpl<MachineOperand *> &ReachingUses);
485
486public:
487 bool initGCNSchedStage() override;
488
489 RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
490 : GCNSchedStage(StageID, DAG) {}
491};
492
493class UnclusteredHighRPStage : public GCNSchedStage {
494private:
495 // Save the initial occupancy before starting this stage.
496 unsigned InitialOccupancy;
497 // Save the temporary target occupancy before starting this stage.
498 unsigned TempTargetOccupancy;
499 // Track whether any region was scheduled by this stage.
500 bool IsAnyRegionScheduled;
501
502public:
503 bool initGCNSchedStage() override;
504
505 void finalizeGCNSchedStage() override;
506
507 bool initGCNRegion() override;
508
509 bool shouldRevertScheduling(unsigned WavesAfter) override;
510
511 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
512 : GCNSchedStage(StageID, DAG) {}
513};
514
515// Retry function scheduling if we found resulting occupancy and it is
516// lower than used for other scheduling passes. This will give more freedom
517// to schedule low register pressure blocks.
518class ClusteredLowOccStage : public GCNSchedStage {
519public:
520 bool initGCNSchedStage() override;
521
522 bool initGCNRegion() override;
523
524 bool shouldRevertScheduling(unsigned WavesAfter) override;
525
526 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
527 : GCNSchedStage(StageID, DAG) {}
528};
529
530/// Attempts to reduce function spilling or, if there is no spilling, to
531/// increase function occupancy by one with respect to register usage by sinking
532/// rematerializable instructions to their use. When the stage estimates that
533/// reducing spilling or increasing occupancy is possible, it tries to
534/// rematerialize as few registers as possible to reduce potential negative
535/// effects on function latency.
536///
537/// The stage only supports rematerializing registers that meet all of the
538/// following constraints.
539/// 1. The register is virtual and has a single defining instruction.
540/// 2. The single defining instruction is either deemed rematerializable by the
541/// target-independent logic, or if not, has no non-constant and
542/// non-ignorable physical register use.
543/// 3 The register has no virtual register use whose live range would be
544/// extended by the rematerialization.
545/// 4. The register has a single non-debug user in a different region from its
546/// defining region.
547/// 5. The register is not used by or using another register that is going to be
548/// rematerialized.
549class PreRARematStage : public GCNSchedStage {
550private:
551 /// A rematerializable register.
552 struct RematReg {
553 /// Single MI defining the rematerializable register.
554 MachineInstr *DefMI;
555 /// Single user of the rematerializable register.
556 MachineInstr *UseMI;
557 /// Regions in which the register is live-in/live-out/live anywhere.
558 BitVector LiveIn, LiveOut, Live;
559 /// The rematerializable register's lane bitmask.
560 LaneBitmask Mask;
561 /// Defining and using regions.
562 unsigned DefRegion, UseRegion;
563
564 RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
565 GCNScheduleDAGMILive &DAG,
566 const DenseMap<MachineInstr *, unsigned> &MIRegion);
567
568 /// Returns the rematerializable register. Do not call after deleting the
569 /// original defining instruction.
570 Register getReg() const { return DefMI->getOperand(i: 0).getReg(); }
571
572 /// Determines whether this rematerialization may be beneficial in at least
573 /// one target region.
574 bool maybeBeneficial(const BitVector &TargetRegions,
575 ArrayRef<GCNRPTarget> RPTargets) const;
576
577 /// Determines if the register is both unused and live-through in region \p
578 /// I. This guarantees that rematerializing it will reduce RP in the region.
579 bool isUnusedLiveThrough(unsigned I) const {
580 assert(I < Live.size() && "region index out of range");
581 return LiveIn[I] && LiveOut[I] && I != UseRegion;
582 }
583
584 /// Updates internal structures following a MI rematerialization. Part of
585 /// the stage instead of the DAG because it makes assumptions that are
586 /// specific to the rematerialization process.
587 void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
588 GCNScheduleDAGMILive &DAG) const;
589 };
590
591 /// A scored rematerialization candidate. Higher scores indicate more
592 /// beneficial rematerializations. A null score indicate the rematerialization
593 /// is not helpful to reduce RP in target regions.
594 struct ScoredRemat {
595 /// The rematerializable register under consideration.
596 RematReg *Remat;
597
598 /// Execution frequency information required by scoring heuristics.
599 /// Frequencies are scaled down if they are high to avoid overflow/underflow
600 /// when combining them.
601 struct FreqInfo {
602 /// Per-region execution frequencies. 0 when unknown.
603 SmallVector<uint64_t> Regions;
604 /// Minimum and maximum observed frequencies.
605 uint64_t MinFreq, MaxFreq;
606
607 FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
608
609 private:
610 static const uint64_t ScaleFactor = 1024;
611 };
612
613 /// This only initializes state-independent characteristics of \p Remat, not
614 /// the actual score.
615 ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
616 const GCNScheduleDAGMILive &DAG);
617
618 /// Rematerializes the candidate and returns the new MI. This removes the
619 /// rematerialized register from live-in/out lists in the \p DAG and updates
620 /// \p RPTargets in all affected regions. Regions in which RP savings are
621 /// not guaranteed are set in \p RecomputeRP.
622 MachineInstr *rematerialize(BitVector &RecomputeRP,
623 SmallVectorImpl<GCNRPTarget> &RPTargets,
624 GCNScheduleDAGMILive &DAG) const;
625
626 /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
627 /// \p RegionFreq indicates the frequency of each region
628 void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
629 const FreqInfo &Freq, bool ReduceSpill);
630
631 /// Returns whether the current score is null, indicating the
632 /// rematerialization is useless.
633 bool hasNullScore() const { return !RegionImpact; }
634
635 /// Compare score components of non-null scores pair-wise. A null score is
636 /// always strictly lesser than another non-null score.
637 bool operator<(const ScoredRemat &O) const {
638 if (hasNullScore())
639 return !O.hasNullScore();
640 if (O.hasNullScore())
641 return false;
642 if (MaxFreq != O.MaxFreq)
643 return MaxFreq < O.MaxFreq;
644 if (FreqDiff != O.FreqDiff)
645 return FreqDiff < O.FreqDiff;
646 if (RegionImpact != O.RegionImpact)
647 return RegionImpact < O.RegionImpact;
648 // Break ties using pointer to rematerializable register. Rematerializable
649 // registers are collected in instruction order so, within the same
650 // region, this will prefer registers defined earlier that have longer
651 // live ranges in their defining region (since the registers we consider
652 // are always live-out in their defining region).
653 return Remat > O.Remat;
654 }
655
656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
657 Printable print() const;
658#endif
659
660 private:
661 /// Expected register pressure decrease induced by rematerializing this
662 /// candidate.
663 GCNRegPressure RPSave;
664
665 // The three members below are the scoring components, top to bottom from
666 // most important to least important when comparing candidates.
667
668 /// Frequency of impacted target region with highest known frequency. This
669 /// only matters when the stage is trying to reduce spilling, so it is
670 /// always 0 when it is not.
671 uint64_t MaxFreq;
672 /// Frequency difference between defining and using regions. Negative values
673 /// indicate we are rematerializing to higher frequency regions; positive
674 /// values indicate the contrary.
675 int64_t FreqDiff;
676 /// Expected number of target regions impacted by the rematerialization,
677 /// scaled by the size of the register being rematerialized.
678 unsigned RegionImpact;
679
680 int64_t getFreqDiff(const FreqInfo &Freq) const;
681 };
682
683 /// Parent MBB to each region, in region order.
684 SmallVector<MachineBasicBlock *> RegionBB;
685 /// Register pressure targets for all regions.
686 SmallVector<GCNRPTarget> RPTargets;
687 /// Regions which are above the stage's RP target.
688 BitVector TargetRegions;
689 /// The target occupancy the set is trying to achieve. Empty when the
690 /// objective is spilling reduction.
691 std::optional<unsigned> TargetOcc;
692 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
693 unsigned AchievedOcc;
694 /// After successful stage initialization, indicates which regions should be
695 /// rescheduled.
696 BitVector RescheduleRegions;
697
698 /// List of rematerializable registers.
699 SmallVector<RematReg> RematRegs;
700
701 /// Holds enough information to rollback a rematerialization decision post
702 /// re-scheduling.
703 struct RollbackInfo {
704 /// The rematerializable register under consideration.
705 const RematReg *Remat;
706 /// The rematerialized MI replacing the original defining MI.
707 MachineInstr *RematMI;
708 /// Maps register machine operand indices to their original register.
709 SmallDenseMap<unsigned, Register, 4> RegMap;
710
711 RollbackInfo(const RematReg *Remat) : Remat(Remat) {}
712 };
713 /// List of rematerializations to rollback if rematerialization does not end
714 /// up being beneficial.
715 SmallVector<RollbackInfo> Rollbacks;
716
717 /// State of a region pre-re-scheduling but post-rematerializations that we
718 /// must keep to be able to revert re-scheduling effects.
719 struct RegionSchedRevert {
720 /// Region number;
721 unsigned RegionIdx;
722 /// Original instruction order (both debug and non-debug MIs).
723 std::vector<MachineInstr *> OrigMIOrder;
724 /// Maximum pressure recorded in the region.
725 GCNRegPressure MaxPressure;
726
727 RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
728 const GCNRegPressure &MaxPressure)
729 : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
730 MaxPressure(MaxPressure) {}
731 };
732 /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
733 /// regions.
734 SmallVector<RegionSchedRevert> RegionReverts;
735
736 /// Returns the occupancy the stage is trying to achieve.
737 unsigned getStageTargetOccupancy() const;
738
739 /// Determines the stage's objective (increasing occupancy or reducing
740 /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
741 /// achieve that objective and mark those that don't achieve it in \ref
742 /// TargetRegions. Returns whether there is any target region.
743 bool setObjective();
744
745 /// Unsets target regions in \p Regions whose RP target has been reached.
746 void unsetSatisfiedRPTargets(const BitVector &Regions);
747
748 /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
749 /// again all \ref TargetRegions that were optimistically marked as satisfied
750 /// but are actually not, and returns whether there were any such regions.
751 bool updateAndVerifyRPTargets(const BitVector &Regions);
752
753 /// Collects all rematerializable registers and appends them to \ref
754 /// RematRegs. \p MIRegion maps MIs to their region. Returns whether any
755 /// rematerializable register was found.
756 bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion);
757
758 /// Deletes all rematerialized MIs from the MIR when they were kept around for
759 /// potential rollback.
760 void commitRematerializations() const;
761
762 /// Whether the MI is rematerializable
763 bool isReMaterializable(const MachineInstr &MI);
764
765 /// If remat alone did not increase occupancy to the target one, rollbacks all
766 /// rematerializations and resets live-ins/RP in all regions impacted by the
767 /// stage to their pre-stage values.
768 void finalizeGCNSchedStage() override;
769
770public:
771 bool initGCNSchedStage() override;
772
773 bool initGCNRegion() override;
774
775 void finalizeGCNRegion() override;
776
777 bool shouldRevertScheduling(unsigned WavesAfter) override;
778
779 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
780 : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
781 RescheduleRegions(DAG.Regions.size()) {
782 const unsigned NumRegions = DAG.Regions.size();
783 RPTargets.reserve(N: NumRegions);
784 RegionBB.reserve(N: NumRegions);
785 }
786};
787
788class ILPInitialScheduleStage : public GCNSchedStage {
789public:
790 bool shouldRevertScheduling(unsigned WavesAfter) override;
791
792 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
793 : GCNSchedStage(StageID, DAG) {}
794};
795
796class MemoryClauseInitialScheduleStage : public GCNSchedStage {
797public:
798 bool shouldRevertScheduling(unsigned WavesAfter) override;
799
800 MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
801 GCNScheduleDAGMILive &DAG)
802 : GCNSchedStage(StageID, DAG) {}
803};
804
805class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
806private:
807 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
808
809 bool HasIGLPInstrs = false;
810
811public:
812 void schedule() override;
813
814 void finalizeSchedule() override;
815
816 GCNPostScheduleDAGMILive(MachineSchedContext *C,
817 std::unique_ptr<MachineSchedStrategy> S,
818 bool RemoveKillFlags);
819};
820
821} // End namespace llvm
822
823#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
824