1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/CodeGen/MachineBasicBlock.h"
20#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
21#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
22#include "llvm/CodeGen/MachineInstr.h"
23#include "llvm/CodeGen/MachineScheduler.h"
24
25namespace llvm {
26
27class SIMachineFunctionInfo;
28class SIRegisterInfo;
29class GCNSubtarget;
30class GCNSchedStage;
31
32enum class GCNSchedStageID : unsigned {
33 OccInitialSchedule = 0,
34 RewriteMFMAForm = 1,
35 UnclusteredHighRPReschedule = 2,
36 ClusteredLowOccupancyReschedule = 3,
37 PreRARematerialize = 4,
38 ILPInitialSchedule = 5,
39 MemoryClauseInitialSchedule = 6
40};
41
42#ifndef NDEBUG
43raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
44#endif
45
46/// This is a minimal scheduler strategy. The main difference between this
47/// and the GenericScheduler is that GCNSchedStrategy uses different
48/// heuristics to determine excess/critical pressure sets.
49class GCNSchedStrategy : public GenericScheduler {
50protected:
51 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
52
53 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
54 const RegPressureTracker &RPTracker,
55 SchedCandidate &Cand, bool &IsPending,
56 bool IsBottomUp);
57
58 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
59 const RegPressureTracker &RPTracker,
60 const SIRegisterInfo *SRI, unsigned SGPRPressure,
61 unsigned VGPRPressure, bool IsBottomUp);
62
63 /// Evaluates instructions in the pending queue using a subset of scheduling
64 /// heuristics.
65 ///
66 /// Instructions that cannot be issued due to hardware constraints are placed
67 /// in the pending queue rather than the available queue, making them normally
68 /// invisible to scheduling heuristics. However, in certain scenarios (such as
69 /// avoiding register spilling), it may be beneficial to consider scheduling
70 /// these not-yet-ready instructions.
71 bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
72 SchedBoundary *Zone) const;
73
74 void printCandidateDecision(const SchedCandidate &Current,
75 const SchedCandidate &Preferred);
76
77 std::vector<unsigned> Pressure;
78
79 std::vector<unsigned> MaxPressure;
80
81 unsigned SGPRExcessLimit;
82
83 unsigned VGPRExcessLimit;
84
85 unsigned TargetOccupancy;
86
87 MachineFunction *MF;
88
89 // Scheduling stages for this strategy.
90 SmallVector<GCNSchedStageID, 4> SchedStages;
91
92 // Pointer to the current SchedStageID.
93 SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
94
95 // GCN RP Tracker for top-down scheduling
96 mutable GCNDownwardRPTracker DownwardTracker;
97
98 // GCN RP Tracker for botttom-up scheduling
99 mutable GCNUpwardRPTracker UpwardTracker;
100
101public:
102 // schedule() have seen register pressure over the critical limits and had to
103 // track register pressure for actual scheduling heuristics.
104 bool HasHighPressure;
105
106 // Schedule known to have excess register pressure. Be more conservative in
107 // increasing ILP and preserving VGPRs.
108 bool KnownExcessRP = false;
109
110 // An error margin is necessary because of poor performance of the generic RP
111 // tracker and can be adjusted up for tuning heuristics to try and more
112 // aggressively reduce register pressure.
113 unsigned ErrorMargin = 3;
114
115 // Bias for SGPR limits under a high register pressure.
116 const unsigned HighRPSGPRBias = 7;
117
118 // Bias for VGPR limits under a high register pressure.
119 const unsigned HighRPVGPRBias = 7;
120
121 unsigned SGPRCriticalLimit;
122
123 unsigned VGPRCriticalLimit;
124
125 unsigned SGPRLimitBias = 0;
126
127 unsigned VGPRLimitBias = 0;
128
129 GCNSchedStrategy(const MachineSchedContext *C);
130
131 SUnit *pickNode(bool &IsTopNode) override;
132
133 void schedNode(SUnit *SU, bool IsTopNode) override;
134
135 void initialize(ScheduleDAGMI *DAG) override;
136
137 unsigned getTargetOccupancy() { return TargetOccupancy; }
138
139 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
140
141 GCNSchedStageID getCurrentStage();
142
143 // Advances stage. Returns true if there are remaining stages.
144 bool advanceStage();
145
146 bool hasNextStage() const;
147
148 GCNSchedStageID getNextStage() const;
149
150 GCNDownwardRPTracker *getDownwardTracker() { return &DownwardTracker; }
151
152 GCNUpwardRPTracker *getUpwardTracker() { return &UpwardTracker; }
153};
154
155/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
156/// maximum number of waves per simd).
157class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
158public:
159 GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
160 bool IsLegacyScheduler = false);
161};
162
163/// The goal of this scheduling strategy is to maximize ILP for a single wave
164/// (i.e. latency hiding).
165class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
166protected:
167 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
168 SchedBoundary *Zone) const override;
169
170public:
171 GCNMaxILPSchedStrategy(const MachineSchedContext *C);
172};
173
174/// The goal of this scheduling strategy is to maximize memory clause for a
175/// single wave.
176class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
177protected:
178 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
179 SchedBoundary *Zone) const override;
180
181public:
182 GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
183};
184
185class ScheduleMetrics {
186 unsigned ScheduleLength;
187 unsigned BubbleCycles;
188
189public:
190 ScheduleMetrics() = default;
191 ScheduleMetrics(unsigned L, unsigned BC)
192 : ScheduleLength(L), BubbleCycles(BC) {}
193 unsigned getLength() const { return ScheduleLength; }
194 unsigned getBubbles() const { return BubbleCycles; }
195 unsigned getMetric() const {
196 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
197 // Metric is zero if the amount of bubbles is less than 1% which is too
198 // small. So, return 1.
199 return Metric ? Metric : 1;
200 }
201 static const unsigned ScaleFactor;
202};
203
204inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
205 dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
206 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
207 << Sm.getLength() << " ]\n";
208 return OS;
209}
210
211class GCNScheduleDAGMILive;
212class RegionPressureMap {
213 GCNScheduleDAGMILive *DAG;
214 // The live in/out pressure as indexed by the first or last MI in the region
215 // before scheduling.
216 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
217 // The mapping of RegionIDx to key instruction
218 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
219 // Whether we are calculating LiveOuts or LiveIns
220 bool IsLiveOut;
221
222public:
223 RegionPressureMap() = default;
224 RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
225 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
226 // Build the Instr->LiveReg and RegionIdx->Instr maps
227 void buildLiveRegMap();
228
229 // Retrieve the LiveReg for a given RegionIdx
230 GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
231 assert(IdxToInstruction.contains(RegionIdx));
232 MachineInstr *Key = IdxToInstruction[RegionIdx];
233 return RegionLiveRegMap[Key];
234 }
235};
236
237/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
238/// boundary is inclusive, the upper boundary is exclusive.
239using RegionBoundaries =
240 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
241
242class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
243 friend class GCNSchedStage;
244 friend class OccInitialScheduleStage;
245 friend class RewriteMFMAFormStage;
246 friend class UnclusteredHighRPStage;
247 friend class ClusteredLowOccStage;
248 friend class PreRARematStage;
249 friend class ILPInitialScheduleStage;
250 friend class RegionPressureMap;
251
252 const GCNSubtarget &ST;
253
254 SIMachineFunctionInfo &MFI;
255
256 // Occupancy target at the beginning of function scheduling cycle.
257 unsigned StartingOccupancy;
258
259 // Minimal real occupancy recorder for the function.
260 unsigned MinOccupancy;
261
262 // Vector of regions recorder for later rescheduling
263 SmallVector<RegionBoundaries, 32> Regions;
264
265 // Record regions with high register pressure.
266 BitVector RegionsWithHighRP;
267
268 // Record regions with excess register pressure over the physical register
269 // limit. Register pressure in these regions usually will result in spilling.
270 BitVector RegionsWithExcessRP;
271
272 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
273 BitVector RegionsWithIGLPInstrs;
274
275 // Region live-in cache.
276 SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
277
278 // Region pressure cache.
279 SmallVector<GCNRegPressure, 32> Pressure;
280
281 // Temporary basic block live-in cache.
282 DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
283
284 // The map of the initial first region instruction to region live in registers
285 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
286
287 // Calculate the map of the initial first region instruction to region live in
288 // registers
289 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getRegionLiveInMap() const;
290
291 // Calculate the map of the initial last region instruction to region live out
292 // registers
293 DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
294 getRegionLiveOutMap() const;
295
296 // The live out registers per region. These are internally stored as a map of
297 // the initial last region instruction to region live out registers, but can
298 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
299 RegionPressureMap RegionLiveOuts;
300
301 // Return current region pressure.
302 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
303
304 // Compute and cache live-ins and pressure for all regions in block.
305 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
306
307 /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
308 void setTargetOccupancy(unsigned TargetOccupancy);
309
310 void runSchedStages();
311
312 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
313
314 void deleteMI(unsigned RegionIdx, MachineInstr *MI);
315
316public:
317 GCNScheduleDAGMILive(MachineSchedContext *C,
318 std::unique_ptr<MachineSchedStrategy> S);
319
320 void schedule() override;
321
322 void finalizeSchedule() override;
323};
324
325// GCNSchedStrategy applies multiple scheduling stages to a function.
326class GCNSchedStage {
327protected:
328 GCNScheduleDAGMILive &DAG;
329
330 GCNSchedStrategy &S;
331
332 MachineFunction &MF;
333
334 SIMachineFunctionInfo &MFI;
335
336 const GCNSubtarget &ST;
337
338 const GCNSchedStageID StageID;
339
340 // The current block being scheduled.
341 MachineBasicBlock *CurrentMBB = nullptr;
342
343 // Current region index.
344 unsigned RegionIdx = 0;
345
346 // Record the original order of instructions before scheduling.
347 std::vector<MachineInstr *> Unsched;
348
349 // RP before scheduling the current region.
350 GCNRegPressure PressureBefore;
351
352 // RP after scheduling the current region.
353 GCNRegPressure PressureAfter;
354
355 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
356
357 GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
358
359public:
360 // Initialize state for a scheduling stage. Returns false if the current stage
361 // should be skipped.
362 virtual bool initGCNSchedStage();
363
364 // Finalize state after finishing a scheduling pass on the function.
365 virtual void finalizeGCNSchedStage();
366
367 // Setup for scheduling a region. Returns false if the current region should
368 // be skipped.
369 virtual bool initGCNRegion();
370
371 // Finalize state after scheduling a region.
372 virtual void finalizeGCNRegion();
373
374 // Track whether a new region is also a new MBB.
375 void setupNewBlock();
376
377 // Check result of scheduling.
378 void checkScheduling();
379
380 // computes the given schedule virtual execution time in clocks
381 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
382 ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
383 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
384 DenseMap<unsigned, unsigned> &ReadyCycles,
385 const TargetSchedModel &SM);
386
387 // Returns true if scheduling should be reverted.
388 virtual bool shouldRevertScheduling(unsigned WavesAfter);
389
390 // Returns true if current region has known excess pressure.
391 bool isRegionWithExcessRP() const {
392 return DAG.RegionsWithExcessRP[RegionIdx];
393 }
394
395 // The region number this stage is currently working on
396 unsigned getRegionIdx() { return RegionIdx; }
397
398 // Returns true if the new schedule may result in more spilling.
399 bool mayCauseSpilling(unsigned WavesAfter);
400
401 /// Sets the schedule of region \p RegionIdx in block \p MBB to \p MIOrder.
402 /// The MIs in \p MIOrder must be exactly the same as the ones currently
403 /// existing inside the region, only in a different order that honors def-use
404 /// chains.
405 void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB,
406 ArrayRef<MachineInstr *> MIOrder);
407
408 void advanceRegion() { RegionIdx++; }
409
410 virtual ~GCNSchedStage() = default;
411};
412
413class OccInitialScheduleStage : public GCNSchedStage {
414public:
415 bool shouldRevertScheduling(unsigned WavesAfter) override;
416
417 OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
418 : GCNSchedStage(StageID, DAG) {}
419};
420
421class RewriteMFMAFormStage : public GCNSchedStage {
422private:
423 // Record regions with excess archvgpr register pressure over the physical
424 // register limit. Register pressure in these regions usually will result in
425 // spilling.
426 BitVector RegionsWithExcessArchVGPR;
427
428 const SIInstrInfo *TII;
429 const SIRegisterInfo *SRI;
430
431 /// Do a speculative rewrite and collect copy locations. The speculative
432 /// rewrite allows us to calculate the RP of the code after the rewrite, and
433 /// the copy locations allow us to calculate the total cost of copies required
434 /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
435 /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
436 /// copy locations for defs (of the MFMA operands) in \p CopyForDef
437 bool
438 initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
439 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
440 SmallPtrSetImpl<MachineInstr *> &CopyForDef);
441
442 /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
443 /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
444 /// costs, and \p RewriteCands to undo rewriting.
445 int64_t getRewriteCost(
446 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
447 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
448 const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
449
450 /// Do the final rewrite on \p RewriteCands and insert any needed copies.
451 bool
452 rewrite(const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands);
453
454 /// \returns true if this MI is a rewrite candidate.
455 bool isRewriteCandidate(MachineInstr *MI) const;
456
457 /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
458 /// DefIdxs
459 void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
460 SmallVectorImpl<SlotIndex> &DefIdxs);
461
462 /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
463 /// ReachingUses
464 void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS,
465 SmallVectorImpl<MachineOperand *> &ReachingUses);
466
467public:
468 bool initGCNSchedStage() override;
469
470 RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
471 : GCNSchedStage(StageID, DAG) {}
472};
473
474class UnclusteredHighRPStage : public GCNSchedStage {
475private:
476 // Save the initial occupancy before starting this stage.
477 unsigned InitialOccupancy;
478 // Save the temporary target occupancy before starting this stage.
479 unsigned TempTargetOccupancy;
480 // Track whether any region was scheduled by this stage.
481 bool IsAnyRegionScheduled;
482
483public:
484 bool initGCNSchedStage() override;
485
486 void finalizeGCNSchedStage() override;
487
488 bool initGCNRegion() override;
489
490 bool shouldRevertScheduling(unsigned WavesAfter) override;
491
492 UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
493 : GCNSchedStage(StageID, DAG) {}
494};
495
496// Retry function scheduling if we found resulting occupancy and it is
497// lower than used for other scheduling passes. This will give more freedom
498// to schedule low register pressure blocks.
499class ClusteredLowOccStage : public GCNSchedStage {
500public:
501 bool initGCNSchedStage() override;
502
503 bool initGCNRegion() override;
504
505 bool shouldRevertScheduling(unsigned WavesAfter) override;
506
507 ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
508 : GCNSchedStage(StageID, DAG) {}
509};
510
511/// Attempts to reduce function spilling or, if there is no spilling, to
512/// increase function occupancy by one with respect to register usage by sinking
513/// rematerializable instructions to their use. When the stage estimates that
514/// reducing spilling or increasing occupancy is possible, it tries to
515/// rematerialize as few registers as possible to reduce potential negative
516/// effects on function latency.
517///
518/// The stage only supports rematerializing registers that meet all of the
519/// following constraints.
520/// 1. The register is virtual and has a single defining instruction.
521/// 2. The single defining instruction is either deemed rematerializable by the
522/// target-independent logic, or if not, has no non-constant and
523/// non-ignorable physical register use.
524/// 3 The register has no virtual register use whose live range would be
525/// extended by the rematerialization.
526/// 4. The register has a single non-debug user in a different region from its
527/// defining region.
528/// 5. The register is not used by or using another register that is going to be
529/// rematerialized.
530class PreRARematStage : public GCNSchedStage {
531private:
532 /// A rematerializable register.
533 struct RematReg {
534 /// Single MI defining the rematerializable register.
535 MachineInstr *DefMI;
536 /// Single user of the rematerializable register.
537 MachineInstr *UseMI;
538 /// Regions in which the register is live-in/live-out/live anywhere.
539 BitVector LiveIn, LiveOut, Live;
540 /// The rematerializable register's lane bitmask.
541 LaneBitmask Mask;
542 /// Defining and using regions.
543 unsigned DefRegion, UseRegion;
544
545 RematReg(MachineInstr *DefMI, MachineInstr *UseMI,
546 GCNScheduleDAGMILive &DAG,
547 const DenseMap<MachineInstr *, unsigned> &MIRegion);
548
549 /// Returns the rematerializable register. Do not call after deleting the
550 /// original defining instruction.
551 Register getReg() const { return DefMI->getOperand(i: 0).getReg(); }
552
553 /// Determines whether this rematerialization may be beneficial in at least
554 /// one target region.
555 bool maybeBeneficial(const BitVector &TargetRegions,
556 ArrayRef<GCNRPTarget> RPTargets) const;
557
558 /// Determines if the register is both unused and live-through in region \p
559 /// I. This guarantees that rematerializing it will reduce RP in the region.
560 bool isUnusedLiveThrough(unsigned I) const {
561 assert(I < Live.size() && "region index out of range");
562 return LiveIn[I] && LiveOut[I] && I != UseRegion;
563 }
564
565 /// Updates internal structures following a MI rematerialization. Part of
566 /// the stage instead of the DAG because it makes assumptions that are
567 /// specific to the rematerialization process.
568 void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
569 GCNScheduleDAGMILive &DAG) const;
570 };
571
572 /// A scored rematerialization candidate. Higher scores indicate more
573 /// beneficial rematerializations. A null score indicate the rematerialization
574 /// is not helpful to reduce RP in target regions.
575 struct ScoredRemat {
576 /// The rematerializable register under consideration.
577 RematReg *Remat;
578
579 /// Execution frequency information required by scoring heuristics.
580 /// Frequencies are scaled down if they are high to avoid overflow/underflow
581 /// when combining them.
582 struct FreqInfo {
583 /// Per-region execution frequencies. 0 when unknown.
584 SmallVector<uint64_t> Regions;
585 /// Minimum and maximum observed frequencies.
586 uint64_t MinFreq, MaxFreq;
587
588 FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
589
590 private:
591 static const uint64_t ScaleFactor = 1024;
592 };
593
594 /// This only initializes state-independent characteristics of \p Remat, not
595 /// the actual score.
596 ScoredRemat(RematReg *Remat, const FreqInfo &Freq,
597 const GCNScheduleDAGMILive &DAG);
598
599 /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
600 /// \p RegionFreq indicates the frequency of each region
601 void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
602 const FreqInfo &Freq, bool ReduceSpill);
603
604 /// Returns whether the current score is null, indicating the
605 /// rematerialization is useless.
606 bool hasNullScore() const { return !RegionImpact; }
607
608 /// Compare score components of non-null scores pair-wise. A null score is
609 /// always strictly lesser than another non-null score.
610 bool operator<(const ScoredRemat &O) const {
611 if (hasNullScore())
612 return !O.hasNullScore();
613 if (O.hasNullScore())
614 return false;
615 if (MaxFreq != O.MaxFreq)
616 return MaxFreq < O.MaxFreq;
617 if (FreqDiff != O.FreqDiff)
618 return FreqDiff < O.FreqDiff;
619 if (RegionImpact != O.RegionImpact)
620 return RegionImpact < O.RegionImpact;
621 // Break ties using pointer to rematerializable register. Rematerializable
622 // registers are collected in instruction order so, within the same
623 // region, this will prefer registers defined earlier that have longer
624 // live ranges in their defining region (since the registers we consider
625 // are always live-out in their defining region).
626 return Remat > O.Remat;
627 }
628
629#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
630 Printable print() const;
631#endif
632
633 private:
634 /// Number of 32-bit registers this rematerialization covers.
635 unsigned NumRegs;
636
637 // The three members below are the scoring components, top to bottom from
638 // most important to least important when comparing candidates.
639
640 /// Frequency of impacted target region with highest known frequency. This
641 /// only matters when the stage is trying to reduce spilling, so it is
642 /// always 0 when it is not.
643 uint64_t MaxFreq;
644 /// Frequency difference between defining and using regions. Negative values
645 /// indicate we are rematerializing to higher frequency regions; positive
646 /// values indicate the contrary.
647 int64_t FreqDiff;
648 /// Expected number of target regions impacted by the rematerialization,
649 /// scaled by the size of the register being rematerialized.
650 unsigned RegionImpact;
651
652 unsigned getNumRegs(const GCNScheduleDAGMILive &DAG) const;
653
654 int64_t getFreqDiff(const FreqInfo &Freq) const;
655 };
656
657 /// Parent MBB to each region, in region order.
658 SmallVector<MachineBasicBlock *> RegionBB;
659 /// Register pressure targets for all regions.
660 SmallVector<GCNRPTarget> RPTargets;
661 /// Regions which are above the stage's RP target.
662 BitVector TargetRegions;
663 /// The target occupancy the set is trying to achieve. Empty when the
664 /// objective is spilling reduction.
665 std::optional<unsigned> TargetOcc;
666 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
667 unsigned AchievedOcc;
668 /// After successful stage initialization, indicates which regions should be
669 /// rescheduled.
670 BitVector RescheduleRegions;
671
672 /// List of rematerializable registers.
673 SmallVector<RematReg> RematRegs;
674
675 /// Holds enough information to rollback a rematerialization decision post
676 /// re-scheduling.
677 struct RollbackInfo {
678 /// The rematerializable register under consideration.
679 const RematReg *Remat;
680 /// The rematerialized MI replacing the original defining MI.
681 MachineInstr *RematMI;
682 /// Maps register machine operand indices to their original register.
683 SmallDenseMap<unsigned, Register, 4> RegMap;
684
685 RollbackInfo(const RematReg *Remat) : Remat(Remat) {}
686 };
687 /// List of rematerializations to rollback if rematerialization does not end
688 /// up being beneficial.
689 SmallVector<RollbackInfo> Rollbacks;
690
691 /// State of a region pre-re-scheduling but post-rematerializations that we
692 /// must keep to be able to revert re-scheduling effects.
693 struct RegionSchedRevert {
694 /// Region number;
695 unsigned RegionIdx;
696 /// Original instruction order (both debug and non-debug MIs).
697 std::vector<MachineInstr *> OrigMIOrder;
698 /// Maximum pressure recorded in the region.
699 GCNRegPressure MaxPressure;
700
701 RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
702 const GCNRegPressure &MaxPressure)
703 : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
704 MaxPressure(MaxPressure) {}
705 };
706 /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
707 /// regions.
708 SmallVector<RegionSchedRevert> RegionReverts;
709
710 /// Returns the occupancy the stage is trying to achieve.
711 unsigned getStageTargetOccupancy() const;
712
713 /// Determines the stage's objective (increasing occupancy or reducing
714 /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
715 /// achieve that objective and mark those that don't achieve it in \ref
716 /// TargetRegions. Returns whether there is any target region.
717 bool setObjective();
718
719 /// Unsets target regions in \p Regions whose RP target has been reached.
720 void unsetSatisifedRPTargets(const BitVector &Regions);
721
722 /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
723 /// again all \ref TargetRegions that were optimistically marked as satisfied
724 /// but are actually not, and returns whether there were any such regions.
725 bool updateAndVerifyRPTargets(const BitVector &Regions);
726
727 /// Collects all rematerializable registers and appends them to \ref
728 /// RematRegs. \p MIRegion maps MIs to their region. Returns whether any
729 /// rematerializable register was found.
730 bool collectRematRegs(const DenseMap<MachineInstr *, unsigned> &MIRegion);
731
732 /// Rematerializes \p Remat. This removes the rematerialized register from
733 /// live-in/out lists in the DAG and updates RP targets in all affected
734 /// regions, which are also marked in \ref RescheduleRegions. Regions in which
735 /// RP savings are not guaranteed are set in \p RecomputeRP. When \p Rollback
736 /// is non-null, fills it with required information to be able to rollback the
737 /// rematerialization post-rescheduling.
738 void rematerialize(const RematReg &Remat, BitVector &RecomputeRP,
739 RollbackInfo *Rollback);
740
741 /// Rollbacks the rematerialization decision represented by \p Rollback. This
742 /// update live-in/out lists in the DAG but does not update cached register
743 /// pressures.
744 void rollback(const RollbackInfo &Rollback) const;
745
746 /// Deletes all rematerialized MIs from the MIR when they were kept around for
747 /// potential rollback.
748 void commitRematerializations() const;
749
750 /// Whether the MI is rematerializable
751 bool isReMaterializable(const MachineInstr &MI);
752
753 /// If remat alone did not increase occupancy to the target one, rollbacks all
754 /// rematerializations and resets live-ins/RP in all regions impacted by the
755 /// stage to their pre-stage values.
756 void finalizeGCNSchedStage() override;
757
758public:
759 bool initGCNSchedStage() override;
760
761 bool initGCNRegion() override;
762
763 void finalizeGCNRegion() override;
764
765 bool shouldRevertScheduling(unsigned WavesAfter) override;
766
767 PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
768 : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
769 RescheduleRegions(DAG.Regions.size()) {
770 const unsigned NumRegions = DAG.Regions.size();
771 RPTargets.reserve(N: NumRegions);
772 RegionBB.reserve(N: NumRegions);
773 }
774};
775
776class ILPInitialScheduleStage : public GCNSchedStage {
777public:
778 bool shouldRevertScheduling(unsigned WavesAfter) override;
779
780 ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
781 : GCNSchedStage(StageID, DAG) {}
782};
783
784class MemoryClauseInitialScheduleStage : public GCNSchedStage {
785public:
786 bool shouldRevertScheduling(unsigned WavesAfter) override;
787
788 MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
789 GCNScheduleDAGMILive &DAG)
790 : GCNSchedStage(StageID, DAG) {}
791};
792
793class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
794private:
795 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
796
797 bool HasIGLPInstrs = false;
798
799public:
800 void schedule() override;
801
802 void finalizeSchedule() override;
803
804 GCNPostScheduleDAGMILive(MachineSchedContext *C,
805 std::unique_ptr<MachineSchedStrategy> S,
806 bool RemoveKillFlags);
807};
808
809} // End namespace llvm
810
811#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
812