GCNSchedStrategy.h source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h]

1	//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -- C++ --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14	#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16	#include "GCNRegPressure.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/MapVector.h"
19	#include "llvm/CodeGen/MachineBasicBlock.h"
20	#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
21	#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
22	#include "llvm/CodeGen/MachineInstr.h"
23	#include "llvm/CodeGen/MachineScheduler.h"
24
25	namespace llvm {
26
27	class SIMachineFunctionInfo;
28	class SIRegisterInfo;
29	class GCNSubtarget;
30	class GCNSchedStage;
31
32	enum class GCNSchedStageID : unsigned {
33	OccInitialSchedule = `0`,
34	RewriteMFMAForm = `1`,
35	UnclusteredHighRPReschedule = `2`,
36	ClusteredLowOccupancyReschedule = `3`,
37	PreRARematerialize = `4`,
38	ILPInitialSchedule = `5`,
39	MemoryClauseInitialSchedule = `6`
40	};
41
42	#ifndef NDEBUG
43	raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
44	#endif
45
46	/// This is a minimal scheduler strategy. The main difference between this
47	/// and the GenericScheduler is that GCNSchedStrategy uses different
48	/// heuristics to determine excess/critical pressure sets.
49	class GCNSchedStrategy : public GenericScheduler {
50	protected:
51	SUnit pickNodeBidirectional(bool* &IsTopNode, bool &PickedPending);
52
53	void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
54	const RegPressureTracker &RPTracker,
55	SchedCandidate &Cand, bool &IsPending,
56	bool IsBottomUp);
57
58	void initCandidate(SchedCandidate &Cand, SUnit SU, bool* AtTop,
59	const RegPressureTracker &RPTracker,
60	const SIRegisterInfo SRI, unsigned* SGPRPressure,
61	unsigned VGPRPressure, bool IsBottomUp);
62
63	/// Evaluates instructions in the pending queue using a subset of scheduling
64	/// heuristics.
65	///
66	/// Instructions that cannot be issued due to hardware constraints are placed
67	/// in the pending queue rather than the available queue, making them normally
68	/// invisible to scheduling heuristics. However, in certain scenarios (such as
69	/// avoiding register spilling), it may be beneficial to consider scheduling
70	/// these not-yet-ready instructions.
71	bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
72	SchedBoundary Zone) const*;
73
74	void printCandidateDecision(const SchedCandidate &Current,
75	const SchedCandidate &Preferred);
76
77	std::vector<unsigned> Pressure;
78
79	std::vector<unsigned> MaxPressure;
80
81	unsigned SGPRExcessLimit;
82
83	unsigned VGPRExcessLimit;
84
85	unsigned TargetOccupancy;
86
87	MachineFunction *MF;
88
89	// Scheduling stages for this strategy.
90	SmallVector<GCNSchedStageID, `4`> SchedStages;
91
92	// Pointer to the current SchedStageID.
93	SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
94
95	// GCN RP Tracker for top-down scheduling
96	mutable GCNDownwardRPTracker DownwardTracker;
97
98	// GCN RP Tracker for botttom-up scheduling
99	mutable GCNUpwardRPTracker UpwardTracker;
100
101	public:
102	// schedule() have seen register pressure over the critical limits and had to
103	// track register pressure for actual scheduling heuristics.
104	bool HasHighPressure;
105
106	// Schedule known to have excess register pressure. Be more conservative in
107	// increasing ILP and preserving VGPRs.
108	bool KnownExcessRP = false;
109
110	// An error margin is necessary because of poor performance of the generic RP
111	// tracker and can be adjusted up for tuning heuristics to try and more
112	// aggressively reduce register pressure.
113	unsigned ErrorMargin = `3`;
114
115	// Bias for SGPR limits under a high register pressure.
116	const unsigned HighRPSGPRBias = `7`;
117
118	// Bias for VGPR limits under a high register pressure.
119	const unsigned HighRPVGPRBias = `7`;
120
121	unsigned SGPRCriticalLimit;
122
123	unsigned VGPRCriticalLimit;
124
125	unsigned SGPRLimitBias = `0`;
126
127	unsigned VGPRLimitBias = `0`;
128
129	GCNSchedStrategy(const MachineSchedContext *C);
130
131	SUnit pickNode(bool* &IsTopNode) override;
132
133	void schedNode(SUnit SU, bool* IsTopNode) override;
134
135	void initialize(ScheduleDAGMI *DAG) override;
136
137	unsigned getTargetOccupancy() { return TargetOccupancy; }
138
139	void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
140
141	GCNSchedStageID getCurrentStage();
142
143	// Advances stage. Returns true if there are remaining stages.
144	bool advanceStage();
145
146	bool hasNextStage() const;
147
148	GCNSchedStageID getNextStage() const;
149
150	GCNDownwardRPTracker getDownwardTracker() { return* &DownwardTracker; }
151
152	GCNUpwardRPTracker getUpwardTracker() { return* &UpwardTracker; }
153	};
154
155	/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
156	/// maximum number of waves per simd).
157	class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
158	public:
159	GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
160	bool IsLegacyScheduler = false);
161	};
162
163	/// The goal of this scheduling strategy is to maximize ILP for a single wave
164	/// (i.e. latency hiding).
165	class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
166	protected:
167	bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
168	SchedBoundary Zone) const* override;
169
170	public:
171	GCNMaxILPSchedStrategy(const MachineSchedContext *C);
172	};
173
174	/// The goal of this scheduling strategy is to maximize memory clause for a
175	/// single wave.
176	class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
177	protected:
178	bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
179	SchedBoundary Zone) const* override;
180
181	public:
182	GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
183	};
184
185	class ScheduleMetrics {
186	unsigned ScheduleLength;
187	unsigned BubbleCycles;
188
189	public:
190	ScheduleMetrics() = default;
191	ScheduleMetrics(unsigned L, unsigned BC)
192	: ScheduleLength(L), BubbleCycles(BC) {}
193	unsigned getLength() const { return ScheduleLength; }
194	unsigned getBubbles() const { return BubbleCycles; }
195	unsigned getMetric() const {
196	unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
197	// Metric is zero if the amount of bubbles is less than 1% which is too
198	// small. So, return 1.
199	return Metric ? Metric : `1`;
200	}
201	static const unsigned ScaleFactor;
202	};
203
204	inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
205	dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
206	<< " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
207	<< Sm.getLength() << " ]\n";
208	return OS;
209	}
210
211	class GCNScheduleDAGMILive;
212	class RegionPressureMap {
213	GCNScheduleDAGMILive *DAG;
214	// The live in/out pressure as indexed by the first or last MI in the region
215	// before scheduling.
216	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
217	// The mapping of RegionIDx to key instruction
218	DenseMap<unsigned, MachineInstr *> IdxToInstruction;
219	// Whether we are calculating LiveOuts or LiveIns
220	bool IsLiveOut;
221
222	public:
223	RegionPressureMap() = default;
224	RegionPressureMap(GCNScheduleDAGMILive GCNDAG, bool* LiveOut)
225	: DAG(GCNDAG), IsLiveOut(LiveOut) {}
226	// Build the Instr->LiveReg and RegionIdx->Instr maps
227	void buildLiveRegMap();
228
229	// Retrieve the LiveReg for a given RegionIdx
230	GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
231	assert(IdxToInstruction.contains(RegionIdx));
232	MachineInstr *Key = IdxToInstruction [RegionIdx];
233	return RegionLiveRegMap [Key];
234	}
235	};
236
237	/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
238	/// boundary is inclusive, the upper boundary is exclusive.
239	using RegionBoundaries =
240	std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
241
242	class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
243	friend class GCNSchedStage;
244	friend class OccInitialScheduleStage;
245	friend class RewriteMFMAFormStage;
246	friend class UnclusteredHighRPStage;
247	friend class ClusteredLowOccStage;
248	friend class PreRARematStage;
249	friend class ILPInitialScheduleStage;
250	friend class RegionPressureMap;
251
252	const GCNSubtarget &ST;
253
254	SIMachineFunctionInfo &MFI;
255
256	// Occupancy target at the beginning of function scheduling cycle.
257	unsigned StartingOccupancy;
258
259	// Minimal real occupancy recorder for the function.
260	unsigned MinOccupancy;
261
262	// Vector of regions recorder for later rescheduling
263	SmallVector<RegionBoundaries, `32`> Regions;
264
265	// Record regions with high register pressure.
266	BitVector RegionsWithHighRP;
267
268	// Record regions with excess register pressure over the physical register
269	// limit. Register pressure in these regions usually will result in spilling.
270	BitVector RegionsWithExcessRP;
271
272	// Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
273	BitVector RegionsWithIGLPInstrs;
274
275	// Region live-in cache.
276	SmallVector<GCNRPTracker::LiveRegSet, `32`> LiveIns;
277
278	// Region pressure cache.
279	SmallVector<GCNRegPressure, `32`> Pressure;
280
281	// Temporary basic block live-in cache.
282	DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
283
284	// The map of the initial first region instruction to region live in registers
285	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
286
287	// Calculate the map of the initial first region instruction to region live in
288	// registers
289	DenseMap<MachineInstr , GCNRPTracker::LiveRegSet> getRegionLiveInMap() const*;
290
291	// Calculate the map of the initial last region instruction to region live out
292	// registers
293	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
294	getRegionLiveOutMap() const;
295
296	// The live out registers per region. These are internally stored as a map of
297	// the initial last region instruction to region live out registers, but can
298	// be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
299	RegionPressureMap RegionLiveOuts;
300
301	// Return current region pressure.
302	GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
303
304	// Compute and cache live-ins and pressure for all regions in block.
305	void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
306
307	/// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
308	void setTargetOccupancy(unsigned TargetOccupancy);
309
310	void runSchedStages();
311
312	std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
313
314	void deleteMI(unsigned RegionIdx, MachineInstr *MI);
315
316	public:
317	GCNScheduleDAGMILive(MachineSchedContext *C,
318	std::unique_ptr<MachineSchedStrategy> S);
319
320	void schedule() override;
321
322	void finalizeSchedule() override;
323	};
324
325	// GCNSchedStrategy applies multiple scheduling stages to a function.
326	class GCNSchedStage {
327	protected:
328	GCNScheduleDAGMILive &DAG;
329
330	GCNSchedStrategy &S;
331
332	MachineFunction &MF;
333
334	SIMachineFunctionInfo &MFI;
335
336	const GCNSubtarget &ST;
337
338	const GCNSchedStageID StageID;
339
340	// The current block being scheduled.
341	MachineBasicBlock CurrentMBB = nullptr*;
342
343	// Current region index.
344	unsigned RegionIdx = `0`;
345
346	// Record the original order of instructions before scheduling.
347	std::vector<MachineInstr *> Unsched;
348
349	// RP before scheduling the current region.
350	GCNRegPressure PressureBefore;
351
352	// RP after scheduling the current region.
353	GCNRegPressure PressureAfter;
354
355	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
356
357	GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
358
359	public:
360	// Initialize state for a scheduling stage. Returns false if the current stage
361	// should be skipped.
362	virtual bool initGCNSchedStage();
363
364	// Finalize state after finishing a scheduling pass on the function.
365	virtual void finalizeGCNSchedStage();
366
367	// Setup for scheduling a region. Returns false if the current region should
368	// be skipped.
369	virtual bool initGCNRegion();
370
371	// Finalize state after scheduling a region.
372	virtual void finalizeGCNRegion();
373
374	// Track whether a new region is also a new MBB.
375	void setupNewBlock();
376
377	// Check result of scheduling.
378	void checkScheduling();
379
380	// computes the given schedule virtual execution time in clocks
381	ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
382	ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
383	unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
384	DenseMap<unsigned, unsigned> &ReadyCycles,
385	const TargetSchedModel &SM);
386
387	// Returns true if scheduling should be reverted.
388	virtual bool shouldRevertScheduling(unsigned WavesAfter);
389
390	// Returns true if current region has known excess pressure.
391	bool isRegionWithExcessRP() const {
392	return DAG.RegionsWithExcessRP [RegionIdx];
393	}
394
395	// The region number this stage is currently working on
396	unsigned getRegionIdx() { return RegionIdx; }
397
398	// Returns true if the new schedule may result in more spilling.
399	bool mayCauseSpilling(unsigned WavesAfter);
400
401	/// Sets the schedule of region \p RegionIdx in block \p MBB to \p MIOrder.
402	/// The MIs in \p MIOrder must be exactly the same as the ones currently
403	/// existing inside the region, only in a different order that honors def-use
404	/// chains.
405	void modifyRegionSchedule(unsigned RegionIdx, MachineBasicBlock *MBB,
406	ArrayRef<MachineInstr *> MIOrder);
407
408	void advanceRegion() { RegionIdx++; }
409
410	virtual ~GCNSchedStage() = default;
411	};
412
413	class OccInitialScheduleStage : public GCNSchedStage {
414	public:
415	bool shouldRevertScheduling(unsigned WavesAfter) override;
416
417	OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
418	: GCNSchedStage (StageID, DAG) {}
419	};
420
421	class RewriteMFMAFormStage : public GCNSchedStage {
422	private:
423	// Record regions with excess archvgpr register pressure over the physical
424	// register limit. Register pressure in these regions usually will result in
425	// spilling.
426	BitVector RegionsWithExcessArchVGPR;
427
428	const SIInstrInfo *TII;
429	const SIRegisterInfo *SRI;
430
431	/// Do a speculative rewrite and collect copy locations. The speculative
432	/// rewrite allows us to calculate the RP of the code after the rewrite, and
433	/// the copy locations allow us to calculate the total cost of copies required
434	/// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
435	/// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
436	/// copy locations for defs (of the MFMA operands) in \p CopyForDef
437	bool
438	initHeuristics(std::vector<std::pair<MachineInstr , unsigned*>> &RewriteCands,
439	DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
440	SmallPtrSetImpl<MachineInstr *> &CopyForDef);
441
442	/// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
443	/// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
444	/// costs, and \p RewriteCands to undo rewriting.
445	int64_t getRewriteCost(
446	const std::vector<std::pair<MachineInstr , unsigned*>> &RewriteCands,
447	const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
448	const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
449
450	/// Do the final rewrite on \p RewriteCands and insert any needed copies.
451	bool
452	rewrite(const std::vector<std::pair<MachineInstr , unsigned*>> &RewriteCands);
453
454	/// \returns true if this MI is a rewrite candidate.
455	bool isRewriteCandidate(MachineInstr MI) const*;
456
457	/// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
458	/// DefIdxs
459	void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
460	SmallVectorImpl<SlotIndex> &DefIdxs);
461
462	/// Finds all the reaching uses of \p DefMI and stores the use operands in \p
463	/// ReachingUses
464	void findReachingUses(MachineInstr DefMI, LiveIntervals LIS,
465	SmallVectorImpl<MachineOperand *> &ReachingUses);
466
467	public:
468	bool initGCNSchedStage() override;
469
470	RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
471	: GCNSchedStage (StageID, DAG) {}
472	};
473
474	class UnclusteredHighRPStage : public GCNSchedStage {
475	private:
476	// Save the initial occupancy before starting this stage.
477	unsigned InitialOccupancy;
478	// Save the temporary target occupancy before starting this stage.
479	unsigned TempTargetOccupancy;
480	// Track whether any region was scheduled by this stage.
481	bool IsAnyRegionScheduled;
482
483	public:
484	bool initGCNSchedStage() override;
485
486	void finalizeGCNSchedStage() override;
487
488	bool initGCNRegion() override;
489
490	bool shouldRevertScheduling(unsigned WavesAfter) override;
491
492	UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
493	: GCNSchedStage (StageID, DAG) {}
494	};
495
496	// Retry function scheduling if we found resulting occupancy and it is
497	// lower than used for other scheduling passes. This will give more freedom
498	// to schedule low register pressure blocks.
499	class ClusteredLowOccStage : public GCNSchedStage {
500	public:
501	bool initGCNSchedStage() override;
502
503	bool initGCNRegion() override;
504
505	bool shouldRevertScheduling(unsigned WavesAfter) override;
506
507	ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
508	: GCNSchedStage (StageID, DAG) {}
509	};
510
511	/// Attempts to reduce function spilling or, if there is no spilling, to
512	/// increase function occupancy by one with respect to register usage by sinking
513	/// rematerializable instructions to their use. When the stage estimates that
514	/// reducing spilling or increasing occupancy is possible, it tries to
515	/// rematerialize as few registers as possible to reduce potential negative
516	/// effects on function latency.
517	///
518	/// The stage only supports rematerializing registers that meet all of the
519	/// following constraints.
520	/// 1. The register is virtual and has a single defining instruction.
521	/// 2. The single defining instruction is either deemed rematerializable by the
522	/// target-independent logic, or if not, has no non-constant and
523	/// non-ignorable physical register use.
524	/// 3 The register has no virtual register use whose live range would be
525	/// extended by the rematerialization.
526	/// 4. The register has a single non-debug user in a different region from its
527	/// defining region.
528	/// 5. The register is not used by or using another register that is going to be
529	/// rematerialized.
530	class PreRARematStage : public GCNSchedStage {
531	private:
532	/// A rematerializable register.
533	struct RematReg {
534	/// Single MI defining the rematerializable register.
535	MachineInstr *DefMI;
536	/// Single user of the rematerializable register.
537	MachineInstr *UseMI;
538	/// Regions in which the register is live-in/live-out/live anywhere.
539	BitVector LiveIn, LiveOut, Live;
540	/// The rematerializable register's lane bitmask.
541	LaneBitmask Mask;
542	/// Defining and using regions.
543	unsigned DefRegion, UseRegion;
544
545	RematReg(MachineInstr DefMI, MachineInstr UseMI,
546	GCNScheduleDAGMILive &DAG,
547	const DenseMap<MachineInstr , unsigned*> &MIRegion);
548
549	/// Returns the rematerializable register. Do not call after deleting the
550	/// original defining instruction.
551	Register getReg() const { return DefMI->getOperand(i: `0`).getReg(); }
552
553	/// Determines whether this rematerialization may be beneficial in at least
554	/// one target region.
555	bool maybeBeneficial(const BitVector &TargetRegions,
556	ArrayRef<GCNRPTarget> RPTargets) const;
557
558	/// Determines if the register is both unused and live-through in region \p
559	/// I. This guarantees that rematerializing it will reduce RP in the region.
560	bool isUnusedLiveThrough(unsigned I) const {
561	assert(I < Live.size() && "region index out of range");
562	return LiveIn [I] && LiveOut [I] && I != UseRegion;
563	}
564
565	/// Updates internal structures following a MI rematerialization. Part of
566	/// the stage instead of the DAG because it makes assumptions that are
567	/// specific to the rematerialization process.
568	void insertMI(unsigned RegionIdx, MachineInstr *RematMI,
569	GCNScheduleDAGMILive &DAG) const;
570	};
571
572	/// A scored rematerialization candidate. Higher scores indicate more
573	/// beneficial rematerializations. A null score indicate the rematerialization
574	/// is not helpful to reduce RP in target regions.
575	struct ScoredRemat {
576	/// The rematerializable register under consideration.
577	RematReg *Remat;
578
579	/// Execution frequency information required by scoring heuristics.
580	/// Frequencies are scaled down if they are high to avoid overflow/underflow
581	/// when combining them.
582	struct FreqInfo {
583	/// Per-region execution frequencies. 0 when unknown.
584	SmallVector<uint64_t> Regions;
585	/// Minimum and maximum observed frequencies.
586	uint64_t MinFreq, MaxFreq;
587
588	FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG);
589
590	private:
591	static const uint64_t ScaleFactor = `1024`;
592	};
593
594	/// This only initializes state-independent characteristics of \p Remat, not
595	/// the actual score.
596	ScoredRemat(RematReg Remat, const* FreqInfo &Freq,
597	const GCNScheduleDAGMILive &DAG);
598
599	/// Rematerializes the candidate and returns the new MI. This removes the
600	/// rematerialized register from live-in/out lists in the \p DAG and updates
601	/// \p RPTargets in all affected regions. Regions in which RP savings are
602	/// not guaranteed are set in \p RecomputeRP.
603	MachineInstr *rematerialize(BitVector &RecomputeRP,
604	SmallVectorImpl<GCNRPTarget> &RPTargets,
605	GCNScheduleDAGMILive &DAG) const;
606
607	/// Updates the rematerialization's score w.r.t. the current \p RPTargets.
608	/// \p RegionFreq indicates the frequency of each region
609	void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
610	const FreqInfo &Freq, bool ReduceSpill);
611
612	/// Returns whether the current score is null, indicating the
613	/// rematerialization is useless.
614	bool hasNullScore() const { return !RegionImpact; }
615
616	/// Compare score components of non-null scores pair-wise. A null score is
617	/// always strictly lesser than another non-null score.
618	bool operator<(const ScoredRemat &O) const {
619	if (hasNullScore())
620	return !O.hasNullScore();
621	if (O.hasNullScore())
622	return false;
623	if (MaxFreq != O.MaxFreq)
624	return MaxFreq < O.MaxFreq;
625	if (FreqDiff != O.FreqDiff)
626	return FreqDiff < O.FreqDiff;
627	if (RegionImpact != O.RegionImpact)
628	return RegionImpact < O.RegionImpact;
629	// Break ties using pointer to rematerializable register. Rematerializable
630	// registers are collected in instruction order so, within the same
631	// region, this will prefer registers defined earlier that have longer
632	// live ranges in their defining region (since the registers we consider
633	// are always live-out in their defining region).
634	return Remat > O.Remat;
635	}
636
637	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
638	Printable print() const;
639	#endif
640
641	private:
642	/// Expected register pressure decrease induced by rematerializing this
643	/// candidate.
644	GCNRegPressure RPSave;
645
646	// The three members below are the scoring components, top to bottom from
647	// most important to least important when comparing candidates.
648
649	/// Frequency of impacted target region with highest known frequency. This
650	/// only matters when the stage is trying to reduce spilling, so it is
651	/// always 0 when it is not.
652	uint64_t MaxFreq;
653	/// Frequency difference between defining and using regions. Negative values
654	/// indicate we are rematerializing to higher frequency regions; positive
655	/// values indicate the contrary.
656	int64_t FreqDiff;
657	/// Expected number of target regions impacted by the rematerialization,
658	/// scaled by the size of the register being rematerialized.
659	unsigned RegionImpact;
660
661	int64_t getFreqDiff(const FreqInfo &Freq) const;
662	};
663
664	/// Parent MBB to each region, in region order.
665	SmallVector<MachineBasicBlock *> RegionBB;
666	/// Register pressure targets for all regions.
667	SmallVector<GCNRPTarget> RPTargets;
668	/// Regions which are above the stage's RP target.
669	BitVector TargetRegions;
670	/// The target occupancy the set is trying to achieve. Empty when the
671	/// objective is spilling reduction.
672	std::optional<unsigned> TargetOcc;
673	/// Achieved occupancy only* through rematerializations (pre-rescheduling).*
674	unsigned AchievedOcc;
675	/// After successful stage initialization, indicates which regions should be
676	/// rescheduled.
677	BitVector RescheduleRegions;
678
679	/// List of rematerializable registers.
680	SmallVector<RematReg> RematRegs;
681
682	/// Holds enough information to rollback a rematerialization decision post
683	/// re-scheduling.
684	struct RollbackInfo {
685	/// The rematerializable register under consideration.
686	const RematReg *Remat;
687	/// The rematerialized MI replacing the original defining MI.
688	MachineInstr *RematMI;
689	/// Maps register machine operand indices to their original register.
690	SmallDenseMap<unsigned, Register, `4`> RegMap;
691
692	RollbackInfo(const RematReg *Remat) : Remat(Remat) {}
693	};
694	/// List of rematerializations to rollback if rematerialization does not end
695	/// up being beneficial.
696	SmallVector<RollbackInfo> Rollbacks;
697
698	/// State of a region pre-re-scheduling but post-rematerializations that we
699	/// must keep to be able to revert re-scheduling effects.
700	struct RegionSchedRevert {
701	/// Region number;
702	unsigned RegionIdx;
703	/// Original instruction order (both debug and non-debug MIs).
704	std::vector<MachineInstr *> OrigMIOrder;
705	/// Maximum pressure recorded in the region.
706	GCNRegPressure MaxPressure;
707
708	RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
709	const GCNRegPressure &MaxPressure)
710	: RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
711	MaxPressure (MaxPressure) {}
712	};
713	/// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
714	/// regions.
715	SmallVector<RegionSchedRevert> RegionReverts;
716
717	/// Returns the occupancy the stage is trying to achieve.
718	unsigned getStageTargetOccupancy() const;
719
720	/// Determines the stage's objective (increasing occupancy or reducing
721	/// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
722	/// achieve that objective and mark those that don't achieve it in \ref
723	/// TargetRegions. Returns whether there is any target region.
724	bool setObjective();
725
726	/// Unsets target regions in \p Regions whose RP target has been reached.
727	void unsetSatisfiedRPTargets(const BitVector &Regions);
728
729	/// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
730	/// again all \ref TargetRegions that were optimistically marked as satisfied
731	/// but are actually not, and returns whether there were any such regions.
732	bool updateAndVerifyRPTargets(const BitVector &Regions);
733
734	/// Collects all rematerializable registers and appends them to \ref
735	/// RematRegs. \p MIRegion maps MIs to their region. Returns whether any
736	/// rematerializable register was found.
737	bool collectRematRegs(const DenseMap<MachineInstr , unsigned*> &MIRegion);
738
739	/// Deletes all rematerialized MIs from the MIR when they were kept around for
740	/// potential rollback.
741	void commitRematerializations() const;
742
743	/// Whether the MI is rematerializable
744	bool isReMaterializable(const MachineInstr &MI);
745
746	/// If remat alone did not increase occupancy to the target one, rollbacks all
747	/// rematerializations and resets live-ins/RP in all regions impacted by the
748	/// stage to their pre-stage values.
749	void finalizeGCNSchedStage() override;
750
751	public:
752	bool initGCNSchedStage() override;
753
754	bool initGCNRegion() override;
755
756	void finalizeGCNRegion() override;
757
758	bool shouldRevertScheduling(unsigned WavesAfter) override;
759
760	PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
761	: GCNSchedStage (StageID, DAG), TargetRegions (DAG.Regions.size()),
762	RescheduleRegions (DAG.Regions.size()) {
763	const unsigned NumRegions = DAG.Regions.size();
764	RPTargets.reserve(N: NumRegions);
765	RegionBB.reserve(N: NumRegions);
766	}
767	};
768
769	class ILPInitialScheduleStage : public GCNSchedStage {
770	public:
771	bool shouldRevertScheduling(unsigned WavesAfter) override;
772
773	ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
774	: GCNSchedStage (StageID, DAG) {}
775	};
776
777	class MemoryClauseInitialScheduleStage : public GCNSchedStage {
778	public:
779	bool shouldRevertScheduling(unsigned WavesAfter) override;
780
781	MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
782	GCNScheduleDAGMILive &DAG)
783	: GCNSchedStage (StageID, DAG) {}
784	};
785
786	class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
787	private:
788	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
789
790	bool HasIGLPInstrs = false;
791
792	public:
793	void schedule() override;
794
795	void finalizeSchedule() override;
796
797	GCNPostScheduleDAGMILive(MachineSchedContext *C,
798	std::unique_ptr<MachineSchedStrategy> S,
799	bool RemoveKillFlags);
800	};
801
802	} // End namespace llvm
803
804	#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
805

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h