GCNSchedStrategy.h source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h]

1	//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -- C++ --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14	#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16	#include "GCNRegPressure.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/MapVector.h"
19	#include "llvm/CodeGen/MachineInstr.h"
20	#include "llvm/CodeGen/MachineScheduler.h"
21
22	namespace llvm {
23
24	class SIMachineFunctionInfo;
25	class SIRegisterInfo;
26	class GCNSubtarget;
27	class GCNSchedStage;
28
29	enum class GCNSchedStageID : unsigned {
30	OccInitialSchedule = `0`,
31	UnclusteredHighRPReschedule = `1`,
32	ClusteredLowOccupancyReschedule = `2`,
33	PreRARematerialize = `3`,
34	ILPInitialSchedule = `4`,
35	MemoryClauseInitialSchedule = `5`
36	};
37
38	#ifndef NDEBUG
39	raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
40	#endif
41
42	/// This is a minimal scheduler strategy. The main difference between this
43	/// and the GenericScheduler is that GCNSchedStrategy uses different
44	/// heuristics to determine excess/critical pressure sets.
45	class GCNSchedStrategy : public GenericScheduler {
46	protected:
47	SUnit pickNodeBidirectional(bool* &IsTopNode);
48
49	void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
50	const RegPressureTracker &RPTracker,
51	SchedCandidate &Cand, bool IsBottomUp);
52
53	void initCandidate(SchedCandidate &Cand, SUnit SU, bool* AtTop,
54	const RegPressureTracker &RPTracker,
55	const SIRegisterInfo SRI, unsigned* SGPRPressure,
56	unsigned VGPRPressure, bool IsBottomUp);
57
58	std::vector<unsigned> Pressure;
59
60	std::vector<unsigned> MaxPressure;
61
62	unsigned SGPRExcessLimit;
63
64	unsigned VGPRExcessLimit;
65
66	unsigned TargetOccupancy;
67
68	MachineFunction *MF;
69
70	// Scheduling stages for this strategy.
71	SmallVector<GCNSchedStageID, `4`> SchedStages;
72
73	// Pointer to the current SchedStageID.
74	SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
75
76	// GCN RP Tracker for top-down scheduling
77	mutable GCNDownwardRPTracker DownwardTracker;
78
79	// GCN RP Tracker for botttom-up scheduling
80	mutable GCNUpwardRPTracker UpwardTracker;
81
82	public:
83	// schedule() have seen register pressure over the critical limits and had to
84	// track register pressure for actual scheduling heuristics.
85	bool HasHighPressure;
86
87	// Schedule known to have excess register pressure. Be more conservative in
88	// increasing ILP and preserving VGPRs.
89	bool KnownExcessRP = false;
90
91	// An error margin is necessary because of poor performance of the generic RP
92	// tracker and can be adjusted up for tuning heuristics to try and more
93	// aggressively reduce register pressure.
94	unsigned ErrorMargin = `3`;
95
96	// Bias for SGPR limits under a high register pressure.
97	const unsigned HighRPSGPRBias = `7`;
98
99	// Bias for VGPR limits under a high register pressure.
100	const unsigned HighRPVGPRBias = `7`;
101
102	unsigned SGPRCriticalLimit;
103
104	unsigned VGPRCriticalLimit;
105
106	unsigned SGPRLimitBias = `0`;
107
108	unsigned VGPRLimitBias = `0`;
109
110	GCNSchedStrategy(const MachineSchedContext *C);
111
112	SUnit pickNode(bool* &IsTopNode) override;
113
114	void schedNode(SUnit SU, bool* IsTopNode) override;
115
116	void initialize(ScheduleDAGMI *DAG) override;
117
118	unsigned getTargetOccupancy() { return TargetOccupancy; }
119
120	void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
121
122	GCNSchedStageID getCurrentStage();
123
124	// Advances stage. Returns true if there are remaining stages.
125	bool advanceStage();
126
127	bool hasNextStage() const;
128
129	GCNSchedStageID getNextStage() const;
130
131	GCNDownwardRPTracker getDownwardTracker() { return* &DownwardTracker; }
132
133	GCNUpwardRPTracker getUpwardTracker() { return* &UpwardTracker; }
134	};
135
136	/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
137	/// maximum number of waves per simd).
138	class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
139	public:
140	GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
141	bool IsLegacyScheduler = false);
142	};
143
144	/// The goal of this scheduling strategy is to maximize ILP for a single wave
145	/// (i.e. latency hiding).
146	class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
147	protected:
148	bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
149	SchedBoundary Zone) const* override;
150
151	public:
152	GCNMaxILPSchedStrategy(const MachineSchedContext *C);
153	};
154
155	/// The goal of this scheduling strategy is to maximize memory clause for a
156	/// single wave.
157	class GCNMaxMemoryClauseSchedStrategy final : public GCNSchedStrategy {
158	protected:
159	bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
160	SchedBoundary Zone) const* override;
161
162	public:
163	GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C);
164	};
165
166	class ScheduleMetrics {
167	unsigned ScheduleLength;
168	unsigned BubbleCycles;
169
170	public:
171	ScheduleMetrics() {}
172	ScheduleMetrics(unsigned L, unsigned BC)
173	: ScheduleLength(L), BubbleCycles(BC) {}
174	unsigned getLength() const { return ScheduleLength; }
175	unsigned getBubbles() const { return BubbleCycles; }
176	unsigned getMetric() const {
177	unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
178	// Metric is zero if the amount of bubbles is less than 1% which is too
179	// small. So, return 1.
180	return Metric ? Metric : `1`;
181	}
182	static const unsigned ScaleFactor;
183	};
184
185	inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
186	dbgs() << "\n Schedule Metric (scaled by "
187	<< ScheduleMetrics::ScaleFactor
188	<< " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
189	<< Sm.getLength() << " ]\n";
190	return OS;
191	}
192
193	class GCNScheduleDAGMILive;
194	class RegionPressureMap {
195	GCNScheduleDAGMILive *DAG;
196	// The live in/out pressure as indexed by the first or last MI in the region
197	// before scheduling.
198	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> RegionLiveRegMap;
199	// The mapping of RegionIDx to key instruction
200	DenseMap<unsigned, MachineInstr *> IdxToInstruction;
201	// Whether we are calculating LiveOuts or LiveIns
202	bool IsLiveOut;
203
204	public:
205	RegionPressureMap() {}
206	RegionPressureMap(GCNScheduleDAGMILive GCNDAG, bool* LiveOut)
207	: DAG(GCNDAG), IsLiveOut(LiveOut) {}
208	// Build the Instr->LiveReg and RegionIdx->Instr maps
209	void buildLiveRegMap();
210
211	// Retrieve the LiveReg for a given RegionIdx
212	GCNRPTracker::LiveRegSet &getLiveRegsForRegionIdx(unsigned RegionIdx) {
213	assert(IdxToInstruction.contains(RegionIdx));
214	MachineInstr *Key = IdxToInstruction [RegionIdx];
215	return RegionLiveRegMap [Key];
216	}
217	};
218
219	/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
220	/// boundary is inclusive, the upper boundary is exclusive.
221	using RegionBoundaries =
222	std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
223
224	class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
225	friend class GCNSchedStage;
226	friend class OccInitialScheduleStage;
227	friend class UnclusteredHighRPStage;
228	friend class ClusteredLowOccStage;
229	friend class PreRARematStage;
230	friend class ILPInitialScheduleStage;
231	friend class RegionPressureMap;
232
233	const GCNSubtarget &ST;
234
235	SIMachineFunctionInfo &MFI;
236
237	// Occupancy target at the beginning of function scheduling cycle.
238	unsigned StartingOccupancy;
239
240	// Minimal real occupancy recorder for the function.
241	unsigned MinOccupancy;
242
243	// Vector of regions recorder for later rescheduling
244	SmallVector<RegionBoundaries, `32`> Regions;
245
246	// Record regions with high register pressure.
247	BitVector RegionsWithHighRP;
248
249	// Record regions with excess register pressure over the physical register
250	// limit. Register pressure in these regions usually will result in spilling.
251	BitVector RegionsWithExcessRP;
252
253	// Regions that has the same occupancy as the latest MinOccupancy
254	BitVector RegionsWithMinOcc;
255
256	// Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
257	BitVector RegionsWithIGLPInstrs;
258
259	// Region live-in cache.
260	SmallVector<GCNRPTracker::LiveRegSet, `32`> LiveIns;
261
262	// Region pressure cache.
263	SmallVector<GCNRegPressure, `32`> Pressure;
264
265	// Temporary basic block live-in cache.
266	DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
267
268	// The map of the initial first region instruction to region live in registers
269	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
270
271	// Calculate the map of the initial first region instruction to region live in
272	// registers
273	DenseMap<MachineInstr , GCNRPTracker::LiveRegSet> getRegionLiveInMap() const*;
274
275	// Calculate the map of the initial last region instruction to region live out
276	// registers
277	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
278	getRegionLiveOutMap() const;
279
280	// The live out registers per region. These are internally stored as a map of
281	// the initial last region instruction to region live out registers, but can
282	// be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
283	RegionPressureMap RegionLiveOuts;
284
285	// Return current region pressure.
286	GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
287
288	// Compute and cache live-ins and pressure for all regions in block.
289	void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
290
291	/// If necessary, updates a region's boundaries following insertion ( \p NewMI
292	/// != nullptr) or removal ( \p NewMI == nullptr) of a \p MI in the region.
293	/// For an MI removal, this must be called before the MI is actually erased
294	/// from its parent MBB.
295	void updateRegionBoundaries(RegionBoundaries &RegionBounds,
296	MachineBasicBlock::iterator MI,
297	MachineInstr *NewMI);
298
299	void runSchedStages();
300
301	std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
302
303	public:
304	GCNScheduleDAGMILive(MachineSchedContext *C,
305	std::unique_ptr<MachineSchedStrategy> S);
306
307	void schedule() override;
308
309	void finalizeSchedule() override;
310	};
311
312	// GCNSchedStrategy applies multiple scheduling stages to a function.
313	class GCNSchedStage {
314	protected:
315	GCNScheduleDAGMILive &DAG;
316
317	GCNSchedStrategy &S;
318
319	MachineFunction &MF;
320
321	SIMachineFunctionInfo &MFI;
322
323	const GCNSubtarget &ST;
324
325	const GCNSchedStageID StageID;
326
327	// The current block being scheduled.
328	MachineBasicBlock CurrentMBB = nullptr*;
329
330	// Current region index.
331	unsigned RegionIdx = `0`;
332
333	// Record the original order of instructions before scheduling.
334	std::vector<MachineInstr *> Unsched;
335
336	// RP before scheduling the current region.
337	GCNRegPressure PressureBefore;
338
339	// RP after scheduling the current region.
340	GCNRegPressure PressureAfter;
341
342	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
343
344	GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
345
346	public:
347	// Initialize state for a scheduling stage. Returns false if the current stage
348	// should be skipped.
349	virtual bool initGCNSchedStage();
350
351	// Finalize state after finishing a scheduling pass on the function.
352	virtual void finalizeGCNSchedStage();
353
354	// Setup for scheduling a region. Returns false if the current region should
355	// be skipped.
356	virtual bool initGCNRegion();
357
358	// Track whether a new region is also a new MBB.
359	void setupNewBlock();
360
361	// Finalize state after scheudling a region.
362	void finalizeGCNRegion();
363
364	// Check result of scheduling.
365	void checkScheduling();
366
367	// computes the given schedule virtual execution time in clocks
368	ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
369	ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
370	unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
371	DenseMap<unsigned, unsigned> &ReadyCycles,
372	const TargetSchedModel &SM);
373
374	// Returns true if scheduling should be reverted.
375	virtual bool shouldRevertScheduling(unsigned WavesAfter);
376
377	// Returns true if current region has known excess pressure.
378	bool isRegionWithExcessRP() const {
379	return DAG.RegionsWithExcessRP [RegionIdx];
380	}
381
382	// The region number this stage is currently working on
383	unsigned getRegionIdx() { return RegionIdx; }
384
385	// Returns true if the new schedule may result in more spilling.
386	bool mayCauseSpilling(unsigned WavesAfter);
387
388	// Attempt to revert scheduling for this region.
389	void revertScheduling();
390
391	void advanceRegion() { RegionIdx++; }
392
393	virtual ~GCNSchedStage() = default;
394	};
395
396	class OccInitialScheduleStage : public GCNSchedStage {
397	public:
398	bool shouldRevertScheduling(unsigned WavesAfter) override;
399
400	OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
401	: GCNSchedStage (StageID, DAG) {}
402	};
403
404	class UnclusteredHighRPStage : public GCNSchedStage {
405	private:
406	// Save the initial occupancy before starting this stage.
407	unsigned InitialOccupancy;
408
409	public:
410	bool initGCNSchedStage() override;
411
412	void finalizeGCNSchedStage() override;
413
414	bool initGCNRegion() override;
415
416	bool shouldRevertScheduling(unsigned WavesAfter) override;
417
418	UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
419	: GCNSchedStage (StageID, DAG) {}
420	};
421
422	// Retry function scheduling if we found resulting occupancy and it is
423	// lower than used for other scheduling passes. This will give more freedom
424	// to schedule low register pressure blocks.
425	class ClusteredLowOccStage : public GCNSchedStage {
426	public:
427	bool initGCNSchedStage() override;
428
429	bool initGCNRegion() override;
430
431	bool shouldRevertScheduling(unsigned WavesAfter) override;
432
433	ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
434	: GCNSchedStage (StageID, DAG) {}
435	};
436
437	/// Attempts to reduce function spilling or, if there is no spilling, to
438	/// increase function occupancy by one with respect to ArchVGPR usage by sinking
439	/// trivially rematerializable instructions to their use. When the stage
440	/// estimates reducing spilling or increasing occupancy is possible, as few
441	/// instructions as possible are rematerialized to reduce potential negative
442	/// effects on function latency.
443	class PreRARematStage : public GCNSchedStage {
444	private:
445	/// Useful information about a rematerializable instruction.
446	struct RematInstruction {
447	/// Single use of the rematerializable instruction's defined register,
448	/// located in a different block.
449	MachineInstr *UseMI;
450	/// Rematerialized version of \p DefMI, set in
451	/// PreRARematStage::rematerialize. Used for reverting rematerializations.
452	MachineInstr *RematMI;
453	/// Set of regions in which the rematerializable instruction's defined
454	/// register is a live-in.
455	SmallDenseSet<unsigned, `4`> LiveInRegions;
456
457	RematInstruction(MachineInstr *UseMI) : UseMI(UseMI) {}
458	};
459
460	/// Maps all MIs to their parent region. MI terminators are considered to be
461	/// outside the region they delimitate, and as such are not stored in the map.
462	DenseMap<MachineInstr , unsigned*> MIRegion;
463	/// Parent MBB to each region, in region order.
464	SmallVector<MachineBasicBlock *> RegionBB;
465	/// Collects instructions to rematerialize.
466	MapVector<MachineInstr *, RematInstruction> Rematerializations;
467	/// Collects regions whose live-ins or register pressure will change due to
468	/// rematerializations.
469	DenseMap<unsigned, GCNRegPressure> ImpactedRegions;
470	/// In case we need to rollback rematerializations, save lane masks for all
471	/// rematerialized registers in all regions in which they are live-ins.
472	DenseMap<std::pair<unsigned, Register>, LaneBitmask> RegMasks;
473	/// After successful stage initialization, indicates which regions should be
474	/// rescheduled.
475	BitVector RescheduleRegions;
476	/// Target occupancy the stage estimates is reachable through
477	/// rematerialization. Greater than or equal to the pre-stage min occupancy.
478	unsigned TargetOcc;
479	/// Achieved occupancy only* through rematerializations (pre-rescheduling).*
480	/// Smaller than or equal to the target occupancy.
481	unsigned AchievedOcc;
482	/// Whether the stage is attempting to increase occupancy in the abscence of
483	/// spilling.
484	bool IncreaseOccupancy;
485
486	/// Returns whether remat can reduce spilling or increase function occupancy
487	/// by 1 through rematerialization. If it can do one, collects instructions in
488	/// PreRARematStage::Rematerializations and sets the target occupancy in
489	/// PreRARematStage::TargetOccupancy.
490	bool canIncreaseOccupancyOrReduceSpill();
491
492	/// Whether the MI is trivially rematerializable and does not have any virtual
493	/// register use.
494	bool isTriviallyReMaterializable(const MachineInstr &MI);
495
496	/// Rematerializes all instructions in PreRARematStage::Rematerializations
497	/// and stores the achieved occupancy after remat in
498	/// PreRARematStage::AchievedOcc.
499	void rematerialize();
500
501	/// If remat alone did not increase occupancy to the target one, rollbacks all
502	/// rematerializations and resets live-ins/RP in all regions impacted by the
503	/// stage to their pre-stage values.
504	void finalizeGCNSchedStage() override;
505
506	/// \p Returns true if all the uses in \p InstToRemat defined at \p
507	/// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
508	/// reg uses.
509	bool allUsesAvailableAt(const MachineInstr *InstToRemat,
510	SlotIndex OriginalIdx, SlotIndex RematIdx) const;
511
512	public:
513	bool initGCNSchedStage() override;
514
515	bool initGCNRegion() override;
516
517	bool shouldRevertScheduling(unsigned WavesAfter) override;
518
519	PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
520	: GCNSchedStage (StageID, DAG), RescheduleRegions (DAG.Regions.size()) {}
521	};
522
523	class ILPInitialScheduleStage : public GCNSchedStage {
524	public:
525	bool shouldRevertScheduling(unsigned WavesAfter) override;
526
527	ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
528	: GCNSchedStage (StageID, DAG) {}
529	};
530
531	class MemoryClauseInitialScheduleStage : public GCNSchedStage {
532	public:
533	bool shouldRevertScheduling(unsigned WavesAfter) override;
534
535	MemoryClauseInitialScheduleStage(GCNSchedStageID StageID,
536	GCNScheduleDAGMILive &DAG)
537	: GCNSchedStage (StageID, DAG) {}
538	};
539
540	class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
541	private:
542	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
543
544	bool HasIGLPInstrs = false;
545
546	public:
547	void schedule() override;
548
549	void finalizeSchedule() override;
550
551	GCNPostScheduleDAGMILive(MachineSchedContext *C,
552	std::unique_ptr<MachineSchedStrategy> S,
553	bool RemoveKillFlags);
554	};
555
556	} // End namespace llvm
557
558	#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
559

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h