SIMachineScheduler.h source code [llvm_projects/llvm/lib/Target/AMDGPU/SIMachineScheduler.h]

1	//===-- SIMachineScheduler.h - SI Scheduler Interface ------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// SI Machine Scheduler interface
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15	#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16
17	#include "llvm/CodeGen/MachineScheduler.h"
18	#include "llvm/CodeGen/RegisterPressure.h"
19	#include "llvm/CodeGen/ScheduleDAG.h"
20	#include <cstdint>
21	#include <set>
22	#include <vector>
23
24	namespace llvm {
25
26	class SIInstrInfo;
27	class SIRegisterInfo;
28	class SIScheduleDAGMI;
29	class SIScheduleBlockCreator;
30
31	enum SIScheduleCandReason {
32	NoCand,
33	RegUsage,
34	Latency,
35	Successor,
36	Depth,
37	NodeOrder
38	};
39
40	struct SISchedulerCandidate {
41	// The reason for this candidate.
42	SIScheduleCandReason Reason = NoCand;
43
44	// Set of reasons that apply to multiple candidates.
45	uint32_t RepeatReasonSet = `0`;
46
47	SISchedulerCandidate() = default;
48
49	bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (`1` << R); }
50	void setRepeat(SIScheduleCandReason R) { RepeatReasonSet \|= (`1` << R); }
51	};
52
53	enum SIScheduleBlockLinkKind {
54	NoData,
55	Data
56	};
57
58	class SIScheduleBlock {
59	SIScheduleDAGMI *DAG;
60	SIScheduleBlockCreator *BC;
61
62	std::vector<SUnit*> SUnits;
63	std::map<unsigned, unsigned> NodeNum2Index;
64	std::vector<SUnit*> TopReadySUs;
65	std::vector<SUnit*> ScheduledSUnits;
66
67	/// The top of the unscheduled zone.
68	IntervalPressure TopPressure;
69	RegPressureTracker TopRPTracker;
70
71	// Pressure: number of said class of registers needed to
72	// store the live virtual and real registers.
73	// We do care only of SGPR32 and VGPR32 and do track only virtual registers.
74	// Pressure of additional registers required inside the block.
75	std::vector<unsigned> InternalAdditionalPressure;
76	// Pressure of input and output registers
77	std::vector<unsigned> LiveInPressure;
78	std::vector<unsigned> LiveOutPressure;
79	// Registers required by the block, and outputs.
80	// We do track only virtual registers.
81	// Note that some registers are not 32 bits,
82	// and thus the pressure is not equal
83	// to the number of live registers.
84	std::set<Register> LiveInRegs;
85	std::set<Register> LiveOutRegs;
86
87	bool Scheduled = false;
88	bool HighLatencyBlock = false;
89
90	std::vector<unsigned> HasLowLatencyNonWaitedParent;
91
92	// Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
93	unsigned ID;
94
95	std::vector<SIScheduleBlock> Preds; // All blocks predecessors.*
96	// All blocks successors, and the kind of link
97	std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
98	unsigned NumHighLatencySuccessors = `0`;
99
100	public:
101	SIScheduleBlock(SIScheduleDAGMI DAG, SIScheduleBlockCreator BC,
102	unsigned ID):
103	DAG(DAG), BC(BC), TopRPTracker (TopPressure), ID(ID) {}
104
105	~SIScheduleBlock() = default;
106
107	unsigned getID() const { return ID; }
108
109	/// Functions for Block construction.
110	void addUnit(SUnit *SU);
111
112	// When all SUs have been added.
113	void finalizeUnits();
114
115	// Add block pred, which has instruction predecessor of SU.
116	void addPred(SIScheduleBlock *Pred);
117	void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
118
119	const std::vector<SIScheduleBlock>& getPreds() const* { return Preds; }
120	ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
121	getSuccs() const { return Succs; }
122
123	unsigned Height = `0`; // Maximum topdown path length to block without outputs
124	unsigned Depth = `0`; // Maximum bottomup path length to block without inputs
125
126	unsigned getNumHighLatencySuccessors() const {
127	return NumHighLatencySuccessors;
128	}
129
130	bool isHighLatencyBlock() { return HighLatencyBlock; }
131
132	// This is approximative.
133	// Ideally should take into accounts some instructions (rcp, etc)
134	// are 4 times slower.
135	int getCost() { return SUnits.size(); }
136
137	// The block Predecessors and Successors must be all registered
138	// before fastSchedule().
139	// Fast schedule with no particular requirement.
140	void fastSchedule();
141
142	std::vector<SUnit> getScheduledUnits() { return* ScheduledSUnits; }
143
144	// Complete schedule that will try to minimize reg pressure and
145	// low latencies, and will fill liveins and liveouts.
146	// Needs all MIs to be grouped between BeginBlock and EndBlock.
147	// The MIs can be moved after the scheduling,
148	// it is just used to allow correct track of live registers.
149	void schedule(MachineBasicBlock::iterator BeginBlock,
150	MachineBasicBlock::iterator EndBlock);
151
152	bool isScheduled() { return Scheduled; }
153
154	// Needs the block to be scheduled inside
155	// TODO: find a way to compute it.
156	std::vector<unsigned> &getInternalAdditionalRegUsage() {
157	return InternalAdditionalPressure;
158	}
159
160	std::set<Register> &getInRegs() { return LiveInRegs; }
161	std::set<Register> &getOutRegs() { return LiveOutRegs; }
162
163	void printDebug(bool Full);
164
165	private:
166	struct SISchedCandidate : SISchedulerCandidate {
167	// The best SUnit candidate.
168	SUnit SU = nullptr*;
169
170	unsigned SGPRUsage;
171	unsigned VGPRUsage;
172	bool IsLowLatency;
173	unsigned LowLatencyOffset;
174	bool HasLowLatencyNonWaitedParent;
175
176	SISchedCandidate() = default;
177
178	bool isValid() const { return SU; }
179
180	// Copy the status of another candidate without changing policy.
181	void setBest(SISchedCandidate &Best) {
182	assert(Best.Reason != NoCand && "uninitialized Sched candidate");
183	SU = Best.SU;
184	Reason = Best.Reason;
185	SGPRUsage = Best.SGPRUsage;
186	VGPRUsage = Best.VGPRUsage;
187	IsLowLatency = Best.IsLowLatency;
188	LowLatencyOffset = Best.LowLatencyOffset;
189	HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
190	}
191	};
192
193	void undoSchedule();
194
195	void undoReleaseSucc(SUnit SU, SDep SuccEdge);
196	void releaseSucc(SUnit SU, SDep SuccEdge);
197	// InOrOutBlock: restrict to links pointing inside the block (true),
198	// or restrict to links pointing outside the block (false).
199	void releaseSuccessors(SUnit SU, bool* InOrOutBlock);
200
201	void nodeScheduled(SUnit *SU);
202	void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
203	void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
204	SUnit* pickNode();
205	void traceCandidate(const SISchedCandidate &Cand);
206	void initRegPressure(MachineBasicBlock::iterator BeginBlock,
207	MachineBasicBlock::iterator EndBlock);
208	};
209
210	struct SIScheduleBlocks {
211	std::vector<SIScheduleBlock*> Blocks;
212	std::vector<int> TopDownIndex2Block;
213	std::vector<int> TopDownBlock2Index;
214	};
215
216	enum SISchedulerBlockCreatorVariant {
217	LatenciesAlone,
218	LatenciesGrouped,
219	LatenciesAlonePlusConsecutive
220	};
221
222	class SIScheduleBlockCreator {
223	SIScheduleDAGMI *DAG;
224	// unique_ptr handles freeing memory for us.
225	std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
226	std::map<SISchedulerBlockCreatorVariant,
227	SIScheduleBlocks> Blocks;
228	std::vector<SIScheduleBlock*> CurrentBlocks;
229	std::vector<int> Node2CurrentBlock;
230
231	// Topological sort
232	// Maps topological index to the node number.
233	std::vector<int> TopDownIndex2Block;
234	std::vector<int> TopDownBlock2Index;
235	std::vector<int> BottomUpIndex2Block;
236
237	// 0 -> Color not given.
238	// 1 to SUnits.size() -> Reserved group (you should only add elements to them).
239	// Above -> Other groups.
240	int NextReservedID;
241	int NextNonReservedID;
242	std::vector<int> CurrentColoring;
243	std::vector<int> CurrentTopDownReservedDependencyColoring;
244	std::vector<int> CurrentBottomUpReservedDependencyColoring;
245
246	public:
247	SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
248
249	SIScheduleBlocks
250	getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
251
252	bool isSUInBlock(SUnit SU, unsigned* ID);
253
254	private:
255	// Give a Reserved color to every high latency.
256	void colorHighLatenciesAlone();
257
258	// Create groups of high latencies with a Reserved color.
259	void colorHighLatenciesGroups();
260
261	// Compute coloring for topdown and bottom traversals with
262	// different colors depending on dependencies on Reserved colors.
263	void colorComputeReservedDependencies();
264
265	// Give color to all non-colored SUs according to Reserved groups dependencies.
266	void colorAccordingToReservedDependencies();
267
268	// Divides Blocks having no bottom up or top down dependencies on Reserved groups.
269	// The new colors are computed according to the dependencies on the other blocks
270	// formed with colorAccordingToReservedDependencies.
271	void colorEndsAccordingToDependencies();
272
273	// Cut groups into groups with SUs in consecutive order (except for Reserved groups).
274	void colorForceConsecutiveOrderInGroup();
275
276	// Merge Constant loads that have all their users into another group to the group.
277	// (TODO: else if all their users depend on the same group, put them there)
278	void colorMergeConstantLoadsNextGroup();
279
280	// Merge SUs that have all their users into another group to the group
281	void colorMergeIfPossibleNextGroup();
282
283	// Merge SUs that have all their users into another group to the group,
284	// but only for Reserved groups.
285	void colorMergeIfPossibleNextGroupOnlyForReserved();
286
287	// Merge SUs that have all their users into another group to the group,
288	// but only if the group is no more than a few SUs.
289	void colorMergeIfPossibleSmallGroupsToNextGroup();
290
291	// Divides Blocks with important size.
292	// Idea of implementation: attribute new colors depending on topdown and
293	// bottom up links to other blocks.
294	void cutHugeBlocks();
295
296	// Put in one group all instructions with no users in this scheduling region
297	// (we'd want these groups be at the end).
298	void regroupNoUserInstructions();
299
300	// Give Reserved color to export instructions
301	void colorExports();
302
303	void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
304
305	void topologicalSort();
306
307	void scheduleInsideBlocks();
308
309	void fillStats();
310	};
311
312	enum SISchedulerBlockSchedulerVariant {
313	BlockLatencyRegUsage,
314	BlockRegUsageLatency,
315	BlockRegUsage
316	};
317
318	class SIScheduleBlockScheduler {
319	SIScheduleDAGMI *DAG;
320	SISchedulerBlockSchedulerVariant Variant;
321	std::vector<SIScheduleBlock*> Blocks;
322
323	std::vector<std::map<Register, unsigned>> LiveOutRegsNumUsages;
324	std::set<Register> LiveRegs;
325	// Num of schedulable unscheduled blocks reading the register.
326	std::map<Register, unsigned> LiveRegsConsumers;
327
328	std::vector<unsigned> LastPosHighLatencyParentScheduled;
329	int LastPosWaitedHighLatency;
330
331	std::vector<SIScheduleBlock*> BlocksScheduled;
332	unsigned NumBlockScheduled;
333	std::vector<SIScheduleBlock*> ReadyBlocks;
334
335	unsigned VregCurrentUsage;
336	unsigned SregCurrentUsage;
337
338	// Currently is only approximation.
339	unsigned maxVregUsage;
340	unsigned maxSregUsage;
341
342	std::vector<unsigned> BlockNumPredsLeft;
343	std::vector<unsigned> BlockNumSuccsLeft;
344
345	public:
346	SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
347	SISchedulerBlockSchedulerVariant Variant,
348	SIScheduleBlocks BlocksStruct);
349	~SIScheduleBlockScheduler() = default;
350
351	std::vector<SIScheduleBlock> getBlocks() { return* BlocksScheduled; }
352
353	unsigned getVGPRUsage() { return maxVregUsage; }
354	unsigned getSGPRUsage() { return maxSregUsage; }
355
356	private:
357	struct SIBlockSchedCandidate : SISchedulerCandidate {
358	// The best Block candidate.
359	SIScheduleBlock Block = nullptr*;
360
361	bool IsHighLatency;
362	int VGPRUsageDiff;
363	unsigned NumSuccessors;
364	unsigned NumHighLatencySuccessors;
365	unsigned LastPosHighLatParentScheduled;
366	unsigned Height;
367
368	SIBlockSchedCandidate() = default;
369
370	bool isValid() const { return Block; }
371
372	// Copy the status of another candidate without changing policy.
373	void setBest(SIBlockSchedCandidate &Best) {
374	assert(Best.Reason != NoCand && "uninitialized Sched candidate");
375	Block = Best.Block;
376	Reason = Best.Reason;
377	IsHighLatency = Best.IsHighLatency;
378	VGPRUsageDiff = Best.VGPRUsageDiff;
379	NumSuccessors = Best.NumSuccessors;
380	NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
381	LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
382	Height = Best.Height;
383	}
384	};
385
386	bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
387	SIBlockSchedCandidate &TryCand);
388	bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
389	SIBlockSchedCandidate &TryCand);
390	SIScheduleBlock *pickBlock();
391
392	void addLiveRegs(std::set<Register> &Regs);
393	void decreaseLiveRegs(SIScheduleBlock *Block, std::set<Register> &Regs);
394	void releaseBlockSuccs(SIScheduleBlock *Parent);
395	void blockScheduled(SIScheduleBlock *Block);
396
397	// Check register pressure change
398	// by scheduling a block with these LiveIn and LiveOut.
399	std::vector<int> checkRegUsageImpact(std::set<Register> &InRegs,
400	std::set<Register> &OutRegs);
401
402	void schedule();
403	};
404
405	struct SIScheduleBlockResult {
406	std::vector<unsigned> SUs;
407	unsigned MaxSGPRUsage;
408	unsigned MaxVGPRUsage;
409	};
410
411	class SIScheduler {
412	SIScheduleDAGMI *DAG;
413	SIScheduleBlockCreator BlockCreator;
414
415	public:
416	SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator (DAG) {}
417
418	~SIScheduler() = default;
419
420	struct SIScheduleBlockResult
421	scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
422	SISchedulerBlockSchedulerVariant ScheduleVariant);
423	};
424
425	class SIScheduleDAGMI final : public ScheduleDAGMILive {
426	const SIInstrInfo *SITII;
427	const SIRegisterInfo *SITRI;
428
429	std::vector<SUnit> SUnitsLinksBackup;
430
431	// For moveLowLatencies. After all Scheduling variants are tested.
432	std::vector<unsigned> ScheduledSUnits;
433	std::vector<unsigned> ScheduledSUnitsInv;
434
435	public:
436	SIScheduleDAGMI(MachineSchedContext *C);
437
438	~SIScheduleDAGMI() override;
439
440	// Entry point for the schedule.
441	void schedule() override;
442
443	// To init Block's RPTracker.
444	void initRPTracker(RegPressureTracker &RPTracker) {
445	RPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: RegionBegin, TrackLaneMasks: false, TrackUntiedDefs: false);
446	}
447
448	MachineBasicBlock getBB() { return* BB; }
449	MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
450	MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
451	LiveIntervals getLIS() { return* LIS; }
452	MachineRegisterInfo getMRI() { return* &MRI; }
453	const TargetRegisterInfo getTRI() { return* TRI; }
454	ScheduleDAGTopologicalSort GetTopo() { return* &Topo; }
455	SUnit &getEntrySU() { return EntrySU; }
456	SUnit& getExitSU() { return ExitSU; }
457
458	void restoreSULinksLeft();
459
460	template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
461	_Iterator End,
462	unsigned &VgprUsage,
463	unsigned &SgprUsage);
464
465	std::set<Register> getInRegs() {
466	std::set<Register> InRegs;
467	for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
468	InRegs.insert(x: RegMaskPair.RegUnit);
469	}
470	return InRegs;
471	}
472
473	std::set<unsigned> getOutRegs() {
474	std::set<unsigned> OutRegs;
475	for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
476	OutRegs.insert(x: RegMaskPair.RegUnit);
477	}
478	return OutRegs;
479	};
480
481	private:
482	void topologicalSort();
483	// After scheduling is done, improve low latency placements.
484	void moveLowLatencies();
485
486	public:
487	// Some stats for scheduling inside blocks.
488	std::vector<unsigned> IsLowLatencySU;
489	std::vector<unsigned> LowLatencyOffset;
490	std::vector<unsigned> IsHighLatencySU;
491	// Topological sort
492	// Maps topological index to the node number.
493	std::vector<int> TopDownIndex2SU;
494	std::vector<int> BottomUpIndex2SU;
495	};
496
497	} // end namespace llvm
498
499	#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
500

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIMachineScheduler.h