GCNIterativeScheduler.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp]

1	//===- GCNIterativeScheduler.cpp ------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements the class GCNIterativeScheduler.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "GCNIterativeScheduler.h"
15	#include "AMDGPUIGroupLP.h"
16	#include "GCNSchedStrategy.h"
17	#include "SIMachineFunctionInfo.h"
18
19	using namespace llvm;
20
21	#define DEBUG_TYPE "machine-scheduler"
22
23	namespace llvm {
24
25	std::vector<const SUnit > makeMinRegSchedule(ArrayRef<const* SUnit *> TopRoots,
26	const ScheduleDAG &DAG);
27
28	std::vector<const SUnit > makeGCNILPScheduler(ArrayRef<const* SUnit *> BotRoots,
29	const ScheduleDAG &DAG);
30	} // namespace llvm
31
32	// shim accessors for different order containers
33	static inline MachineInstr getMachineInstr(MachineInstr MI) {
34	return MI;
35	}
36	static inline MachineInstr getMachineInstr(const* SUnit *SU) {
37	return SU->getInstr();
38	}
39	static inline MachineInstr getMachineInstr(const* SUnit &SU) {
40	return SU.getInstr();
41	}
42
43	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
44	LLVM_DUMP_METHOD
45	static void printRegion(raw_ostream &OS,
46	MachineBasicBlock::iterator Begin,
47	MachineBasicBlock::iterator End,
48	const LiveIntervals *LIS,
49	unsigned MaxInstNum =
50	std::numeric_limits<unsigned>::max()) {
51	auto *BB = Begin->getParent();
52	OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << `' '`
53	<< BB->getName() << ":\n";
54	auto I = Begin;
55	MaxInstNum = std::max(MaxInstNum, `1u`);
56	for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
57	if (!I->isDebugInstr() && LIS)
58	OS << LIS->getInstructionIndex(*I);
59	OS << `'\t'` << *I;
60	}
61	if (I != End) {
62	OS << "\t...\n";
63	I = std::prev(End);
64	if (!I->isDebugInstr() && LIS)
65	OS << LIS->getInstructionIndex(*I);
66	OS << `'\t'` << *I;
67	}
68	if (End != BB->end()) { // print boundary inst if present
69	OS << "----\n";
70	if (LIS) OS << LIS->getInstructionIndex(*End) << `'\t'`;
71	OS << *End;
72	}
73	}
74
75	LLVM_DUMP_METHOD
76	static void printLivenessInfo(raw_ostream &OS,
77	MachineBasicBlock::iterator Begin,
78	MachineBasicBlock::iterator End,
79	const LiveIntervals *LIS) {
80	auto *const BB = Begin->getParent();
81	const auto &MRI = BB->getParent()->getRegInfo();
82
83	const auto LiveIns = getLiveRegsBefore(Begin, LIS);
84	OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns));
85
86	const auto BottomMI = End == BB->end() ? std::prev(End) : End;
87	const auto LiveOuts = getLiveRegsAfter(BottomMI, LIS);
88	OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts));
89	}
90
91	LLVM_DUMP_METHOD
92	void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
93	const auto &ST = MF.getSubtarget<GCNSubtarget>();
94	for (auto *const R : Regions) {
95	OS << "Region to schedule ";
96	printRegion(OS, R->Begin, R->End, LIS, `1`);
97	printLivenessInfo(OS, R->Begin, R->End, LIS);
98	OS << "Max RP: " << print(R->MaxPressure, &ST);
99	}
100	}
101
102	LLVM_DUMP_METHOD
103	void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
104	const Region *R,
105	const GCNRegPressure &RP) const {
106	OS << "\nAfter scheduling ";
107	printRegion(OS, R->Begin, R->End, LIS);
108	printSchedRP(OS, R->MaxPressure, RP);
109	OS << `'\n'`;
110	}
111
112	LLVM_DUMP_METHOD
113	void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
114	const GCNRegPressure &Before,
115	const GCNRegPressure &After) const {
116	const auto &ST = MF.getSubtarget<GCNSubtarget>();
117	OS << "RP before: " << print(Before, &ST)
118	<< "RP after: " << print(After, &ST);
119	}
120	#endif
121
122	void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) {
123	bool HasIGLPInstrs = false;
124	const SIInstrInfo SII = static_cast<const* SIInstrInfo *>(TII);
125	for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I ++) {
126	if (SII->isIGLPMutationOnly(Opcode: I ->getOpcode())) {
127	HasIGLPInstrs = true;
128	break;
129	}
130	}
131
132	if (HasIGLPInstrs) {
133	SavedMutations.clear();
134	SavedMutations.swap(x&: Mutations);
135	auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry
136	: AMDGPU::SchedulingPhase::Initial;
137
138	addMutation(Mutation: createIGroupLPDAGMutation(Phase: SchedPhase));
139	}
140	}
141
142	// DAG builder helper
143	class GCNIterativeScheduler::BuildDAG {
144	GCNIterativeScheduler &Sch;
145	SmallVector<SUnit *, `8`> TopRoots;
146
147	SmallVector<SUnit*, `8`> BotRoots;
148	public:
149	BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false)
150	: Sch(_Sch) {
151	auto *BB = R.Begin ->getParent();
152	Sch.BaseClass::startBlock(bb: BB);
153	Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs);
154	Sch.swapIGLPMutations(R, IsReentry);
155	Sch.buildSchedGraph(AA: Sch.AA, RPTracker: nullptr, PDiffs: nullptr, LIS: nullptr,
156	/TrackLaneMask/TrackLaneMasks: true);
157	Sch.postProcessDAG();
158	Sch.Topo.InitDAGTopologicalSorting();
159	Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
160	}
161
162	~BuildDAG() {
163	Sch.BaseClass::exitRegion();
164	Sch.BaseClass::finishBlock();
165	}
166
167	ArrayRef<const SUnit > getTopRoots() const* {
168	return TopRoots;
169	}
170	ArrayRef<SUnit> getBottomRoots() const* {
171	return BotRoots;
172	}
173	};
174
175	class GCNIterativeScheduler::OverrideLegacyStrategy {
176	GCNIterativeScheduler &Sch;
177	Region &Rgn;
178	std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
179	GCNRegPressure SaveMaxRP;
180
181	public:
182	OverrideLegacyStrategy(Region &R,
183	MachineSchedStrategy &OverrideStrategy,
184	GCNIterativeScheduler &_Sch)
185	: Sch(_Sch)
186	, Rgn(R)
187	, SaveSchedImpl (std::move(_Sch.SchedImpl))
188	, SaveMaxRP (R.MaxPressure) {
189	Sch.SchedImpl.reset(p: &OverrideStrategy);
190	auto *BB = R.Begin ->getParent();
191	Sch.BaseClass::startBlock(bb: BB);
192	Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs);
193	}
194
195	~OverrideLegacyStrategy() {
196	Sch.BaseClass::exitRegion();
197	Sch.BaseClass::finishBlock();
198	Sch.SchedImpl.release();
199	Sch.SchedImpl = std::move(SaveSchedImpl);
200	}
201
202	void schedule() {
203	assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
204	LLVM_DEBUG(dbgs() << "\nScheduling ";
205	printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, `2`));
206	Sch.BaseClass::schedule();
207
208	// Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
209	Sch.RegionEnd = Rgn.End;
210	//assert(Rgn.End == Sch.RegionEnd);
211	Rgn.Begin = Sch.RegionBegin;
212	Rgn.MaxPressure.clear();
213	}
214
215	void restoreOrder() {
216	assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
217	// DAG SUnits are stored using original region's order
218	// so just use SUnits as the restoring schedule
219	Sch.scheduleRegion(R&: Rgn, Schedule&: Sch.SUnits, MaxRP: SaveMaxRP);
220	}
221	};
222
223	namespace {
224
225	// just a stub to make base class happy
226	class SchedStrategyStub : public MachineSchedStrategy {
227	public:
228	bool shouldTrackPressure() const override { return false; }
229	bool shouldTrackLaneMasks() const override { return false; }
230	void initialize(ScheduleDAGMI *DAG) override {}
231	SUnit pickNode(bool* &IsTopNode) override { return nullptr; }
232	void schedNode(SUnit SU, bool* IsTopNode) override {}
233	void releaseTopNode(SUnit *SU) override {}
234	void releaseBottomNode(SUnit *SU) override {}
235	};
236
237	} // end anonymous namespace
238
239	GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
240	StrategyKind S)
241	: BaseClass (C, std::make_unique<SchedStrategyStub>())
242	, Context(C)
243	, Strategy(S)
244	, UPTracker (*LIS) {
245	}
246
247	// returns max pressure for a region
248	GCNRegPressure
249	GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
250	MachineBasicBlock::iterator End)
251	const {
252	// For the purpose of pressure tracking bottom inst of the region should
253	// be also processed. End is either BB end, BB terminator inst or sched
254	// boundary inst.
255	auto const BBEnd = Begin ->getParent()->end();
256	auto const BottomMI = End == BBEnd ? std::prev(x: End) : End;
257
258	// scheduleRegions walks bottom to top, so its likely we just get next
259	// instruction to track
260	auto AfterBottomMI = std::next(x: BottomMI);
261	if (AfterBottomMI == BBEnd \|\|
262	&*AfterBottomMI != UPTracker.getLastTrackedMI()) {
263	UPTracker.reset(MI: *BottomMI);
264	} else {
265	assert(UPTracker.isValid());
266	}
267
268	for (auto I = BottomMI; I != Begin; --I)
269	UPTracker.recede(MI: *I);
270
271	UPTracker.recede(MI: *Begin);
272
273	assert(UPTracker.isValid() \|\|
274	(dbgs() << "Tracked region ",
275	printRegion(dbgs(), Begin, End, LIS), false));
276	return UPTracker.getMaxPressureAndReset();
277	}
278
279	// returns max pressure for a tentative schedule
280	template <typename Range> GCNRegPressure
281	GCNIterativeScheduler::getSchedulePressure(const Region &R,
282	Range &&Schedule) const {
283	auto const BBEnd = R.Begin ->getParent()->end();
284	GCNUpwardRPTracker RPTracker(*LIS);
285	if (R.End != BBEnd) {
286	// R.End points to the boundary instruction but the
287	// schedule doesn't include it
288	RPTracker.reset(MI: *R.End);
289	RPTracker.recede(MI: *R.End);
290	} else {
291	// R.End doesn't point to the boundary instruction
292	RPTracker.reset(MI: *std::prev(x: BBEnd));
293	}
294	for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
295	RPTracker.recede(MI: getMachineInstr(--I));
296	}
297	return RPTracker.getMaxPressureAndReset();
298	}
299
300	void GCNIterativeScheduler::enterRegion(MachineBasicBlock BB, // overridden*
301	MachineBasicBlock::iterator Begin,
302	MachineBasicBlock::iterator End,
303	unsigned NumRegionInstrs) {
304	BaseClass::enterRegion(bb: BB, begin: Begin, end: End, regioninstrs: NumRegionInstrs);
305	if (NumRegionInstrs > `2`) {
306	Regions.push_back(
307	x: new (Alloc.Allocate())
308	Region { .Begin: Begin, .End: End, .NumRegionInstrs: NumRegionInstrs,
309	.MaxPressure: getRegionPressure(Begin, End), .BestSchedule: nullptr });
310	}
311	}
312
313	void GCNIterativeScheduler::schedule() { // overridden
314	// do nothing
315	LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
316	if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
317	dbgs() << "Max RP: "
318	<< print(Regions.back()->MaxPressure,
319	&MF.getSubtarget<GCNSubtarget>());
320	} dbgs()
321	<< `'\n'`;);
322	}
323
324	void GCNIterativeScheduler::finalizeSchedule() { // overridden
325	if (Regions.empty())
326	return;
327	switch (Strategy) {
328	case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
329	case SCHEDULE_MINREGFORCED: scheduleMinReg(force: true); break;
330	case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
331	case SCHEDULE_ILP: scheduleILP(TryMaximizeOccupancy: false); break;
332	}
333	}
334
335	// Detach schedule from SUnits and interleave it with debug values.
336	// Returned schedule becomes independent of DAG state.
337	std::vector<MachineInstr*>
338	GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
339	std::vector<MachineInstr*> Res;
340	Res.reserve(n: Schedule.size() * `2`);
341
342	if (FirstDbgValue)
343	Res.push_back(x: FirstDbgValue);
344
345	const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
346	for (const auto *SU : Schedule) {
347	Res.push_back(x: SU->getInstr());
348	const auto &D = std::find_if(first: DbgB, last: DbgE, pred: [SU](decltype(*DbgB) &P) {
349	return P.second == SU->getInstr();
350	});
351	if (D != DbgE)
352	Res.push_back(x: D ->first);
353	}
354	return Res;
355	}
356
357	void GCNIterativeScheduler::setBestSchedule(Region &R,
358	ScheduleRef Schedule,
359	const GCNRegPressure &MaxRP) {
360	R.BestSchedule.reset(
361	p: new TentativeSchedule{ .Schedule: detachSchedule(Schedule), .MaxPressure: MaxRP });
362	}
363
364	void GCNIterativeScheduler::scheduleBest(Region &R) {
365	assert(R.BestSchedule.get() && "No schedule specified");
366	scheduleRegion(R, Schedule&: R.BestSchedule ->Schedule, MaxRP: R.BestSchedule ->MaxPressure);
367	R.BestSchedule.reset();
368	}
369
370	// minimal required region scheduler, works for ranges of SUnits,*
371	// SUnits or MachineIntrs*
372	template <typename Range>
373	void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
374	const GCNRegPressure &MaxRP) {
375	assert(RegionBegin == R.Begin && RegionEnd == R.End);
376	assert(LIS != nullptr);
377	#ifndef NDEBUG
378	const auto SchedMaxRP = getSchedulePressure(R, Schedule);
379	#endif
380	auto *BB = R.Begin ->getParent();
381	auto Top = R.Begin;
382	for (const auto &I : Schedule) {
383	auto MI = getMachineInstr(I);
384
385	MachineBasicBlock::iterator MII = MI->getIterator();
386	if (MII != Top) {
387	bool NonDebugReordered =
388	!MI->isDebugInstr() && skipDebugInstructionsForward(It: Top, End: MII) != MII;
389	BB->remove(I: MI);
390	BB->insert(Top, MI);
391	if (NonDebugReordered)
392	LIS->handleMove(MI&: MI, UpdateFlags: true*);
393	}
394	if (!MI->isDebugInstr()) {
395	// Reset read - undef flags and update them later.
396	for (auto &Op : MI->all_defs())
397	Op.setIsUndef(false);
398
399	RegisterOperands RegOpers;
400	RegOpers.collect(MI: MI, TRI: TRI, MRI, /ShouldTrackLaneMasks/TrackLaneMasks: true,
401	/IgnoreDead/false);
402	// Adjust liveness and add missing dead+read-undef flags.
403	auto SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
404	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
405	}
406	Top = std::next(MI->getIterator());
407	}
408	RegionBegin = getMachineInstr(Schedule.front());
409
410	// Schedule consisting of MachineInstr is considered 'detached'*
411	// and already interleaved with debug values
412	if (!std::is_same_v<decltype(Schedule.begin()), MachineInstr>) {
413	placeDebugValues();
414	// Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
415	// assert(R.End == RegionEnd);
416	RegionEnd = R.End;
417	}
418
419	R.Begin = RegionBegin;
420	R.MaxPressure = MaxRP;
421
422	#ifndef NDEBUG
423	const auto RegionMaxRP = getRegionPressure(R);
424	const auto &ST = MF.getSubtarget<GCNSubtarget>();
425	#endif
426	assert(
427	(SchedMaxRP == RegionMaxRP && (MaxRP.empty() \|\| SchedMaxRP == MaxRP)) \|\|
428	(dbgs() << "Max RP mismatch!!!\n"
429	"RP for schedule (calculated): "
430	<< print(SchedMaxRP, &ST)
431	<< "RP for schedule (reported): " << print(MaxRP, &ST)
432	<< "RP after scheduling: " << print(RegionMaxRP, &ST),
433	false));
434	}
435
436	// Sort recorded regions by pressure - highest at the front
437	void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
438	llvm::sort(C&: Regions, Comp: [this, TargetOcc](const Region R1, const* Region *R2) {
439	return R2->MaxPressure.less(MF, O: R1->MaxPressure, MaxOccupancy: TargetOcc);
440	});
441	}
442
443	///////////////////////////////////////////////////////////////////////////////
444	// Legacy MaxOccupancy Strategy
445
446	// Tries to increase occupancy applying minreg scheduler for a sequence of
447	// most demanding regions. Obtained schedules are saved as BestSchedule for a
448	// region.
449	// TargetOcc is the best achievable occupancy for a kernel.
450	// Returns better occupancy on success or current occupancy on fail.
451	// BestSchedules aren't deleted on fail.
452	unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
453	// TODO: assert Regions are sorted descending by pressure
454	const auto &ST = MF.getSubtarget<GCNSubtarget>();
455	const unsigned DynamicVGPRBlockSize =
456	MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
457	const auto Occ =
458	Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
459	LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
460	<< ", current = " << Occ << `'\n'`);
461
462	auto NewOcc = TargetOcc;
463	for (auto *R : Regions) {
464	// Always build the DAG to add mutations
465	BuildDAG DAG(R, this);
466
467	if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
468	continue;
469
470	LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, `3`);
471	printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
472
473	const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this);
474	const auto MaxRP = getSchedulePressure(R: *R, Schedule: MinSchedule);
475	LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
476	printSchedRP(dbgs(), R->MaxPressure, MaxRP));
477
478	NewOcc = std::min(a: NewOcc, b: MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
479	if (NewOcc <= Occ)
480	break;
481
482	setBestSchedule(R&: *R, Schedule: MinSchedule, MaxRP);
483	}
484	LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
485	<< ", prev occupancy = " << Occ << `'\n'`);
486	if (NewOcc > Occ) {
487	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
488	MFI->increaseOccupancy(MF, Limit: NewOcc);
489	}
490
491	return std::max(a: NewOcc, b: Occ);
492	}
493
494	void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
495	bool TryMaximizeOccupancy) {
496	const auto &ST = MF.getSubtarget<GCNSubtarget>();
497	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
498	auto TgtOcc = MFI->getMinAllowedOccupancy();
499	unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
500
501	sortRegionsByPressure(TargetOcc: TgtOcc);
502	auto Occ =
503	Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
504
505	bool IsReentry = false;
506	if (TryMaximizeOccupancy && Occ < TgtOcc) {
507	Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc);
508	IsReentry = true;
509	}
510
511	// This is really weird but for some magic scheduling regions twice
512	// gives performance improvement
513	const int NumPasses = Occ < TgtOcc ? `2` : `1`;
514
515	TgtOcc = std::min(a: Occ, b: TgtOcc);
516	LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
517	"target occupancy = "
518	<< TgtOcc << `'\n'`);
519	GCNMaxOccupancySchedStrategy LStrgy(Context, /IsLegacyScheduler=/true);
520	unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy());
521
522	for (int I = `0`; I < NumPasses; ++I) {
523	// running first pass with TargetOccupancy = 0 mimics previous scheduling
524	// approach and is a performance magic
525	LStrgy.setTargetOccupancy(I == `0` ? `0` : TgtOcc);
526	for (auto *R : Regions) {
527	OverrideLegacyStrategy Ovr(R, LStrgy, this);
528	IsReentry \|= I > `0`;
529	swapIGLPMutations(R: *R, IsReentry);
530	Ovr.schedule();
531	const auto RP = getRegionPressure(R: *R);
532	LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
533
534	if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
535	LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
536	if (R->BestSchedule.get() && R->BestSchedule ->MaxPressure.getOccupancy(
537	ST, DynamicVGPRBlockSize) >= TgtOcc) {
538	LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
539	scheduleBest(R&: *R);
540	} else {
541	LLVM_DEBUG(dbgs() << ", restoring\n");
542	Ovr.restoreOrder();
543	assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
544	TgtOcc);
545	}
546	}
547	FinalOccupancy =
548	std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize));
549	}
550	}
551	MFI->limitOccupancy(Limit: FinalOccupancy);
552	}
553
554	///////////////////////////////////////////////////////////////////////////////
555	// Minimal Register Strategy
556
557	void GCNIterativeScheduler::scheduleMinReg(bool force) {
558	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
559	const auto TgtOcc = MFI->getOccupancy();
560	sortRegionsByPressure(TargetOcc: TgtOcc);
561
562	auto MaxPressure = Regions.front()->MaxPressure;
563	for (auto *R : Regions) {
564	if (!force && R->MaxPressure.less(MF, O: MaxPressure, MaxOccupancy: TgtOcc))
565	break;
566
567	BuildDAG DAG(R, this);
568	const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this);
569
570	const auto RP = getSchedulePressure(R: *R, Schedule: MinSchedule);
571	LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) {
572	dbgs() << "\nWarning: Pressure becomes worse after minreg!";
573	printSchedRP(dbgs(), R->MaxPressure, RP);
574	});
575
576	if (!force && MaxPressure.less(MF, O: RP, MaxOccupancy: TgtOcc))
577	break;
578
579	scheduleRegion(R&: *R, Schedule: MinSchedule, MaxRP: RP);
580	LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
581
582	MaxPressure = RP;
583	}
584	}
585
586	///////////////////////////////////////////////////////////////////////////////
587	// ILP scheduler port
588
589	void GCNIterativeScheduler::scheduleILP(
590	bool TryMaximizeOccupancy) {
591	const auto &ST = MF.getSubtarget<GCNSubtarget>();
592	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
593	auto TgtOcc = MFI->getMinAllowedOccupancy();
594	unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
595
596	sortRegionsByPressure(TargetOcc: TgtOcc);
597	auto Occ =
598	Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
599
600	bool IsReentry = false;
601	if (TryMaximizeOccupancy && Occ < TgtOcc) {
602	Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc);
603	IsReentry = true;
604	}
605
606	TgtOcc = std::min(a: Occ, b: TgtOcc);
607	LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
608	"target occupancy = "
609	<< TgtOcc << `'\n'`);
610
611	unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy());
612	for (auto *R : Regions) {
613	BuildDAG DAG(R, this, IsReentry);
614	const auto ILPSchedule = makeGCNILPScheduler(BotRoots: DAG.getBottomRoots(), DAG: *this);
615
616	const auto RP = getSchedulePressure(R: *R, Schedule: ILPSchedule);
617	LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
618
619	if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
620	LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
621	if (R->BestSchedule.get() && R->BestSchedule ->MaxPressure.getOccupancy(
622	ST, DynamicVGPRBlockSize) >= TgtOcc) {
623	LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
624	scheduleBest(R&: *R);
625	}
626	} else {
627	scheduleRegion(R&: *R, Schedule: ILPSchedule, MaxRP: RP);
628	LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
629	FinalOccupancy =
630	std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize));
631	}
632	}
633	MFI->limitOccupancy(Limit: FinalOccupancy);
634	}
635

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp