GCNIterativeScheduler.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp]

1	//===- GCNIterativeScheduler.cpp ------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements the class GCNIterativeScheduler.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "GCNIterativeScheduler.h"
15	#include "AMDGPUIGroupLP.h"
16	#include "GCNSchedStrategy.h"
17	#include "SIMachineFunctionInfo.h"
18
19	using namespace llvm;
20
21	#define DEBUG_TYPE "machine-scheduler"
22
23	namespace llvm {
24
25	std::vector<const SUnit > makeMinRegSchedule(ArrayRef<const* SUnit *> TopRoots,
26	const ScheduleDAG &DAG);
27
28	std::vector<const SUnit > makeGCNILPScheduler(ArrayRef<const* SUnit *> BotRoots,
29	const ScheduleDAG &DAG);
30	} // namespace llvm
31
32	// shim accessors for different order containers
33	static inline MachineInstr getMachineInstr(MachineInstr MI) {
34	return MI;
35	}
36	static inline MachineInstr getMachineInstr(const* SUnit *SU) {
37	return SU->getInstr();
38	}
39	static inline MachineInstr getMachineInstr(const* SUnit &SU) {
40	return SU.getInstr();
41	}
42
43	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
44	LLVM_DUMP_METHOD
45	static void printRegion(raw_ostream &OS,
46	MachineBasicBlock::iterator Begin,
47	MachineBasicBlock::iterator End,
48	const LiveIntervals *LIS,
49	unsigned MaxInstNum =
50	std::numeric_limits<unsigned>::max()) {
51	auto *BB = Begin->getParent();
52	OS << BB->getParent()->getName() << ":" << printMBBReference(*BB) << `' '`
53	<< BB->getName() << ":\n";
54	auto I = Begin;
55	MaxInstNum = std::max(MaxInstNum, `1u`);
56	for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
57	if (!I->isDebugInstr() && LIS)
58	OS << LIS->getInstructionIndex(*I);
59	OS << `'\t'` << *I;
60	}
61	if (I != End) {
62	OS << "\t...\n";
63	I = std::prev(End);
64	if (!I->isDebugInstr() && LIS)
65	OS << LIS->getInstructionIndex(*I);
66	OS << `'\t'` << *I;
67	}
68	if (End != BB->end()) { // print boundary inst if present
69	OS << "----\n";
70	if (LIS) OS << LIS->getInstructionIndex(*End) << `'\t'`;
71	OS << *End;
72	}
73	}
74
75	LLVM_DUMP_METHOD
76	static void printLivenessInfo(raw_ostream &OS,
77	MachineBasicBlock::iterator Begin,
78	MachineBasicBlock::iterator End,
79	const LiveIntervals *LIS) {
80	auto *const BB = Begin->getParent();
81	const auto &MRI = BB->getParent()->getRegInfo();
82
83	const auto LiveIns = getLiveRegsBefore(Begin, LIS);
84	OS << "LIn RP: " << print(getRegPressure(MRI, LiveIns));
85
86	const auto BottomMI = End == BB->end() ? std::prev(End) : End;
87	const auto LiveOuts = getLiveRegsAfter(BottomMI, LIS);
88	OS << "LOt RP: " << print(getRegPressure(MRI, LiveOuts));
89	}
90
91	LLVM_DUMP_METHOD
92	void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
93	const auto &ST = MF.getSubtarget<GCNSubtarget>();
94	for (auto *const R : Regions) {
95	OS << "Region to schedule ";
96	printRegion(OS, R->Begin, R->End, LIS, `1`);
97	printLivenessInfo(OS, R->Begin, R->End, LIS);
98	OS << "Max RP: " << print(R->MaxPressure, &ST);
99	}
100	}
101
102	LLVM_DUMP_METHOD
103	void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
104	const Region *R,
105	const GCNRegPressure &RP) const {
106	OS << "\nAfter scheduling ";
107	printRegion(OS, R->Begin, R->End, LIS);
108	printSchedRP(OS, R->MaxPressure, RP);
109	OS << `'\n'`;
110	}
111
112	LLVM_DUMP_METHOD
113	void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
114	const GCNRegPressure &Before,
115	const GCNRegPressure &After) const {
116	const auto &ST = MF.getSubtarget<GCNSubtarget>();
117	OS << "RP before: " << print(Before, &ST)
118	<< "RP after: " << print(After, &ST);
119	}
120	#endif
121
122	void GCNIterativeScheduler::swapIGLPMutations(const Region &R, bool IsReentry) {
123	bool HasIGLPInstrs = false;
124	const SIInstrInfo SII = static_cast<const* SIInstrInfo *>(TII);
125	for (MachineBasicBlock::iterator I = R.Begin; I != R.End; I ++) {
126	if (SII->isIGLPMutationOnly(Opcode: I ->getOpcode())) {
127	HasIGLPInstrs = true;
128	break;
129	}
130	}
131
132	if (HasIGLPInstrs) {
133	SavedMutations.clear();
134	SavedMutations.swap(x&: Mutations);
135	auto SchedPhase = IsReentry ? AMDGPU::SchedulingPhase::PreRAReentry
136	: AMDGPU::SchedulingPhase::Initial;
137
138	addMutation(Mutation: createIGroupLPDAGMutation(Phase: SchedPhase));
139	}
140	}
141
142	// DAG builder helper
143	class GCNIterativeScheduler::BuildDAG {
144	GCNIterativeScheduler &Sch;
145	SmallVector<SUnit *, `8`> TopRoots;
146
147	SmallVector<SUnit*, `8`> BotRoots;
148	public:
149	BuildDAG(const Region &R, GCNIterativeScheduler &_Sch, bool IsReentry = false)
150	: Sch(_Sch) {
151	auto *BB = R.Begin ->getParent();
152	Sch.BaseClass::startBlock(bb: BB);
153	Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs);
154	Sch.swapIGLPMutations(R, IsReentry);
155	Sch.buildSchedGraph(AA: Sch.AA, RPTracker: nullptr, PDiffs: nullptr, LIS: nullptr,
156	/TrackLaneMask/TrackLaneMasks: true);
157	Sch.postProcessDAG();
158	Sch.Topo.InitDAGTopologicalSorting();
159	Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
160	}
161
162	~BuildDAG() {
163	Sch.BaseClass::exitRegion();
164	Sch.BaseClass::finishBlock();
165	}
166
167	ArrayRef<const SUnit > getTopRoots() const* {
168	return TopRoots;
169	}
170	ArrayRef<SUnit> getBottomRoots() const* {
171	return BotRoots;
172	}
173	};
174
175	class GCNIterativeScheduler::OverrideLegacyStrategy {
176	GCNIterativeScheduler &Sch;
177	Region &Rgn;
178	std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
179	GCNRegPressure SaveMaxRP;
180
181	public:
182	OverrideLegacyStrategy(Region &R,
183	MachineSchedStrategy &OverrideStrategy,
184	GCNIterativeScheduler &_Sch)
185	: Sch(_Sch)
186	, Rgn(R)
187	, SaveSchedImpl (std::move(_Sch.SchedImpl))
188	, SaveMaxRP (R.MaxPressure) {
189	Sch.SchedImpl.reset(p: &OverrideStrategy);
190	auto *BB = R.Begin ->getParent();
191	Sch.BaseClass::startBlock(bb: BB);
192	Sch.BaseClass::enterRegion(bb: BB, begin: R.Begin, end: R.End, regioninstrs: R.NumRegionInstrs);
193	}
194
195	~OverrideLegacyStrategy() {
196	Sch.BaseClass::exitRegion();
197	Sch.BaseClass::finishBlock();
198	Sch.SchedImpl.release();
199	Sch.SchedImpl = std::move(SaveSchedImpl);
200	}
201
202	void schedule() {
203	assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
204	LLVM_DEBUG(dbgs() << "\nScheduling ";
205	printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, `2`));
206	Sch.BaseClass::schedule();
207
208	// Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
209	Sch.RegionEnd = Rgn.End;
210	//assert(Rgn.End == Sch.RegionEnd);
211	Rgn.Begin = Sch.RegionBegin;
212	Rgn.MaxPressure.clear();
213	}
214
215	void restoreOrder() {
216	assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
217	// DAG SUnits are stored using original region's order
218	// so just use SUnits as the restoring schedule
219	Sch.scheduleRegion(R&: Rgn, Schedule&: Sch.SUnits, MaxRP: SaveMaxRP);
220	}
221	};
222
223	namespace {
224
225	// just a stub to make base class happy
226	class SchedStrategyStub : public MachineSchedStrategy {
227	public:
228	bool shouldTrackPressure() const override { return false; }
229	bool shouldTrackLaneMasks() const override { return false; }
230	void initialize(ScheduleDAGMI *DAG) override {}
231	SUnit pickNode(bool* &IsTopNode) override { return nullptr; }
232	void schedNode(SUnit SU, bool* IsTopNode) override {}
233	void releaseTopNode(SUnit *SU) override {}
234	void releaseBottomNode(SUnit *SU) override {}
235	};
236
237	} // end anonymous namespace
238
239	GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
240	StrategyKind S)
241	: BaseClass (C, std::make_unique<SchedStrategyStub>())
242	, Context(C)
243	, Strategy(S)
244	, UPTracker (*LIS) {
245	}
246
247	// returns max pressure for a region
248	GCNRegPressure
249	GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
250	MachineBasicBlock::iterator End)
251	const {
252	// For the purpose of pressure tracking bottom inst of the region should
253	// be also processed. End is either BB end, BB terminator inst or sched
254	// boundary inst.
255	auto const BBEnd = Begin ->getParent()->end();
256	auto const BottomMI = End == BBEnd ? std::prev(x: End) : End;
257
258	// scheduleRegions walks bottom to top, so its likely we just get next
259	// instruction to track
260	auto AfterBottomMI = std::next(x: BottomMI);
261	if (AfterBottomMI == BBEnd \|\|
262	&*AfterBottomMI != UPTracker.getLastTrackedMI()) {
263	UPTracker.reset(MI: *BottomMI);
264	} else {
265	assert(UPTracker.isValid());
266	}
267
268	for (auto I = BottomMI; I != Begin; --I)
269	UPTracker.recede(MI: *I);
270
271	UPTracker.recede(MI: *Begin);
272
273	assert(UPTracker.isValid() \|\|
274	(dbgs() << "Tracked region ",
275	printRegion(dbgs(), Begin, End, LIS), false));
276	return UPTracker.getMaxPressureAndReset();
277	}
278
279	// returns max pressure for a tentative schedule
280	template <typename Range> GCNRegPressure
281	GCNIterativeScheduler::getSchedulePressure(const Region &R,
282	Range &&Schedule) const {
283	auto const BBEnd = R.Begin ->getParent()->end();
284	GCNUpwardRPTracker RPTracker(*LIS);
285	if (R.End != BBEnd) {
286	// R.End points to the boundary instruction but the
287	// schedule doesn't include it
288	RPTracker.reset(MI: *R.End);
289	RPTracker.recede(MI: *R.End);
290	} else {
291	// R.End doesn't point to the boundary instruction
292	RPTracker.reset(MI: *std::prev(x: BBEnd));
293	}
294	for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
295	RPTracker.recede(MI: getMachineInstr(--I));
296	}
297	return RPTracker.getMaxPressureAndReset();
298	}
299
300	void GCNIterativeScheduler::enterRegion(MachineBasicBlock BB, // overridden*
301	MachineBasicBlock::iterator Begin,
302	MachineBasicBlock::iterator End,
303	unsigned NumRegionInstrs) {
304	BaseClass::enterRegion(bb: BB, begin: Begin, end: End, regioninstrs: NumRegionInstrs);
305	if (NumRegionInstrs > `2`) {
306	Regions.push_back(
307	x: new (Alloc.Allocate())
308	Region { .Begin: Begin, .End: End, .NumRegionInstrs: NumRegionInstrs,
309	.MaxPressure: getRegionPressure(Begin, End), .BestSchedule: nullptr });
310	}
311	}
312
313	void GCNIterativeScheduler::schedule() { // overridden
314	// do nothing
315	LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
316	if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
317	dbgs() << "Max RP: "
318	<< print(Regions.back()->MaxPressure,
319	&MF.getSubtarget<GCNSubtarget>());
320	} dbgs()
321	<< `'\n'`;);
322	}
323
324	void GCNIterativeScheduler::finalizeSchedule() { // overridden
325	if (Regions.empty())
326	return;
327	switch (Strategy) {
328	case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
329	case SCHEDULE_MINREGFORCED: scheduleMinReg(force: true); break;
330	case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
331	case SCHEDULE_ILP: scheduleILP(TryMaximizeOccupancy: false); break;
332	}
333	}
334
335	// Detach schedule from SUnits and interleave it with debug values.
336	// Returned schedule becomes independent of DAG state.
337	std::vector<MachineInstr*>
338	GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
339	std::vector<MachineInstr*> Res;
340	Res.reserve(n: Schedule.size() * `2`);
341
342	if (FirstDbgValue)
343	Res.push_back(x: FirstDbgValue);
344
345	const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
346	for (const auto *SU : Schedule) {
347	Res.push_back(x: SU->getInstr());
348	const auto &D = std::find_if(first: DbgB, last: DbgE, pred: [SU](decltype(*DbgB) &P) {
349	return P.second == SU->getInstr();
350	});
351	if (D != DbgE)
352	Res.push_back(x: D ->first);
353	}
354	return Res;
355	}
356
357	void GCNIterativeScheduler::setBestSchedule(Region &R,
358	ScheduleRef Schedule,
359	const GCNRegPressure &MaxRP) {
360	R.BestSchedule.reset(
361	p: new TentativeSchedule{ .Schedule: detachSchedule(Schedule), .MaxPressure: MaxRP });
362	}
363
364	void GCNIterativeScheduler::scheduleBest(Region &R) {
365	assert(R.BestSchedule.get() && "No schedule specified");
366	scheduleRegion(R, Schedule&: R.BestSchedule ->Schedule, MaxRP: R.BestSchedule ->MaxPressure);
367	R.BestSchedule.reset();
368	}
369
370	// minimal required region scheduler, works for ranges of SUnits,*
371	// SUnits or MachineIntrs*
372	template <typename Range>
373	void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
374	const GCNRegPressure &MaxRP) {
375	assert(RegionBegin == R.Begin && RegionEnd == R.End);
376	assert(LIS != nullptr);
377	#ifndef NDEBUG
378	const auto SchedMaxRP = getSchedulePressure(R, Schedule);
379	#endif
380	auto *BB = R.Begin ->getParent();
381	auto Top = R.Begin;
382	for (const auto &I : Schedule) {
383	auto MI = getMachineInstr(I);
384	if (MI != &*Top) {
385	BB->remove(I: MI);
386	BB->insert(Top, MI);
387	if (!MI->isDebugInstr())
388	LIS->handleMove(MI&: MI, UpdateFlags: true*);
389	}
390	if (!MI->isDebugInstr()) {
391	// Reset read - undef flags and update them later.
392	for (auto &Op : MI->all_defs())
393	Op.setIsUndef(false);
394
395	RegisterOperands RegOpers;
396	RegOpers.collect(MI: MI, TRI: TRI, MRI, /ShouldTrackLaneMasks/TrackLaneMasks: true,
397	/IgnoreDead/false);
398	// Adjust liveness and add missing dead+read-undef flags.
399	auto SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
400	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
401	}
402	Top = std::next(MI->getIterator());
403	}
404	RegionBegin = getMachineInstr(Schedule.front());
405
406	// Schedule consisting of MachineInstr is considered 'detached'*
407	// and already interleaved with debug values
408	if (!std::is_same_v<decltype(Schedule.begin()), MachineInstr>) {
409	placeDebugValues();
410	// Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
411	// assert(R.End == RegionEnd);
412	RegionEnd = R.End;
413	}
414
415	R.Begin = RegionBegin;
416	R.MaxPressure = MaxRP;
417
418	#ifndef NDEBUG
419	const auto RegionMaxRP = getRegionPressure(R);
420	const auto &ST = MF.getSubtarget<GCNSubtarget>();
421	#endif
422	assert(
423	(SchedMaxRP == RegionMaxRP && (MaxRP.empty() \|\| SchedMaxRP == MaxRP)) \|\|
424	(dbgs() << "Max RP mismatch!!!\n"
425	"RP for schedule (calculated): "
426	<< print(SchedMaxRP, &ST)
427	<< "RP for schedule (reported): " << print(MaxRP, &ST)
428	<< "RP after scheduling: " << print(RegionMaxRP, &ST),
429	false));
430	}
431
432	// Sort recorded regions by pressure - highest at the front
433	void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
434	llvm::sort(C&: Regions, Comp: [this, TargetOcc](const Region R1, const* Region *R2) {
435	return R2->MaxPressure.less(MF, O: R1->MaxPressure, MaxOccupancy: TargetOcc);
436	});
437	}
438
439	///////////////////////////////////////////////////////////////////////////////
440	// Legacy MaxOccupancy Strategy
441
442	// Tries to increase occupancy applying minreg scheduler for a sequence of
443	// most demanding regions. Obtained schedules are saved as BestSchedule for a
444	// region.
445	// TargetOcc is the best achievable occupancy for a kernel.
446	// Returns better occupancy on success or current occupancy on fail.
447	// BestSchedules aren't deleted on fail.
448	unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
449	// TODO: assert Regions are sorted descending by pressure
450	const auto &ST = MF.getSubtarget<GCNSubtarget>();
451	const unsigned DynamicVGPRBlockSize =
452	MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
453	const auto Occ =
454	Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
455	LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
456	<< ", current = " << Occ << `'\n'`);
457
458	auto NewOcc = TargetOcc;
459	for (auto *R : Regions) {
460	// Always build the DAG to add mutations
461	BuildDAG DAG(R, this);
462
463	if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
464	continue;
465
466	LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, `3`);
467	printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
468
469	const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this);
470	const auto MaxRP = getSchedulePressure(R: *R, Schedule: MinSchedule);
471	LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
472	printSchedRP(dbgs(), R->MaxPressure, MaxRP));
473
474	NewOcc = std::min(a: NewOcc, b: MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
475	if (NewOcc <= Occ)
476	break;
477
478	setBestSchedule(R&: *R, Schedule: MinSchedule, MaxRP);
479	}
480	LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
481	<< ", prev occupancy = " << Occ << `'\n'`);
482	if (NewOcc > Occ) {
483	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
484	MFI->increaseOccupancy(MF, Limit: NewOcc);
485	}
486
487	return std::max(a: NewOcc, b: Occ);
488	}
489
490	void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
491	bool TryMaximizeOccupancy) {
492	const auto &ST = MF.getSubtarget<GCNSubtarget>();
493	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
494	auto TgtOcc = MFI->getMinAllowedOccupancy();
495	unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
496
497	sortRegionsByPressure(TargetOcc: TgtOcc);
498	auto Occ =
499	Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
500
501	bool IsReentry = false;
502	if (TryMaximizeOccupancy && Occ < TgtOcc) {
503	Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc);
504	IsReentry = true;
505	}
506
507	// This is really weird but for some magic scheduling regions twice
508	// gives performance improvement
509	const int NumPasses = Occ < TgtOcc ? `2` : `1`;
510
511	TgtOcc = std::min(a: Occ, b: TgtOcc);
512	LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
513	"target occupancy = "
514	<< TgtOcc << `'\n'`);
515	GCNMaxOccupancySchedStrategy LStrgy(Context, /IsLegacyScheduler=/true);
516	unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy());
517
518	for (int I = `0`; I < NumPasses; ++I) {
519	// running first pass with TargetOccupancy = 0 mimics previous scheduling
520	// approach and is a performance magic
521	LStrgy.setTargetOccupancy(I == `0` ? `0` : TgtOcc);
522	for (auto *R : Regions) {
523	OverrideLegacyStrategy Ovr(R, LStrgy, this);
524	IsReentry \|= I > `0`;
525	swapIGLPMutations(R: *R, IsReentry);
526	Ovr.schedule();
527	const auto RP = getRegionPressure(R: *R);
528	LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
529
530	if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
531	LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
532	if (R->BestSchedule.get() && R->BestSchedule ->MaxPressure.getOccupancy(
533	ST, DynamicVGPRBlockSize) >= TgtOcc) {
534	LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
535	scheduleBest(R&: *R);
536	} else {
537	LLVM_DEBUG(dbgs() << ", restoring\n");
538	Ovr.restoreOrder();
539	assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
540	TgtOcc);
541	}
542	}
543	FinalOccupancy =
544	std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize));
545	}
546	}
547	MFI->limitOccupancy(Limit: FinalOccupancy);
548	}
549
550	///////////////////////////////////////////////////////////////////////////////
551	// Minimal Register Strategy
552
553	void GCNIterativeScheduler::scheduleMinReg(bool force) {
554	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
555	const auto TgtOcc = MFI->getOccupancy();
556	sortRegionsByPressure(TargetOcc: TgtOcc);
557
558	auto MaxPressure = Regions.front()->MaxPressure;
559	for (auto *R : Regions) {
560	if (!force && R->MaxPressure.less(MF, O: MaxPressure, MaxOccupancy: TgtOcc))
561	break;
562
563	BuildDAG DAG(R, this);
564	const auto MinSchedule = makeMinRegSchedule(TopRoots: DAG.getTopRoots(), DAG: *this);
565
566	const auto RP = getSchedulePressure(R: *R, Schedule: MinSchedule);
567	LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) {
568	dbgs() << "\nWarning: Pressure becomes worse after minreg!";
569	printSchedRP(dbgs(), R->MaxPressure, RP);
570	});
571
572	if (!force && MaxPressure.less(MF, O: RP, MaxOccupancy: TgtOcc))
573	break;
574
575	scheduleRegion(R&: *R, Schedule: MinSchedule, MaxRP: RP);
576	LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
577
578	MaxPressure = RP;
579	}
580	}
581
582	///////////////////////////////////////////////////////////////////////////////
583	// ILP scheduler port
584
585	void GCNIterativeScheduler::scheduleILP(
586	bool TryMaximizeOccupancy) {
587	const auto &ST = MF.getSubtarget<GCNSubtarget>();
588	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
589	auto TgtOcc = MFI->getMinAllowedOccupancy();
590	unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
591
592	sortRegionsByPressure(TargetOcc: TgtOcc);
593	auto Occ =
594	Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
595
596	bool IsReentry = false;
597	if (TryMaximizeOccupancy && Occ < TgtOcc) {
598	Occ = tryMaximizeOccupancy(TargetOcc: TgtOcc);
599	IsReentry = true;
600	}
601
602	TgtOcc = std::min(a: Occ, b: TgtOcc);
603	LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
604	"target occupancy = "
605	<< TgtOcc << `'\n'`);
606
607	unsigned FinalOccupancy = std::min(a: Occ, b: MFI->getOccupancy());
608	for (auto *R : Regions) {
609	BuildDAG DAG(R, this, IsReentry);
610	const auto ILPSchedule = makeGCNILPScheduler(BotRoots: DAG.getBottomRoots(), DAG: *this);
611
612	const auto RP = getSchedulePressure(R: *R, Schedule: ILPSchedule);
613	LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
614
615	if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
616	LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
617	if (R->BestSchedule.get() && R->BestSchedule ->MaxPressure.getOccupancy(
618	ST, DynamicVGPRBlockSize) >= TgtOcc) {
619	LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
620	scheduleBest(R&: *R);
621	}
622	} else {
623	scheduleRegion(R&: *R, Schedule: ILPSchedule, MaxRP: RP);
624	LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
625	FinalOccupancy =
626	std::min(a: FinalOccupancy, b: RP.getOccupancy(ST, DynamicVGPRBlockSize));
627	}
628	}
629	MFI->limitOccupancy(Limit: FinalOccupancy);
630	}
631

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp