GCNNSAReassign.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp]

1	//===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// \brief Try to reassign registers on GFX10+ from non-sequential to sequential
11	/// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA
12	/// with sequential versions where possible.
13	///
14	//===----------------------------------------------------------------------===//
15
16	#include "GCNNSAReassign.h"
17	#include "AMDGPU.h"
18	#include "GCNSubtarget.h"
19	#include "SIMachineFunctionInfo.h"
20	#include "SIRegisterInfo.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/CodeGen/LiveIntervals.h"
23	#include "llvm/CodeGen/LiveRegMatrix.h"
24	#include "llvm/CodeGen/MachineFunctionPass.h"
25	#include "llvm/CodeGen/VirtRegMap.h"
26	#include "llvm/InitializePasses.h"
27
28	using namespace llvm;
29
30	#define DEBUG_TYPE "amdgpu-nsa-reassign"
31
32	STATISTIC(NumNSAInstructions,
33	"Number of NSA instructions with non-sequential address found");
34	STATISTIC(NumNSAConverted,
35	"Number of NSA instructions changed to sequential");
36
37	namespace {
38	class GCNNSAReassignImpl {
39	public:
40	GCNNSAReassignImpl(VirtRegMap VM, LiveRegMatrix LM, LiveIntervals *LS)
41	: VRM(VM), LRM(LM), LIS(LS) {}
42
43	bool run(MachineFunction &MF);
44
45	private:
46	using NSA_Status = enum {
47	NOT_NSA, // Not an NSA instruction
48	FIXED, // NSA which we cannot modify
49	NON_CONTIGUOUS, // NSA with non-sequential address which we can try
50	// to optimize.
51	CONTIGUOUS // NSA with all sequential address registers
52	};
53
54	const GCNSubtarget *ST;
55
56	const MachineRegisterInfo *MRI;
57
58	const SIRegisterInfo *TRI;
59
60	VirtRegMap *VRM;
61
62	LiveRegMatrix *LRM;
63
64	LiveIntervals *LIS;
65
66	unsigned MaxNumVGPRs;
67
68	const MCPhysReg *CSRegs;
69
70	NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const;
71
72	bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
73	unsigned StartReg) const;
74
75	bool canAssign(unsigned StartReg, unsigned NumRegs) const;
76
77	bool scavengeRegs(SmallVectorImpl<LiveInterval > &Intervals) const*;
78	};
79
80	class GCNNSAReassignLegacy : public MachineFunctionPass {
81	public:
82	static char ID;
83
84	GCNNSAReassignLegacy() : MachineFunctionPass (ID) {
85	initializeGCNNSAReassignLegacyPass(*PassRegistry::getPassRegistry());
86	}
87
88	bool runOnMachineFunction(MachineFunction &MF) override;
89
90	StringRef getPassName() const override { return "GCN NSA Reassign"; };
91
92	void getAnalysisUsage(AnalysisUsage &AU) const override {
93	AU.addRequired<LiveIntervalsWrapperPass>();
94	AU.addRequired<VirtRegMapWrapperLegacy>();
95	AU.addRequired<LiveRegMatrixWrapperLegacy>();
96	AU.setPreservesAll();
97	MachineFunctionPass::getAnalysisUsage(AU);
98	}
99	};
100
101	} // End anonymous namespace.
102
103	INITIALIZE_PASS_BEGIN(GCNNSAReassignLegacy, DEBUG_TYPE, "GCN NSA Reassign",
104	false, false)
105	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
106	INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
107	INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
108	INITIALIZE_PASS_END(GCNNSAReassignLegacy, DEBUG_TYPE, "GCN NSA Reassign", false,
109	false)
110
111	char GCNNSAReassignLegacy::ID = `0`;
112
113	char &llvm::GCNNSAReassignID = GCNNSAReassignLegacy::ID;
114
115	bool GCNNSAReassignImpl::tryAssignRegisters(
116	SmallVectorImpl<LiveInterval > &Intervals, unsigned* StartReg) const {
117	unsigned NumRegs = Intervals.size();
118
119	for (unsigned N = `0`; N < NumRegs; ++N)
120	if (VRM->hasPhys(virtReg: Intervals [N]->reg()))
121	LRM->unassign(VirtReg: *Intervals [N]);
122
123	for (unsigned N = `0`; N < NumRegs; ++N)
124	if (LRM->checkInterference(VirtReg: *Intervals [N], PhysReg: MCRegister::from(Val: StartReg + N)))
125	return false;
126
127	for (unsigned N = `0`; N < NumRegs; ++N)
128	LRM->assign(VirtReg: *Intervals [N], PhysReg: MCRegister::from(Val: StartReg + N));
129
130	return true;
131	}
132
133	bool GCNNSAReassignImpl::canAssign(unsigned StartReg, unsigned NumRegs) const {
134	for (unsigned N = `0`; N < NumRegs; ++N) {
135	unsigned Reg = StartReg + N;
136	if (!MRI->isAllocatable(PhysReg: Reg))
137	return false;
138
139	for (unsigned I = `0`; CSRegs[I]; ++I)
140	if (TRI->isSubRegisterEq(RegA: Reg, RegB: CSRegs[I]) &&
141	!LRM->isPhysRegUsed(PhysReg: CSRegs[I]))
142	return false;
143	}
144
145	return true;
146	}
147
148	bool GCNNSAReassignImpl::scavengeRegs(
149	SmallVectorImpl<LiveInterval > &Intervals) const* {
150	unsigned NumRegs = Intervals.size();
151
152	if (NumRegs > MaxNumVGPRs)
153	return false;
154	unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0;
155
156	for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) {
157	if (!canAssign(StartReg: Reg, NumRegs))
158	continue;
159
160	if (tryAssignRegisters(Intervals, StartReg: Reg))
161	return true;
162	}
163
164	return false;
165	}
166
167	GCNNSAReassignImpl::NSA_Status
168	GCNNSAReassignImpl::CheckNSA(const MachineInstr &MI, bool Fast) const {
169	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
170	if (!Info)
171	return NSA_Status::NOT_NSA;
172
173	switch (Info->MIMGEncoding) {
174	case AMDGPU::MIMGEncGfx10NSA:
175	case AMDGPU::MIMGEncGfx11NSA:
176	break;
177	default:
178	return NSA_Status::NOT_NSA;
179	}
180
181	int VAddr0Idx =
182	AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
183
184	unsigned VgprBase = `0`;
185	bool NSA = false;
186	for (unsigned I = `0`; I < Info->VAddrOperands; ++I) {
187	const MachineOperand &Op = MI.getOperand(i: VAddr0Idx + I);
188	Register Reg = Op.getReg();
189	if (Reg.isPhysical() \|\| !VRM->isAssignedReg(virtReg: Reg))
190	return NSA_Status::FIXED;
191
192	Register PhysReg = VRM->getPhys(virtReg: Reg);
193
194	if (!Fast) {
195	if (!PhysReg)
196	return NSA_Status::FIXED;
197
198	// TODO: address the below limitation to handle GFX11 BVH instructions
199	// Bail if address is not a VGPR32. That should be possible to extend the
200	// optimization to work with subregs of a wider register tuples, but the
201	// logic to find free registers will be much more complicated with much
202	// less chances for success. That seems reasonable to assume that in most
203	// cases a tuple is used because a vector variable contains different
204	// parts of an address and it is either already consecutive or cannot
205	// be reassigned if not. If needed it is better to rely on register
206	// coalescer to process such address tuples.
207	if (TRI->getRegSizeInBits(RC: *MRI->getRegClass(Reg)) != `32` \|\| Op.getSubReg())
208	return NSA_Status::FIXED;
209
210	// InlineSpiller does not call LRM::assign() after an LI split leaving
211	// it in an inconsistent state, so we cannot call LRM::unassign().
212	// See llvm bug #48911.
213	// Skip reassign if a register has originated from such split.
214	// FIXME: Remove the workaround when bug #48911 is fixed.
215	if (VRM->getPreSplitReg(virtReg: Reg))
216	return NSA_Status::FIXED;
217
218	const MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
219
220	if (Def && Def->isCopy() && Def->getOperand(i: `1`).getReg() == PhysReg)
221	return NSA_Status::FIXED;
222
223	for (auto U : MRI->use_nodbg_operands(Reg)) {
224	if (U.isImplicit())
225	return NSA_Status::FIXED;
226	const MachineInstr *UseInst = U.getParent();
227	if (UseInst->isCopy() && UseInst->getOperand(i: `0`).getReg() == PhysReg)
228	return NSA_Status::FIXED;
229	}
230
231	if (!LIS->hasInterval(Reg))
232	return NSA_Status::FIXED;
233	}
234
235	if (I == `0`)
236	VgprBase = PhysReg;
237	else if (VgprBase + I != PhysReg)
238	NSA = true;
239	}
240
241	return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS;
242	}
243
244	bool GCNNSAReassignImpl::run(MachineFunction &MF) {
245	ST = &MF.getSubtarget<GCNSubtarget>();
246	if (!ST->hasNSAEncoding() \|\| !ST->hasNonNSAEncoding())
247	return false;
248
249	MRI = &MF.getRegInfo();
250	TRI = ST->getRegisterInfo();
251
252	const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
253	MaxNumVGPRs = ST->getMaxNumVGPRs(MF);
254	MaxNumVGPRs = std::min(
255	a: ST->getMaxNumVGPRs(WavesPerEU: MFI->getOccupancy(), DynamicVGPRBlockSize: MFI->getDynamicVGPRBlockSize()),
256	b: MaxNumVGPRs);
257	CSRegs = MRI->getCalleeSavedRegs();
258
259	using Candidate = std::pair<const MachineInstr, bool*>;
260	SmallVector<Candidate, `32`> Candidates;
261	for (const MachineBasicBlock &MBB : MF) {
262	for (const MachineInstr &MI : MBB) {
263	switch (CheckNSA(MI)) {
264	default:
265	continue;
266	case NSA_Status::CONTIGUOUS:
267	Candidates.push_back(Elt: std::pair(&MI, true));
268	break;
269	case NSA_Status::NON_CONTIGUOUS:
270	Candidates.push_back(Elt: std::pair(&MI, false));
271	++NumNSAInstructions;
272	break;
273	}
274	}
275	}
276
277	bool Changed = false;
278	for (auto &C : Candidates) {
279	if (C.second)
280	continue;
281
282	const MachineInstr *MI = C.first;
283	if (CheckNSA(MI: MI, Fast: true*) == NSA_Status::CONTIGUOUS) {
284	// Already happen to be fixed.
285	C.second = true;
286	++NumNSAConverted;
287	continue;
288	}
289
290	const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI->getOpcode());
291	int VAddr0Idx =
292	AMDGPU::getNamedOperandIdx(Opcode: MI->getOpcode(), Name: AMDGPU::OpName::vaddr0);
293
294	SmallVector<LiveInterval *, `16`> Intervals;
295	SmallVector<MCRegister, `16`> OrigRegs;
296	SlotIndex MinInd, MaxInd;
297	for (unsigned I = `0`; I < Info->VAddrOperands; ++I) {
298	const MachineOperand &Op = MI->getOperand(i: VAddr0Idx + I);
299	Register Reg = Op.getReg();
300	LiveInterval *LI = &LIS->getInterval(Reg);
301	if (llvm::is_contained(Range&: Intervals, Element: LI)) {
302	// Same register used, unable to make sequential
303	Intervals.clear();
304	break;
305	}
306	Intervals.push_back(Elt: LI);
307	OrigRegs.push_back(Elt: VRM->getPhys(virtReg: Reg));
308	if (LI->empty()) {
309	// The address input is undef, so it doesn't contribute to the relevant
310	// range. Seed a reasonable index range if required.
311	if (I == `0`)
312	MinInd = MaxInd = LIS->getInstructionIndex(Instr: *MI);
313	continue;
314	}
315	MinInd = I != `0` ? std::min(a: MinInd, b: LI->beginIndex()) : LI->beginIndex();
316	MaxInd = I != `0` ? std::max(a: MaxInd, b: LI->endIndex()) : LI->endIndex();
317	}
318
319	if (Intervals.empty())
320	continue;
321
322	LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI
323	<< "\tOriginal allocation:\t";
324	for (auto *LI
325	: Intervals) dbgs()
326	<< " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI);
327	dbgs() << `'\n'`);
328
329	bool Success = scavengeRegs(Intervals);
330	if (!Success) {
331	LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n");
332	if (VRM->hasPhys(virtReg: Intervals.back()->reg())) // Did not change allocation.
333	continue;
334	} else {
335	// Check we did not make it worse for other instructions.
336	auto *I =
337	std::lower_bound(first: Candidates.begin(), last: &C, val: MinInd,
338	comp: [this](const Candidate &C, SlotIndex I) {
339	return LIS->getInstructionIndex(Instr: *C.first) < I;
340	});
341	for (auto *E = Candidates.end();
342	Success && I != E && LIS->getInstructionIndex(Instr: *I->first) < MaxInd;
343	++I) {
344	if (I->second && CheckNSA(MI: I->first, Fast: true*) < NSA_Status::CONTIGUOUS) {
345	Success = false;
346	LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first);
347	}
348	}
349	}
350
351	if (!Success) {
352	for (unsigned I = `0`; I < Info->VAddrOperands; ++I)
353	if (VRM->hasPhys(virtReg: Intervals [I]->reg()))
354	LRM->unassign(VirtReg: *Intervals [I]);
355
356	for (unsigned I = `0`; I < Info->VAddrOperands; ++I)
357	LRM->assign(VirtReg: *Intervals [I], PhysReg: OrigRegs [I]);
358
359	continue;
360	}
361
362	C.second = true;
363	++NumNSAConverted;
364	LLVM_DEBUG(
365	dbgs() << "\tNew allocation:\t\t ["
366	<< llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI)
367	<< " : "
368	<< llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI)
369	<< "]\n");
370	Changed = true;
371	}
372
373	return Changed;
374	}
375
376	bool GCNNSAReassignLegacy::runOnMachineFunction(MachineFunction &MF) {
377	auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
378	auto *LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
379	auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
380
381	GCNNSAReassignImpl Impl(VRM, LRM, LIS);
382	return Impl.run(MF);
383	}
384
385	PreservedAnalyses
386	GCNNSAReassignPass::run(MachineFunction &MF,
387	MachineFunctionAnalysisManager &MFAM) {
388	auto &VRM = MFAM.getResult<VirtRegMapAnalysis>(IR&: MF);
389	auto &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(IR&: MF);
390	auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
391
392	GCNNSAReassignImpl Impl(&VRM, &LRM, &LIS);
393	Impl.run(MF);
394	return PreservedAnalyses::all();
395	}
396

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp