GCNPreRAOptimizations.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp]

1	//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This pass combines split register tuple initialization into a single pseudo:
11	///
12	/// undef %0.sub1:sreg_64 = S_MOV_B32 1
13	/// %0.sub0:sreg_64 = S_MOV_B32 2
14	/// =>
15	/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16	///
17	/// This is to allow rematerialization of a value instead of spilling. It is
18	/// supposed to be done after register coalescer to allow it to do its job and
19	/// before actual register allocation to allow rematerialization.
20	///
21	/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22	/// although the same shall be possible with other register classes and
23	/// instructions if necessary.
24	///
25	/// This pass also adds register allocation hints to COPY.
26	/// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
27	/// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
28	/// and a VGPR_16. If we use the VGPR_16 that corresponds to the lo16 bits of
29	/// the VGPR_32, the COPY can be completely eliminated.
30	///
31	//===----------------------------------------------------------------------===//
32
33	#include "GCNPreRAOptimizations.h"
34	#include "AMDGPU.h"
35	#include "GCNSubtarget.h"
36	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37	#include "SIRegisterInfo.h"
38	#include "llvm/CodeGen/LiveIntervals.h"
39	#include "llvm/CodeGen/MachineFunctionPass.h"
40	#include "llvm/InitializePasses.h"
41
42	using namespace llvm;
43
44	#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
45
46	namespace {
47
48	class GCNPreRAOptimizationsImpl {
49	private:
50	const SIInstrInfo *TII;
51	const SIRegisterInfo *TRI;
52	MachineRegisterInfo *MRI;
53	LiveIntervals *LIS;
54
55	bool processReg(Register Reg);
56
57	public:
58	GCNPreRAOptimizationsImpl(LiveIntervals *LS) : LIS(LS) {}
59	bool run(MachineFunction &MF);
60	};
61
62	class GCNPreRAOptimizationsLegacy : public MachineFunctionPass {
63	public:
64	static char ID;
65
66	GCNPreRAOptimizationsLegacy() : MachineFunctionPass (ID) {
67	initializeGCNPreRAOptimizationsLegacyPass(*PassRegistry::getPassRegistry());
68	}
69
70	bool runOnMachineFunction(MachineFunction &MF) override;
71
72	StringRef getPassName() const override {
73	return "AMDGPU Pre-RA optimizations";
74	}
75
76	void getAnalysisUsage(AnalysisUsage &AU) const override {
77	AU.addRequired<LiveIntervalsWrapperPass>();
78	AU.setPreservesAll();
79	MachineFunctionPass::getAnalysisUsage(AU);
80	}
81	};
82	} // End anonymous namespace.
83
84	INITIALIZE_PASS_BEGIN(GCNPreRAOptimizationsLegacy, DEBUG_TYPE,
85	"AMDGPU Pre-RA optimizations", false, false)
86	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
87	INITIALIZE_PASS_END(GCNPreRAOptimizationsLegacy, DEBUG_TYPE,
88	"Pre-RA optimizations", false, false)
89
90	char GCNPreRAOptimizationsLegacy::ID = `0`;
91
92	char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizationsLegacy::ID;
93
94	FunctionPass *llvm::createGCNPreRAOptimizationsLegacyPass() {
95	return new GCNPreRAOptimizationsLegacy ();
96	}
97
98	bool GCNPreRAOptimizationsImpl::processReg(Register Reg) {
99	MachineInstr Def0 = nullptr*;
100	MachineInstr Def1 = nullptr*;
101	uint64_t Init = `0`;
102	bool Changed = false;
103	SmallSet<Register, `32`> ModifiedRegs;
104	bool IsAGPRDst = TRI->isAGPRClass(RC: MRI->getRegClass(Reg));
105
106	for (MachineInstr &I : MRI->def_instructions(Reg)) {
107	switch (I.getOpcode()) {
108	default:
109	return false;
110	case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
111	break;
112	case AMDGPU::COPY: {
113	// Some subtargets cannot do an AGPR to AGPR copy directly, and need an
114	// intermdiate temporary VGPR register. Try to find the defining
115	// accvgpr_write to avoid temporary registers.
116
117	if (!IsAGPRDst)
118	return false;
119
120	Register SrcReg = I.getOperand(i: `1`).getReg();
121
122	if (!SrcReg.isVirtual())
123	break;
124
125	// Check if source of copy is from another AGPR.
126	bool IsAGPRSrc = TRI->isAGPRClass(RC: MRI->getRegClass(Reg: SrcReg));
127	if (!IsAGPRSrc)
128	break;
129
130	// def_instructions() does not look at subregs so it may give us a
131	// different instruction that defines the same vreg but different subreg
132	// so we have to manually check subreg.
133	Register SrcSubReg = I.getOperand(i: `1`).getSubReg();
134	for (auto &Def : MRI->def_instructions(Reg: SrcReg)) {
135	if (SrcSubReg != Def.getOperand(i: `0`).getSubReg())
136	continue;
137
138	if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
139	MachineOperand DefSrcMO = Def.getOperand(i: `1`);
140
141	// Immediates are not an issue and can be propagated in
142	// postrapseudos pass. Only handle cases where defining
143	// accvgpr_write source is a vreg.
144	if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
145	// Propagate source reg of accvgpr write to this copy instruction
146	I.getOperand(i: `1`).setReg(DefSrcMO.getReg());
147	I.getOperand(i: `1`).setSubReg(DefSrcMO.getSubReg());
148
149	// Reg uses were changed, collect unique set of registers to update
150	// live intervals at the end.
151	ModifiedRegs.insert(V: DefSrcMO.getReg());
152	ModifiedRegs.insert(V: SrcReg);
153
154	Changed = true;
155	}
156
157	// Found the defining accvgpr_write, stop looking any further.
158	break;
159	}
160	}
161	break;
162	}
163	case AMDGPU::S_MOV_B32:
164	if (I.getOperand(i: `0`).getReg() != Reg \|\| !I.getOperand(i: `1`).isImm() \|\|
165	I.getNumOperands() != `2`)
166	return false;
167
168	switch (I.getOperand(i: `0`).getSubReg()) {
169	default:
170	return false;
171	case AMDGPU::sub0:
172	if (Def0)
173	return false;
174	Def0 = &I;
175	Init \|= Lo_32(Value: I.getOperand(i: `1`).getImm());
176	break;
177	case AMDGPU::sub1:
178	if (Def1)
179	return false;
180	Def1 = &I;
181	Init \|= static_cast<uint64_t>(I.getOperand(i: `1`).getImm()) << `32`;
182	break;
183	}
184	break;
185	}
186	}
187
188	// For AGPR reg, check if live intervals need to be updated.
189	if (IsAGPRDst) {
190	if (Changed) {
191	for (Register RegToUpdate : ModifiedRegs) {
192	LIS->removeInterval(Reg: RegToUpdate);
193	LIS->createAndComputeVirtRegInterval(Reg: RegToUpdate);
194	}
195	}
196
197	return Changed;
198	}
199
200	// For SGPR reg, check if we can combine instructions.
201	if (!Def0 \|\| !Def1 \|\| Def0->getParent() != Def1->getParent())
202	return Changed;
203
204	LLVM_DEBUG(dbgs() << "Combining:\n " << Def0 << " " << Def1
205	<< " =>\n");
206
207	if (SlotIndex::isEarlierInstr(A: LIS->getInstructionIndex(Instr: *Def1),
208	B: LIS->getInstructionIndex(Instr: *Def0)))
209	std::swap(a&: Def0, b&: Def1);
210
211	LIS->RemoveMachineInstrFromMaps(MI&: *Def0);
212	LIS->RemoveMachineInstrFromMaps(MI&: *Def1);
213	auto NewI = BuildMI(BB&: Def0->getParent(), I&: Def0, MIMD: Def0->getDebugLoc(),
214	MCID: TII->get(Opcode: AMDGPU::S_MOV_B64_IMM_PSEUDO), DestReg: Reg)
215	.addImm(Val: Init);
216
217	Def0->eraseFromParent();
218	Def1->eraseFromParent();
219	LIS->InsertMachineInstrInMaps(MI&: *NewI);
220	LIS->removeInterval(Reg);
221	LIS->createAndComputeVirtRegInterval(Reg);
222
223	LLVM_DEBUG(dbgs() << " " << *NewI);
224
225	return true;
226	}
227
228	bool GCNPreRAOptimizationsLegacy::runOnMachineFunction(MachineFunction &MF) {
229	if (skipFunction(F: MF.getFunction()))
230	return false;
231	LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
232	return GCNPreRAOptimizationsImpl (LIS).run(MF);
233	}
234
235	PreservedAnalyses
236	GCNPreRAOptimizationsPass::run(MachineFunction &MF,
237	MachineFunctionAnalysisManager &MFAM) {
238	LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
239	GCNPreRAOptimizationsImpl (LIS).run(MF);
240	return PreservedAnalyses::all();
241	}
242
243	bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
244	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
245	TII = ST.getInstrInfo();
246	MRI = &MF.getRegInfo();
247	TRI = ST.getRegisterInfo();
248
249	bool Changed = false;
250
251	for (unsigned I = `0`, E = MRI->getNumVirtRegs(); I != E; ++I) {
252	Register Reg = Register::index2VirtReg(Index: I);
253	if (!LIS->hasInterval(Reg))
254	continue;
255	const TargetRegisterClass *RC = MRI->getRegClass(Reg);
256	if ((RC->MC->getSizeInBits() != `64` \|\| !TRI->isSGPRClass(RC)) &&
257	(ST.hasGFX90AInsts() \|\| !TRI->isAGPRClass(RC)))
258	continue;
259
260	Changed \|= processReg(Reg);
261	}
262
263	if (!ST.useRealTrue16Insts())
264	return Changed;
265
266	// Add RA hints to improve True16 COPY elimination.
267	for (const MachineBasicBlock &MBB : MF) {
268	for (const MachineInstr &MI : MBB) {
269	if (MI.getOpcode() != AMDGPU::COPY)
270	continue;
271	Register Dst = MI.getOperand(i: `0`).getReg();
272	Register Src = MI.getOperand(i: `1`).getReg();
273	if (Dst.isVirtual() &&
274	MRI->getRegClass(Reg: Dst) == &AMDGPU::VGPR_16RegClass &&
275	Src.isPhysical() &&
276	TRI->getRegClassForReg(MRI: *MRI, Reg: Src) == &AMDGPU::VGPR_32RegClass)
277	MRI->setRegAllocationHint(VReg: Dst, Type: `0`, PrefReg: TRI->getSubReg(Reg: Src, Idx: AMDGPU::lo16));
278	if (Src.isVirtual() &&
279	MRI->getRegClass(Reg: Src) == &AMDGPU::VGPR_16RegClass &&
280	Dst.isPhysical() &&
281	TRI->getRegClassForReg(MRI: *MRI, Reg: Dst) == &AMDGPU::VGPR_32RegClass)
282	MRI->setRegAllocationHint(VReg: Src, Type: `0`, PrefReg: TRI->getSubReg(Reg: Dst, Idx: AMDGPU::lo16));
283	if (!Dst.isVirtual() \|\| !Src.isVirtual())
284	continue;
285	if (MRI->getRegClass(Reg: Dst) == &AMDGPU::VGPR_32RegClass &&
286	MRI->getRegClass(Reg: Src) == &AMDGPU::VGPR_16RegClass) {
287	MRI->setRegAllocationHint(VReg: Dst, Type: AMDGPURI::Size32, PrefReg: Src);
288	MRI->setRegAllocationHint(VReg: Src, Type: AMDGPURI::Size16, PrefReg: Dst);
289	}
290	if (MRI->getRegClass(Reg: Dst) == &AMDGPU::VGPR_16RegClass &&
291	MRI->getRegClass(Reg: Src) == &AMDGPU::VGPR_32RegClass)
292	MRI->setRegAllocationHint(VReg: Dst, Type: AMDGPURI::Size16, PrefReg: Src);
293	}
294	}
295
296	return Changed;
297	}
298

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp