GCNPreRAOptimizations.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp]

1	//===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This pass combines split register tuple initialization into a single pseudo:
11	///
12	/// undef %0.sub1:sreg_64 = S_MOV_B32 1
13	/// %0.sub0:sreg_64 = S_MOV_B32 2
14	/// =>
15	/// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16	///
17	/// This is to allow rematerialization of a value instead of spilling. It is
18	/// supposed to be done after register coalescer to allow it to do its job and
19	/// before actual register allocation to allow rematerialization.
20	///
21	/// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22	/// although the same shall be possible with other register classes and
23	/// instructions if necessary.
24	///
25	/// This pass also adds register allocation hints to COPY.
26	/// The hints will be post-processed by SIRegisterInfo::getRegAllocationHints.
27	/// When using True16, we often see COPY moving a 16-bit value between a VGPR_32
28	/// and a VGPR_16. If we use the VGPR_16 that corresponds to the lo16 bits of
29	/// the VGPR_32, the COPY can be completely eliminated.
30	///
31	//===----------------------------------------------------------------------===//
32
33	#include "GCNPreRAOptimizations.h"
34	#include "AMDGPU.h"
35	#include "GCNSubtarget.h"
36	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37	#include "SIRegisterInfo.h"
38	#include "llvm/CodeGen/LiveIntervals.h"
39	#include "llvm/CodeGen/MachineFunctionPass.h"
40	#include "llvm/InitializePasses.h"
41
42	using namespace llvm;
43
44	#define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
45
46	namespace {
47
48	class GCNPreRAOptimizationsImpl {
49	private:
50	const SIInstrInfo *TII;
51	const SIRegisterInfo *TRI;
52	MachineRegisterInfo *MRI;
53	LiveIntervals *LIS;
54
55	bool processReg(Register Reg);
56
57	public:
58	GCNPreRAOptimizationsImpl(LiveIntervals *LS) : LIS(LS) {}
59	bool run(MachineFunction &MF);
60	};
61
62	class GCNPreRAOptimizationsLegacy : public MachineFunctionPass {
63	public:
64	static char ID;
65
66	GCNPreRAOptimizationsLegacy() : MachineFunctionPass (ID) {}
67
68	bool runOnMachineFunction(MachineFunction &MF) override;
69
70	StringRef getPassName() const override {
71	return "AMDGPU Pre-RA optimizations";
72	}
73
74	void getAnalysisUsage(AnalysisUsage &AU) const override {
75	AU.addRequired<LiveIntervalsWrapperPass>();
76	AU.setPreservesAll();
77	MachineFunctionPass::getAnalysisUsage(AU);
78	}
79	};
80	} // End anonymous namespace.
81
82	INITIALIZE_PASS_BEGIN(GCNPreRAOptimizationsLegacy, DEBUG_TYPE,
83	"AMDGPU Pre-RA optimizations", false, false)
84	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
85	INITIALIZE_PASS_END(GCNPreRAOptimizationsLegacy, DEBUG_TYPE,
86	"Pre-RA optimizations", false, false)
87
88	char GCNPreRAOptimizationsLegacy::ID = `0`;
89
90	char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizationsLegacy::ID;
91
92	FunctionPass *llvm::createGCNPreRAOptimizationsLegacyPass() {
93	return new GCNPreRAOptimizationsLegacy ();
94	}
95
96	bool GCNPreRAOptimizationsImpl::processReg(Register Reg) {
97	MachineInstr Def0 = nullptr*;
98	MachineInstr Def1 = nullptr*;
99	uint64_t Init = `0`;
100	bool Changed = false;
101	SmallSet<Register, `32`> ModifiedRegs;
102	bool IsAGPRDst = TRI->isAGPRClass(RC: MRI->getRegClass(Reg));
103
104	for (MachineInstr &I : MRI->def_instructions(Reg)) {
105	switch (I.getOpcode()) {
106	default:
107	return false;
108	case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
109	break;
110	case AMDGPU::COPY: {
111	// Some subtargets cannot do an AGPR to AGPR copy directly, and need an
112	// intermdiate temporary VGPR register. Try to find the defining
113	// accvgpr_write to avoid temporary registers.
114
115	if (!IsAGPRDst)
116	return false;
117
118	Register SrcReg = I.getOperand(i: `1`).getReg();
119
120	if (!SrcReg.isVirtual())
121	break;
122
123	// Check if source of copy is from another AGPR.
124	bool IsAGPRSrc = TRI->isAGPRClass(RC: MRI->getRegClass(Reg: SrcReg));
125	if (!IsAGPRSrc)
126	break;
127
128	// def_instructions() does not look at subregs so it may give us a
129	// different instruction that defines the same vreg but different subreg
130	// so we have to manually check subreg.
131	Register SrcSubReg = I.getOperand(i: `1`).getSubReg();
132	for (auto &Def : MRI->def_instructions(Reg: SrcReg)) {
133	if (SrcSubReg != Def.getOperand(i: `0`).getSubReg())
134	continue;
135
136	if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
137	const MachineOperand &DefSrcMO = Def.getOperand(i: `1`);
138
139	// Immediates are not an issue and can be propagated in
140	// postrapseudos pass. Only handle cases where defining
141	// accvgpr_write source is a vreg.
142	if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
143	// Propagate source reg of accvgpr write to this copy instruction
144	I.getOperand(i: `1`).setReg(DefSrcMO.getReg());
145	I.getOperand(i: `1`).setSubReg(DefSrcMO.getSubReg());
146
147	// Reg uses were changed, collect unique set of registers to update
148	// live intervals at the end.
149	ModifiedRegs.insert(V: DefSrcMO.getReg());
150	ModifiedRegs.insert(V: SrcReg);
151
152	Changed = true;
153	}
154
155	// Found the defining accvgpr_write, stop looking any further.
156	break;
157	}
158	}
159	break;
160	}
161	case AMDGPU::S_MOV_B32:
162	if (I.getOperand(i: `0`).getReg() != Reg \|\| !I.getOperand(i: `1`).isImm() \|\|
163	I.getNumOperands() != `2`)
164	return false;
165
166	switch (I.getOperand(i: `0`).getSubReg()) {
167	default:
168	return false;
169	case AMDGPU::sub0:
170	if (Def0)
171	return false;
172	Def0 = &I;
173	Init \|= Lo_32(Value: I.getOperand(i: `1`).getImm());
174	break;
175	case AMDGPU::sub1:
176	if (Def1)
177	return false;
178	Def1 = &I;
179	Init \|= static_cast<uint64_t>(I.getOperand(i: `1`).getImm()) << `32`;
180	break;
181	}
182	break;
183	}
184	}
185
186	// For AGPR reg, check if live intervals need to be updated.
187	if (IsAGPRDst) {
188	if (Changed) {
189	for (Register RegToUpdate : ModifiedRegs) {
190	LIS->removeInterval(Reg: RegToUpdate);
191	LIS->createAndComputeVirtRegInterval(Reg: RegToUpdate);
192	}
193	}
194
195	return Changed;
196	}
197
198	// For SGPR reg, check if we can combine instructions.
199	if (!Def0 \|\| !Def1 \|\| Def0->getParent() != Def1->getParent())
200	return Changed;
201
202	LLVM_DEBUG(dbgs() << "Combining:\n " << Def0 << " " << Def1
203	<< " =>\n");
204
205	if (SlotIndex::isEarlierInstr(A: LIS->getInstructionIndex(Instr: *Def1),
206	B: LIS->getInstructionIndex(Instr: *Def0)))
207	std::swap(a&: Def0, b&: Def1);
208
209	LIS->RemoveMachineInstrFromMaps(MI&: *Def0);
210	LIS->RemoveMachineInstrFromMaps(MI&: *Def1);
211	auto NewI = BuildMI(BB&: Def0->getParent(), I&: Def0, MIMD: Def0->getDebugLoc(),
212	MCID: TII->get(Opcode: AMDGPU::S_MOV_B64_IMM_PSEUDO), DestReg: Reg)
213	.addImm(Val: Init);
214
215	Def0->eraseFromParent();
216	Def1->eraseFromParent();
217	LIS->InsertMachineInstrInMaps(MI&: *NewI);
218	LIS->removeInterval(Reg);
219	LIS->createAndComputeVirtRegInterval(Reg);
220
221	LLVM_DEBUG(dbgs() << " " << *NewI);
222
223	return true;
224	}
225
226	bool GCNPreRAOptimizationsLegacy::runOnMachineFunction(MachineFunction &MF) {
227	if (skipFunction(F: MF.getFunction()))
228	return false;
229	LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
230	return GCNPreRAOptimizationsImpl (LIS).run(MF);
231	}
232
233	PreservedAnalyses
234	GCNPreRAOptimizationsPass::run(MachineFunction &MF,
235	MachineFunctionAnalysisManager &MFAM) {
236	LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
237	GCNPreRAOptimizationsImpl (LIS).run(MF);
238	return PreservedAnalyses::all();
239	}
240
241	bool GCNPreRAOptimizationsImpl::run(MachineFunction &MF) {
242	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
243	TII = ST.getInstrInfo();
244	MRI = &MF.getRegInfo();
245	TRI = ST.getRegisterInfo();
246
247	bool Changed = false;
248
249	for (unsigned I = `0`, E = MRI->getNumVirtRegs(); I != E; ++I) {
250	Register Reg = Register::index2VirtReg(Index: I);
251	if (!LIS->hasInterval(Reg))
252	continue;
253	const TargetRegisterClass *RC = MRI->getRegClass(Reg);
254	if ((RC->MC->getSizeInBits() != `64` \|\| !TRI->isSGPRClass(RC)) &&
255	(ST.hasGFX90AInsts() \|\| !TRI->isAGPRClass(RC)))
256	continue;
257
258	Changed \|= processReg(Reg);
259	}
260
261	if (!ST.useRealTrue16Insts())
262	return Changed;
263
264	// Add RA hints to improve True16 COPY elimination.
265	for (const MachineBasicBlock &MBB : MF) {
266	for (const MachineInstr &MI : MBB) {
267	if (MI.getOpcode() != AMDGPU::COPY)
268	continue;
269	Register Dst = MI.getOperand(i: `0`).getReg();
270	Register Src = MI.getOperand(i: `1`).getReg();
271	const TargetRegisterClass DstRC = TRI->getRegClassForReg(MRI: MRI, Reg: Dst);
272	bool IsDst16Bit = AMDGPU::VGPR_16RegClass.hasSubClassEq(RC: DstRC);
273	if (Dst.isVirtual() && IsDst16Bit && Src.isPhysical() &&
274	TRI->getRegClassForReg(MRI: *MRI, Reg: Src) == &AMDGPU::VGPR_32RegClass)
275	MRI->setRegAllocationHint(VReg: Dst, Type: `0`, PrefReg: TRI->getSubReg(Reg: Src, Idx: AMDGPU::lo16));
276	if (Src.isVirtual() &&
277	MRI->getRegClass(Reg: Src) == &AMDGPU::VGPR_16RegClass &&
278	Dst.isPhysical() && DstRC == &AMDGPU::VGPR_32RegClass)
279	MRI->setRegAllocationHint(VReg: Src, Type: `0`, PrefReg: TRI->getSubReg(Reg: Dst, Idx: AMDGPU::lo16));
280	if (!Dst.isVirtual() \|\| !Src.isVirtual())
281	continue;
282	if (MRI->getRegClass(Reg: Dst) == &AMDGPU::VGPR_32RegClass &&
283	MRI->getRegClass(Reg: Src) == &AMDGPU::VGPR_16RegClass) {
284	MRI->setRegAllocationHint(VReg: Dst, Type: AMDGPURI::Size32, PrefReg: Src);
285	MRI->setRegAllocationHint(VReg: Src, Type: AMDGPURI::Size16, PrefReg: Dst);
286	}
287	if (IsDst16Bit && MRI->getRegClass(Reg: Src) == &AMDGPU::VGPR_32RegClass)
288	MRI->setRegAllocationHint(VReg: Dst, Type: AMDGPURI::Size16, PrefReg: Src);
289	}
290	}
291
292	return Changed;
293	}
294

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp