AMDGPURewriteAGPRCopyMFMA.cpp source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp]

1	//===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
10	/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
11	/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
12	/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
13	/// pass will attempt to delete the cross register bank copy and replace the
14	/// MFMA opcode.
15	///
16	/// TODO:
17	/// - Handle rewrites of phis. This must be more careful than normal about the
18	/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
19	/// loop, so it depends on the exact assignment of the copy.
20	///
21	/// - Update LiveIntervals incrementally instead of recomputing from scratch
22	///
23	//===----------------------------------------------------------------------===//
24
25	#include "AMDGPU.h"
26	#include "GCNSubtarget.h"
27	#include "SIMachineFunctionInfo.h"
28	#include "SIRegisterInfo.h"
29	#include "llvm/ADT/Statistic.h"
30	#include "llvm/CodeGen/LiveIntervals.h"
31	#include "llvm/CodeGen/LiveRegMatrix.h"
32	#include "llvm/CodeGen/LiveStacks.h"
33	#include "llvm/CodeGen/MachineFrameInfo.h"
34	#include "llvm/CodeGen/MachineFunctionPass.h"
35	#include "llvm/CodeGen/SlotIndexes.h"
36	#include "llvm/CodeGen/VirtRegMap.h"
37	#include "llvm/InitializePasses.h"
38
39	using namespace llvm;
40
41	#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
42
43	namespace {
44
45	STATISTIC(NumMFMAsRewrittenToAGPR,
46	"Number of MFMA instructions rewritten to use AGPR form");
47
48	/// Map from spill slot frame index to list of instructions which reference it.
49	using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, `4`>>;
50
51	class AMDGPURewriteAGPRCopyMFMAImpl {
52	MachineFunction &MF;
53	const GCNSubtarget &ST;
54	const SIInstrInfo &TII;
55	const SIRegisterInfo &TRI;
56	MachineRegisterInfo &MRI;
57	VirtRegMap &VRM;
58	LiveRegMatrix &LRM;
59	LiveIntervals &LIS;
60	LiveStacks &LSS;
61	const RegisterClassInfo &RegClassInfo;
62
63	bool attemptReassignmentsToAGPR(SmallSetVector<Register, `4`> &InterferingRegs,
64	MCPhysReg PrefPhysReg) const;
65
66	public:
67	AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
68	LiveRegMatrix &LRM, LiveIntervals &LIS,
69	LiveStacks &LSS,
70	const RegisterClassInfo &RegClassInfo)
71	: MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
72	TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
73	LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
74
75	bool isRewriteCandidate(const MachineInstr &MI) const {
76	return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(Opcode: MI.getOpcode()) != -`1`;
77	}
78
79	/// Find AV_ registers assigned to AGPRs (or virtual registers which were*
80	/// already required to be AGPR).
81	///
82	/// \return the assigned physical register that \p VReg is assigned to if it
83	/// is an AGPR, otherwise MCRegister().
84	MCRegister getAssignedAGPR(Register VReg) const {
85	MCRegister PhysReg = VRM.getPhys(virtReg: VReg);
86	if (!PhysReg)
87	return MCRegister ();
88
89	// If this is an AV register, we have to check if the actual assignment is
90	// to an AGPR
91	const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(Reg: PhysReg);
92	return TRI.isAGPRClass(RC: AssignedRC) ? PhysReg : MCRegister ();
93	}
94
95	bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
96	MCPhysReg PhysRegHint) const;
97
98	/// Compute the register class constraints based on the uses of \p Reg,
99	/// excluding MFMA uses from which can be rewritten to change the register
100	/// class constraint. MFMA scale operands need to be constraint checked.
101	/// This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
102
103	/// \p RewriteCandidates will collect the set of MFMA instructions that need
104	/// to have the opcode mutated to perform the replacement.
105	///
106	/// \p RewriteRegs will accumulate the set of register used by those MFMAs
107	/// that need to have the register classes adjusted.
108	bool recomputeRegClassExceptRewritable(
109	Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
110	SmallSetVector<Register, `4`> &RewriteRegs) const;
111
112	bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
113	bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
114
115	/// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
116	/// with a COPY to the replacement register value \p VReg.
117	void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
118	Register VReg) const;
119
120	/// Create a map from frame index to use instructions for spills. If a use of
121	/// the frame index does not consist only of spill instructions, it will not
122	/// be included in the map.
123	void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
124	SpillReferenceMap &Map) const;
125
126	/// Attempt to unspill VGPRs by finding a free register and replacing the
127	/// spill instructions with copies.
128	void eliminateSpillsOfReassignedVGPRs() const;
129
130	bool run(MachineFunction &MF) const;
131	};
132
133	bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
134	Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
135	SmallSetVector<Register, `4`> &RewriteRegs) const {
136	SmallVector<Register, `8`> Worklist = {StartReg};
137
138	// Recursively visit all transitive MFMA users
139	while (!Worklist.empty()) {
140	Register Reg = Worklist.pop_back_val();
141	const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
142
143	// Inflate to the equivalent AV_ class.*
144	const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(RC: OldRC, MF);
145	if (OldRC == NewRC)
146	return false;
147
148	// Accumulate constraints from all uses.
149	for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
150	// Apply the effect of the given operand to NewRC.
151	MachineInstr *MI = MO.getParent();
152
153	// We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
154	// effects of rewrite candidates. It just so happens that we can use
155	// either AGPR or VGPR in src0/src1. We still need to check constraint
156	// effects for scale variant, which does not allow AGPR.
157	if (isRewriteCandidate(MI: *MI)) {
158	int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(Opcode: MI->getOpcode());
159	const MCInstrDesc &AGPRDesc = TII.get(Opcode: AGPROp);
160	const TargetRegisterClass *NewRC =
161	TII.getRegClass(MCID: AGPRDesc, OpNum: MO.getOperandNo());
162	if (!TRI.hasAGPRs(RC: NewRC))
163	return false;
164
165	const MachineOperand *VDst =
166	TII.getNamedOperand(MI&: *MI, OperandName: AMDGPU::OpName::vdst);
167	const MachineOperand *Src2 =
168	TII.getNamedOperand(MI&: *MI, OperandName: AMDGPU::OpName::src2);
169	for (const MachineOperand *Op : {VDst, Src2}) {
170	if (!Op->isReg())
171	continue;
172
173	Register OtherReg = Op->getReg();
174	if (OtherReg.isPhysical())
175	return false;
176
177	if (OtherReg != Reg && RewriteRegs.insert(X: OtherReg))
178	Worklist.push_back(Elt: OtherReg);
179	}
180
181	if (!is_contained(Range&: RewriteCandidates, Element: MI)) {
182	LLVM_DEBUG({
183	Register VDstPhysReg = VRM.getPhys(VDst->getReg());
184	dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
185	<< " Dst=[" << printReg(VDst->getReg()) << " => "
186	<< printReg(VDstPhysReg, &TRI);
187
188	if (Src2->isReg()) {
189	Register Src2PhysReg = VRM.getPhys(Src2->getReg());
190	dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
191	<< printReg(Src2PhysReg, &TRI);
192	}
193
194	dbgs() << "]: " << MI;
195	});
196
197	RewriteCandidates.push_back(Elt: MI);
198	}
199
200	continue;
201	}
202
203	unsigned OpNo = &MO - &MI->getOperand(i: `0`);
204	NewRC = MI->getRegClassConstraintEffect(OpIdx: OpNo, CurRC: NewRC, TII: &TII, TRI: &TRI);
205	if (!NewRC \|\| NewRC == OldRC) {
206	LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
207	<< " cannot be reassigned to "
208	<< TRI.getRegClassName(NewRC) << ": " << *MI);
209	return false;
210	}
211	}
212	}
213
214	return true;
215	}
216
217	bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
218	MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
219	// src2 and dst have the same physical class constraint; try to preserve
220	// the original src2 subclass if one were to exist.
221	SmallVector<MachineInstr *, `4`> RewriteCandidates = {&MFMA};
222	SmallSetVector<Register, `4`> RewriteRegs;
223
224	// Make sure we reassign the MFMA we found the copy from first. We want
225	// to ensure dst ends up in the physreg we were originally copying to.
226	RewriteRegs.insert(X: MFMAHintReg);
227
228	// We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
229	// that we can trivially rewrite src2 to use the new AGPR. If we can't
230	// trivially replace it, we're going to induce as many copies as we would have
231	// emitted in the first place, as well as need to assign another register, and
232	// need to figure out where to put them. The live range splitting is smarter
233	// than anything we're doing here, so trust it did something reasonable.
234	//
235	// Note recomputeRegClassExceptRewritable will consider the constraints of
236	// this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
237	if (!recomputeRegClassExceptRewritable(StartReg: MFMAHintReg, RewriteCandidates,
238	RewriteRegs)) {
239	LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
240	<< printReg(MFMAHintReg, &TRI) << `'\n'`);
241	return false;
242	}
243
244	// If src2 and dst are different registers, we need to also reassign the
245	// input to an available AGPR if it is compatible with all other uses.
246	//
247	// If we can't reassign it, we'd need to introduce a different copy
248	// which is likely worse than the copy we'd be saving.
249	//
250	// It's likely that the MFMA is used in sequence with other MFMAs; if we
251	// cannot migrate the full use/def chain of MFMAs, we would need to
252	// introduce intermediate copies somewhere. So we only make the
253	// transform if all the interfering MFMAs can also be migrated. Collect
254	// the set of rewritable MFMAs and check if we can assign an AGPR at
255	// that point.
256	//
257	// If any of the MFMAs aren't reassignable, we give up and rollback to
258	// the original register assignments.
259
260	using RecoloringStack =
261	SmallVector<std::pair<const LiveInterval *, MCRegister>, `8`>;
262	RecoloringStack TentativeReassignments;
263
264	for (Register RewriteReg : RewriteRegs) {
265	LiveInterval &LI = LIS.getInterval(Reg: RewriteReg);
266	TentativeReassignments.push_back(Elt: {&LI, VRM.getPhys(virtReg: RewriteReg)});
267	LRM.unassign(VirtReg: LI);
268	}
269
270	if (!attemptReassignmentsToAGPR(InterferingRegs&: RewriteRegs, PrefPhysReg: PhysRegHint)) {
271	// Roll back the register assignments to the original state.
272	for (auto [LI, OldAssign] : TentativeReassignments) {
273	if (VRM.hasPhys(virtReg: LI->reg()))
274	LRM.unassign(VirtReg: *LI);
275	LRM.assign(VirtReg: *LI, PhysReg: OldAssign);
276	}
277
278	return false;
279	}
280
281	// Fixup the register classes of the virtual registers now that we've
282	// committed to the reassignments.
283	for (Register InterferingReg : RewriteRegs) {
284	const TargetRegisterClass *EquivalentAGPRRegClass =
285	TRI.getEquivalentAGPRClass(SRC: MRI.getRegClass(Reg: InterferingReg));
286	MRI.setRegClass(Reg: InterferingReg, RC: EquivalentAGPRRegClass);
287	}
288
289	for (MachineInstr *RewriteCandidate : RewriteCandidates) {
290	int NewMFMAOp =
291	AMDGPU::getMFMASrcCVDstAGPROp(Opcode: RewriteCandidate->getOpcode());
292	RewriteCandidate->setDesc(TII.get(Opcode: NewMFMAOp));
293	++NumMFMAsRewrittenToAGPR;
294	}
295
296	return true;
297	}
298
299	/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
300	/// preference to use \p PhysReg first. Returns false if the reassignments
301	/// cannot be trivially performed.
302	bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
303	SmallSetVector<Register, `4`> &InterferingRegs, MCPhysReg PrefPhysReg) const {
304	// FIXME: The ordering may matter here, but we're just taking uselistorder
305	// with the special case of ensuring to process the starting instruction
306	// first. We probably should extract the priority advisor out of greedy and
307	// use that ordering.
308	for (Register InterferingReg : InterferingRegs) {
309	LiveInterval &ReassignLI = LIS.getInterval(Reg: InterferingReg);
310	const TargetRegisterClass *EquivalentAGPRRegClass =
311	TRI.getEquivalentAGPRClass(SRC: MRI.getRegClass(Reg: InterferingReg));
312
313	MCPhysReg Assignable = AMDGPU::NoRegister;
314	if (EquivalentAGPRRegClass->contains(Reg: PrefPhysReg) &&
315	LRM.checkInterference(VirtReg: ReassignLI, PhysReg: PrefPhysReg) ==
316	LiveRegMatrix::IK_Free) {
317	// First try to assign to the AGPR we were already copying to. This
318	// should be the first assignment we attempt. We have to guard
319	// against the use being a subregister (which doesn't have an exact
320	// class match).
321
322	// TODO: If this does happen to be a subregister use, we should
323	// still try to assign to a subregister of the original copy result.
324	Assignable = PrefPhysReg;
325	} else {
326	ArrayRef<MCPhysReg> AllocOrder =
327	RegClassInfo.getOrder(RC: EquivalentAGPRRegClass);
328	for (MCPhysReg Reg : AllocOrder) {
329	if (LRM.checkInterference(VirtReg: ReassignLI, PhysReg: Reg) == LiveRegMatrix::IK_Free) {
330	Assignable = Reg;
331	break;
332	}
333	}
334	}
335
336	if (!Assignable) {
337	LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
338	<< printReg(InterferingReg, &TRI)
339	<< " to a free AGPR\n");
340	return false;
341	}
342
343	LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
344	<< " to " << printReg(Assignable, &TRI) << `'\n'`);
345	LRM.assign(VirtReg: ReassignLI, PhysReg: Assignable);
346	}
347
348	return true;
349	}
350
351	/// Identify copies that look like:
352	/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
353	/// %agpr = COPY %vgpr
354	///
355	/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
356	/// versions of the MFMA. This should cover the common case.
357	bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
358	Register VReg, MCRegister AssignedAGPR) const {
359	bool MadeChange = false;
360	for (MachineInstr &UseMI : MRI.def_instructions(Reg: VReg)) {
361	if (!UseMI.isCopy())
362	continue;
363
364	Register CopySrcReg = UseMI.getOperand(i: `1`).getReg();
365	if (!CopySrcReg.isVirtual())
366	continue;
367
368	// TODO: Handle loop phis copied to AGPR. e.g.
369	//
370	// loop:
371	// %phi:vgpr = COPY %mfma:vgpr
372	// %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
373	// s_cbranch_vccnz loop
374	//
375	// endloop:
376	// %agpr = mfma
377	//
378	// We need to be sure that %phi is assigned to the same physical register as
379	// %mfma, or else we will just be moving copies into the loop.
380
381	for (MachineInstr &CopySrcDefMI : MRI.def_instructions(Reg: CopySrcReg)) {
382	if (isRewriteCandidate(MI: CopySrcDefMI) &&
383	tryReassigningMFMAChain(
384	MFMA&: CopySrcDefMI, MFMAHintReg: CopySrcDefMI.getOperand(i: `0`).getReg(), PhysRegHint: AssignedAGPR))
385	MadeChange = true;
386	}
387	}
388
389	return MadeChange;
390	}
391
392	/// Identify copies that look like:
393	/// %src:vgpr = COPY %src:agpr
394	/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
395	///
396	/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
397	/// versions of the MFMA. This should cover rarer cases, and will generally be
398	/// redundant with tryFoldCopiesToAGPR.
399	bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
400	Register VReg, MCRegister AssignedAGPR) const {
401	bool MadeChange = false;
402	for (MachineInstr &UseMI : MRI.use_instructions(Reg: VReg)) {
403	if (!UseMI.isCopy())
404	continue;
405
406	Register CopyDstReg = UseMI.getOperand(i: `0`).getReg();
407	if (!CopyDstReg.isVirtual())
408	continue;
409	for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(Reg: CopyDstReg)) {
410	if (!CopyUseMO.readsReg())
411	continue;
412
413	MachineInstr &CopyUseMI = *CopyUseMO.getParent();
414	if (isRewriteCandidate(MI: CopyUseMI)) {
415	if (tryReassigningMFMAChain(MFMA&: CopyUseMI, MFMAHintReg: CopyDstReg,
416	PhysRegHint: VRM.getPhys(virtReg: CopyDstReg)))
417	MadeChange = true;
418	}
419	}
420	}
421
422	return MadeChange;
423	}
424
425	void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
426	MachineInstr &SpillMI, int SpillFI, Register VReg) const {
427	const DebugLoc &DL = SpillMI.getDebugLoc();
428	MachineBasicBlock &MBB = *SpillMI.getParent();
429	MachineInstr *NewCopy;
430	if (SpillMI.mayStore()) {
431	NewCopy = BuildMI(BB&: MBB, I&: SpillMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VReg)
432	.add(MO: SpillMI.getOperand(i: `0`));
433	} else {
434	NewCopy = BuildMI(BB&: MBB, I&: SpillMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY))
435	.add(MO: SpillMI.getOperand(i: `0`))
436	.addReg(RegNo: VReg);
437	}
438
439	LIS.ReplaceMachineInstrInMaps(MI&: SpillMI, NewMI&: *NewCopy);
440	SpillMI.eraseFromParent();
441	}
442
443	void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
444	ArrayRef<LiveInterval > StackIntervals, SpillReferenceMap &Map) const* {
445
446	SmallSet<int, `4`> NeededFrameIndexes;
447	for (const LiveInterval *LI : StackIntervals)
448	NeededFrameIndexes.insert(V: LI->reg().stackSlotIndex());
449
450	for (MachineBasicBlock &MBB : MF) {
451	for (MachineInstr &MI : MBB) {
452	for (MachineOperand &MO : MI.operands()) {
453	if (!MO.isFI() \|\| !NeededFrameIndexes.count(V: MO.getIndex()))
454	continue;
455
456	if (TII.isVGPRSpill(MI)) {
457	SmallVector<MachineInstr *, `4`> &References = Map [MO.getIndex()];
458	References.push_back(Elt: &MI);
459	break;
460	}
461
462	// Verify this was really a spill instruction, if it's not just ignore
463	// all uses.
464
465	// TODO: This should probably be verifier enforced.
466	NeededFrameIndexes.erase(V: MO.getIndex());
467	Map.erase(Val: MO.getIndex());
468	}
469	}
470	}
471	}
472
473	void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
474	unsigned NumSlots = LSS.getNumIntervals();
475	if (NumSlots == `0`)
476	return;
477
478	MachineFrameInfo &MFI = MF.getFrameInfo();
479
480	SmallVector<LiveInterval *, `32`> StackIntervals;
481	StackIntervals.reserve(N: NumSlots);
482
483	for (auto &[Slot, LI] : LSS) {
484	if (!MFI.isSpillSlotObjectIndex(ObjectIdx: Slot) \|\| MFI.isDeadObjectIndex(ObjectIdx: Slot))
485	continue;
486
487	const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
488	if (TRI.hasVGPRs(RC))
489	StackIntervals.push_back(Elt: &LI);
490	}
491
492	sort(C&: StackIntervals, Comp: [](const LiveInterval A, const* LiveInterval *B) {
493	// The ordering has to be strictly weak.
494	/// Sort heaviest intervals first to prioritize their unspilling
495	if (A->weight() != B->weight())
496	return A->weight() > B->weight();
497
498	if (A->getSize() != B->getSize())
499	return A->getSize() > B->getSize();
500
501	// Tie breaker by number to avoid need for stable sort
502	return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
503	});
504
505	// FIXME: The APIs for dealing with the LiveInterval of a frame index are
506	// cumbersome. LiveStacks owns its LiveIntervals which refer to stack
507	// slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
508	// and must create a substitute virtual register to do so. This makes
509	// incremental updating here difficult; we need to actually perform the IR
510	// mutation to get the new vreg references in place to compute the register
511	// LiveInterval to perform an assignment to track the new interference
512	// correctly, and we can't simply migrate the LiveInterval we already have.
513	//
514	// To avoid walking through the entire function for each index, pre-collect
515	// all the instructions slot referencess.
516
517	DenseMap<int, SmallVector<MachineInstr *, `4`>> SpillSlotReferences;
518	collectSpillIndexUses(StackIntervals, Map&: SpillSlotReferences);
519
520	for (LiveInterval *LI : StackIntervals) {
521	int Slot = LI->reg().stackSlotIndex();
522	auto SpillReferences = SpillSlotReferences.find(Val: Slot);
523	if (SpillReferences == SpillSlotReferences.end())
524	continue;
525
526	const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
527
528	LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
529	<< " by reassigning\n");
530
531	ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
532
533	for (MCPhysReg PhysReg : AllocOrder) {
534	if (LRM.checkInterference(VirtReg: *LI, PhysReg) != LiveRegMatrix::IK_Free)
535	continue;
536
537	LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
538	<< printReg(PhysReg, &TRI) << `'\n'`);
539
540	const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
541	Register NewVReg = MRI.createVirtualRegister(RegClass: RC);
542
543	for (MachineInstr *SpillMI : SpillReferences ->second)
544	replaceSpillWithCopyToVReg(SpillMI&: *SpillMI, SpillFI: Slot, VReg: NewVReg);
545
546	// TODO: We should be able to transfer the information from the stack
547	// slot's LiveInterval without recomputing from scratch with the
548	// replacement vreg uses.
549	LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(Reg: NewVReg);
550	VRM.grow();
551	LRM.assign(VirtReg: NewLI, PhysReg);
552	MFI.RemoveStackObject(ObjectIdx: Slot);
553	break;
554	}
555	}
556	}
557
558	bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
559	// This only applies on subtargets that have a configurable AGPR vs. VGPR
560	// allocation.
561	if (!ST.hasGFX90AInsts())
562	return false;
563
564	// Early exit if no AGPRs were assigned.
565	if (!LRM.isPhysRegUsed(PhysReg: AMDGPU::AGPR0)) {
566	LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
567	return false;
568	}
569
570	bool MadeChange = false;
571
572	for (unsigned I = `0`, E = MRI.getNumVirtRegs(); I != E; ++I) {
573	Register VReg = Register::index2VirtReg(Index: I);
574	MCRegister AssignedAGPR = getAssignedAGPR(VReg);
575	if (!AssignedAGPR)
576	continue;
577
578	if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
579	MadeChange = true;
580	if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
581	MadeChange = true;
582	}
583
584	// If we've successfully rewritten some MFMAs, we've alleviated some VGPR
585	// pressure. See if we can eliminate some spills now that those registers are
586	// more available.
587	if (MadeChange)
588	eliminateSpillsOfReassignedVGPRs();
589
590	return MadeChange;
591	}
592
593	class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
594	public:
595	static char ID;
596	RegisterClassInfo RegClassInfo;
597
598	AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass (ID) {}
599
600	bool runOnMachineFunction(MachineFunction &MF) override;
601
602	StringRef getPassName() const override {
603	return "AMDGPU Rewrite AGPR-Copy-MFMA";
604	}
605
606	void getAnalysisUsage(AnalysisUsage &AU) const override {
607	AU.addRequired<LiveIntervalsWrapperPass>();
608	AU.addRequired<VirtRegMapWrapperLegacy>();
609	AU.addRequired<LiveRegMatrixWrapperLegacy>();
610	AU.addRequired<LiveStacksWrapperLegacy>();
611
612	AU.addPreserved<LiveIntervalsWrapperPass>();
613	AU.addPreserved<VirtRegMapWrapperLegacy>();
614	AU.addPreserved<LiveRegMatrixWrapperLegacy>();
615	AU.addPreserved<LiveStacksWrapperLegacy>();
616
617	AU.setPreservesAll();
618	MachineFunctionPass::getAnalysisUsage(AU);
619	}
620	};
621
622	} // End anonymous namespace.
623
624	INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
625	"AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
626	INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
627	INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
628	INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
629	INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
630	INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
631	"AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
632
633	char AMDGPURewriteAGPRCopyMFMALegacy::ID = `0`;
634
635	char &llvm::AMDGPURewriteAGPRCopyMFMALegacyID =
636	AMDGPURewriteAGPRCopyMFMALegacy::ID;
637
638	bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
639	MachineFunction &MF) {
640	if (skipFunction(F: MF.getFunction()))
641	return false;
642
643	RegClassInfo.runOnMachineFunction(MF);
644
645	auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
646	auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
647	auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
648	auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
649	AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
650	return Impl.run(MF);
651	}
652
653	PreservedAnalyses
654	AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
655	MachineFunctionAnalysisManager &MFAM) {
656	VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(IR&: MF);
657	LiveRegMatrix &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(IR&: MF);
658	LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
659	LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(IR&: MF);
660	RegisterClassInfo RegClassInfo;
661	RegClassInfo.runOnMachineFunction(MF);
662
663	AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
664	if (!Impl.run(MF))
665	return PreservedAnalyses::all();
666	auto PA = getMachineFunctionPassPreservedAnalyses();
667	PA.preserveSet<CFGAnalyses>()
668	.preserve<LiveStacksAnalysis>()
669	.preserve<VirtRegMapAnalysis>()
670	.preserve<SlotIndexesAnalysis>()
671	.preserve<LiveIntervalsAnalysis>()
672	.preserve<LiveRegMatrixAnalysis>();
673	return PA;
674	}
675

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp