1//===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
10/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
11/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
12/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
13/// pass will attempt to delete the cross register bank copy and replace the
14/// MFMA opcode.
15///
16/// TODO:
17/// - Handle rewrites of phis. This must be more careful than normal about the
18/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
19/// loop, so it depends on the exact assignment of the copy.
20///
21/// - Update LiveIntervals incrementally instead of recomputing from scratch
22///
23//===----------------------------------------------------------------------===//
24
25#include "AMDGPU.h"
26#include "GCNSubtarget.h"
27#include "SIMachineFunctionInfo.h"
28#include "SIRegisterInfo.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/CodeGen/LiveIntervals.h"
31#include "llvm/CodeGen/LiveRegMatrix.h"
32#include "llvm/CodeGen/LiveStacks.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunctionPass.h"
35#include "llvm/CodeGen/SlotIndexes.h"
36#include "llvm/CodeGen/VirtRegMap.h"
37#include "llvm/InitializePasses.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
42
43namespace {
44
45STATISTIC(NumMFMAsRewrittenToAGPR,
46 "Number of MFMA instructions rewritten to use AGPR form");
47
48/// Map from spill slot frame index to list of instructions which reference it.
49using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, 4>>;
50
51class AMDGPURewriteAGPRCopyMFMAImpl {
52 MachineFunction &MF;
53 const GCNSubtarget &ST;
54 const SIInstrInfo &TII;
55 const SIRegisterInfo &TRI;
56 MachineRegisterInfo &MRI;
57 VirtRegMap &VRM;
58 LiveRegMatrix &LRM;
59 LiveIntervals &LIS;
60 LiveStacks &LSS;
61 const RegisterClassInfo &RegClassInfo;
62
63 bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
64 MCPhysReg PrefPhysReg) const;
65
66public:
67 AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
68 LiveRegMatrix &LRM, LiveIntervals &LIS,
69 LiveStacks &LSS,
70 const RegisterClassInfo &RegClassInfo)
71 : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
72 TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
73 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
74
75 bool isRewriteCandidate(const MachineInstr &MI) const {
76 return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(Opcode: MI.getOpcode()) != -1;
77 }
78
79 /// Find AV_* registers assigned to AGPRs (or virtual registers which were
80 /// already required to be AGPR).
81 ///
82 /// \return the assigned physical register that \p VReg is assigned to if it
83 /// is an AGPR, otherwise MCRegister().
84 MCRegister getAssignedAGPR(Register VReg) const {
85 MCRegister PhysReg = VRM.getPhys(virtReg: VReg);
86 if (!PhysReg)
87 return MCRegister();
88
89 // If this is an AV register, we have to check if the actual assignment is
90 // to an AGPR
91 const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(Reg: PhysReg);
92 return TRI.isAGPRClass(RC: AssignedRC) ? PhysReg : MCRegister();
93 }
94
95 bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
96 MCPhysReg PhysRegHint) const;
97
98 /// Compute the register class constraints based on the uses of \p Reg,
99 /// excluding MFMA uses from which can be rewritten to change the register
100 /// class constraint. MFMA scale operands need to be constraint checked.
101 /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
102
103 /// \p RewriteCandidates will collect the set of MFMA instructions that need
104 /// to have the opcode mutated to perform the replacement.
105 ///
106 /// \p RewriteRegs will accumulate the set of register used by those MFMAs
107 /// that need to have the register classes adjusted.
108 bool recomputeRegClassExceptRewritable(
109 Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
110 SmallSetVector<Register, 4> &RewriteRegs) const;
111
112 bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
113 bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
114
115 /// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
116 /// with a COPY to the replacement register value \p VReg.
117 void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
118 Register VReg) const;
119
120 /// Create a map from frame index to use instructions for spills. If a use of
121 /// the frame index does not consist only of spill instructions, it will not
122 /// be included in the map.
123 void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
124 SpillReferenceMap &Map) const;
125
126 /// Attempt to unspill VGPRs by finding a free register and replacing the
127 /// spill instructions with copies.
128 void eliminateSpillsOfReassignedVGPRs() const;
129
130 bool run(MachineFunction &MF) const;
131};
132
133bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
134 Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
135 SmallSetVector<Register, 4> &RewriteRegs) const {
136 SmallVector<Register, 8> Worklist = {StartReg};
137
138 // Recursively visit all transitive MFMA users
139 while (!Worklist.empty()) {
140 Register Reg = Worklist.pop_back_val();
141 const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
142
143 // Inflate to the equivalent AV_* class.
144 const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(RC: OldRC, MF);
145 if (OldRC == NewRC)
146 return false;
147
148 // Accumulate constraints from all uses.
149 for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
150 // Apply the effect of the given operand to NewRC.
151 MachineInstr *MI = MO.getParent();
152
153 // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
154 // effects of rewrite candidates. It just so happens that we can use
155 // either AGPR or VGPR in src0/src1. We still need to check constraint
156 // effects for scale variant, which does not allow AGPR.
157 if (isRewriteCandidate(MI: *MI)) {
158 int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(Opcode: MI->getOpcode());
159 const MCInstrDesc &AGPRDesc = TII.get(Opcode: AGPROp);
160 const TargetRegisterClass *NewRC =
161 TII.getRegClass(MCID: AGPRDesc, OpNum: MO.getOperandNo());
162 if (!TRI.hasAGPRs(RC: NewRC))
163 return false;
164
165 const MachineOperand *VDst =
166 TII.getNamedOperand(MI&: *MI, OperandName: AMDGPU::OpName::vdst);
167 const MachineOperand *Src2 =
168 TII.getNamedOperand(MI&: *MI, OperandName: AMDGPU::OpName::src2);
169 for (const MachineOperand *Op : {VDst, Src2}) {
170 if (!Op->isReg())
171 continue;
172
173 Register OtherReg = Op->getReg();
174 if (OtherReg.isPhysical())
175 return false;
176
177 if (OtherReg != Reg && RewriteRegs.insert(X: OtherReg))
178 Worklist.push_back(Elt: OtherReg);
179 }
180
181 if (!is_contained(Range&: RewriteCandidates, Element: MI)) {
182 LLVM_DEBUG({
183 Register VDstPhysReg = VRM.getPhys(VDst->getReg());
184 dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
185 << " Dst=[" << printReg(VDst->getReg()) << " => "
186 << printReg(VDstPhysReg, &TRI);
187
188 if (Src2->isReg()) {
189 Register Src2PhysReg = VRM.getPhys(Src2->getReg());
190 dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
191 << printReg(Src2PhysReg, &TRI);
192 }
193
194 dbgs() << "]: " << MI;
195 });
196
197 RewriteCandidates.push_back(Elt: MI);
198 }
199
200 continue;
201 }
202
203 unsigned OpNo = &MO - &MI->getOperand(i: 0);
204 NewRC = MI->getRegClassConstraintEffect(OpIdx: OpNo, CurRC: NewRC, TII: &TII, TRI: &TRI);
205 if (!NewRC || NewRC == OldRC) {
206 LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
207 << " cannot be reassigned to "
208 << TRI.getRegClassName(NewRC) << ": " << *MI);
209 return false;
210 }
211 }
212 }
213
214 return true;
215}
216
217bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
218 MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
219 // src2 and dst have the same physical class constraint; try to preserve
220 // the original src2 subclass if one were to exist.
221 SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
222 SmallSetVector<Register, 4> RewriteRegs;
223
224 // Make sure we reassign the MFMA we found the copy from first. We want
225 // to ensure dst ends up in the physreg we were originally copying to.
226 RewriteRegs.insert(X: MFMAHintReg);
227
228 // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
229 // that we can trivially rewrite src2 to use the new AGPR. If we can't
230 // trivially replace it, we're going to induce as many copies as we would have
231 // emitted in the first place, as well as need to assign another register, and
232 // need to figure out where to put them. The live range splitting is smarter
233 // than anything we're doing here, so trust it did something reasonable.
234 //
235 // Note recomputeRegClassExceptRewritable will consider the constraints of
236 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
237 if (!recomputeRegClassExceptRewritable(StartReg: MFMAHintReg, RewriteCandidates,
238 RewriteRegs)) {
239 LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
240 << printReg(MFMAHintReg, &TRI) << '\n');
241 return false;
242 }
243
244 // If src2 and dst are different registers, we need to also reassign the
245 // input to an available AGPR if it is compatible with all other uses.
246 //
247 // If we can't reassign it, we'd need to introduce a different copy
248 // which is likely worse than the copy we'd be saving.
249 //
250 // It's likely that the MFMA is used in sequence with other MFMAs; if we
251 // cannot migrate the full use/def chain of MFMAs, we would need to
252 // introduce intermediate copies somewhere. So we only make the
253 // transform if all the interfering MFMAs can also be migrated. Collect
254 // the set of rewritable MFMAs and check if we can assign an AGPR at
255 // that point.
256 //
257 // If any of the MFMAs aren't reassignable, we give up and rollback to
258 // the original register assignments.
259
260 using RecoloringStack =
261 SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
262 RecoloringStack TentativeReassignments;
263
264 for (Register RewriteReg : RewriteRegs) {
265 LiveInterval &LI = LIS.getInterval(Reg: RewriteReg);
266 TentativeReassignments.push_back(Elt: {&LI, VRM.getPhys(virtReg: RewriteReg)});
267 LRM.unassign(VirtReg: LI);
268 }
269
270 if (!attemptReassignmentsToAGPR(InterferingRegs&: RewriteRegs, PrefPhysReg: PhysRegHint)) {
271 // Roll back the register assignments to the original state.
272 for (auto [LI, OldAssign] : TentativeReassignments) {
273 if (VRM.hasPhys(virtReg: LI->reg()))
274 LRM.unassign(VirtReg: *LI);
275 LRM.assign(VirtReg: *LI, PhysReg: OldAssign);
276 }
277
278 return false;
279 }
280
281 // Fixup the register classes of the virtual registers now that we've
282 // committed to the reassignments.
283 for (Register InterferingReg : RewriteRegs) {
284 const TargetRegisterClass *EquivalentAGPRRegClass =
285 TRI.getEquivalentAGPRClass(SRC: MRI.getRegClass(Reg: InterferingReg));
286 MRI.setRegClass(Reg: InterferingReg, RC: EquivalentAGPRRegClass);
287 }
288
289 for (MachineInstr *RewriteCandidate : RewriteCandidates) {
290 int NewMFMAOp =
291 AMDGPU::getMFMASrcCVDstAGPROp(Opcode: RewriteCandidate->getOpcode());
292 RewriteCandidate->setDesc(TII.get(Opcode: NewMFMAOp));
293 ++NumMFMAsRewrittenToAGPR;
294 }
295
296 return true;
297}
298
299/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
300/// preference to use \p PhysReg first. Returns false if the reassignments
301/// cannot be trivially performed.
302bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
303 SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const {
304 // FIXME: The ordering may matter here, but we're just taking uselistorder
305 // with the special case of ensuring to process the starting instruction
306 // first. We probably should extract the priority advisor out of greedy and
307 // use that ordering.
308 for (Register InterferingReg : InterferingRegs) {
309 LiveInterval &ReassignLI = LIS.getInterval(Reg: InterferingReg);
310 const TargetRegisterClass *EquivalentAGPRRegClass =
311 TRI.getEquivalentAGPRClass(SRC: MRI.getRegClass(Reg: InterferingReg));
312
313 MCPhysReg Assignable = AMDGPU::NoRegister;
314 if (EquivalentAGPRRegClass->contains(Reg: PrefPhysReg) &&
315 LRM.checkInterference(VirtReg: ReassignLI, PhysReg: PrefPhysReg) ==
316 LiveRegMatrix::IK_Free) {
317 // First try to assign to the AGPR we were already copying to. This
318 // should be the first assignment we attempt. We have to guard
319 // against the use being a subregister (which doesn't have an exact
320 // class match).
321
322 // TODO: If this does happen to be a subregister use, we should
323 // still try to assign to a subregister of the original copy result.
324 Assignable = PrefPhysReg;
325 } else {
326 ArrayRef<MCPhysReg> AllocOrder =
327 RegClassInfo.getOrder(RC: EquivalentAGPRRegClass);
328 for (MCPhysReg Reg : AllocOrder) {
329 if (LRM.checkInterference(VirtReg: ReassignLI, PhysReg: Reg) == LiveRegMatrix::IK_Free) {
330 Assignable = Reg;
331 break;
332 }
333 }
334 }
335
336 if (!Assignable) {
337 LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
338 << printReg(InterferingReg, &TRI)
339 << " to a free AGPR\n");
340 return false;
341 }
342
343 LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
344 << " to " << printReg(Assignable, &TRI) << '\n');
345 LRM.assign(VirtReg: ReassignLI, PhysReg: Assignable);
346 }
347
348 return true;
349}
350
351/// Identify copies that look like:
352/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
353/// %agpr = COPY %vgpr
354///
355/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
356/// versions of the MFMA. This should cover the common case.
357bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
358 Register VReg, MCRegister AssignedAGPR) const {
359 bool MadeChange = false;
360 for (MachineInstr &UseMI : MRI.def_instructions(Reg: VReg)) {
361 if (!UseMI.isCopy())
362 continue;
363
364 Register CopySrcReg = UseMI.getOperand(i: 1).getReg();
365 if (!CopySrcReg.isVirtual())
366 continue;
367
368 // TODO: Handle loop phis copied to AGPR. e.g.
369 //
370 // loop:
371 // %phi:vgpr = COPY %mfma:vgpr
372 // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
373 // s_cbranch_vccnz loop
374 //
375 // endloop:
376 // %agpr = mfma
377 //
378 // We need to be sure that %phi is assigned to the same physical register as
379 // %mfma, or else we will just be moving copies into the loop.
380
381 for (MachineInstr &CopySrcDefMI : MRI.def_instructions(Reg: CopySrcReg)) {
382 if (isRewriteCandidate(MI: CopySrcDefMI) &&
383 tryReassigningMFMAChain(
384 MFMA&: CopySrcDefMI, MFMAHintReg: CopySrcDefMI.getOperand(i: 0).getReg(), PhysRegHint: AssignedAGPR))
385 MadeChange = true;
386 }
387 }
388
389 return MadeChange;
390}
391
392/// Identify copies that look like:
393/// %src:vgpr = COPY %src:agpr
394/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
395///
396/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
397/// versions of the MFMA. This should cover rarer cases, and will generally be
398/// redundant with tryFoldCopiesToAGPR.
399bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
400 Register VReg, MCRegister AssignedAGPR) const {
401 bool MadeChange = false;
402 for (MachineInstr &UseMI : MRI.use_instructions(Reg: VReg)) {
403 if (!UseMI.isCopy())
404 continue;
405
406 Register CopyDstReg = UseMI.getOperand(i: 0).getReg();
407 if (!CopyDstReg.isVirtual())
408 continue;
409 for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(Reg: CopyDstReg)) {
410 if (!CopyUseMO.readsReg())
411 continue;
412
413 MachineInstr &CopyUseMI = *CopyUseMO.getParent();
414 if (isRewriteCandidate(MI: CopyUseMI)) {
415 if (tryReassigningMFMAChain(MFMA&: CopyUseMI, MFMAHintReg: CopyDstReg,
416 PhysRegHint: VRM.getPhys(virtReg: CopyDstReg)))
417 MadeChange = true;
418 }
419 }
420 }
421
422 return MadeChange;
423}
424
425void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
426 MachineInstr &SpillMI, int SpillFI, Register VReg) const {
427 const DebugLoc &DL = SpillMI.getDebugLoc();
428 MachineBasicBlock &MBB = *SpillMI.getParent();
429 MachineInstr *NewCopy;
430 if (SpillMI.mayStore()) {
431 NewCopy = BuildMI(BB&: MBB, I&: SpillMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VReg)
432 .add(MO: SpillMI.getOperand(i: 0));
433 } else {
434 NewCopy = BuildMI(BB&: MBB, I&: SpillMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY))
435 .add(MO: SpillMI.getOperand(i: 0))
436 .addReg(RegNo: VReg);
437 }
438
439 LIS.ReplaceMachineInstrInMaps(MI&: SpillMI, NewMI&: *NewCopy);
440 SpillMI.eraseFromParent();
441}
442
443void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
444 ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {
445
446 SmallSet<int, 4> NeededFrameIndexes;
447 for (const LiveInterval *LI : StackIntervals)
448 NeededFrameIndexes.insert(V: LI->reg().stackSlotIndex());
449
450 for (MachineBasicBlock &MBB : MF) {
451 for (MachineInstr &MI : MBB) {
452 for (MachineOperand &MO : MI.operands()) {
453 if (!MO.isFI() || !NeededFrameIndexes.count(V: MO.getIndex()))
454 continue;
455
456 if (TII.isVGPRSpill(MI)) {
457 SmallVector<MachineInstr *, 4> &References = Map[MO.getIndex()];
458 References.push_back(Elt: &MI);
459 break;
460 }
461
462 // Verify this was really a spill instruction, if it's not just ignore
463 // all uses.
464
465 // TODO: This should probably be verifier enforced.
466 NeededFrameIndexes.erase(V: MO.getIndex());
467 Map.erase(Val: MO.getIndex());
468 }
469 }
470 }
471}
472
473void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
474 unsigned NumSlots = LSS.getNumIntervals();
475 if (NumSlots == 0)
476 return;
477
478 MachineFrameInfo &MFI = MF.getFrameInfo();
479
480 SmallVector<LiveInterval *, 32> StackIntervals;
481 StackIntervals.reserve(N: NumSlots);
482
483 for (auto &[Slot, LI] : LSS) {
484 if (!MFI.isSpillSlotObjectIndex(ObjectIdx: Slot) || MFI.isDeadObjectIndex(ObjectIdx: Slot))
485 continue;
486
487 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
488 if (TRI.hasVGPRs(RC))
489 StackIntervals.push_back(Elt: &LI);
490 }
491
492 sort(C&: StackIntervals, Comp: [](const LiveInterval *A, const LiveInterval *B) {
493 // The ordering has to be strictly weak.
494 /// Sort heaviest intervals first to prioritize their unspilling
495 if (A->weight() != B->weight())
496 return A->weight() > B->weight();
497
498 if (A->getSize() != B->getSize())
499 return A->getSize() > B->getSize();
500
501 // Tie breaker by number to avoid need for stable sort
502 return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
503 });
504
505 // FIXME: The APIs for dealing with the LiveInterval of a frame index are
506 // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
507 // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
508 // and must create a substitute virtual register to do so. This makes
509 // incremental updating here difficult; we need to actually perform the IR
510 // mutation to get the new vreg references in place to compute the register
511 // LiveInterval to perform an assignment to track the new interference
512 // correctly, and we can't simply migrate the LiveInterval we already have.
513 //
514 // To avoid walking through the entire function for each index, pre-collect
515 // all the instructions slot referencess.
516
517 DenseMap<int, SmallVector<MachineInstr *, 4>> SpillSlotReferences;
518 collectSpillIndexUses(StackIntervals, Map&: SpillSlotReferences);
519
520 for (LiveInterval *LI : StackIntervals) {
521 int Slot = LI->reg().stackSlotIndex();
522 auto SpillReferences = SpillSlotReferences.find(Val: Slot);
523 if (SpillReferences == SpillSlotReferences.end())
524 continue;
525
526 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
527
528 LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
529 << " by reassigning\n");
530
531 ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
532
533 for (MCPhysReg PhysReg : AllocOrder) {
534 if (LRM.checkInterference(VirtReg: *LI, PhysReg) != LiveRegMatrix::IK_Free)
535 continue;
536
537 LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
538 << printReg(PhysReg, &TRI) << '\n');
539
540 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
541 Register NewVReg = MRI.createVirtualRegister(RegClass: RC);
542
543 for (MachineInstr *SpillMI : SpillReferences->second)
544 replaceSpillWithCopyToVReg(SpillMI&: *SpillMI, SpillFI: Slot, VReg: NewVReg);
545
546 // TODO: We should be able to transfer the information from the stack
547 // slot's LiveInterval without recomputing from scratch with the
548 // replacement vreg uses.
549 LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(Reg: NewVReg);
550 VRM.grow();
551 LRM.assign(VirtReg: NewLI, PhysReg);
552 MFI.RemoveStackObject(ObjectIdx: Slot);
553 break;
554 }
555 }
556}
557
558bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
559 // This only applies on subtargets that have a configurable AGPR vs. VGPR
560 // allocation.
561 if (!ST.hasGFX90AInsts())
562 return false;
563
564 // Early exit if no AGPRs were assigned.
565 if (!LRM.isPhysRegUsed(PhysReg: AMDGPU::AGPR0)) {
566 LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
567 return false;
568 }
569
570 bool MadeChange = false;
571
572 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
573 Register VReg = Register::index2VirtReg(Index: I);
574 MCRegister AssignedAGPR = getAssignedAGPR(VReg);
575 if (!AssignedAGPR)
576 continue;
577
578 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
579 MadeChange = true;
580 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
581 MadeChange = true;
582 }
583
584 // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
585 // pressure. See if we can eliminate some spills now that those registers are
586 // more available.
587 if (MadeChange)
588 eliminateSpillsOfReassignedVGPRs();
589
590 return MadeChange;
591}
592
593class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
594public:
595 static char ID;
596 RegisterClassInfo RegClassInfo;
597
598 AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {
599 initializeAMDGPURewriteAGPRCopyMFMALegacyPass(
600 *PassRegistry::getPassRegistry());
601 }
602
603 bool runOnMachineFunction(MachineFunction &MF) override;
604
605 StringRef getPassName() const override {
606 return "AMDGPU Rewrite AGPR-Copy-MFMA";
607 }
608
609 void getAnalysisUsage(AnalysisUsage &AU) const override {
610 AU.addRequired<LiveIntervalsWrapperPass>();
611 AU.addRequired<VirtRegMapWrapperLegacy>();
612 AU.addRequired<LiveRegMatrixWrapperLegacy>();
613 AU.addRequired<LiveStacksWrapperLegacy>();
614
615 AU.addPreserved<LiveIntervalsWrapperPass>();
616 AU.addPreserved<VirtRegMapWrapperLegacy>();
617 AU.addPreserved<LiveRegMatrixWrapperLegacy>();
618 AU.addPreserved<LiveStacksWrapperLegacy>();
619
620 AU.setPreservesAll();
621 MachineFunctionPass::getAnalysisUsage(AU);
622 }
623};
624
625} // End anonymous namespace.
626
627INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
628 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
629INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
630INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
631INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
632INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
633INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
634 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
635
636char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;
637
638char &llvm::AMDGPURewriteAGPRCopyMFMALegacyID =
639 AMDGPURewriteAGPRCopyMFMALegacy::ID;
640
641bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
642 MachineFunction &MF) {
643 if (skipFunction(F: MF.getFunction()))
644 return false;
645
646 RegClassInfo.runOnMachineFunction(MF);
647
648 auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
649 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
650 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
651 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
652 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
653 return Impl.run(MF);
654}
655
656PreservedAnalyses
657AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
658 MachineFunctionAnalysisManager &MFAM) {
659 VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(IR&: MF);
660 LiveRegMatrix &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(IR&: MF);
661 LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
662 LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(IR&: MF);
663 RegisterClassInfo RegClassInfo;
664 RegClassInfo.runOnMachineFunction(MF);
665
666 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
667 if (!Impl.run(MF))
668 return PreservedAnalyses::all();
669 auto PA = getMachineFunctionPassPreservedAnalyses();
670 PA.preserveSet<CFGAnalyses>()
671 .preserve<LiveStacksAnalysis>()
672 .preserve<VirtRegMapAnalysis>()
673 .preserve<SlotIndexesAnalysis>()
674 .preserve<LiveIntervalsAnalysis>()
675 .preserve<LiveRegMatrixAnalysis>();
676 return PA;
677}
678