1//===-- AMDGPURewriteAGPRCopyMFMA.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file \brief Try to replace MFMA instructions using VGPRs with MFMA
10/// instructions using AGPRs. We expect MFMAs to be selected using VGPRs, and
11/// only use AGPRs if it helps avoid spilling. In this case, the MFMA will have
12/// copies between AGPRs and VGPRs and the AGPR variant of an MFMA pseudo. This
13/// pass will attempt to delete the cross register bank copy and replace the
14/// MFMA opcode.
15///
16/// TODO:
17/// - Handle rewrites of phis. This must be more careful than normal about the
18/// reassignment. We do not want to introduce an AGPR-to-AGPR copy inside of a
19/// loop, so it depends on the exact assignment of the copy.
20///
21/// - Update LiveIntervals incrementally instead of recomputing from scratch
22///
23//===----------------------------------------------------------------------===//
24
25#include "AMDGPU.h"
26#include "GCNSubtarget.h"
27#include "SIMachineFunctionInfo.h"
28#include "SIRegisterInfo.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/CodeGen/LiveIntervals.h"
31#include "llvm/CodeGen/LiveRegMatrix.h"
32#include "llvm/CodeGen/LiveStacks.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunctionPass.h"
35#include "llvm/CodeGen/SlotIndexes.h"
36#include "llvm/CodeGen/VirtRegMap.h"
37#include "llvm/InitializePasses.h"
38#include "llvm/Support/DebugCounter.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "amdgpu-rewrite-agpr-copy-mfma"
43
44DEBUG_COUNTER(RewriteAGPRCopyMFMACounter, DEBUG_TYPE,
45 "Controls which MFMA chains are rewritten to AGPR form");
46
47namespace {
48
49STATISTIC(NumMFMAsRewrittenToAGPR,
50 "Number of MFMA instructions rewritten to use AGPR form");
51
52/// Map from spill slot frame index to list of instructions which reference it.
53using SpillReferenceMap = DenseMap<int, SmallVector<MachineInstr *, 4>>;
54
55class AMDGPURewriteAGPRCopyMFMAImpl {
56 MachineFunction &MF;
57 const GCNSubtarget &ST;
58 const SIInstrInfo &TII;
59 const SIRegisterInfo &TRI;
60 MachineRegisterInfo &MRI;
61 VirtRegMap &VRM;
62 LiveRegMatrix &LRM;
63 LiveIntervals &LIS;
64 LiveStacks &LSS;
65 const RegisterClassInfo &RegClassInfo;
66
67 bool attemptReassignmentsToAGPR(SmallSetVector<Register, 4> &InterferingRegs,
68 MCPhysReg PrefPhysReg) const;
69
70public:
71 AMDGPURewriteAGPRCopyMFMAImpl(MachineFunction &MF, VirtRegMap &VRM,
72 LiveRegMatrix &LRM, LiveIntervals &LIS,
73 LiveStacks &LSS,
74 const RegisterClassInfo &RegClassInfo)
75 : MF(MF), ST(MF.getSubtarget<GCNSubtarget>()), TII(*ST.getInstrInfo()),
76 TRI(*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
77 LIS(LIS), LSS(LSS), RegClassInfo(RegClassInfo) {}
78
79 bool isRewriteCandidate(const MachineInstr &MI) const {
80 return TII.isMAI(MI) && AMDGPU::getMFMASrcCVDstAGPROp(Opcode: MI.getOpcode()) != -1;
81 }
82
83 /// Find AV_* registers assigned to AGPRs (or virtual registers which were
84 /// already required to be AGPR).
85 ///
86 /// \return the assigned physical register that \p VReg is assigned to if it
87 /// is an AGPR, otherwise MCRegister().
88 MCRegister getAssignedAGPR(Register VReg) const {
89 MCRegister PhysReg = VRM.getPhys(virtReg: VReg);
90 if (!PhysReg)
91 return MCRegister();
92
93 // If this is an AV register, we have to check if the actual assignment is
94 // to an AGPR
95 const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(Reg: PhysReg);
96 return TRI.isAGPRClass(RC: AssignedRC) ? PhysReg : MCRegister();
97 }
98
99 bool tryReassigningMFMAChain(MachineInstr &MFMA, Register MFMAHintReg,
100 MCPhysReg PhysRegHint) const;
101
102 /// Compute the register class constraints based on the uses of \p Reg,
103 /// excluding MFMA uses from which can be rewritten to change the register
104 /// class constraint. MFMA scale operands need to be constraint checked.
105 /// This should be nearly identical to MachineRegisterInfo::recomputeRegClass.
106
107 /// \p RewriteCandidates will collect the set of MFMA instructions that need
108 /// to have the opcode mutated to perform the replacement.
109 ///
110 /// \p RewriteRegs will accumulate the set of register used by those MFMAs
111 /// that need to have the register classes adjusted.
112 bool recomputeRegClassExceptRewritable(
113 Register Reg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
114 SmallSetVector<Register, 4> &RewriteRegs) const;
115
116 bool tryFoldCopiesToAGPR(Register VReg, MCRegister AssignedAGPR) const;
117 bool tryFoldCopiesFromAGPR(Register VReg, MCRegister AssignedAGPR) const;
118
119 /// Replace spill instruction \p SpillMI which loads/stores from/to \p SpillFI
120 /// with a COPY to the replacement register value \p VReg.
121 void replaceSpillWithCopyToVReg(MachineInstr &SpillMI, int SpillFI,
122 Register VReg) const;
123
124 /// Create a map from frame index to use instructions for spills. If a use of
125 /// the frame index does not consist only of spill instructions, it will not
126 /// be included in the map.
127 void collectSpillIndexUses(ArrayRef<LiveInterval *> StackIntervals,
128 SpillReferenceMap &Map) const;
129
130 /// Attempt to unspill VGPRs by finding a free register and replacing the
131 /// spill instructions with copies.
132 void eliminateSpillsOfReassignedVGPRs() const;
133
134 bool run(MachineFunction &MF) const;
135};
136
137bool AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable(
138 Register StartReg, SmallVectorImpl<MachineInstr *> &RewriteCandidates,
139 SmallSetVector<Register, 4> &RewriteRegs) const {
140 SmallVector<Register, 8> Worklist = {StartReg};
141
142 // Recursively visit all transitive MFMA users
143 while (!Worklist.empty()) {
144 Register Reg = Worklist.pop_back_val();
145 const TargetRegisterClass *OldRC = MRI.getRegClass(Reg);
146
147 // Inflate to the equivalent AV_* class.
148 const TargetRegisterClass *NewRC = TRI.getLargestLegalSuperClass(RC: OldRC, MF);
149 if (OldRC == NewRC)
150 return false;
151
152 // Accumulate constraints from all uses.
153 for (MachineOperand &MO : MRI.reg_nodbg_operands(Reg)) {
154 // Apply the effect of the given operand to NewRC.
155 MachineInstr *MI = MO.getParent();
156
157 // We can swap the classes of dst + src2 as a pair to AGPR, so ignore the
158 // effects of rewrite candidates. It just so happens that we can use
159 // either AGPR or VGPR in src0/src1. We still need to check constraint
160 // effects for scale variant, which does not allow AGPR.
161 if (isRewriteCandidate(MI: *MI)) {
162 int AGPROp = AMDGPU::getMFMASrcCVDstAGPROp(Opcode: MI->getOpcode());
163 const MCInstrDesc &AGPRDesc = TII.get(Opcode: AGPROp);
164 const TargetRegisterClass *NewRC =
165 TII.getRegClass(MCID: AGPRDesc, OpNum: MO.getOperandNo());
166 if (!TRI.hasAGPRs(RC: NewRC))
167 return false;
168
169 const MachineOperand *VDst =
170 TII.getNamedOperand(MI&: *MI, OperandName: AMDGPU::OpName::vdst);
171 const MachineOperand *Src2 =
172 TII.getNamedOperand(MI&: *MI, OperandName: AMDGPU::OpName::src2);
173 for (const MachineOperand *Op : {VDst, Src2}) {
174 if (!Op->isReg())
175 continue;
176
177 Register OtherReg = Op->getReg();
178 if (OtherReg.isPhysical())
179 return false;
180
181 if (OtherReg != Reg && RewriteRegs.insert(X: OtherReg))
182 Worklist.push_back(Elt: OtherReg);
183 }
184
185 if (!is_contained(Range&: RewriteCandidates, Element: MI)) {
186 LLVM_DEBUG({
187 Register VDstPhysReg = VRM.getPhys(VDst->getReg());
188 dbgs() << "Attempting to replace VGPR MFMA with AGPR version:"
189 << " Dst=[" << printReg(VDst->getReg()) << " => "
190 << printReg(VDstPhysReg, &TRI);
191
192 if (Src2->isReg()) {
193 Register Src2PhysReg = VRM.getPhys(Src2->getReg());
194 dbgs() << "], Src2=[" << printReg(Src2->getReg(), &TRI) << " => "
195 << printReg(Src2PhysReg, &TRI);
196 }
197
198 dbgs() << "]: " << MI;
199 });
200
201 RewriteCandidates.push_back(Elt: MI);
202 }
203
204 continue;
205 }
206
207 unsigned OpNo = &MO - &MI->getOperand(i: 0);
208 NewRC = MI->getRegClassConstraintEffect(OpIdx: OpNo, CurRC: NewRC, TII: &TII, TRI: &TRI);
209 if (!NewRC || NewRC == OldRC) {
210 LLVM_DEBUG(dbgs() << "User of " << printReg(Reg, &TRI)
211 << " cannot be reassigned to "
212 << (NewRC ? TRI.getRegClassName(NewRC) : "NULL")
213 << ": " << *MI);
214 return false;
215 }
216 }
217 }
218
219 return true;
220}
221
222bool AMDGPURewriteAGPRCopyMFMAImpl::tryReassigningMFMAChain(
223 MachineInstr &MFMA, Register MFMAHintReg, MCPhysReg PhysRegHint) const {
224 // src2 and dst have the same physical class constraint; try to preserve
225 // the original src2 subclass if one were to exist.
226 SmallVector<MachineInstr *, 4> RewriteCandidates = {&MFMA};
227 SmallSetVector<Register, 4> RewriteRegs;
228
229 // Make sure we reassign the MFMA we found the copy from first. We want
230 // to ensure dst ends up in the physreg we were originally copying to.
231 RewriteRegs.insert(X: MFMAHintReg);
232
233 // We've found av = COPY (MFMA) (or MFMA (v = COPY av)) and need to verify
234 // that we can trivially rewrite src2 to use the new AGPR. If we can't
235 // trivially replace it, we're going to induce as many copies as we would have
236 // emitted in the first place, as well as need to assign another register, and
237 // need to figure out where to put them. The live range splitting is smarter
238 // than anything we're doing here, so trust it did something reasonable.
239 //
240 // Note recomputeRegClassExceptRewritable will consider the constraints of
241 // this MFMA's src2 as well as the src2/dst of any transitive MFMA users.
242 if (!recomputeRegClassExceptRewritable(StartReg: MFMAHintReg, RewriteCandidates,
243 RewriteRegs)) {
244 LLVM_DEBUG(dbgs() << "Could not recompute the regclass of dst reg "
245 << printReg(MFMAHintReg, &TRI) << '\n');
246 return false;
247 }
248
249 // If src2 and dst are different registers, we need to also reassign the
250 // input to an available AGPR if it is compatible with all other uses.
251 //
252 // If we can't reassign it, we'd need to introduce a different copy
253 // which is likely worse than the copy we'd be saving.
254 //
255 // It's likely that the MFMA is used in sequence with other MFMAs; if we
256 // cannot migrate the full use/def chain of MFMAs, we would need to
257 // introduce intermediate copies somewhere. So we only make the
258 // transform if all the interfering MFMAs can also be migrated. Collect
259 // the set of rewritable MFMAs and check if we can assign an AGPR at
260 // that point.
261 //
262 // If any of the MFMAs aren't reassignable, we give up and rollback to
263 // the original register assignments.
264
265 using RecoloringStack =
266 SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
267 RecoloringStack TentativeReassignments;
268
269 for (Register RewriteReg : RewriteRegs) {
270 LiveInterval &LI = LIS.getInterval(Reg: RewriteReg);
271 TentativeReassignments.push_back(Elt: {&LI, VRM.getPhys(virtReg: RewriteReg)});
272 LRM.unassign(VirtReg: LI);
273 }
274
275 if (!DebugCounter::shouldExecute(Counter&: RewriteAGPRCopyMFMACounter) ||
276 !attemptReassignmentsToAGPR(InterferingRegs&: RewriteRegs, PrefPhysReg: PhysRegHint)) {
277 // Roll back the register assignments to the original state.
278 for (auto [LI, OldAssign] : TentativeReassignments) {
279 if (VRM.hasPhys(virtReg: LI->reg()))
280 LRM.unassign(VirtReg: *LI);
281 LRM.assign(VirtReg: *LI, PhysReg: OldAssign);
282 }
283
284 return false;
285 }
286
287 // Fixup the register classes of the virtual registers now that we've
288 // committed to the reassignments.
289 for (Register InterferingReg : RewriteRegs) {
290 const TargetRegisterClass *EquivalentAGPRRegClass =
291 TRI.getEquivalentAGPRClass(SRC: MRI.getRegClass(Reg: InterferingReg));
292 MRI.setRegClass(Reg: InterferingReg, RC: EquivalentAGPRRegClass);
293 }
294
295 for (MachineInstr *RewriteCandidate : RewriteCandidates) {
296 int NewMFMAOp =
297 AMDGPU::getMFMASrcCVDstAGPROp(Opcode: RewriteCandidate->getOpcode());
298 RewriteCandidate->setDesc(TII.get(Opcode: NewMFMAOp));
299 ++NumMFMAsRewrittenToAGPR;
300 }
301
302 return true;
303}
304
305/// Attempt to reassign the registers in \p InterferingRegs to be AGPRs, with a
306/// preference to use \p PhysReg first. Returns false if the reassignments
307/// cannot be trivially performed.
308bool AMDGPURewriteAGPRCopyMFMAImpl::attemptReassignmentsToAGPR(
309 SmallSetVector<Register, 4> &InterferingRegs, MCPhysReg PrefPhysReg) const {
310 // FIXME: The ordering may matter here, but we're just taking uselistorder
311 // with the special case of ensuring to process the starting instruction
312 // first. We probably should extract the priority advisor out of greedy and
313 // use that ordering.
314 for (Register InterferingReg : InterferingRegs) {
315 LiveInterval &ReassignLI = LIS.getInterval(Reg: InterferingReg);
316 const TargetRegisterClass *EquivalentAGPRRegClass =
317 TRI.getEquivalentAGPRClass(SRC: MRI.getRegClass(Reg: InterferingReg));
318
319 MCPhysReg Assignable = AMDGPU::NoRegister;
320 if (EquivalentAGPRRegClass->contains(Reg: PrefPhysReg) &&
321 LRM.checkInterference(VirtReg: ReassignLI, PhysReg: PrefPhysReg) ==
322 LiveRegMatrix::IK_Free) {
323 // First try to assign to the AGPR we were already copying to. This
324 // should be the first assignment we attempt. We have to guard
325 // against the use being a subregister (which doesn't have an exact
326 // class match).
327
328 // TODO: If this does happen to be a subregister use, we should
329 // still try to assign to a subregister of the original copy result.
330 Assignable = PrefPhysReg;
331 } else {
332 ArrayRef<MCPhysReg> AllocOrder =
333 RegClassInfo.getOrder(RC: EquivalentAGPRRegClass);
334 for (MCPhysReg Reg : AllocOrder) {
335 if (LRM.checkInterference(VirtReg: ReassignLI, PhysReg: Reg) == LiveRegMatrix::IK_Free) {
336 Assignable = Reg;
337 break;
338 }
339 }
340 }
341
342 if (!Assignable) {
343 LLVM_DEBUG(dbgs() << "Unable to reassign VGPR "
344 << printReg(InterferingReg, &TRI)
345 << " to a free AGPR\n");
346 return false;
347 }
348
349 LLVM_DEBUG(dbgs() << "Reassigning VGPR " << printReg(InterferingReg, &TRI)
350 << " to " << printReg(Assignable, &TRI) << '\n');
351 LRM.assign(VirtReg: ReassignLI, PhysReg: Assignable);
352 }
353
354 return true;
355}
356
357/// Identify copies that look like:
358/// %vdst:vgpr = V_MFMA_.. %src0:av, %src1:av, %src2:vgpr
359/// %agpr = COPY %vgpr
360///
361/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
362/// versions of the MFMA. This should cover the common case.
363bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesToAGPR(
364 Register VReg, MCRegister AssignedAGPR) const {
365 bool MadeChange = false;
366 for (MachineInstr &UseMI : MRI.def_instructions(Reg: VReg)) {
367 if (!UseMI.isCopy())
368 continue;
369
370 Register CopySrcReg = UseMI.getOperand(i: 1).getReg();
371 if (!CopySrcReg.isVirtual())
372 continue;
373
374 // TODO: Handle loop phis copied to AGPR. e.g.
375 //
376 // loop:
377 // %phi:vgpr = COPY %mfma:vgpr
378 // %mfma:vgpr = V_MFMA_xxx_vgprcd_e64 %a, %b, %phi
379 // s_cbranch_vccnz loop
380 //
381 // endloop:
382 // %agpr = mfma
383 //
384 // We need to be sure that %phi is assigned to the same physical register as
385 // %mfma, or else we will just be moving copies into the loop.
386
387 for (MachineInstr &CopySrcDefMI : MRI.def_instructions(Reg: CopySrcReg)) {
388 if (isRewriteCandidate(MI: CopySrcDefMI) &&
389 tryReassigningMFMAChain(
390 MFMA&: CopySrcDefMI, MFMAHintReg: CopySrcDefMI.getOperand(i: 0).getReg(), PhysRegHint: AssignedAGPR))
391 MadeChange = true;
392 }
393 }
394
395 return MadeChange;
396}
397
398/// Identify copies that look like:
399/// %src:vgpr = COPY %src:agpr
400/// %vdst:vgpr = V_MFMA_... %src0:av, %src1:av, %src:vgpr
401///
402/// Then try to replace the transitive uses of %src2 and %vdst with the AGPR
403/// versions of the MFMA. This should cover rarer cases, and will generally be
404/// redundant with tryFoldCopiesToAGPR.
405bool AMDGPURewriteAGPRCopyMFMAImpl::tryFoldCopiesFromAGPR(
406 Register VReg, MCRegister AssignedAGPR) const {
407 bool MadeChange = false;
408 for (MachineInstr &UseMI : MRI.use_instructions(Reg: VReg)) {
409 if (!UseMI.isCopy())
410 continue;
411
412 Register CopyDstReg = UseMI.getOperand(i: 0).getReg();
413 if (!CopyDstReg.isVirtual())
414 continue;
415 for (MachineOperand &CopyUseMO : MRI.reg_nodbg_operands(Reg: CopyDstReg)) {
416 if (!CopyUseMO.readsReg())
417 continue;
418
419 MachineInstr &CopyUseMI = *CopyUseMO.getParent();
420 if (isRewriteCandidate(MI: CopyUseMI)) {
421 if (tryReassigningMFMAChain(MFMA&: CopyUseMI, MFMAHintReg: CopyDstReg,
422 PhysRegHint: VRM.getPhys(virtReg: CopyDstReg)))
423 MadeChange = true;
424 }
425 }
426 }
427
428 return MadeChange;
429}
430
431void AMDGPURewriteAGPRCopyMFMAImpl::replaceSpillWithCopyToVReg(
432 MachineInstr &SpillMI, int SpillFI, Register VReg) const {
433 const DebugLoc &DL = SpillMI.getDebugLoc();
434 MachineBasicBlock &MBB = *SpillMI.getParent();
435 MachineInstr *NewCopy;
436 if (SpillMI.mayStore()) {
437 NewCopy = BuildMI(BB&: MBB, I&: SpillMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VReg)
438 .add(MO: SpillMI.getOperand(i: 0));
439 } else {
440 NewCopy = BuildMI(BB&: MBB, I&: SpillMI, MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::COPY))
441 .add(MO: SpillMI.getOperand(i: 0))
442 .addReg(RegNo: VReg);
443 }
444
445 LIS.ReplaceMachineInstrInMaps(MI&: SpillMI, NewMI&: *NewCopy);
446 SpillMI.eraseFromParent();
447}
448
449void AMDGPURewriteAGPRCopyMFMAImpl::collectSpillIndexUses(
450 ArrayRef<LiveInterval *> StackIntervals, SpillReferenceMap &Map) const {
451
452 SmallSet<int, 4> NeededFrameIndexes;
453 for (const LiveInterval *LI : StackIntervals)
454 NeededFrameIndexes.insert(V: LI->reg().stackSlotIndex());
455
456 for (MachineBasicBlock &MBB : MF) {
457 for (MachineInstr &MI : MBB) {
458 for (MachineOperand &MO : MI.operands()) {
459 if (!MO.isFI() || !NeededFrameIndexes.count(V: MO.getIndex()))
460 continue;
461
462 if (TII.isVGPRSpill(MI)) {
463 SmallVector<MachineInstr *, 4> &References = Map[MO.getIndex()];
464 References.push_back(Elt: &MI);
465 break;
466 }
467
468 // Verify this was really a spill instruction, if it's not just ignore
469 // all uses.
470
471 // TODO: This should probably be verifier enforced.
472 NeededFrameIndexes.erase(V: MO.getIndex());
473 Map.erase(Val: MO.getIndex());
474 }
475 }
476 }
477}
478
479void AMDGPURewriteAGPRCopyMFMAImpl::eliminateSpillsOfReassignedVGPRs() const {
480 unsigned NumSlots = LSS.getNumIntervals();
481 if (NumSlots == 0)
482 return;
483
484 MachineFrameInfo &MFI = MF.getFrameInfo();
485
486 SmallVector<LiveInterval *, 32> StackIntervals;
487 StackIntervals.reserve(N: NumSlots);
488
489 for (auto &[Slot, LI] : LSS) {
490 if (!MFI.isSpillSlotObjectIndex(ObjectIdx: Slot) || MFI.isDeadObjectIndex(ObjectIdx: Slot))
491 continue;
492
493 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
494 if (TRI.hasVGPRs(RC))
495 StackIntervals.push_back(Elt: &LI);
496 }
497
498 sort(C&: StackIntervals, Comp: [](const LiveInterval *A, const LiveInterval *B) {
499 // The ordering has to be strictly weak.
500 /// Sort heaviest intervals first to prioritize their unspilling
501 if (A->weight() != B->weight())
502 return A->weight() > B->weight();
503
504 if (A->getSize() != B->getSize())
505 return A->getSize() > B->getSize();
506
507 // Tie breaker by number to avoid need for stable sort
508 return A->reg().stackSlotIndex() < B->reg().stackSlotIndex();
509 });
510
511 // FIXME: The APIs for dealing with the LiveInterval of a frame index are
512 // cumbersome. LiveStacks owns its LiveIntervals which refer to stack
513 // slots. We cannot use the usual LiveRegMatrix::assign and unassign on these,
514 // and must create a substitute virtual register to do so. This makes
515 // incremental updating here difficult; we need to actually perform the IR
516 // mutation to get the new vreg references in place to compute the register
517 // LiveInterval to perform an assignment to track the new interference
518 // correctly, and we can't simply migrate the LiveInterval we already have.
519 //
520 // To avoid walking through the entire function for each index, pre-collect
521 // all the instructions slot referencess.
522
523 DenseMap<int, SmallVector<MachineInstr *, 4>> SpillSlotReferences;
524 collectSpillIndexUses(StackIntervals, Map&: SpillSlotReferences);
525
526 for (LiveInterval *LI : StackIntervals) {
527 int Slot = LI->reg().stackSlotIndex();
528 auto SpillReferences = SpillSlotReferences.find(Val: Slot);
529 if (SpillReferences == SpillSlotReferences.end())
530 continue;
531
532 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
533
534 LLVM_DEBUG(dbgs() << "Trying to eliminate " << printReg(Slot, &TRI)
535 << " by reassigning\n");
536
537 ArrayRef<MCPhysReg> AllocOrder = RegClassInfo.getOrder(RC);
538
539 for (MCPhysReg PhysReg : AllocOrder) {
540 if (LRM.checkInterference(VirtReg: *LI, PhysReg) != LiveRegMatrix::IK_Free)
541 continue;
542
543 LLVM_DEBUG(dbgs() << "Reassigning " << *LI << " to "
544 << printReg(PhysReg, &TRI) << '\n');
545
546 const TargetRegisterClass *RC = LSS.getIntervalRegClass(Slot);
547 Register NewVReg = MRI.createVirtualRegister(RegClass: RC);
548
549 for (MachineInstr *SpillMI : SpillReferences->second)
550 replaceSpillWithCopyToVReg(SpillMI&: *SpillMI, SpillFI: Slot, VReg: NewVReg);
551
552 // TODO: We should be able to transfer the information from the stack
553 // slot's LiveInterval without recomputing from scratch with the
554 // replacement vreg uses.
555 LiveInterval &NewLI = LIS.createAndComputeVirtRegInterval(Reg: NewVReg);
556 VRM.grow();
557
558 // A spill slot can be stored to multiple times, so the replacement
559 // vreg may have multiple disconnected live range components. Split
560 // them into separate vregs to maintain the single-component invariant.
561 SmallVector<LiveInterval *, 4> SplitLIs;
562 LIS.splitSeparateComponents(LI&: NewLI, SplitLIs);
563
564 LLVM_DEBUG({
565 if (!SplitLIs.empty()) {
566 dbgs() << "Split unspilled interval into " << (SplitLIs.size() + 1)
567 << " components\n";
568 }
569 });
570
571 LRM.assign(VirtReg: NewLI, PhysReg);
572 for (LiveInterval *SplitLI : SplitLIs) {
573 VRM.grow();
574 LRM.assign(VirtReg: *SplitLI, PhysReg);
575 }
576
577 MFI.RemoveStackObject(ObjectIdx: Slot);
578 break;
579 }
580 }
581}
582
583bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
584 // This only applies on subtargets that have a configurable AGPR vs. VGPR
585 // allocation.
586 if (!ST.hasGFX90AInsts())
587 return false;
588
589 // Early exit if no AGPRs were assigned.
590 if (!LRM.isPhysRegUsed(PhysReg: AMDGPU::AGPR0)) {
591 LLVM_DEBUG(dbgs() << "skipping function that did not allocate AGPRs\n");
592 return false;
593 }
594
595 bool MadeChange = false;
596
597 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
598 Register VReg = Register::index2VirtReg(Index: I);
599 MCRegister AssignedAGPR = getAssignedAGPR(VReg);
600 if (!AssignedAGPR)
601 continue;
602
603 if (tryFoldCopiesToAGPR(VReg, AssignedAGPR))
604 MadeChange = true;
605 if (tryFoldCopiesFromAGPR(VReg, AssignedAGPR))
606 MadeChange = true;
607 }
608
609 // If we've successfully rewritten some MFMAs, we've alleviated some VGPR
610 // pressure. See if we can eliminate some spills now that those registers are
611 // more available.
612 if (MadeChange)
613 eliminateSpillsOfReassignedVGPRs();
614
615 return MadeChange;
616}
617
618class AMDGPURewriteAGPRCopyMFMALegacy : public MachineFunctionPass {
619public:
620 static char ID;
621 RegisterClassInfo RegClassInfo;
622
623 AMDGPURewriteAGPRCopyMFMALegacy() : MachineFunctionPass(ID) {}
624
625 bool runOnMachineFunction(MachineFunction &MF) override;
626
627 StringRef getPassName() const override {
628 return "AMDGPU Rewrite AGPR-Copy-MFMA";
629 }
630
631 void getAnalysisUsage(AnalysisUsage &AU) const override {
632 AU.addRequired<LiveIntervalsWrapperPass>();
633 AU.addRequired<VirtRegMapWrapperLegacy>();
634 AU.addRequired<LiveRegMatrixWrapperLegacy>();
635 AU.addRequired<LiveStacksWrapperLegacy>();
636
637 AU.addPreserved<LiveIntervalsWrapperPass>();
638 AU.addPreserved<VirtRegMapWrapperLegacy>();
639 AU.addPreserved<LiveRegMatrixWrapperLegacy>();
640 AU.addPreserved<LiveStacksWrapperLegacy>();
641
642 AU.setPreservesAll();
643 MachineFunctionPass::getAnalysisUsage(AU);
644 }
645};
646
647} // End anonymous namespace.
648
649INITIALIZE_PASS_BEGIN(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
650 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
651INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
652INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
653INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
654INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy)
655INITIALIZE_PASS_END(AMDGPURewriteAGPRCopyMFMALegacy, DEBUG_TYPE,
656 "AMDGPU Rewrite AGPR-Copy-MFMA", false, false)
657
658char AMDGPURewriteAGPRCopyMFMALegacy::ID = 0;
659
660char &llvm::AMDGPURewriteAGPRCopyMFMALegacyID =
661 AMDGPURewriteAGPRCopyMFMALegacy::ID;
662
663bool AMDGPURewriteAGPRCopyMFMALegacy::runOnMachineFunction(
664 MachineFunction &MF) {
665 if (skipFunction(F: MF.getFunction()))
666 return false;
667
668 RegClassInfo.runOnMachineFunction(MF);
669
670 auto &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
671 auto &LRM = getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
672 auto &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
673 auto &LSS = getAnalysis<LiveStacksWrapperLegacy>().getLS();
674 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
675 return Impl.run(MF);
676}
677
678PreservedAnalyses
679AMDGPURewriteAGPRCopyMFMAPass::run(MachineFunction &MF,
680 MachineFunctionAnalysisManager &MFAM) {
681 VirtRegMap &VRM = MFAM.getResult<VirtRegMapAnalysis>(IR&: MF);
682 LiveRegMatrix &LRM = MFAM.getResult<LiveRegMatrixAnalysis>(IR&: MF);
683 LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
684 LiveStacks &LSS = MFAM.getResult<LiveStacksAnalysis>(IR&: MF);
685 RegisterClassInfo RegClassInfo;
686 RegClassInfo.runOnMachineFunction(MF);
687
688 AMDGPURewriteAGPRCopyMFMAImpl Impl(MF, VRM, LRM, LIS, LSS, RegClassInfo);
689 if (!Impl.run(MF))
690 return PreservedAnalyses::all();
691 auto PA = getMachineFunctionPassPreservedAnalyses();
692 PA.preserveSet<CFGAnalyses>()
693 .preserve<LiveStacksAnalysis>()
694 .preserve<VirtRegMapAnalysis>()
695 .preserve<SlotIndexesAnalysis>()
696 .preserve<LiveIntervalsAnalysis>()
697 .preserve<LiveRegMatrixAnalysis>();
698 return PA;
699}
700