1//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Pass to pre-allocated WWM registers
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIPreAllocateWWMRegs.h"
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
17#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18#include "SIMachineFunctionInfo.h"
19#include "llvm/ADT/PostOrderIterator.h"
20#include "llvm/CodeGen/LiveIntervals.h"
21#include "llvm/CodeGen/LiveRegMatrix.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/RegisterClassInfo.h"
25#include "llvm/CodeGen/VirtRegMap.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
30
31static cl::opt<bool>
32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
33 cl::init(Val: false), cl::Hidden);
34
35namespace {
36
37class SIPreAllocateWWMRegs {
38private:
39 const SIInstrInfo *TII;
40 const SIRegisterInfo *TRI;
41 MachineRegisterInfo *MRI;
42 LiveIntervals *LIS;
43 LiveRegMatrix *Matrix;
44 VirtRegMap *VRM;
45 RegisterClassInfo RegClassInfo;
46
47 std::vector<unsigned> RegsToRewrite;
48#ifndef NDEBUG
49 void printWWMInfo(const MachineInstr &MI);
50#endif
51 bool processDef(MachineOperand &MO);
52 void rewriteRegs(MachineFunction &MF);
53
54public:
55 SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
56 VirtRegMap *VRM)
57 : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
58 bool run(MachineFunction &MF);
59};
60
61class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
62public:
63 static char ID;
64
65 SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
66
67 bool runOnMachineFunction(MachineFunction &MF) override;
68
69 void getAnalysisUsage(AnalysisUsage &AU) const override {
70 AU.addRequired<LiveIntervalsWrapperPass>();
71 AU.addRequired<VirtRegMapWrapperLegacy>();
72 AU.addRequired<LiveRegMatrixWrapperLegacy>();
73 AU.setPreservesAll();
74 MachineFunctionPass::getAnalysisUsage(AU);
75 }
76};
77
78} // End anonymous namespace.
79
80INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
81 "SI Pre-allocate WWM Registers", false, false)
82INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
83INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
84INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
85INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
86 "SI Pre-allocate WWM Registers", false, false)
87
88char SIPreAllocateWWMRegsLegacy::ID = 0;
89
90char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
91
92FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
93 return new SIPreAllocateWWMRegsLegacy();
94}
95
96bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
97 Register Reg = MO.getReg();
98 if (Reg.isPhysical())
99 return false;
100
101 if (!TRI->isVGPR(MRI: *MRI, Reg))
102 return false;
103
104 if (VRM->hasPhys(virtReg: Reg))
105 return false;
106
107 LiveInterval &LI = LIS->getInterval(Reg);
108
109 for (MCRegister PhysReg : RegClassInfo.getOrder(RC: MRI->getRegClass(Reg))) {
110 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
111 Matrix->checkInterference(VirtReg: LI, PhysReg) == LiveRegMatrix::IK_Free) {
112 Matrix->assign(VirtReg: LI, PhysReg);
113 assert(PhysReg != 0);
114 RegsToRewrite.push_back(x: Reg);
115 return true;
116 }
117 }
118
119 llvm_unreachable("physreg not found for WWM expression");
120}
121
122void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
123 for (MachineBasicBlock &MBB : MF) {
124 for (MachineInstr &MI : MBB) {
125 for (MachineOperand &MO : MI.operands()) {
126 if (!MO.isReg())
127 continue;
128
129 const Register VirtReg = MO.getReg();
130 if (VirtReg.isPhysical())
131 continue;
132
133 if (!VRM->hasPhys(virtReg: VirtReg))
134 continue;
135
136 Register PhysReg = VRM->getPhys(virtReg: VirtReg);
137 const unsigned SubReg = MO.getSubReg();
138 if (SubReg != 0) {
139 PhysReg = TRI->getSubReg(Reg: PhysReg, Idx: SubReg);
140 MO.setSubReg(0);
141 }
142
143 MO.setReg(PhysReg);
144 MO.setIsRenamable(false);
145 }
146 }
147 }
148
149 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
150
151 for (unsigned Reg : RegsToRewrite) {
152 LIS->removeInterval(Reg);
153
154 const Register PhysReg = VRM->getPhys(virtReg: Reg);
155 assert(PhysReg != 0);
156
157 MFI->reserveWWMRegister(Reg: PhysReg);
158 }
159
160 RegsToRewrite.clear();
161
162 // Update the set of reserved registers to include WWM ones.
163 MRI->freezeReservedRegs();
164}
165
166#ifndef NDEBUG
167LLVM_DUMP_METHOD void
168SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
169
170 unsigned Opc = MI.getOpcode();
171
172 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
173 dbgs() << "Entering ";
174 } else {
175 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
176 dbgs() << "Exiting ";
177 }
178
179 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
180 dbgs() << "Strict WWM ";
181 } else {
182 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
183 dbgs() << "Strict WQM ";
184 }
185
186 dbgs() << "region: " << MI;
187}
188
189#endif
190
191bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
192 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
193 auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
194 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
195 return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
196}
197
198bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
199 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
200
201 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
202
203 TII = ST.getInstrInfo();
204 TRI = &TII->getRegisterInfo();
205 MRI = &MF.getRegInfo();
206
207 RegClassInfo.runOnMachineFunction(MF);
208
209 bool PreallocateSGPRSpillVGPRs =
210 EnablePreallocateSGPRSpillVGPRs ||
211 MF.getFunction().hasFnAttribute(Kind: "amdgpu-prealloc-sgpr-spill-vgprs");
212
213 bool RegsAssigned = false;
214
215 // We use a reverse post-order traversal of the control-flow graph to
216 // guarantee that we visit definitions in dominance order. Since WWM
217 // expressions are guaranteed to never involve phi nodes, and we can only
218 // escape WWM through the special WWM instruction, this means that this is a
219 // perfect elimination order, so we can never do any better.
220 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
221
222 for (MachineBasicBlock *MBB : RPOT) {
223 bool InWWM = false;
224 for (MachineInstr &MI : *MBB) {
225 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
226 if (PreallocateSGPRSpillVGPRs)
227 RegsAssigned |= processDef(MO&: MI.getOperand(i: 0));
228 continue;
229 }
230
231 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
232 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
233 LLVM_DEBUG(printWWMInfo(MI));
234 InWWM = true;
235 continue;
236 }
237
238 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
239 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
240 LLVM_DEBUG(printWWMInfo(MI));
241 InWWM = false;
242 }
243
244 if (!InWWM)
245 continue;
246
247 LLVM_DEBUG(dbgs() << "Processing " << MI);
248
249 for (MachineOperand &DefOpnd : MI.defs()) {
250 RegsAssigned |= processDef(MO&: DefOpnd);
251 }
252 }
253 }
254
255 if (!RegsAssigned)
256 return false;
257
258 rewriteRegs(MF);
259 return true;
260}
261
262PreservedAnalyses
263SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
264 MachineFunctionAnalysisManager &MFAM) {
265 auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
266 auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(IR&: MF);
267 auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(IR&: MF);
268 SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
269 return PreservedAnalyses::all();
270}
271