1//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Pass to pre-allocated WWM registers
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIPreAllocateWWMRegs.h"
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
17#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18#include "SIMachineFunctionInfo.h"
19#include "llvm/ADT/PostOrderIterator.h"
20#include "llvm/CodeGen/LiveIntervals.h"
21#include "llvm/CodeGen/LiveRegMatrix.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/RegisterClassInfo.h"
25#include "llvm/CodeGen/VirtRegMap.h"
26#include "llvm/InitializePasses.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
31
32static cl::opt<bool>
33 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
34 cl::init(Val: false), cl::Hidden);
35
36namespace {
37
38class SIPreAllocateWWMRegs {
39private:
40 const SIInstrInfo *TII;
41 const SIRegisterInfo *TRI;
42 MachineRegisterInfo *MRI;
43 LiveIntervals *LIS;
44 LiveRegMatrix *Matrix;
45 VirtRegMap *VRM;
46 RegisterClassInfo RegClassInfo;
47
48 std::vector<unsigned> RegsToRewrite;
49#ifndef NDEBUG
50 void printWWMInfo(const MachineInstr &MI);
51#endif
52 bool processDef(MachineOperand &MO);
53 void rewriteRegs(MachineFunction &MF);
54
55public:
56 SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix,
57 VirtRegMap *VRM)
58 : LIS(LIS), Matrix(Matrix), VRM(VRM) {}
59 bool run(MachineFunction &MF);
60};
61
62class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass {
63public:
64 static char ID;
65
66 SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {}
67
68 bool runOnMachineFunction(MachineFunction &MF) override;
69
70 void getAnalysisUsage(AnalysisUsage &AU) const override {
71 AU.addRequired<LiveIntervalsWrapperPass>();
72 AU.addRequired<VirtRegMapWrapperLegacy>();
73 AU.addRequired<LiveRegMatrixWrapperLegacy>();
74 AU.setPreservesAll();
75 MachineFunctionPass::getAnalysisUsage(AU);
76 }
77};
78
79} // End anonymous namespace.
80
81INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
82 "SI Pre-allocate WWM Registers", false, false)
83INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
84INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
85INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
86INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE,
87 "SI Pre-allocate WWM Registers", false, false)
88
89char SIPreAllocateWWMRegsLegacy::ID = 0;
90
91char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID;
92
93FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() {
94 return new SIPreAllocateWWMRegsLegacy();
95}
96
97bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
98 Register Reg = MO.getReg();
99 if (Reg.isPhysical())
100 return false;
101
102 if (!TRI->isVGPR(MRI: *MRI, Reg))
103 return false;
104
105 if (VRM->hasPhys(virtReg: Reg))
106 return false;
107
108 LiveInterval &LI = LIS->getInterval(Reg);
109
110 for (MCRegister PhysReg : RegClassInfo.getOrder(RC: MRI->getRegClass(Reg))) {
111 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
112 Matrix->checkInterference(VirtReg: LI, PhysReg) == LiveRegMatrix::IK_Free) {
113 Matrix->assign(VirtReg: LI, PhysReg);
114 assert(PhysReg != 0);
115 RegsToRewrite.push_back(x: Reg);
116 return true;
117 }
118 }
119
120 llvm_unreachable("physreg not found for WWM expression");
121}
122
123void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
124 for (MachineBasicBlock &MBB : MF) {
125 for (MachineInstr &MI : MBB) {
126 for (MachineOperand &MO : MI.operands()) {
127 if (!MO.isReg())
128 continue;
129
130 const Register VirtReg = MO.getReg();
131 if (VirtReg.isPhysical())
132 continue;
133
134 if (!VirtReg.isValid())
135 continue;
136
137 if (!VRM->hasPhys(virtReg: VirtReg))
138 continue;
139
140 Register PhysReg = VRM->getPhys(virtReg: VirtReg);
141 const unsigned SubReg = MO.getSubReg();
142 if (SubReg != 0) {
143 PhysReg = TRI->getSubReg(Reg: PhysReg, Idx: SubReg);
144 MO.setSubReg(0);
145 }
146
147 MO.setReg(PhysReg);
148 MO.setIsRenamable(false);
149 }
150 }
151 }
152
153 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
154
155 for (unsigned Reg : RegsToRewrite) {
156 const Register PhysReg = VRM->getPhys(virtReg: Reg);
157 assert(PhysReg != 0);
158
159 LiveInterval &LI = LIS->getInterval(Reg);
160 Matrix->unassign(VirtReg: LI, /*ClearAllReferencingSegments=*/true);
161 LIS->removeInterval(Reg);
162
163 MFI->reserveWWMRegister(Reg: PhysReg);
164 }
165
166 RegsToRewrite.clear();
167
168 // Update the set of reserved registers to include WWM ones.
169 MRI->freezeReservedRegs();
170}
171
172#ifndef NDEBUG
173LLVM_DUMP_METHOD void
174SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
175
176 unsigned Opc = MI.getOpcode();
177
178 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
179 dbgs() << "Entering ";
180 } else {
181 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
182 dbgs() << "Exiting ";
183 }
184
185 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
186 dbgs() << "Strict WWM ";
187 } else {
188 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
189 dbgs() << "Strict WQM ";
190 }
191
192 dbgs() << "region: " << MI;
193}
194
195#endif
196
197bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) {
198 auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
199 auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
200 auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
201 return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
202}
203
204bool SIPreAllocateWWMRegs::run(MachineFunction &MF) {
205 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
206
207 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
208
209 TII = ST.getInstrInfo();
210 TRI = &TII->getRegisterInfo();
211 MRI = &MF.getRegInfo();
212
213 RegClassInfo.runOnMachineFunction(MF);
214
215 bool PreallocateSGPRSpillVGPRs =
216 EnablePreallocateSGPRSpillVGPRs ||
217 MF.getFunction().hasFnAttribute(Kind: "amdgpu-prealloc-sgpr-spill-vgprs");
218
219 bool RegsAssigned = false;
220
221 // We use a reverse post-order traversal of the control-flow graph to
222 // guarantee that we visit definitions in dominance order. Since WWM
223 // expressions are guaranteed to never involve phi nodes, and we can only
224 // escape WWM through the special WWM instruction, this means that this is a
225 // perfect elimination order, so we can never do any better.
226 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
227
228 for (MachineBasicBlock *MBB : RPOT) {
229 bool InWWM = false;
230 for (MachineInstr &MI : *MBB) {
231 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
232 if (PreallocateSGPRSpillVGPRs)
233 RegsAssigned |= processDef(MO&: MI.getOperand(i: 0));
234 continue;
235 }
236
237 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
238 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
239 LLVM_DEBUG(printWWMInfo(MI));
240 InWWM = true;
241 continue;
242 }
243
244 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
245 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
246 LLVM_DEBUG(printWWMInfo(MI));
247 InWWM = false;
248 }
249
250 if (!InWWM)
251 continue;
252
253 LLVM_DEBUG(dbgs() << "Processing " << MI);
254
255 for (MachineOperand &DefOpnd : MI.defs()) {
256 RegsAssigned |= processDef(MO&: DefOpnd);
257 }
258 }
259 }
260
261 if (!RegsAssigned)
262 return false;
263
264 rewriteRegs(MF);
265 return true;
266}
267
268PreservedAnalyses
269SIPreAllocateWWMRegsPass::run(MachineFunction &MF,
270 MachineFunctionAnalysisManager &MFAM) {
271 auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF);
272 auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(IR&: MF);
273 auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(IR&: MF);
274 SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF);
275 return PreservedAnalyses::all();
276}
277