1 | //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Pass to pre-allocated WWM registers |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "SIPreAllocateWWMRegs.h" |
15 | #include "AMDGPU.h" |
16 | #include "GCNSubtarget.h" |
17 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
18 | #include "SIMachineFunctionInfo.h" |
19 | #include "llvm/ADT/PostOrderIterator.h" |
20 | #include "llvm/CodeGen/LiveIntervals.h" |
21 | #include "llvm/CodeGen/LiveRegMatrix.h" |
22 | #include "llvm/CodeGen/MachineFrameInfo.h" |
23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
24 | #include "llvm/CodeGen/RegisterClassInfo.h" |
25 | #include "llvm/CodeGen/VirtRegMap.h" |
26 | |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "si-pre-allocate-wwm-regs" |
30 | |
31 | static cl::opt<bool> |
32 | EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs" , |
33 | cl::init(Val: false), cl::Hidden); |
34 | |
35 | namespace { |
36 | |
37 | class SIPreAllocateWWMRegs { |
38 | private: |
39 | const SIInstrInfo *TII; |
40 | const SIRegisterInfo *TRI; |
41 | MachineRegisterInfo *MRI; |
42 | LiveIntervals *LIS; |
43 | LiveRegMatrix *Matrix; |
44 | VirtRegMap *VRM; |
45 | RegisterClassInfo RegClassInfo; |
46 | |
47 | std::vector<unsigned> RegsToRewrite; |
48 | #ifndef NDEBUG |
49 | void printWWMInfo(const MachineInstr &MI); |
50 | #endif |
51 | bool processDef(MachineOperand &MO); |
52 | void rewriteRegs(MachineFunction &MF); |
53 | |
54 | public: |
55 | SIPreAllocateWWMRegs(LiveIntervals *LIS, LiveRegMatrix *Matrix, |
56 | VirtRegMap *VRM) |
57 | : LIS(LIS), Matrix(Matrix), VRM(VRM) {} |
58 | bool run(MachineFunction &MF); |
59 | }; |
60 | |
61 | class SIPreAllocateWWMRegsLegacy : public MachineFunctionPass { |
62 | public: |
63 | static char ID; |
64 | |
65 | SIPreAllocateWWMRegsLegacy() : MachineFunctionPass(ID) {} |
66 | |
67 | bool runOnMachineFunction(MachineFunction &MF) override; |
68 | |
69 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
70 | AU.addRequired<LiveIntervalsWrapperPass>(); |
71 | AU.addRequired<VirtRegMapWrapperLegacy>(); |
72 | AU.addRequired<LiveRegMatrixWrapperLegacy>(); |
73 | AU.setPreservesAll(); |
74 | MachineFunctionPass::getAnalysisUsage(AU); |
75 | } |
76 | }; |
77 | |
78 | } // End anonymous namespace. |
79 | |
80 | INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, |
81 | "SI Pre-allocate WWM Registers" , false, false) |
82 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
83 | INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) |
84 | INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) |
85 | INITIALIZE_PASS_END(SIPreAllocateWWMRegsLegacy, DEBUG_TYPE, |
86 | "SI Pre-allocate WWM Registers" , false, false) |
87 | |
88 | char SIPreAllocateWWMRegsLegacy::ID = 0; |
89 | |
90 | char &llvm::SIPreAllocateWWMRegsLegacyID = SIPreAllocateWWMRegsLegacy::ID; |
91 | |
92 | FunctionPass *llvm::createSIPreAllocateWWMRegsLegacyPass() { |
93 | return new SIPreAllocateWWMRegsLegacy(); |
94 | } |
95 | |
96 | bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { |
97 | Register Reg = MO.getReg(); |
98 | if (Reg.isPhysical()) |
99 | return false; |
100 | |
101 | if (!TRI->isVGPR(MRI: *MRI, Reg)) |
102 | return false; |
103 | |
104 | if (VRM->hasPhys(virtReg: Reg)) |
105 | return false; |
106 | |
107 | LiveInterval &LI = LIS->getInterval(Reg); |
108 | |
109 | for (MCRegister PhysReg : RegClassInfo.getOrder(RC: MRI->getRegClass(Reg))) { |
110 | if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) && |
111 | Matrix->checkInterference(VirtReg: LI, PhysReg) == LiveRegMatrix::IK_Free) { |
112 | Matrix->assign(VirtReg: LI, PhysReg); |
113 | assert(PhysReg != 0); |
114 | RegsToRewrite.push_back(x: Reg); |
115 | return true; |
116 | } |
117 | } |
118 | |
119 | llvm_unreachable("physreg not found for WWM expression" ); |
120 | } |
121 | |
122 | void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { |
123 | for (MachineBasicBlock &MBB : MF) { |
124 | for (MachineInstr &MI : MBB) { |
125 | for (MachineOperand &MO : MI.operands()) { |
126 | if (!MO.isReg()) |
127 | continue; |
128 | |
129 | const Register VirtReg = MO.getReg(); |
130 | if (VirtReg.isPhysical()) |
131 | continue; |
132 | |
133 | if (!VRM->hasPhys(virtReg: VirtReg)) |
134 | continue; |
135 | |
136 | Register PhysReg = VRM->getPhys(virtReg: VirtReg); |
137 | const unsigned SubReg = MO.getSubReg(); |
138 | if (SubReg != 0) { |
139 | PhysReg = TRI->getSubReg(Reg: PhysReg, Idx: SubReg); |
140 | MO.setSubReg(0); |
141 | } |
142 | |
143 | MO.setReg(PhysReg); |
144 | MO.setIsRenamable(false); |
145 | } |
146 | } |
147 | } |
148 | |
149 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
150 | |
151 | for (unsigned Reg : RegsToRewrite) { |
152 | LIS->removeInterval(Reg); |
153 | |
154 | const Register PhysReg = VRM->getPhys(virtReg: Reg); |
155 | assert(PhysReg != 0); |
156 | |
157 | MFI->reserveWWMRegister(Reg: PhysReg); |
158 | } |
159 | |
160 | RegsToRewrite.clear(); |
161 | |
162 | // Update the set of reserved registers to include WWM ones. |
163 | MRI->freezeReservedRegs(); |
164 | } |
165 | |
166 | #ifndef NDEBUG |
167 | LLVM_DUMP_METHOD void |
168 | SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { |
169 | |
170 | unsigned Opc = MI.getOpcode(); |
171 | |
172 | if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) { |
173 | dbgs() << "Entering " ; |
174 | } else { |
175 | assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM); |
176 | dbgs() << "Exiting " ; |
177 | } |
178 | |
179 | if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { |
180 | dbgs() << "Strict WWM " ; |
181 | } else { |
182 | assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); |
183 | dbgs() << "Strict WQM " ; |
184 | } |
185 | |
186 | dbgs() << "region: " << MI; |
187 | } |
188 | |
189 | #endif |
190 | |
191 | bool SIPreAllocateWWMRegsLegacy::runOnMachineFunction(MachineFunction &MF) { |
192 | auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); |
193 | auto *Matrix = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM(); |
194 | auto *VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM(); |
195 | return SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); |
196 | } |
197 | |
198 | bool SIPreAllocateWWMRegs::run(MachineFunction &MF) { |
199 | LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n" ); |
200 | |
201 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
202 | |
203 | TII = ST.getInstrInfo(); |
204 | TRI = &TII->getRegisterInfo(); |
205 | MRI = &MF.getRegInfo(); |
206 | |
207 | RegClassInfo.runOnMachineFunction(MF); |
208 | |
209 | bool PreallocateSGPRSpillVGPRs = |
210 | EnablePreallocateSGPRSpillVGPRs || |
211 | MF.getFunction().hasFnAttribute(Kind: "amdgpu-prealloc-sgpr-spill-vgprs" ); |
212 | |
213 | bool RegsAssigned = false; |
214 | |
215 | // We use a reverse post-order traversal of the control-flow graph to |
216 | // guarantee that we visit definitions in dominance order. Since WWM |
217 | // expressions are guaranteed to never involve phi nodes, and we can only |
218 | // escape WWM through the special WWM instruction, this means that this is a |
219 | // perfect elimination order, so we can never do any better. |
220 | ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); |
221 | |
222 | for (MachineBasicBlock *MBB : RPOT) { |
223 | bool InWWM = false; |
224 | for (MachineInstr &MI : *MBB) { |
225 | if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) { |
226 | if (PreallocateSGPRSpillVGPRs) |
227 | RegsAssigned |= processDef(MO&: MI.getOperand(i: 0)); |
228 | continue; |
229 | } |
230 | |
231 | if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || |
232 | MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) { |
233 | LLVM_DEBUG(printWWMInfo(MI)); |
234 | InWWM = true; |
235 | continue; |
236 | } |
237 | |
238 | if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || |
239 | MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) { |
240 | LLVM_DEBUG(printWWMInfo(MI)); |
241 | InWWM = false; |
242 | } |
243 | |
244 | if (!InWWM) |
245 | continue; |
246 | |
247 | LLVM_DEBUG(dbgs() << "Processing " << MI); |
248 | |
249 | for (MachineOperand &DefOpnd : MI.defs()) { |
250 | RegsAssigned |= processDef(MO&: DefOpnd); |
251 | } |
252 | } |
253 | } |
254 | |
255 | if (!RegsAssigned) |
256 | return false; |
257 | |
258 | rewriteRegs(MF); |
259 | return true; |
260 | } |
261 | |
262 | PreservedAnalyses |
263 | SIPreAllocateWWMRegsPass::run(MachineFunction &MF, |
264 | MachineFunctionAnalysisManager &MFAM) { |
265 | auto *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(IR&: MF); |
266 | auto *Matrix = &MFAM.getResult<LiveRegMatrixAnalysis>(IR&: MF); |
267 | auto *VRM = &MFAM.getResult<VirtRegMapAnalysis>(IR&: MF); |
268 | SIPreAllocateWWMRegs(LIS, Matrix, VRM).run(MF); |
269 | return PreservedAnalyses::all(); |
270 | } |
271 | |