1 | //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Pass to pre-allocated WWM registers |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "AMDGPU.h" |
15 | #include "GCNSubtarget.h" |
16 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
17 | #include "SIMachineFunctionInfo.h" |
18 | #include "llvm/ADT/PostOrderIterator.h" |
19 | #include "llvm/CodeGen/LiveIntervals.h" |
20 | #include "llvm/CodeGen/LiveRegMatrix.h" |
21 | #include "llvm/CodeGen/MachineFrameInfo.h" |
22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | #include "llvm/CodeGen/RegisterClassInfo.h" |
24 | #include "llvm/CodeGen/VirtRegMap.h" |
25 | #include "llvm/InitializePasses.h" |
26 | |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "si-pre-allocate-wwm-regs" |
30 | |
31 | static cl::opt<bool> |
32 | EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs" , |
33 | cl::init(Val: false), cl::Hidden); |
34 | |
35 | namespace { |
36 | |
37 | class SIPreAllocateWWMRegs : public MachineFunctionPass { |
38 | private: |
39 | const SIInstrInfo *TII; |
40 | const SIRegisterInfo *TRI; |
41 | MachineRegisterInfo *MRI; |
42 | LiveIntervals *LIS; |
43 | LiveRegMatrix *Matrix; |
44 | VirtRegMap *VRM; |
45 | RegisterClassInfo RegClassInfo; |
46 | |
47 | std::vector<unsigned> RegsToRewrite; |
48 | #ifndef NDEBUG |
49 | void printWWMInfo(const MachineInstr &MI); |
50 | #endif |
51 | |
52 | public: |
53 | static char ID; |
54 | |
55 | SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { |
56 | initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); |
57 | } |
58 | |
59 | bool runOnMachineFunction(MachineFunction &MF) override; |
60 | |
61 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
62 | AU.addRequired<LiveIntervalsWrapperPass>(); |
63 | AU.addRequired<VirtRegMap>(); |
64 | AU.addRequired<LiveRegMatrix>(); |
65 | AU.setPreservesAll(); |
66 | MachineFunctionPass::getAnalysisUsage(AU); |
67 | } |
68 | |
69 | private: |
70 | bool processDef(MachineOperand &MO); |
71 | void rewriteRegs(MachineFunction &MF); |
72 | }; |
73 | |
74 | } // End anonymous namespace. |
75 | |
76 | INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, |
77 | "SI Pre-allocate WWM Registers" , false, false) |
78 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
79 | INITIALIZE_PASS_DEPENDENCY(VirtRegMap) |
80 | INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) |
81 | INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, |
82 | "SI Pre-allocate WWM Registers" , false, false) |
83 | |
84 | char SIPreAllocateWWMRegs::ID = 0; |
85 | |
86 | char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; |
87 | |
88 | FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { |
89 | return new SIPreAllocateWWMRegs(); |
90 | } |
91 | |
92 | bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { |
93 | Register Reg = MO.getReg(); |
94 | if (Reg.isPhysical()) |
95 | return false; |
96 | |
97 | if (!TRI->isVGPR(MRI: *MRI, Reg)) |
98 | return false; |
99 | |
100 | if (VRM->hasPhys(virtReg: Reg)) |
101 | return false; |
102 | |
103 | LiveInterval &LI = LIS->getInterval(Reg); |
104 | |
105 | for (MCRegister PhysReg : RegClassInfo.getOrder(RC: MRI->getRegClass(Reg))) { |
106 | if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) && |
107 | Matrix->checkInterference(VirtReg: LI, PhysReg) == LiveRegMatrix::IK_Free) { |
108 | Matrix->assign(VirtReg: LI, PhysReg); |
109 | assert(PhysReg != 0); |
110 | RegsToRewrite.push_back(x: Reg); |
111 | return true; |
112 | } |
113 | } |
114 | |
115 | llvm_unreachable("physreg not found for WWM expression" ); |
116 | } |
117 | |
118 | void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { |
119 | for (MachineBasicBlock &MBB : MF) { |
120 | for (MachineInstr &MI : MBB) { |
121 | for (MachineOperand &MO : MI.operands()) { |
122 | if (!MO.isReg()) |
123 | continue; |
124 | |
125 | const Register VirtReg = MO.getReg(); |
126 | if (VirtReg.isPhysical()) |
127 | continue; |
128 | |
129 | if (!VRM->hasPhys(virtReg: VirtReg)) |
130 | continue; |
131 | |
132 | Register PhysReg = VRM->getPhys(virtReg: VirtReg); |
133 | const unsigned SubReg = MO.getSubReg(); |
134 | if (SubReg != 0) { |
135 | PhysReg = TRI->getSubReg(Reg: PhysReg, Idx: SubReg); |
136 | MO.setSubReg(0); |
137 | } |
138 | |
139 | MO.setReg(PhysReg); |
140 | MO.setIsRenamable(false); |
141 | } |
142 | } |
143 | } |
144 | |
145 | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); |
146 | |
147 | for (unsigned Reg : RegsToRewrite) { |
148 | LIS->removeInterval(Reg); |
149 | |
150 | const Register PhysReg = VRM->getPhys(virtReg: Reg); |
151 | assert(PhysReg != 0); |
152 | |
153 | MFI->reserveWWMRegister(Reg: PhysReg); |
154 | } |
155 | |
156 | RegsToRewrite.clear(); |
157 | |
158 | // Update the set of reserved registers to include WWM ones. |
159 | MRI->freezeReservedRegs(); |
160 | } |
161 | |
162 | #ifndef NDEBUG |
163 | LLVM_DUMP_METHOD void |
164 | SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { |
165 | |
166 | unsigned Opc = MI.getOpcode(); |
167 | |
168 | if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) { |
169 | dbgs() << "Entering " ; |
170 | } else { |
171 | assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM); |
172 | dbgs() << "Exiting " ; |
173 | } |
174 | |
175 | if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { |
176 | dbgs() << "Strict WWM " ; |
177 | } else { |
178 | assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); |
179 | dbgs() << "Strict WQM " ; |
180 | } |
181 | |
182 | dbgs() << "region: " << MI; |
183 | } |
184 | |
185 | #endif |
186 | |
187 | bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { |
188 | LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n" ); |
189 | |
190 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
191 | |
192 | TII = ST.getInstrInfo(); |
193 | TRI = &TII->getRegisterInfo(); |
194 | MRI = &MF.getRegInfo(); |
195 | |
196 | LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); |
197 | Matrix = &getAnalysis<LiveRegMatrix>(); |
198 | VRM = &getAnalysis<VirtRegMap>(); |
199 | |
200 | RegClassInfo.runOnMachineFunction(MF); |
201 | |
202 | bool PreallocateSGPRSpillVGPRs = |
203 | EnablePreallocateSGPRSpillVGPRs || |
204 | MF.getFunction().hasFnAttribute(Kind: "amdgpu-prealloc-sgpr-spill-vgprs" ); |
205 | |
206 | bool RegsAssigned = false; |
207 | |
208 | // We use a reverse post-order traversal of the control-flow graph to |
209 | // guarantee that we visit definitions in dominance order. Since WWM |
210 | // expressions are guaranteed to never involve phi nodes, and we can only |
211 | // escape WWM through the special WWM instruction, this means that this is a |
212 | // perfect elimination order, so we can never do any better. |
213 | ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); |
214 | |
215 | for (MachineBasicBlock *MBB : RPOT) { |
216 | bool InWWM = false; |
217 | for (MachineInstr &MI : *MBB) { |
218 | if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || |
219 | MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) |
220 | RegsAssigned |= processDef(MO&: MI.getOperand(i: 0)); |
221 | |
222 | if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) { |
223 | if (!PreallocateSGPRSpillVGPRs) |
224 | continue; |
225 | RegsAssigned |= processDef(MO&: MI.getOperand(i: 0)); |
226 | } |
227 | |
228 | if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || |
229 | MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) { |
230 | LLVM_DEBUG(printWWMInfo(MI)); |
231 | InWWM = true; |
232 | continue; |
233 | } |
234 | |
235 | if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || |
236 | MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) { |
237 | LLVM_DEBUG(printWWMInfo(MI)); |
238 | InWWM = false; |
239 | } |
240 | |
241 | if (!InWWM) |
242 | continue; |
243 | |
244 | LLVM_DEBUG(dbgs() << "Processing " << MI); |
245 | |
246 | for (MachineOperand &DefOpnd : MI.defs()) { |
247 | RegsAssigned |= processDef(MO&: DefOpnd); |
248 | } |
249 | } |
250 | } |
251 | |
252 | if (!RegsAssigned) |
253 | return false; |
254 | |
255 | rewriteRegs(MF); |
256 | return true; |
257 | } |
258 | |