1 | //===-- AMDGPURegBankSelect.cpp -------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// Assign register banks to all register operands of G_ instructions using |
10 | /// machine uniformity analysis. |
11 | /// Sgpr - uniform values and some lane masks |
12 | /// Vgpr - divergent, non S1, values |
13 | /// Vcc - divergent S1 values(lane masks) |
14 | /// However in some cases G_ instructions with this register bank assignment |
15 | /// can't be inst-selected. This is solved in AMDGPURegBankLegalize. |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "AMDGPU.h" |
19 | #include "AMDGPUGlobalISelUtils.h" |
20 | #include "GCNSubtarget.h" |
21 | #include "llvm/CodeGen/GlobalISel/CSEInfo.h" |
22 | #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" |
23 | #include "llvm/CodeGen/MachineUniformityAnalysis.h" |
24 | #include "llvm/CodeGen/TargetPassConfig.h" |
25 | #include "llvm/InitializePasses.h" |
26 | |
27 | #define DEBUG_TYPE "amdgpu-regbankselect" |
28 | |
29 | using namespace llvm; |
30 | using namespace AMDGPU; |
31 | |
32 | namespace { |
33 | |
34 | class AMDGPURegBankSelect : public MachineFunctionPass { |
35 | public: |
36 | static char ID; |
37 | |
38 | AMDGPURegBankSelect() : MachineFunctionPass(ID) {} |
39 | |
40 | bool runOnMachineFunction(MachineFunction &MF) override; |
41 | |
42 | StringRef getPassName() const override { |
43 | return "AMDGPU Register Bank Select" ; |
44 | } |
45 | |
46 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
47 | AU.addRequired<TargetPassConfig>(); |
48 | AU.addRequired<GISelCSEAnalysisWrapperPass>(); |
49 | AU.addRequired<MachineUniformityAnalysisPass>(); |
50 | MachineFunctionPass::getAnalysisUsage(AU); |
51 | } |
52 | |
53 | // This pass assigns register banks to all virtual registers, and we maintain |
54 | // this property in subsequent passes |
55 | MachineFunctionProperties getSetProperties() const override { |
56 | return MachineFunctionProperties().setRegBankSelected(); |
57 | } |
58 | }; |
59 | |
60 | } // End anonymous namespace. |
61 | |
62 | INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE, |
63 | "AMDGPU Register Bank Select" , false, false) |
64 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
65 | INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) |
66 | INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass) |
67 | INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE, |
68 | "AMDGPU Register Bank Select" , false, false) |
69 | |
70 | char AMDGPURegBankSelect::ID = 0; |
71 | |
72 | char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID; |
73 | |
74 | FunctionPass *llvm::createAMDGPURegBankSelectPass() { |
75 | return new AMDGPURegBankSelect(); |
76 | } |
77 | |
78 | class RegBankSelectHelper { |
79 | MachineIRBuilder &B; |
80 | MachineRegisterInfo &MRI; |
81 | AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA; |
82 | const MachineUniformityInfo &MUI; |
83 | const SIRegisterInfo &TRI; |
84 | const RegisterBank *SgprRB; |
85 | const RegisterBank *VgprRB; |
86 | const RegisterBank *VccRB; |
87 | |
88 | public: |
89 | RegBankSelectHelper(MachineIRBuilder &B, |
90 | AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA, |
91 | const MachineUniformityInfo &MUI, |
92 | const SIRegisterInfo &TRI, const RegisterBankInfo &RBI) |
93 | : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI), |
94 | SgprRB(&RBI.getRegBank(ID: AMDGPU::SGPRRegBankID)), |
95 | VgprRB(&RBI.getRegBank(ID: AMDGPU::VGPRRegBankID)), |
96 | VccRB(&RBI.getRegBank(ID: AMDGPU::VCCRegBankID)) {} |
97 | |
98 | // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of |
99 | // the cycle |
100 | // Note: uniformity analysis does not consider that registers with vgpr def |
101 | // are divergent (you can have uniform value in vgpr). |
102 | // - TODO: implicit use of $exec could be implemented as indicator that |
103 | // instruction is divergent |
104 | bool isTemporalDivergenceCopy(Register Reg) { |
105 | MachineInstr *MI = MRI.getVRegDef(Reg); |
106 | if (!MI->isCopy() || MI->getNumImplicitOperands() != 1) |
107 | return false; |
108 | |
109 | return MI->implicit_operands().begin()->getReg() == TRI.getExec(); |
110 | } |
111 | |
112 | const RegisterBank *getRegBankToAssign(Register Reg) { |
113 | if (!isTemporalDivergenceCopy(Reg) && |
114 | (MUI.isUniform(V: Reg) || ILMA.isS32S64LaneMask(Reg))) |
115 | return SgprRB; |
116 | if (MRI.getType(Reg) == LLT::scalar(SizeInBits: 1)) |
117 | return VccRB; |
118 | return VgprRB; |
119 | } |
120 | |
121 | // %rc:RegClass(s32) = G_ ... |
122 | // ... |
123 | // %a = G_ ..., %rc |
124 | // -> |
125 | // %rb:RegBank(s32) = G_ ... |
126 | // %rc:RegClass(s32) = COPY %rb |
127 | // ... |
128 | // %a = G_ ..., %rb |
129 | void reAssignRegBankOnDef(MachineInstr &MI, MachineOperand &DefOP, |
130 | const RegisterBank *RB) { |
131 | // Register that already has Register class got it during pre-inst selection |
132 | // of another instruction. Maybe cross bank copy was required so we insert a |
133 | // copy that can be removed later. This simplifies post regbanklegalize |
134 | // combiner and avoids need to special case some patterns. |
135 | Register Reg = DefOP.getReg(); |
136 | LLT Ty = MRI.getType(Reg); |
137 | Register NewReg = MRI.createVirtualRegister(RegAttr: {.RCOrRB: RB, .Ty: Ty}); |
138 | DefOP.setReg(NewReg); |
139 | |
140 | auto &MBB = *MI.getParent(); |
141 | B.setInsertPt(MBB, II: MBB.SkipPHIsAndLabels(I: std::next(x: MI.getIterator()))); |
142 | B.buildCopy(Res: Reg, Op: NewReg); |
143 | |
144 | // The problem was discovered for uniform S1 that was used as both |
145 | // lane mask(vcc) and regular sgpr S1. |
146 | // - lane-mask(vcc) use was by si_if, this use is divergent and requires |
147 | // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets |
148 | // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask. |
149 | // - the regular sgpr S1(uniform) instruction is now broken since |
150 | // it uses sreg_64_xexec(S1) which is divergent. |
151 | |
152 | // Replace virtual registers with register class on generic instructions |
153 | // uses with virtual registers with register bank. |
154 | for (auto &UseMI : make_early_inc_range(Range: MRI.use_instructions(Reg))) { |
155 | if (UseMI.isPreISelOpcode()) { |
156 | for (MachineOperand &Op : UseMI.operands()) { |
157 | if (Op.isReg() && Op.getReg() == Reg) |
158 | Op.setReg(NewReg); |
159 | } |
160 | } |
161 | } |
162 | } |
163 | |
164 | // %a = G_ ..., %rc |
165 | // -> |
166 | // %rb:RegBank(s32) = COPY %rc |
167 | // %a = G_ ..., %rb |
168 | void constrainRegBankUse(MachineInstr &MI, MachineOperand &UseOP, |
169 | const RegisterBank *RB) { |
170 | Register Reg = UseOP.getReg(); |
171 | |
172 | LLT Ty = MRI.getType(Reg); |
173 | Register NewReg = MRI.createVirtualRegister(RegAttr: {.RCOrRB: RB, .Ty: Ty}); |
174 | UseOP.setReg(NewReg); |
175 | |
176 | if (MI.isPHI()) { |
177 | auto DefMI = MRI.getVRegDef(Reg)->getIterator(); |
178 | MachineBasicBlock *DefMBB = DefMI->getParent(); |
179 | B.setInsertPt(MBB&: *DefMBB, II: DefMBB->SkipPHIsAndLabels(I: std::next(x: DefMI))); |
180 | } else { |
181 | B.setInstr(MI); |
182 | } |
183 | |
184 | B.buildCopy(Res: NewReg, Op: Reg); |
185 | } |
186 | }; |
187 | |
188 | static Register getVReg(MachineOperand &Op) { |
189 | if (!Op.isReg()) |
190 | return {}; |
191 | |
192 | // Operands of COPY and G_SI_CALL can be physical registers. |
193 | Register Reg = Op.getReg(); |
194 | if (!Reg.isVirtual()) |
195 | return {}; |
196 | |
197 | return Reg; |
198 | } |
199 | |
200 | bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) { |
201 | if (MF.getProperties().hasFailedISel()) |
202 | return false; |
203 | |
204 | // Setup the instruction builder with CSE. |
205 | const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); |
206 | GISelCSEAnalysisWrapper &Wrapper = |
207 | getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); |
208 | GISelCSEInfo &CSEInfo = Wrapper.get(CSEOpt: TPC.getCSEConfig()); |
209 | GISelObserverWrapper Observer; |
210 | Observer.addObserver(O: &CSEInfo); |
211 | |
212 | CSEMIRBuilder B(MF); |
213 | B.setCSEInfo(&CSEInfo); |
214 | B.setChangeObserver(Observer); |
215 | |
216 | RAIIDelegateInstaller DelegateInstaller(MF, &Observer); |
217 | RAIIMFObserverInstaller MFObserverInstaller(MF, Observer); |
218 | |
219 | IntrinsicLaneMaskAnalyzer ILMA(MF); |
220 | MachineUniformityInfo &MUI = |
221 | getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo(); |
222 | MachineRegisterInfo &MRI = *B.getMRI(); |
223 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
224 | RegBankSelectHelper RBSHelper(B, ILMA, MUI, *ST.getRegisterInfo(), |
225 | *ST.getRegBankInfo()); |
226 | // Virtual registers at this point don't have register banks. |
227 | // Virtual registers in def and use operands of already inst-selected |
228 | // instruction have register class. |
229 | |
230 | for (MachineBasicBlock &MBB : MF) { |
231 | for (MachineInstr &MI : MBB) { |
232 | // Vregs in def and use operands of COPY can have either register class |
233 | // or bank. If there is neither on vreg in def operand, assign bank. |
234 | if (MI.isCopy()) { |
235 | Register DefReg = getVReg(Op&: MI.getOperand(i: 0)); |
236 | if (!DefReg.isValid() || MRI.getRegClassOrNull(Reg: DefReg)) |
237 | continue; |
238 | |
239 | assert(!MRI.getRegBankOrNull(DefReg)); |
240 | MRI.setRegBank(Reg: DefReg, RegBank: *RBSHelper.getRegBankToAssign(Reg: DefReg)); |
241 | continue; |
242 | } |
243 | |
244 | if (!MI.isPreISelOpcode()) |
245 | continue; |
246 | |
247 | // Vregs in def and use operands of G_ instructions need to have register |
248 | // banks assigned. Before this loop possible case are |
249 | // - (1) vreg without register class or bank in def or use operand |
250 | // - (2) vreg with register class in def operand |
251 | // - (3) vreg, defined by G_ instruction, in use operand |
252 | // - (4) vreg, defined by pre-inst-selected instruction, in use operand |
253 | |
254 | // First three cases are handled in loop through all def operands of G_ |
255 | // instructions. For case (1) simply setRegBank. Cases (2) and (3) are |
256 | // handled by reAssignRegBankOnDef. |
257 | for (MachineOperand &DefOP : MI.defs()) { |
258 | Register DefReg = getVReg(Op&: DefOP); |
259 | if (!DefReg.isValid()) |
260 | continue; |
261 | |
262 | const RegisterBank *RB = RBSHelper.getRegBankToAssign(Reg: DefReg); |
263 | if (MRI.getRegClassOrNull(Reg: DefReg)) |
264 | RBSHelper.reAssignRegBankOnDef(MI, DefOP, RB); |
265 | else { |
266 | assert(!MRI.getRegBankOrNull(DefReg)); |
267 | MRI.setRegBank(Reg: DefReg, RegBank: *RB); |
268 | } |
269 | } |
270 | |
271 | // Register bank select doesn't modify pre-inst-selected instructions. |
272 | // For case (4) need to insert a copy, handled by constrainRegBankUse. |
273 | for (MachineOperand &UseOP : MI.uses()) { |
274 | Register UseReg = getVReg(Op&: UseOP); |
275 | if (!UseReg.isValid()) |
276 | continue; |
277 | |
278 | // Skip case (3). |
279 | if (!MRI.getRegClassOrNull(Reg: UseReg) || |
280 | MRI.getVRegDef(Reg: UseReg)->isPreISelOpcode()) |
281 | continue; |
282 | |
283 | // Use with register class defined by pre-inst-selected instruction. |
284 | const RegisterBank *RB = RBSHelper.getRegBankToAssign(Reg: UseReg); |
285 | RBSHelper.constrainRegBankUse(MI, UseOP, RB); |
286 | } |
287 | } |
288 | } |
289 | |
290 | return true; |
291 | } |
292 | |