| 1 | //===-- AMDGPURegBankSelect.cpp -------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// Assign register banks to all register operands of G_ instructions using |
| 10 | /// machine uniformity analysis. |
| 11 | /// Sgpr - uniform values and some lane masks |
| 12 | /// Vgpr - divergent, non S1, values |
| 13 | /// Vcc - divergent S1 values(lane masks) |
| 14 | /// However in some cases G_ instructions with this register bank assignment |
| 15 | /// can't be inst-selected. This is solved in AMDGPURegBankLegalize. |
| 16 | //===----------------------------------------------------------------------===// |
| 17 | |
| 18 | #include "AMDGPU.h" |
| 19 | #include "AMDGPUGlobalISelUtils.h" |
| 20 | #include "GCNSubtarget.h" |
| 21 | #include "llvm/CodeGen/GlobalISel/CSEInfo.h" |
| 22 | #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" |
| 23 | #include "llvm/CodeGen/MachineUniformityAnalysis.h" |
| 24 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 25 | #include "llvm/InitializePasses.h" |
| 26 | |
| 27 | #define DEBUG_TYPE "amdgpu-regbankselect" |
| 28 | |
| 29 | using namespace llvm; |
| 30 | using namespace AMDGPU; |
| 31 | |
| 32 | namespace { |
| 33 | |
| 34 | class AMDGPURegBankSelect : public MachineFunctionPass { |
| 35 | public: |
| 36 | static char ID; |
| 37 | |
| 38 | AMDGPURegBankSelect() : MachineFunctionPass(ID) {} |
| 39 | |
| 40 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 41 | |
| 42 | StringRef getPassName() const override { |
| 43 | return "AMDGPU Register Bank Select" ; |
| 44 | } |
| 45 | |
| 46 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 47 | AU.addRequired<TargetPassConfig>(); |
| 48 | AU.addRequired<GISelCSEAnalysisWrapperPass>(); |
| 49 | AU.addRequired<MachineUniformityAnalysisPass>(); |
| 50 | MachineFunctionPass::getAnalysisUsage(AU); |
| 51 | } |
| 52 | |
| 53 | // This pass assigns register banks to all virtual registers, and we maintain |
| 54 | // this property in subsequent passes |
| 55 | MachineFunctionProperties getSetProperties() const override { |
| 56 | return MachineFunctionProperties().setRegBankSelected(); |
| 57 | } |
| 58 | }; |
| 59 | |
| 60 | } // End anonymous namespace. |
| 61 | |
| 62 | INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE, |
| 63 | "AMDGPU Register Bank Select" , false, false) |
| 64 | INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
| 65 | INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) |
| 66 | INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass) |
| 67 | INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE, |
| 68 | "AMDGPU Register Bank Select" , false, false) |
| 69 | |
| 70 | char AMDGPURegBankSelect::ID = 0; |
| 71 | |
| 72 | char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID; |
| 73 | |
| 74 | FunctionPass *llvm::createAMDGPURegBankSelectPass() { |
| 75 | return new AMDGPURegBankSelect(); |
| 76 | } |
| 77 | |
| 78 | class RegBankSelectHelper { |
| 79 | MachineIRBuilder &B; |
| 80 | MachineRegisterInfo &MRI; |
| 81 | AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA; |
| 82 | const MachineUniformityInfo &MUI; |
| 83 | const SIRegisterInfo &TRI; |
| 84 | const RegisterBank *SgprRB; |
| 85 | const RegisterBank *VgprRB; |
| 86 | const RegisterBank *VccRB; |
| 87 | |
| 88 | public: |
| 89 | RegBankSelectHelper(MachineIRBuilder &B, |
| 90 | AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA, |
| 91 | const MachineUniformityInfo &MUI, |
| 92 | const SIRegisterInfo &TRI, const RegisterBankInfo &RBI) |
| 93 | : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI), |
| 94 | SgprRB(&RBI.getRegBank(ID: AMDGPU::SGPRRegBankID)), |
| 95 | VgprRB(&RBI.getRegBank(ID: AMDGPU::VGPRRegBankID)), |
| 96 | VccRB(&RBI.getRegBank(ID: AMDGPU::VCCRegBankID)) {} |
| 97 | |
| 98 | // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of |
| 99 | // the cycle |
| 100 | // Note: uniformity analysis does not consider that registers with vgpr def |
| 101 | // are divergent (you can have uniform value in vgpr). |
| 102 | // - TODO: implicit use of $exec could be implemented as indicator that |
| 103 | // instruction is divergent |
| 104 | bool isTemporalDivergenceCopy(Register Reg) { |
| 105 | MachineInstr *MI = MRI.getVRegDef(Reg); |
| 106 | if (!MI->isCopy() || MI->getNumImplicitOperands() != 1) |
| 107 | return false; |
| 108 | |
| 109 | return MI->implicit_operands().begin()->getReg() == TRI.getExec(); |
| 110 | } |
| 111 | |
| 112 | const RegisterBank *getRegBankToAssign(Register Reg) { |
| 113 | if (!isTemporalDivergenceCopy(Reg) && |
| 114 | (MUI.isUniform(V: Reg) || ILMA.isS32S64LaneMask(Reg))) |
| 115 | return SgprRB; |
| 116 | if (MRI.getType(Reg) == LLT::scalar(SizeInBits: 1)) |
| 117 | return VccRB; |
| 118 | return VgprRB; |
| 119 | } |
| 120 | |
| 121 | // %rc:RegClass(s32) = G_ ... |
| 122 | // ... |
| 123 | // %a = G_ ..., %rc |
| 124 | // -> |
| 125 | // %rb:RegBank(s32) = G_ ... |
| 126 | // %rc:RegClass(s32) = COPY %rb |
| 127 | // ... |
| 128 | // %a = G_ ..., %rb |
| 129 | void reAssignRegBankOnDef(MachineInstr &MI, MachineOperand &DefOP, |
| 130 | const RegisterBank *RB) { |
| 131 | // Register that already has Register class got it during pre-inst selection |
| 132 | // of another instruction. Maybe cross bank copy was required so we insert a |
| 133 | // copy that can be removed later. This simplifies post regbanklegalize |
| 134 | // combiner and avoids need to special case some patterns. |
| 135 | Register Reg = DefOP.getReg(); |
| 136 | LLT Ty = MRI.getType(Reg); |
| 137 | Register NewReg = MRI.createVirtualRegister(RegAttr: {.RCOrRB: RB, .Ty: Ty}); |
| 138 | DefOP.setReg(NewReg); |
| 139 | |
| 140 | auto &MBB = *MI.getParent(); |
| 141 | B.setInsertPt(MBB, II: MBB.SkipPHIsAndLabels(I: std::next(x: MI.getIterator()))); |
| 142 | B.buildCopy(Res: Reg, Op: NewReg); |
| 143 | |
| 144 | // The problem was discovered for uniform S1 that was used as both |
| 145 | // lane mask(vcc) and regular sgpr S1. |
| 146 | // - lane-mask(vcc) use was by si_if, this use is divergent and requires |
| 147 | // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets |
| 148 | // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask. |
| 149 | // - the regular sgpr S1(uniform) instruction is now broken since |
| 150 | // it uses sreg_64_xexec(S1) which is divergent. |
| 151 | |
| 152 | // Replace virtual registers with register class on generic instructions |
| 153 | // uses with virtual registers with register bank. |
| 154 | for (auto &UseMI : make_early_inc_range(Range: MRI.use_instructions(Reg))) { |
| 155 | if (UseMI.isPreISelOpcode()) { |
| 156 | for (MachineOperand &Op : UseMI.operands()) { |
| 157 | if (Op.isReg() && Op.getReg() == Reg) |
| 158 | Op.setReg(NewReg); |
| 159 | } |
| 160 | } |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | // %a = G_ ..., %rc |
| 165 | // -> |
| 166 | // %rb:RegBank(s32) = COPY %rc |
| 167 | // %a = G_ ..., %rb |
| 168 | void constrainRegBankUse(MachineInstr &MI, MachineOperand &UseOP, |
| 169 | const RegisterBank *RB) { |
| 170 | Register Reg = UseOP.getReg(); |
| 171 | |
| 172 | LLT Ty = MRI.getType(Reg); |
| 173 | Register NewReg = MRI.createVirtualRegister(RegAttr: {.RCOrRB: RB, .Ty: Ty}); |
| 174 | UseOP.setReg(NewReg); |
| 175 | |
| 176 | if (MI.isPHI()) { |
| 177 | auto DefMI = MRI.getVRegDef(Reg)->getIterator(); |
| 178 | MachineBasicBlock *DefMBB = DefMI->getParent(); |
| 179 | B.setInsertPt(MBB&: *DefMBB, II: DefMBB->SkipPHIsAndLabels(I: std::next(x: DefMI))); |
| 180 | } else { |
| 181 | B.setInstr(MI); |
| 182 | } |
| 183 | |
| 184 | B.buildCopy(Res: NewReg, Op: Reg); |
| 185 | } |
| 186 | }; |
| 187 | |
| 188 | static Register getVReg(MachineOperand &Op) { |
| 189 | if (!Op.isReg()) |
| 190 | return {}; |
| 191 | |
| 192 | // Operands of COPY and G_SI_CALL can be physical registers. |
| 193 | Register Reg = Op.getReg(); |
| 194 | if (!Reg.isVirtual()) |
| 195 | return {}; |
| 196 | |
| 197 | return Reg; |
| 198 | } |
| 199 | |
| 200 | bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) { |
| 201 | if (MF.getProperties().hasFailedISel()) |
| 202 | return false; |
| 203 | |
| 204 | // Setup the instruction builder with CSE. |
| 205 | const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); |
| 206 | GISelCSEAnalysisWrapper &Wrapper = |
| 207 | getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); |
| 208 | GISelCSEInfo &CSEInfo = Wrapper.get(CSEOpt: TPC.getCSEConfig()); |
| 209 | GISelObserverWrapper Observer; |
| 210 | Observer.addObserver(O: &CSEInfo); |
| 211 | |
| 212 | CSEMIRBuilder B(MF); |
| 213 | B.setCSEInfo(&CSEInfo); |
| 214 | B.setChangeObserver(Observer); |
| 215 | |
| 216 | RAIIDelegateInstaller DelegateInstaller(MF, &Observer); |
| 217 | RAIIMFObserverInstaller MFObserverInstaller(MF, Observer); |
| 218 | |
| 219 | IntrinsicLaneMaskAnalyzer ILMA(MF); |
| 220 | MachineUniformityInfo &MUI = |
| 221 | getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo(); |
| 222 | MachineRegisterInfo &MRI = *B.getMRI(); |
| 223 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 224 | RegBankSelectHelper RBSHelper(B, ILMA, MUI, *ST.getRegisterInfo(), |
| 225 | *ST.getRegBankInfo()); |
| 226 | // Virtual registers at this point don't have register banks. |
| 227 | // Virtual registers in def and use operands of already inst-selected |
| 228 | // instruction have register class. |
| 229 | |
| 230 | for (MachineBasicBlock &MBB : MF) { |
| 231 | for (MachineInstr &MI : MBB) { |
| 232 | // Vregs in def and use operands of COPY can have either register class |
| 233 | // or bank. If there is neither on vreg in def operand, assign bank. |
| 234 | if (MI.isCopy()) { |
| 235 | Register DefReg = getVReg(Op&: MI.getOperand(i: 0)); |
| 236 | if (!DefReg.isValid() || MRI.getRegClassOrNull(Reg: DefReg)) |
| 237 | continue; |
| 238 | |
| 239 | assert(!MRI.getRegBankOrNull(DefReg)); |
| 240 | MRI.setRegBank(Reg: DefReg, RegBank: *RBSHelper.getRegBankToAssign(Reg: DefReg)); |
| 241 | continue; |
| 242 | } |
| 243 | |
| 244 | if (!MI.isPreISelOpcode()) |
| 245 | continue; |
| 246 | |
| 247 | // Vregs in def and use operands of G_ instructions need to have register |
| 248 | // banks assigned. Before this loop possible case are |
| 249 | // - (1) vreg without register class or bank in def or use operand |
| 250 | // - (2) vreg with register class in def operand |
| 251 | // - (3) vreg, defined by G_ instruction, in use operand |
| 252 | // - (4) vreg, defined by pre-inst-selected instruction, in use operand |
| 253 | |
| 254 | // First three cases are handled in loop through all def operands of G_ |
| 255 | // instructions. For case (1) simply setRegBank. Cases (2) and (3) are |
| 256 | // handled by reAssignRegBankOnDef. |
| 257 | for (MachineOperand &DefOP : MI.defs()) { |
| 258 | Register DefReg = getVReg(Op&: DefOP); |
| 259 | if (!DefReg.isValid()) |
| 260 | continue; |
| 261 | |
| 262 | const RegisterBank *RB = RBSHelper.getRegBankToAssign(Reg: DefReg); |
| 263 | if (MRI.getRegClassOrNull(Reg: DefReg)) |
| 264 | RBSHelper.reAssignRegBankOnDef(MI, DefOP, RB); |
| 265 | else { |
| 266 | assert(!MRI.getRegBankOrNull(DefReg)); |
| 267 | MRI.setRegBank(Reg: DefReg, RegBank: *RB); |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | // Register bank select doesn't modify pre-inst-selected instructions. |
| 272 | // For case (4) need to insert a copy, handled by constrainRegBankUse. |
| 273 | for (MachineOperand &UseOP : MI.uses()) { |
| 274 | Register UseReg = getVReg(Op&: UseOP); |
| 275 | if (!UseReg.isValid()) |
| 276 | continue; |
| 277 | |
| 278 | // Skip case (3). |
| 279 | if (!MRI.getRegClassOrNull(Reg: UseReg) || |
| 280 | MRI.getVRegDef(Reg: UseReg)->isPreISelOpcode()) |
| 281 | continue; |
| 282 | |
| 283 | // Use with register class defined by pre-inst-selected instruction. |
| 284 | const RegisterBank *RB = RBSHelper.getRegBankToAssign(Reg: UseReg); |
| 285 | RBSHelper.constrainRegBankUse(MI, UseOP, RB); |
| 286 | } |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | return true; |
| 291 | } |
| 292 | |