| 1 | //===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// Combine VALU pairs into VOPD instructions |
| 11 | /// Only works on wave32 |
| 12 | /// Has register requirements, we reject creating VOPD if the requirements are |
| 13 | /// not met. |
| 14 | /// shouldCombineVOPD mutator in postRA machine scheduler puts candidate |
| 15 | /// instructions for VOPD back-to-back |
| 16 | /// |
| 17 | // |
| 18 | //===----------------------------------------------------------------------===// |
| 19 | |
| 20 | #include "AMDGPU.h" |
| 21 | #include "GCNSubtarget.h" |
| 22 | #include "GCNVOPDUtils.h" |
| 23 | #include "SIInstrInfo.h" |
| 24 | #include "Utils/AMDGPUBaseInfo.h" |
| 25 | #include "llvm/ADT/SmallVector.h" |
| 26 | #include "llvm/ADT/Statistic.h" |
| 27 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 28 | #include "llvm/CodeGen/MachineInstr.h" |
| 29 | #include "llvm/CodeGen/MachineOperand.h" |
| 30 | #include "llvm/CodeGen/MachinePassManager.h" |
| 31 | #include "llvm/Support/Debug.h" |
| 32 | |
| 33 | #define DEBUG_TYPE "gcn-create-vopd" |
| 34 | STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created." ); |
| 35 | |
| 36 | using namespace llvm; |
| 37 | |
| 38 | namespace { |
| 39 | |
| 40 | class GCNCreateVOPD { |
| 41 | private: |
| 42 | class VOPDCombineInfo { |
| 43 | public: |
| 44 | VOPDCombineInfo() = default; |
| 45 | VOPDCombineInfo(MachineInstr *First, MachineInstr *Second, |
| 46 | bool VOPD3 = false) |
| 47 | : FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {} |
| 48 | |
| 49 | MachineInstr *FirstMI; |
| 50 | MachineInstr *SecondMI; |
| 51 | bool IsVOPD3; |
| 52 | }; |
| 53 | |
| 54 | public: |
| 55 | const GCNSubtarget *ST = nullptr; |
| 56 | |
| 57 | bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) { |
| 58 | auto *FirstMI = CI.FirstMI; |
| 59 | auto *SecondMI = CI.SecondMI; |
| 60 | unsigned Opc1 = FirstMI->getOpcode(); |
| 61 | unsigned Opc2 = SecondMI->getOpcode(); |
| 62 | unsigned EncodingFamily = |
| 63 | AMDGPU::getVOPDEncodingFamily(ST: SII->getSubtarget()); |
| 64 | int NewOpcode = AMDGPU::getVOPDFull(OpX: AMDGPU::getVOPDOpcode(Opc: Opc1, VOPD3: CI.IsVOPD3), |
| 65 | OpY: AMDGPU::getVOPDOpcode(Opc: Opc2, VOPD3: CI.IsVOPD3), |
| 66 | EncodingFamily, VOPD3: CI.IsVOPD3); |
| 67 | assert(NewOpcode != -1 && |
| 68 | "Should have previously determined this as a possible VOPD\n" ); |
| 69 | |
| 70 | auto VOPDInst = BuildMI(BB&: *FirstMI->getParent(), I: FirstMI, |
| 71 | MIMD: FirstMI->getDebugLoc(), MCID: SII->get(Opcode: NewOpcode)) |
| 72 | .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags()); |
| 73 | |
| 74 | namespace VOPD = AMDGPU::VOPD; |
| 75 | MachineInstr *MI[] = {FirstMI, SecondMI}; |
| 76 | auto InstInfo = |
| 77 | AMDGPU::getVOPDInstInfo(OpX: FirstMI->getDesc(), OpY: SecondMI->getDesc()); |
| 78 | |
| 79 | for (auto CompIdx : VOPD::COMPONENTS) { |
| 80 | auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands(); |
| 81 | VOPDInst.add(MO: MI[CompIdx]->getOperand(i: MCOprIdx)); |
| 82 | } |
| 83 | |
| 84 | const AMDGPU::OpName Mods[2][3] = { |
| 85 | {AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers, |
| 86 | AMDGPU::OpName::vsrc2X_modifiers}, |
| 87 | {AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers, |
| 88 | AMDGPU::OpName::vsrc2Y_modifiers}}; |
| 89 | const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers, |
| 90 | AMDGPU::OpName::src1_modifiers, |
| 91 | AMDGPU::OpName::src2_modifiers}; |
| 92 | const unsigned VOPDOpc = VOPDInst->getOpcode(); |
| 93 | |
| 94 | for (auto CompIdx : VOPD::COMPONENTS) { |
| 95 | auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum(); |
| 96 | bool IsVOP3 = SII->isVOP3(MI: *MI[CompIdx]); |
| 97 | for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) { |
| 98 | if (AMDGPU::hasNamedOperand(Opcode: VOPDOpc, NamedIdx: Mods[CompIdx][CompSrcIdx])) { |
| 99 | const MachineOperand *Mod = |
| 100 | SII->getNamedOperand(MI&: *MI[CompIdx], OperandName: SrcMods[CompSrcIdx]); |
| 101 | VOPDInst.addImm(Val: Mod ? Mod->getImm() : 0); |
| 102 | } |
| 103 | auto MCOprIdx = |
| 104 | InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3: IsVOP3); |
| 105 | VOPDInst.add(MO: MI[CompIdx]->getOperand(i: MCOprIdx)); |
| 106 | } |
| 107 | if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3) |
| 108 | VOPDInst.addReg(RegNo: AMDGPU::VCC_LO); |
| 109 | } |
| 110 | |
| 111 | if (CI.IsVOPD3) { |
| 112 | if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc: Opc2)) |
| 113 | VOPDInst.addImm(Val: BitOp2); |
| 114 | } |
| 115 | |
| 116 | SII->fixImplicitOperands(MI&: *VOPDInst); |
| 117 | for (auto CompIdx : VOPD::COMPONENTS) |
| 118 | VOPDInst.copyImplicitOps(OtherMI: *MI[CompIdx]); |
| 119 | |
| 120 | LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: " |
| 121 | << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n" ); |
| 122 | |
| 123 | for (auto CompIdx : VOPD::COMPONENTS) |
| 124 | MI[CompIdx]->eraseFromParent(); |
| 125 | |
| 126 | ++NumVOPDCreated; |
| 127 | return true; |
| 128 | } |
| 129 | |
| 130 | bool run(MachineFunction &MF) { |
| 131 | ST = &MF.getSubtarget<GCNSubtarget>(); |
| 132 | if (!AMDGPU::hasVOPD(STI: *ST) || !ST->isWave32()) |
| 133 | return false; |
| 134 | LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n" ); |
| 135 | |
| 136 | const SIInstrInfo *SII = ST->getInstrInfo(); |
| 137 | const SIRegisterInfo *TRI = ST->getRegisterInfo(); |
| 138 | bool Changed = false; |
| 139 | unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(ST: *ST); |
| 140 | bool HasVOPD3 = ST->hasVOPD3(); |
| 141 | |
| 142 | SmallVector<VOPDCombineInfo> ReplaceCandidates; |
| 143 | |
| 144 | for (auto &MBB : MF) { |
| 145 | auto MII = MBB.begin(), E = MBB.end(); |
| 146 | while (MII != E) { |
| 147 | auto *FirstMI = &*MII; |
| 148 | MII = next_nodbg(It: MII, End: MBB.end()); |
| 149 | if (MII == MBB.end()) |
| 150 | break; |
| 151 | if (FirstMI->isDebugInstr()) |
| 152 | continue; |
| 153 | auto *SecondMI = &*MII; |
| 154 | unsigned Opc = FirstMI->getOpcode(); |
| 155 | unsigned Opc2 = SecondMI->getOpcode(); |
| 156 | VOPDCombineInfo CI; |
| 157 | |
| 158 | const auto checkVOPD = [&](bool VOPD3) -> bool { |
| 159 | llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = |
| 160 | AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3); |
| 161 | llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = |
| 162 | AMDGPU::getCanBeVOPD(Opc: Opc2, EncodingFamily, VOPD3); |
| 163 | |
| 164 | if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y && |
| 165 | llvm::checkVOPDRegConstraints(TII: *SII, FirstMI: *FirstMI, SecondMI: *SecondMI, IsVOPD3: VOPD3)) { |
| 166 | CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3); |
| 167 | return true; |
| 168 | } |
| 169 | // We can try swapping the order of the instructions, but in that case |
| 170 | // neither instruction can write to a register the other reads from. |
| 171 | // OpX cannot write something OpY reads because that is the hardware |
| 172 | // rule, and OpY cannot write what OpX reads because that would |
| 173 | // violate the data dependency in the original order. |
| 174 | for (const auto &Use : SecondMI->uses()) |
| 175 | if (Use.isReg() && FirstMI->modifiesRegister(Reg: Use.getReg(), TRI)) |
| 176 | return false; |
| 177 | if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X && |
| 178 | llvm::checkVOPDRegConstraints(TII: *SII, FirstMI: *SecondMI, SecondMI: *FirstMI, IsVOPD3: VOPD3)) { |
| 179 | CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3); |
| 180 | return true; |
| 181 | } |
| 182 | return false; |
| 183 | }; |
| 184 | |
| 185 | if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) { |
| 186 | ReplaceCandidates.push_back(Elt: CI); |
| 187 | ++MII; |
| 188 | } |
| 189 | } |
| 190 | } |
| 191 | for (auto &CI : ReplaceCandidates) { |
| 192 | Changed |= doReplace(SII, CI); |
| 193 | } |
| 194 | |
| 195 | return Changed; |
| 196 | } |
| 197 | }; |
| 198 | |
| 199 | class GCNCreateVOPDLegacy : public MachineFunctionPass { |
| 200 | public: |
| 201 | static char ID; |
| 202 | GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {} |
| 203 | |
| 204 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 205 | AU.setPreservesCFG(); |
| 206 | MachineFunctionPass::getAnalysisUsage(AU); |
| 207 | } |
| 208 | |
| 209 | StringRef getPassName() const override { |
| 210 | return "GCN Create VOPD Instructions" ; |
| 211 | } |
| 212 | bool runOnMachineFunction(MachineFunction &MF) override { |
| 213 | if (skipFunction(F: MF.getFunction())) |
| 214 | return false; |
| 215 | |
| 216 | return GCNCreateVOPD().run(MF); |
| 217 | } |
| 218 | }; |
| 219 | |
| 220 | } // namespace |
| 221 | |
| 222 | PreservedAnalyses |
| 223 | llvm::GCNCreateVOPDPass::run(MachineFunction &MF, |
| 224 | MachineFunctionAnalysisManager &AM) { |
| 225 | if (!GCNCreateVOPD().run(MF)) |
| 226 | return PreservedAnalyses::all(); |
| 227 | return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>(); |
| 228 | } |
| 229 | |
| 230 | char GCNCreateVOPDLegacy::ID = 0; |
| 231 | |
| 232 | char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID; |
| 233 | |
| 234 | INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions" , |
| 235 | false, false) |
| 236 | |