| 1 | //===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | /// \file | 
|---|
| 9 | /// | 
|---|
| 10 | /// This file implements methods from the AMDGPUCustomBehaviour class. | 
|---|
| 11 | /// | 
|---|
| 12 | //===----------------------------------------------------------------------===// | 
|---|
| 13 |  | 
|---|
| 14 | #include "AMDGPUCustomBehaviour.h" | 
|---|
| 15 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | 
|---|
| 16 | #include "TargetInfo/AMDGPUTargetInfo.h" | 
|---|
| 17 | #include "Utils/AMDGPUBaseInfo.h" | 
|---|
| 18 | #include "llvm/MC/TargetRegistry.h" | 
|---|
| 19 | #include "llvm/Support/Compiler.h" | 
|---|
| 20 | #include "llvm/Support/WithColor.h" | 
|---|
| 21 |  | 
|---|
| 22 | namespace llvm::mca { | 
|---|
| 23 |  | 
|---|
| 24 | void AMDGPUInstrPostProcess::postProcessInstruction( | 
|---|
| 25 | std::unique_ptr<Instruction> &Inst, const MCInst &MCI) { | 
|---|
| 26 | switch (MCI.getOpcode()) { | 
|---|
| 27 | case AMDGPU::S_WAITCNT: | 
|---|
| 28 | case AMDGPU::S_WAITCNT_soft: | 
|---|
| 29 | case AMDGPU::S_WAITCNT_EXPCNT: | 
|---|
| 30 | case AMDGPU::S_WAITCNT_LGKMCNT: | 
|---|
| 31 | case AMDGPU::S_WAITCNT_VMCNT: | 
|---|
| 32 | case AMDGPU::S_WAITCNT_VSCNT: | 
|---|
| 33 | case AMDGPU::S_WAITCNT_VSCNT_soft: | 
|---|
| 34 | case AMDGPU::S_WAITCNT_EXPCNT_gfx10: | 
|---|
| 35 | case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: | 
|---|
| 36 | case AMDGPU::S_WAITCNT_VMCNT_gfx10: | 
|---|
| 37 | case AMDGPU::S_WAITCNT_VSCNT_gfx10: | 
|---|
| 38 | case AMDGPU::S_WAITCNT_gfx10: | 
|---|
| 39 | case AMDGPU::S_WAITCNT_gfx6_gfx7: | 
|---|
| 40 | case AMDGPU::S_WAITCNT_vi: | 
|---|
| 41 | return processWaitCnt(Inst, MCI); | 
|---|
| 42 | } | 
|---|
| 43 | } | 
|---|
| 44 |  | 
|---|
| 45 | // s_waitcnt instructions encode important information as immediate operands | 
|---|
| 46 | // which are lost during the MCInst -> mca::Instruction lowering. | 
|---|
| 47 | void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst, | 
|---|
| 48 | const MCInst &MCI) { | 
|---|
| 49 | for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) { | 
|---|
| 50 | MCAOperand Op; | 
|---|
| 51 | const MCOperand &MCOp = MCI.getOperand(i: Idx); | 
|---|
| 52 | if (MCOp.isReg()) { | 
|---|
| 53 | Op = MCAOperand::createReg(Reg: MCOp.getReg()); | 
|---|
| 54 | } else if (MCOp.isImm()) { | 
|---|
| 55 | Op = MCAOperand::createImm(Val: MCOp.getImm()); | 
|---|
| 56 | } | 
|---|
| 57 | Op.setIndex(Idx); | 
|---|
| 58 | Inst->addOperand(Op); | 
|---|
| 59 | } | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, | 
|---|
| 63 | const mca::SourceMgr &SrcMgr, | 
|---|
| 64 | const MCInstrInfo &MCII) | 
|---|
| 65 | : CustomBehaviour(STI, SrcMgr, MCII) { | 
|---|
| 66 | generateWaitCntInfo(); | 
|---|
| 67 | } | 
|---|
| 68 |  | 
|---|
| 69 | unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst, | 
|---|
| 70 | const InstRef &IR) { | 
|---|
| 71 | const Instruction &Inst = *IR.getInstruction(); | 
|---|
| 72 | unsigned Opcode = Inst.getOpcode(); | 
|---|
| 73 |  | 
|---|
| 74 | // llvm-mca is generally run on fully compiled assembly so we wouldn't see any | 
|---|
| 75 | // pseudo instructions here. However, there are plans for the future to make | 
|---|
| 76 | // it possible to use mca within backend passes. As such, I have left the | 
|---|
| 77 | // pseudo version of s_waitcnt within this switch statement. | 
|---|
| 78 | switch (Opcode) { | 
|---|
| 79 | default: | 
|---|
| 80 | return 0; | 
|---|
| 81 | case AMDGPU::S_WAITCNT: // This instruction | 
|---|
| 82 | case AMDGPU::S_WAITCNT_soft: | 
|---|
| 83 | case AMDGPU::S_WAITCNT_EXPCNT: | 
|---|
| 84 | case AMDGPU::S_WAITCNT_LGKMCNT: | 
|---|
| 85 | case AMDGPU::S_WAITCNT_VMCNT: | 
|---|
| 86 | case AMDGPU::S_WAITCNT_VSCNT: | 
|---|
| 87 | case AMDGPU::S_WAITCNT_VSCNT_soft: // to this instruction are all pseudo. | 
|---|
| 88 | case AMDGPU::S_WAITCNT_EXPCNT_gfx10: | 
|---|
| 89 | case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: | 
|---|
| 90 | case AMDGPU::S_WAITCNT_VMCNT_gfx10: | 
|---|
| 91 | case AMDGPU::S_WAITCNT_VSCNT_gfx10: | 
|---|
| 92 | case AMDGPU::S_WAITCNT_gfx10: | 
|---|
| 93 | case AMDGPU::S_WAITCNT_gfx6_gfx7: | 
|---|
| 94 | case AMDGPU::S_WAITCNT_vi: | 
|---|
| 95 | // s_endpgm also behaves as if there is an implicit | 
|---|
| 96 | // s_waitcnt 0, but I'm not sure if it would be appropriate | 
|---|
| 97 | // to model this in llvm-mca based on how the iterations work | 
|---|
| 98 | // while simulating the pipeline over and over. | 
|---|
| 99 | return handleWaitCnt(IssuedInst, IR); | 
|---|
| 100 | } | 
|---|
| 101 |  | 
|---|
| 102 | return 0; | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst, | 
|---|
| 106 | const InstRef &IR) { | 
|---|
| 107 | // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr. | 
|---|
| 108 | // I do not know how that instruction works so I did not attempt to model it. | 
|---|
| 109 | // set the max values to begin | 
|---|
| 110 | unsigned Vmcnt = 63; | 
|---|
| 111 | unsigned Expcnt = 7; | 
|---|
| 112 | unsigned Lgkmcnt = 31; | 
|---|
| 113 | unsigned Vscnt = 63; | 
|---|
| 114 | unsigned CurrVmcnt = 0; | 
|---|
| 115 | unsigned CurrExpcnt = 0; | 
|---|
| 116 | unsigned CurrLgkmcnt = 0; | 
|---|
| 117 | unsigned CurrVscnt = 0; | 
|---|
| 118 | unsigned CyclesToWaitVm = ~0U; | 
|---|
| 119 | unsigned CyclesToWaitExp = ~0U; | 
|---|
| 120 | unsigned CyclesToWaitLgkm = ~0U; | 
|---|
| 121 | unsigned CyclesToWaitVs = ~0U; | 
|---|
| 122 |  | 
|---|
| 123 | computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt); | 
|---|
| 124 |  | 
|---|
| 125 | // We will now look at each of the currently executing instructions | 
|---|
| 126 | // to find out if this wait instruction still needs to wait. | 
|---|
| 127 | for (const InstRef &PrevIR : IssuedInst) { | 
|---|
| 128 | const Instruction &PrevInst = *PrevIR.getInstruction(); | 
|---|
| 129 | const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size(); | 
|---|
| 130 | const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex]; | 
|---|
| 131 | const int CyclesLeft = PrevInst.getCyclesLeft(); | 
|---|
| 132 | assert(CyclesLeft != UNKNOWN_CYCLES && | 
|---|
| 133 | "We should know how many cycles are left for this instruction"); | 
|---|
| 134 | if (PrevInstWaitInfo.VmCnt) { | 
|---|
| 135 | CurrVmcnt++; | 
|---|
| 136 | if ((unsigned)CyclesLeft < CyclesToWaitVm) | 
|---|
| 137 | CyclesToWaitVm = CyclesLeft; | 
|---|
| 138 | } | 
|---|
| 139 | if (PrevInstWaitInfo.ExpCnt) { | 
|---|
| 140 | CurrExpcnt++; | 
|---|
| 141 | if ((unsigned)CyclesLeft < CyclesToWaitExp) | 
|---|
| 142 | CyclesToWaitExp = CyclesLeft; | 
|---|
| 143 | } | 
|---|
| 144 | if (PrevInstWaitInfo.LgkmCnt) { | 
|---|
| 145 | CurrLgkmcnt++; | 
|---|
| 146 | if ((unsigned)CyclesLeft < CyclesToWaitLgkm) | 
|---|
| 147 | CyclesToWaitLgkm = CyclesLeft; | 
|---|
| 148 | } | 
|---|
| 149 | if (PrevInstWaitInfo.VsCnt) { | 
|---|
| 150 | CurrVscnt++; | 
|---|
| 151 | if ((unsigned)CyclesLeft < CyclesToWaitVs) | 
|---|
| 152 | CyclesToWaitVs = CyclesLeft; | 
|---|
| 153 | } | 
|---|
| 154 | } | 
|---|
| 155 |  | 
|---|
| 156 | unsigned CyclesToWait = ~0U; | 
|---|
| 157 | if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait) | 
|---|
| 158 | CyclesToWait = CyclesToWaitVm; | 
|---|
| 159 | if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait) | 
|---|
| 160 | CyclesToWait = CyclesToWaitExp; | 
|---|
| 161 | if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait) | 
|---|
| 162 | CyclesToWait = CyclesToWaitLgkm; | 
|---|
| 163 | if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait) | 
|---|
| 164 | CyclesToWait = CyclesToWaitVs; | 
|---|
| 165 |  | 
|---|
| 166 | // We may underestimate how many cycles we need to wait, but this | 
|---|
| 167 | // isn't a big deal. Our return value is just how many cycles until | 
|---|
| 168 | // this function gets run again. So as long as we don't overestimate | 
|---|
| 169 | // the wait time, we'll still end up stalling at this instruction | 
|---|
| 170 | // for the correct number of cycles. | 
|---|
| 171 |  | 
|---|
| 172 | if (CyclesToWait == ~0U) | 
|---|
| 173 | return 0; | 
|---|
| 174 | return CyclesToWait; | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt, | 
|---|
| 178 | unsigned &Expcnt, unsigned &Lgkmcnt, | 
|---|
| 179 | unsigned &Vscnt) { | 
|---|
| 180 | AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(GPU: STI.getCPU()); | 
|---|
| 181 | const Instruction &Inst = *IR.getInstruction(); | 
|---|
| 182 | unsigned Opcode = Inst.getOpcode(); | 
|---|
| 183 |  | 
|---|
| 184 | switch (Opcode) { | 
|---|
| 185 | case AMDGPU::S_WAITCNT_EXPCNT_gfx10: | 
|---|
| 186 | case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: | 
|---|
| 187 | case AMDGPU::S_WAITCNT_VMCNT_gfx10: | 
|---|
| 188 | case AMDGPU::S_WAITCNT_VSCNT_gfx10: { | 
|---|
| 189 | // Should probably be checking for nullptr | 
|---|
| 190 | // here, but I'm not sure how I should handle the case | 
|---|
| 191 | // where we see a nullptr. | 
|---|
| 192 | const MCAOperand *OpReg = Inst.getOperand(Idx: 0); | 
|---|
| 193 | const MCAOperand *OpImm = Inst.getOperand(Idx: 1); | 
|---|
| 194 | assert(OpReg && OpReg->isReg() && "First operand should be a register."); | 
|---|
| 195 | assert(OpImm && OpImm->isImm() && "Second operand should be an immediate."); | 
|---|
| 196 | if (OpReg->getReg() != AMDGPU::SGPR_NULL) { | 
|---|
| 197 | // Instruction is using a real register. | 
|---|
| 198 | // Since we can't know what value this register will have, | 
|---|
| 199 | // we can't compute what the value of this wait should be. | 
|---|
| 200 | WithColor::warning() << "The register component of " | 
|---|
| 201 | << MCII.getName(Opcode) << " will be completely " | 
|---|
| 202 | << "ignored. So the wait may not be accurate.\n"; | 
|---|
| 203 | } | 
|---|
| 204 | switch (Opcode) { | 
|---|
| 205 | // Redundant switch so I don't have to repeat the code above | 
|---|
| 206 | // for each case. There are more clever ways to avoid this | 
|---|
| 207 | // extra switch and anyone can feel free to implement one of them. | 
|---|
| 208 | case AMDGPU::S_WAITCNT_EXPCNT_gfx10: | 
|---|
| 209 | Expcnt = OpImm->getImm(); | 
|---|
| 210 | break; | 
|---|
| 211 | case AMDGPU::S_WAITCNT_LGKMCNT_gfx10: | 
|---|
| 212 | Lgkmcnt = OpImm->getImm(); | 
|---|
| 213 | break; | 
|---|
| 214 | case AMDGPU::S_WAITCNT_VMCNT_gfx10: | 
|---|
| 215 | Vmcnt = OpImm->getImm(); | 
|---|
| 216 | break; | 
|---|
| 217 | case AMDGPU::S_WAITCNT_VSCNT_gfx10: | 
|---|
| 218 | Vscnt = OpImm->getImm(); | 
|---|
| 219 | break; | 
|---|
| 220 | } | 
|---|
| 221 | return; | 
|---|
| 222 | } | 
|---|
| 223 | case AMDGPU::S_WAITCNT_gfx10: | 
|---|
| 224 | case AMDGPU::S_WAITCNT_gfx6_gfx7: | 
|---|
| 225 | case AMDGPU::S_WAITCNT_vi: | 
|---|
| 226 | unsigned WaitCnt = Inst.getOperand(Idx: 0)->getImm(); | 
|---|
| 227 | AMDGPU::decodeWaitcnt(Version: IV, Waitcnt: WaitCnt, Vmcnt, Expcnt, Lgkmcnt); | 
|---|
| 228 | return; | 
|---|
| 229 | } | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | void AMDGPUCustomBehaviour::generateWaitCntInfo() { | 
|---|
| 233 | // The core logic from this function is taken from | 
|---|
| 234 | // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions | 
|---|
| 235 | // that are being looked at are in the MachineInstr format, whereas we have | 
|---|
| 236 | // access to the MCInst format. The side effects of this are that we can't use | 
|---|
| 237 | // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst) | 
|---|
| 238 | // functions. Therefore, we conservatively assume that these functions will | 
|---|
| 239 | // return true. This may cause a few instructions to be incorrectly tagged | 
|---|
| 240 | // with an extra CNT. However, these are instructions that do interact with at | 
|---|
| 241 | // least one CNT so giving them an extra CNT shouldn't cause issues in most | 
|---|
| 242 | // scenarios. | 
|---|
| 243 | AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(GPU: STI.getCPU()); | 
|---|
| 244 | InstrWaitCntInfo.resize(new_size: SrcMgr.size()); | 
|---|
| 245 |  | 
|---|
| 246 | for (const auto &EN : llvm::enumerate(First: SrcMgr.getInstructions())) { | 
|---|
| 247 | const std::unique_ptr<Instruction> &Inst = EN.value(); | 
|---|
| 248 | unsigned Index = EN.index(); | 
|---|
| 249 | unsigned Opcode = Inst->getOpcode(); | 
|---|
| 250 | const MCInstrDesc &MCID = MCII.get(Opcode); | 
|---|
| 251 | if ((MCID.TSFlags & SIInstrFlags::DS) && | 
|---|
| 252 | (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) { | 
|---|
| 253 | InstrWaitCntInfo[Index].LgkmCnt = true; | 
|---|
| 254 | if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, OpName: AMDGPU::OpName::gds)) | 
|---|
| 255 | InstrWaitCntInfo[Index].ExpCnt = true; | 
|---|
| 256 | } else if (MCID.TSFlags & SIInstrFlags::FLAT) { | 
|---|
| 257 | // We conservatively assume that mayAccessVMEMThroughFlat(Inst) | 
|---|
| 258 | // and mayAccessLDSThroughFlat(Inst) would both return true for this | 
|---|
| 259 | // instruction. We have to do this because those functions use | 
|---|
| 260 | // information about the memory operands that we don't have access to. | 
|---|
| 261 | InstrWaitCntInfo[Index].LgkmCnt = true; | 
|---|
| 262 | if (!STI.hasFeature(Feature: AMDGPU::FeatureVscnt)) | 
|---|
| 263 | InstrWaitCntInfo[Index].VmCnt = true; | 
|---|
| 264 | else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) | 
|---|
| 265 | InstrWaitCntInfo[Index].VmCnt = true; | 
|---|
| 266 | else | 
|---|
| 267 | InstrWaitCntInfo[Index].VsCnt = true; | 
|---|
| 268 | } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opc: Opcode)) { | 
|---|
| 269 | if (!STI.hasFeature(Feature: AMDGPU::FeatureVscnt)) | 
|---|
| 270 | InstrWaitCntInfo[Index].VmCnt = true; | 
|---|
| 271 | else if ((MCID.mayLoad() && | 
|---|
| 272 | !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) || | 
|---|
| 273 | ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() && | 
|---|
| 274 | !MCID.mayStore())) | 
|---|
| 275 | InstrWaitCntInfo[Index].VmCnt = true; | 
|---|
| 276 | else if (MCID.mayStore()) | 
|---|
| 277 | InstrWaitCntInfo[Index].VsCnt = true; | 
|---|
| 278 |  | 
|---|
| 279 | // (IV.Major < 7) is meant to represent | 
|---|
| 280 | // GCNTarget.vmemWriteNeedsExpWaitcnt() | 
|---|
| 281 | // which is defined as | 
|---|
| 282 | // { return getGeneration() < SEA_ISLANDS; } | 
|---|
| 283 | if (IV.Major < 7 && | 
|---|
| 284 | (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet))) | 
|---|
| 285 | InstrWaitCntInfo[Index].ExpCnt = true; | 
|---|
| 286 | } else if (MCID.TSFlags & SIInstrFlags::SMRD) { | 
|---|
| 287 | InstrWaitCntInfo[Index].LgkmCnt = true; | 
|---|
| 288 | } else if (MCID.TSFlags & SIInstrFlags::EXP) { | 
|---|
| 289 | InstrWaitCntInfo[Index].ExpCnt = true; | 
|---|
| 290 | } else { | 
|---|
| 291 | switch (Opcode) { | 
|---|
| 292 | case AMDGPU::S_SENDMSG: | 
|---|
| 293 | case AMDGPU::S_SENDMSGHALT: | 
|---|
| 294 | case AMDGPU::S_MEMTIME: | 
|---|
| 295 | case AMDGPU::S_MEMREALTIME: | 
|---|
| 296 | InstrWaitCntInfo[Index].LgkmCnt = true; | 
|---|
| 297 | break; | 
|---|
| 298 | } | 
|---|
| 299 | } | 
|---|
| 300 | } | 
|---|
| 301 | } | 
|---|
| 302 |  | 
|---|
| 303 | // taken from SIInstrInfo::isVMEM() | 
|---|
| 304 | bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) { | 
|---|
| 305 | return MCID.TSFlags & SIInstrFlags::MUBUF || | 
|---|
| 306 | MCID.TSFlags & SIInstrFlags::MTBUF || | 
|---|
| 307 | MCID.TSFlags & SIInstrFlags::MIMG || MCID.TSFlags & SIInstrFlags::FLAT; | 
|---|
| 308 | } | 
|---|
| 309 |  | 
|---|
| 310 | // taken from SIInstrInfo::hasModifiersSet() | 
|---|
| 311 | bool AMDGPUCustomBehaviour::( | 
|---|
| 312 | const std::unique_ptr<Instruction> &Inst, AMDGPU::OpName OpName) const { | 
|---|
| 313 | int Idx = AMDGPU::getNamedOperandIdx(Opcode: Inst->getOpcode(), Name: OpName); | 
|---|
| 314 | if (Idx == -1) | 
|---|
| 315 | return false; | 
|---|
| 316 |  | 
|---|
| 317 | const MCAOperand *Op = Inst->getOperand(Idx); | 
|---|
| 318 | if (Op == nullptr || !Op->isImm() || !Op->getImm()) | 
|---|
| 319 | return false; | 
|---|
| 320 |  | 
|---|
| 321 | return true; | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | // taken from SIInstrInfo::isGWS() | 
|---|
| 325 | bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const { | 
|---|
| 326 | const MCInstrDesc &MCID = MCII.get(Opcode); | 
|---|
| 327 | return MCID.TSFlags & SIInstrFlags::GWS; | 
|---|
| 328 | } | 
|---|
| 329 |  | 
|---|
| 330 | // taken from SIInstrInfo::isAlwaysGDS() | 
|---|
| 331 | bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const { | 
|---|
| 332 | return Opcode == AMDGPU::DS_ORDERED_COUNT || | 
|---|
| 333 | Opcode == AMDGPU::DS_ADD_GS_REG_RTN || | 
|---|
| 334 | Opcode == AMDGPU::DS_SUB_GS_REG_RTN || isGWS(Opcode); | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | } // namespace llvm::mca | 
|---|
| 338 |  | 
|---|
| 339 | using namespace llvm; | 
|---|
| 340 | using namespace mca; | 
|---|
| 341 |  | 
|---|
| 342 | static CustomBehaviour * | 
|---|
| 343 | createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI, | 
|---|
| 344 | const mca::SourceMgr &SrcMgr, | 
|---|
| 345 | const MCInstrInfo &MCII) { | 
|---|
| 346 | return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII); | 
|---|
| 347 | } | 
|---|
| 348 |  | 
|---|
| 349 | static InstrPostProcess * | 
|---|
| 350 | createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI, | 
|---|
| 351 | const MCInstrInfo &MCII) { | 
|---|
| 352 | return new AMDGPUInstrPostProcess(STI, MCII); | 
|---|
| 353 | } | 
|---|
| 354 |  | 
|---|
| 355 | /// Extern function to initialize the targets for the AMDGPU backend | 
|---|
| 356 |  | 
|---|
| 357 | extern "C"LLVM_ABI LLVM_EXTERNAL_VISIBILITY void | 
|---|
| 358 | LLVMInitializeAMDGPUTargetMCA() { | 
|---|
| 359 | TargetRegistry::RegisterCustomBehaviour(T&: getTheR600Target(), | 
|---|
| 360 | Fn: createAMDGPUCustomBehaviour); | 
|---|
| 361 | TargetRegistry::RegisterInstrPostProcess(T&: getTheR600Target(), | 
|---|
| 362 | Fn: createAMDGPUInstrPostProcess); | 
|---|
| 363 |  | 
|---|
| 364 | TargetRegistry::RegisterCustomBehaviour(T&: getTheGCNTarget(), | 
|---|
| 365 | Fn: createAMDGPUCustomBehaviour); | 
|---|
| 366 | TargetRegistry::RegisterInstrPostProcess(T&: getTheGCNTarget(), | 
|---|
| 367 | Fn: createAMDGPUInstrPostProcess); | 
|---|
| 368 | } | 
|---|
| 369 |  | 
|---|