| 1 | //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// Code to lower AMDGPU MachineInstrs to their corresponding MCInst. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | // |
| 14 | |
| 15 | #include "AMDGPUMCInstLower.h" |
| 16 | #include "AMDGPU.h" |
| 17 | #include "AMDGPUAsmPrinter.h" |
| 18 | #include "AMDGPUMachineFunction.h" |
| 19 | #include "MCTargetDesc/AMDGPUInstPrinter.h" |
| 20 | #include "MCTargetDesc/AMDGPUMCExpr.h" |
| 21 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 22 | #include "SIMachineFunctionInfo.h" |
| 23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 24 | #include "llvm/CodeGen/MachineInstr.h" |
| 25 | #include "llvm/IR/Constants.h" |
| 26 | #include "llvm/IR/Function.h" |
| 27 | #include "llvm/IR/GlobalVariable.h" |
| 28 | #include "llvm/MC/MCCodeEmitter.h" |
| 29 | #include "llvm/MC/MCContext.h" |
| 30 | #include "llvm/MC/MCExpr.h" |
| 31 | #include "llvm/MC/MCInst.h" |
| 32 | #include "llvm/MC/MCObjectStreamer.h" |
| 33 | #include "llvm/MC/MCStreamer.h" |
| 34 | #include "llvm/Support/Endian.h" |
| 35 | #include "llvm/Support/ErrorHandling.h" |
| 36 | #include "llvm/Support/Format.h" |
| 37 | #include <algorithm> |
| 38 | |
| 39 | using namespace llvm; |
| 40 | |
| 41 | #include "AMDGPUGenMCPseudoLowering.inc" |
| 42 | |
| 43 | AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, |
| 44 | const TargetSubtargetInfo &st, |
| 45 | const AsmPrinter &ap): |
| 46 | Ctx(ctx), ST(st), AP(ap) { } |
| 47 | |
| 48 | static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) { |
| 49 | switch (MOFlags) { |
| 50 | default: |
| 51 | return AMDGPUMCExpr::S_None; |
| 52 | case SIInstrInfo::MO_GOTPCREL: |
| 53 | case SIInstrInfo::MO_GOTPCREL64: |
| 54 | return AMDGPUMCExpr::S_GOTPCREL; |
| 55 | case SIInstrInfo::MO_GOTPCREL32_LO: |
| 56 | return AMDGPUMCExpr::S_GOTPCREL32_LO; |
| 57 | case SIInstrInfo::MO_GOTPCREL32_HI: |
| 58 | return AMDGPUMCExpr::S_GOTPCREL32_HI; |
| 59 | case SIInstrInfo::MO_REL32_LO: |
| 60 | return AMDGPUMCExpr::S_REL32_LO; |
| 61 | case SIInstrInfo::MO_REL32_HI: |
| 62 | return AMDGPUMCExpr::S_REL32_HI; |
| 63 | case SIInstrInfo::MO_REL64: |
| 64 | return AMDGPUMCExpr::S_REL64; |
| 65 | case SIInstrInfo::MO_ABS32_LO: |
| 66 | return AMDGPUMCExpr::S_ABS32_LO; |
| 67 | case SIInstrInfo::MO_ABS32_HI: |
| 68 | return AMDGPUMCExpr::S_ABS32_HI; |
| 69 | case SIInstrInfo::MO_ABS64: |
| 70 | return AMDGPUMCExpr::S_ABS64; |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, |
| 75 | MCOperand &MCOp) const { |
| 76 | switch (MO.getType()) { |
| 77 | default: |
| 78 | break; |
| 79 | case MachineOperand::MO_Immediate: |
| 80 | MCOp = MCOperand::createImm(Val: MO.getImm()); |
| 81 | return true; |
| 82 | case MachineOperand::MO_Register: |
| 83 | MCOp = MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: MO.getReg(), STI: ST)); |
| 84 | return true; |
| 85 | case MachineOperand::MO_MachineBasicBlock: |
| 86 | MCOp = MCOperand::createExpr( |
| 87 | Val: MCSymbolRefExpr::create(Symbol: MO.getMBB()->getSymbol(), Ctx)); |
| 88 | return true; |
| 89 | case MachineOperand::MO_GlobalAddress: { |
| 90 | const GlobalValue *GV = MO.getGlobal(); |
| 91 | SmallString<128> SymbolName; |
| 92 | AP.getNameWithPrefix(Name&: SymbolName, GV); |
| 93 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: SymbolName); |
| 94 | const MCExpr *Expr = |
| 95 | MCSymbolRefExpr::create(Symbol: Sym, specifier: getSpecifier(MOFlags: MO.getTargetFlags()), Ctx); |
| 96 | int64_t Offset = MO.getOffset(); |
| 97 | if (Offset != 0) { |
| 98 | Expr = MCBinaryExpr::createAdd(LHS: Expr, |
| 99 | RHS: MCConstantExpr::create(Value: Offset, Ctx), Ctx); |
| 100 | } |
| 101 | MCOp = MCOperand::createExpr(Val: Expr); |
| 102 | return true; |
| 103 | } |
| 104 | case MachineOperand::MO_ExternalSymbol: { |
| 105 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: StringRef(MO.getSymbolName())); |
| 106 | const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
| 107 | MCOp = MCOperand::createExpr(Val: Expr); |
| 108 | return true; |
| 109 | } |
| 110 | case MachineOperand::MO_RegisterMask: |
| 111 | // Regmasks are like implicit defs. |
| 112 | return false; |
| 113 | case MachineOperand::MO_MCSymbol: |
| 114 | if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) { |
| 115 | MCSymbol *Sym = MO.getMCSymbol(); |
| 116 | MCOp = MCOperand::createExpr(Val: Sym->getVariableValue()); |
| 117 | return true; |
| 118 | } |
| 119 | break; |
| 120 | } |
| 121 | llvm_unreachable("unknown operand type" ); |
| 122 | } |
| 123 | |
| 124 | // Lower true16 D16 Pseudo instruction to d16_lo/d16_hi MCInst based on |
| 125 | // Dst/Data's .l/.h selection |
| 126 | void AMDGPUMCInstLower::lowerT16D16Helper(const MachineInstr *MI, |
| 127 | MCInst &OutMI) const { |
| 128 | unsigned Opcode = MI->getOpcode(); |
| 129 | const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); |
| 130 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
| 131 | const auto *Info = AMDGPU::getT16D16Helper(T16Op: Opcode); |
| 132 | |
| 133 | llvm::AMDGPU::OpName OpName; |
| 134 | if (TII->isDS(Opcode)) { |
| 135 | if (MI->mayLoad()) |
| 136 | OpName = llvm::AMDGPU::OpName::vdst; |
| 137 | else if (MI->mayStore()) |
| 138 | OpName = llvm::AMDGPU::OpName::data0; |
| 139 | else |
| 140 | llvm_unreachable("LDS load or store expected" ); |
| 141 | } else { |
| 142 | OpName = AMDGPU::hasNamedOperand(Opcode, NamedIdx: llvm::AMDGPU::OpName::vdata) |
| 143 | ? llvm::AMDGPU::OpName::vdata |
| 144 | : llvm::AMDGPU::OpName::vdst; |
| 145 | } |
| 146 | |
| 147 | // select Dst/Data |
| 148 | int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: OpName); |
| 149 | const MachineOperand &MIVDstOrVData = MI->getOperand(i: VDstOrVDataIdx); |
| 150 | |
| 151 | // select hi/lo MCInst |
| 152 | bool IsHi = AMDGPU::isHi16Reg(Reg: MIVDstOrVData.getReg(), MRI: TRI); |
| 153 | Opcode = IsHi ? Info->HiOp : Info->LoOp; |
| 154 | |
| 155 | int MCOpcode = TII->pseudoToMCOpcode(Opcode); |
| 156 | assert(MCOpcode != -1 && |
| 157 | "Pseudo instruction doesn't have a target-specific version" ); |
| 158 | OutMI.setOpcode(MCOpcode); |
| 159 | |
| 160 | // lower operands |
| 161 | for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) { |
| 162 | const MachineOperand &MO = MI->getOperand(i: I); |
| 163 | MCOperand MCOp; |
| 164 | if (I == VDstOrVDataIdx) |
| 165 | MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: MIVDstOrVData.getReg())); |
| 166 | else |
| 167 | lowerOperand(MO, MCOp); |
| 168 | OutMI.addOperand(Op: MCOp); |
| 169 | } |
| 170 | |
| 171 | if (AMDGPU::hasNamedOperand(Opcode: MCOpcode, NamedIdx: AMDGPU::OpName::vdst_in)) { |
| 172 | MCOperand MCOp; |
| 173 | lowerOperand(MO: MIVDstOrVData, MCOp); |
| 174 | OutMI.addOperand(Op: MCOp); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | void AMDGPUMCInstLower::lowerT16FmaMixFP16(const MachineInstr *MI, |
| 179 | MCInst &OutMI) const { |
| 180 | unsigned Opcode = MI->getOpcode(); |
| 181 | const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo()); |
| 182 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
| 183 | |
| 184 | int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: llvm::AMDGPU::OpName::vdst); |
| 185 | const MachineOperand &VDst = MI->getOperand(i: VDstIdx); |
| 186 | bool IsHi = AMDGPU::isHi16Reg(Reg: VDst.getReg(), MRI: TRI); |
| 187 | switch (Opcode) { |
| 188 | case AMDGPU::V_FMA_MIX_F16_t16: |
| 189 | Opcode = IsHi ? AMDGPU::V_FMA_MIXHI_F16 : AMDGPU::V_FMA_MIXLO_F16; |
| 190 | break; |
| 191 | case AMDGPU::V_FMA_MIX_BF16_t16: |
| 192 | Opcode = IsHi ? AMDGPU::V_FMA_MIXHI_BF16 : AMDGPU::V_FMA_MIXLO_BF16; |
| 193 | break; |
| 194 | } |
| 195 | int MCOpcode = TII->pseudoToMCOpcode(Opcode); |
| 196 | assert(MCOpcode != -1 && |
| 197 | "Pseudo instruction doesn't have a target-specific version" ); |
| 198 | OutMI.setOpcode(MCOpcode); |
| 199 | |
| 200 | // lower operands |
| 201 | for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) { |
| 202 | const MachineOperand &MO = MI->getOperand(i: I); |
| 203 | MCOperand MCOp; |
| 204 | if (I == VDstIdx) |
| 205 | MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: VDst.getReg())); |
| 206 | else |
| 207 | lowerOperand(MO, MCOp); |
| 208 | OutMI.addOperand(Op: MCOp); |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { |
| 213 | unsigned Opcode = MI->getOpcode(); |
| 214 | const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo()); |
| 215 | |
| 216 | // FIXME: Should be able to handle this with lowerPseudoInstExpansion. We |
| 217 | // need to select it to the subtarget specific version, and there's no way to |
| 218 | // do that with a single pseudo source operation. |
| 219 | if (Opcode == AMDGPU::S_SETPC_B64_return) |
| 220 | Opcode = AMDGPU::S_SETPC_B64; |
| 221 | else if (Opcode == AMDGPU::SI_CALL) { |
| 222 | // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the |
| 223 | // called function (which we need to remove here). |
| 224 | OutMI.setOpcode(TII->pseudoToMCOpcode(Opcode: AMDGPU::S_SWAPPC_B64)); |
| 225 | MCOperand Dest, Src; |
| 226 | lowerOperand(MO: MI->getOperand(i: 0), MCOp&: Dest); |
| 227 | lowerOperand(MO: MI->getOperand(i: 1), MCOp&: Src); |
| 228 | OutMI.addOperand(Op: Dest); |
| 229 | OutMI.addOperand(Op: Src); |
| 230 | return; |
| 231 | } else if (Opcode == AMDGPU::SI_TCRETURN || |
| 232 | Opcode == AMDGPU::SI_TCRETURN_GFX || |
| 233 | Opcode == AMDGPU::SI_TCRETURN_CHAIN) { |
| 234 | // TODO: How to use branch immediate and avoid register+add? |
| 235 | Opcode = AMDGPU::S_SETPC_B64; |
| 236 | } else if (AMDGPU::getT16D16Helper(T16Op: Opcode)) { |
| 237 | lowerT16D16Helper(MI, OutMI); |
| 238 | return; |
| 239 | } else if (Opcode == AMDGPU::V_FMA_MIX_F16_t16 || |
| 240 | Opcode == AMDGPU::V_FMA_MIX_BF16_t16) { |
| 241 | lowerT16FmaMixFP16(MI, OutMI); |
| 242 | return; |
| 243 | } |
| 244 | |
| 245 | int MCOpcode = TII->pseudoToMCOpcode(Opcode); |
| 246 | if (MCOpcode == -1) { |
| 247 | LLVMContext &C = MI->getMF()->getFunction().getContext(); |
| 248 | C.emitError(ErrorStr: "AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " |
| 249 | "a target-specific version: " + Twine(MI->getOpcode())); |
| 250 | } |
| 251 | |
| 252 | OutMI.setOpcode(MCOpcode); |
| 253 | |
| 254 | for (const MachineOperand &MO : MI->explicit_operands()) { |
| 255 | MCOperand MCOp; |
| 256 | lowerOperand(MO, MCOp); |
| 257 | OutMI.addOperand(Op: MCOp); |
| 258 | } |
| 259 | |
| 260 | int FIIdx = AMDGPU::getNamedOperandIdx(Opcode: MCOpcode, Name: AMDGPU::OpName::fi); |
| 261 | if (FIIdx >= (int)OutMI.getNumOperands()) |
| 262 | OutMI.addOperand(Op: MCOperand::createImm(Val: 0)); |
| 263 | } |
| 264 | |
| 265 | bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, |
| 266 | MCOperand &MCOp) const { |
| 267 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
| 268 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
| 269 | return MCInstLowering.lowerOperand(MO, MCOp); |
| 270 | } |
| 271 | |
| 272 | const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV, |
| 273 | const Constant *BaseCV, |
| 274 | uint64_t Offset) { |
| 275 | |
| 276 | // Intercept LDS variables with known addresses |
| 277 | if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(Val: CV)) { |
| 278 | if (std::optional<uint32_t> Address = |
| 279 | AMDGPUMachineFunction::getLDSAbsoluteAddress(GV: *GV)) { |
| 280 | auto *IntTy = Type::getInt32Ty(C&: CV->getContext()); |
| 281 | return AsmPrinter::lowerConstant(CV: ConstantInt::get(Ty: IntTy, V: *Address), |
| 282 | BaseCV, Offset); |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) |
| 287 | return E; |
| 288 | return AsmPrinter::lowerConstant(CV, BaseCV, Offset); |
| 289 | } |
| 290 | |
| 291 | static void (const MachineInstr *MI, const SIInstrInfo *TII, |
| 292 | const TargetRegisterInfo *TRI, |
| 293 | const SIMachineFunctionInfo *MFI, |
| 294 | MCStreamer &OS) { |
| 295 | // The instruction will only transfer a subset of the registers in the block, |
| 296 | // based on the mask that is stored in m0. We could search for the instruction |
| 297 | // that sets m0, but most of the time we'll already have the mask stored in |
| 298 | // the machine function info. Try to use that. This assumes that we only use |
| 299 | // block loads/stores for CSR spills. |
| 300 | Register RegBlock = |
| 301 | TII->getNamedOperand(MI: *MI, OperandName: MI->mayLoad() ? AMDGPU::OpName::vdst |
| 302 | : AMDGPU::OpName::vdata) |
| 303 | ->getReg(); |
| 304 | Register FirstRegInBlock = TRI->getSubReg(Reg: RegBlock, Idx: AMDGPU::sub0); |
| 305 | uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegisterBlock: RegBlock); |
| 306 | |
| 307 | if (!Mask) |
| 308 | return; // Nothing to report |
| 309 | |
| 310 | SmallString<512> TransferredRegs; |
| 311 | for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) { |
| 312 | if (Mask & (1 << I)) { |
| 313 | (llvm::Twine(" " ) + TRI->getRegAsmName(Reg: FirstRegInBlock + I)) |
| 314 | .toVector(Out&: TransferredRegs); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | OS.emitRawComment(T: " transferring at most " + TransferredRegs); |
| 319 | } |
| 320 | |
| 321 | void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { |
| 322 | // FIXME: Enable feature predicate checks once all the test pass. |
| 323 | // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(), |
| 324 | // getSubtargetInfo().getFeatureBits()); |
| 325 | |
| 326 | if (MCInst OutInst; lowerPseudoInstExpansion(MI, Inst&: OutInst)) { |
| 327 | EmitToStreamer(S&: *OutStreamer, Inst: OutInst); |
| 328 | return; |
| 329 | } |
| 330 | |
| 331 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
| 332 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
| 333 | |
| 334 | StringRef Err; |
| 335 | if (!STI.getInstrInfo()->verifyInstruction(MI: *MI, ErrInfo&: Err)) { |
| 336 | LLVMContext &C = MI->getMF()->getFunction().getContext(); |
| 337 | C.emitError(ErrorStr: "Illegal instruction detected: " + Err); |
| 338 | MI->print(OS&: errs()); |
| 339 | } |
| 340 | |
| 341 | if (MI->isBundle()) { |
| 342 | const MachineBasicBlock *MBB = MI->getParent(); |
| 343 | MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); |
| 344 | while (I != MBB->instr_end() && I->isInsideBundle()) { |
| 345 | emitInstruction(MI: &*I); |
| 346 | ++I; |
| 347 | } |
| 348 | } else { |
| 349 | // We don't want these pseudo instructions encoded. They are |
| 350 | // placeholder terminator instructions and should only be printed as |
| 351 | // comments. |
| 352 | if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { |
| 353 | if (isVerbose()) |
| 354 | OutStreamer->emitRawComment(T: " return to shader part epilog" ); |
| 355 | return; |
| 356 | } |
| 357 | |
| 358 | if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) { |
| 359 | if (isVerbose()) |
| 360 | OutStreamer->emitRawComment(T: " wave barrier" ); |
| 361 | return; |
| 362 | } |
| 363 | |
| 364 | if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) { |
| 365 | if (isVerbose()) { |
| 366 | std::string HexString; |
| 367 | raw_string_ostream HexStream(HexString); |
| 368 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
| 369 | OutStreamer->emitRawComment(T: " sched_barrier mask(" + HexString + ")" ); |
| 370 | } |
| 371 | return; |
| 372 | } |
| 373 | |
| 374 | if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) { |
| 375 | if (isVerbose()) { |
| 376 | std::string HexString; |
| 377 | raw_string_ostream HexStream(HexString); |
| 378 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
| 379 | OutStreamer->emitRawComment( |
| 380 | T: " sched_group_barrier mask(" + HexString + ") size(" + |
| 381 | Twine(MI->getOperand(i: 1).getImm()) + ") SyncID(" + |
| 382 | Twine(MI->getOperand(i: 2).getImm()) + ")" ); |
| 383 | } |
| 384 | return; |
| 385 | } |
| 386 | |
| 387 | if (MI->getOpcode() == AMDGPU::IGLP_OPT) { |
| 388 | if (isVerbose()) { |
| 389 | std::string HexString; |
| 390 | raw_string_ostream HexStream(HexString); |
| 391 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
| 392 | OutStreamer->emitRawComment(T: " iglp_opt mask(" + HexString + ")" ); |
| 393 | } |
| 394 | return; |
| 395 | } |
| 396 | |
| 397 | if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { |
| 398 | if (isVerbose()) |
| 399 | OutStreamer->emitRawComment(T: " divergent unreachable" ); |
| 400 | return; |
| 401 | } |
| 402 | |
| 403 | if (MI->isMetaInstruction()) { |
| 404 | if (isVerbose()) |
| 405 | OutStreamer->emitRawComment(T: " meta instruction" ); |
| 406 | return; |
| 407 | } |
| 408 | |
| 409 | unsigned Opc = MI->getOpcode(); |
| 410 | if (LLVM_UNLIKELY(Opc == TargetOpcode::STATEPOINT || |
| 411 | Opc == TargetOpcode::STACKMAP || |
| 412 | Opc == TargetOpcode::PATCHPOINT)) { |
| 413 | LLVMContext &Ctx = MI->getMF()->getFunction().getContext(); |
| 414 | Ctx.emitError(ErrorStr: "unhandled statepoint-like instruction" ); |
| 415 | OutStreamer->emitRawComment(T: "unsupported statepoint/stackmap/patchpoint" ); |
| 416 | return; |
| 417 | } |
| 418 | |
| 419 | if (isVerbose()) |
| 420 | if (STI.getInstrInfo()->isBlockLoadStore(Opcode: MI->getOpcode())) |
| 421 | emitVGPRBlockComment(MI, TII: STI.getInstrInfo(), TRI: STI.getRegisterInfo(), |
| 422 | MFI: MF->getInfo<SIMachineFunctionInfo>(), |
| 423 | OS&: *OutStreamer); |
| 424 | |
| 425 | if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) { |
| 426 | unsigned V = MI->getOperand(i: 0).getImm() & 0xff; |
| 427 | OutStreamer->AddComment( |
| 428 | T: " msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) + |
| 429 | " src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3)); |
| 430 | } |
| 431 | |
| 432 | MCInst TmpInst; |
| 433 | MCInstLowering.lower(MI, OutMI&: TmpInst); |
| 434 | EmitToStreamer(S&: *OutStreamer, Inst: TmpInst); |
| 435 | |
| 436 | #ifdef EXPENSIVE_CHECKS |
| 437 | // Check getInstSizeInBytes on explicitly specified CPUs (it cannot |
| 438 | // work correctly for the generic CPU). |
| 439 | // |
| 440 | // The isPseudo check really shouldn't be here, but unfortunately there are |
| 441 | // some negative lit tests that depend on being able to continue through |
| 442 | // here even when pseudo instructions haven't been lowered. |
| 443 | // |
| 444 | // We also overestimate branch sizes with the offset bug. |
| 445 | if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) && |
| 446 | (!STI.hasOffset3fBug() || !MI->isBranch())) { |
| 447 | SmallVector<MCFixup, 4> Fixups; |
| 448 | SmallVector<char, 16> CodeBytes; |
| 449 | |
| 450 | std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter( |
| 451 | *STI.getInstrInfo(), OutContext)); |
| 452 | InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI); |
| 453 | |
| 454 | assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); |
| 455 | } |
| 456 | #endif |
| 457 | |
| 458 | if (DumpCodeInstEmitter) { |
| 459 | // Disassemble instruction/operands to text |
| 460 | DisasmLines.resize(new_size: DisasmLines.size() + 1); |
| 461 | std::string &DisasmLine = DisasmLines.back(); |
| 462 | raw_string_ostream DisasmStream(DisasmLine); |
| 463 | |
| 464 | AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(), |
| 465 | *STI.getRegisterInfo()); |
| 466 | InstPrinter.printInst(MI: &TmpInst, Address: 0, Annot: StringRef(), STI, O&: DisasmStream); |
| 467 | |
| 468 | // Disassemble instruction/operands to hex representation. |
| 469 | SmallVector<MCFixup, 4> Fixups; |
| 470 | SmallVector<char, 16> CodeBytes; |
| 471 | |
| 472 | DumpCodeInstEmitter->encodeInstruction( |
| 473 | Inst: TmpInst, CB&: CodeBytes, Fixups, STI: MF->getSubtarget<MCSubtargetInfo>()); |
| 474 | HexLines.resize(new_size: HexLines.size() + 1); |
| 475 | std::string &HexLine = HexLines.back(); |
| 476 | raw_string_ostream HexStream(HexLine); |
| 477 | |
| 478 | for (size_t i = 0; i < CodeBytes.size(); i += 4) { |
| 479 | unsigned int CodeDWord = |
| 480 | support::endian::read32le(P: CodeBytes.data() + i); |
| 481 | HexStream << format(Fmt: "%s%08X" , Vals: (i > 0 ? " " : "" ), Vals: CodeDWord); |
| 482 | } |
| 483 | |
| 484 | DisasmLineMaxLen = std::max(a: DisasmLineMaxLen, b: DisasmLine.size()); |
| 485 | } |
| 486 | } |
| 487 | } |
| 488 | |