1//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
11//
12//===----------------------------------------------------------------------===//
13//
14
15#include "AMDGPUMCInstLower.h"
16#include "AMDGPU.h"
17#include "AMDGPUAsmPrinter.h"
18#include "AMDGPUMachineFunction.h"
19#include "MCTargetDesc/AMDGPUInstPrinter.h"
20#include "MCTargetDesc/AMDGPUMCExpr.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIMachineFunctionInfo.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/GlobalVariable.h"
28#include "llvm/MC/MCCodeEmitter.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCObjectStreamer.h"
33#include "llvm/MC/MCStreamer.h"
34#include "llvm/Support/Endian.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/Format.h"
37#include <algorithm>
38
39using namespace llvm;
40
41#include "AMDGPUGenMCPseudoLowering.inc"
42
43AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
44 const TargetSubtargetInfo &st,
45 const AsmPrinter &ap):
46 Ctx(ctx), ST(st), AP(ap) { }
47
48static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) {
49 switch (MOFlags) {
50 default:
51 return AMDGPUMCExpr::S_None;
52 case SIInstrInfo::MO_GOTPCREL:
53 case SIInstrInfo::MO_GOTPCREL64:
54 return AMDGPUMCExpr::S_GOTPCREL;
55 case SIInstrInfo::MO_GOTPCREL32_LO:
56 return AMDGPUMCExpr::S_GOTPCREL32_LO;
57 case SIInstrInfo::MO_GOTPCREL32_HI:
58 return AMDGPUMCExpr::S_GOTPCREL32_HI;
59 case SIInstrInfo::MO_REL32_LO:
60 return AMDGPUMCExpr::S_REL32_LO;
61 case SIInstrInfo::MO_REL32_HI:
62 return AMDGPUMCExpr::S_REL32_HI;
63 case SIInstrInfo::MO_REL64:
64 return AMDGPUMCExpr::S_REL64;
65 case SIInstrInfo::MO_ABS32_LO:
66 return AMDGPUMCExpr::S_ABS32_LO;
67 case SIInstrInfo::MO_ABS32_HI:
68 return AMDGPUMCExpr::S_ABS32_HI;
69 case SIInstrInfo::MO_ABS64:
70 return AMDGPUMCExpr::S_ABS64;
71 }
72}
73
74bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
75 MCOperand &MCOp) const {
76 switch (MO.getType()) {
77 default:
78 break;
79 case MachineOperand::MO_Immediate:
80 MCOp = MCOperand::createImm(Val: MO.getImm());
81 return true;
82 case MachineOperand::MO_Register:
83 MCOp = MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: MO.getReg(), STI: ST));
84 return true;
85 case MachineOperand::MO_MachineBasicBlock:
86 MCOp = MCOperand::createExpr(
87 Val: MCSymbolRefExpr::create(Symbol: MO.getMBB()->getSymbol(), Ctx));
88 return true;
89 case MachineOperand::MO_GlobalAddress: {
90 const GlobalValue *GV = MO.getGlobal();
91 SmallString<128> SymbolName;
92 AP.getNameWithPrefix(Name&: SymbolName, GV);
93 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: SymbolName);
94 const MCExpr *Expr =
95 MCSymbolRefExpr::create(Symbol: Sym, specifier: getSpecifier(MOFlags: MO.getTargetFlags()), Ctx);
96 int64_t Offset = MO.getOffset();
97 if (Offset != 0) {
98 Expr = MCBinaryExpr::createAdd(LHS: Expr,
99 RHS: MCConstantExpr::create(Value: Offset, Ctx), Ctx);
100 }
101 MCOp = MCOperand::createExpr(Val: Expr);
102 return true;
103 }
104 case MachineOperand::MO_ExternalSymbol: {
105 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: StringRef(MO.getSymbolName()));
106 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
107 MCOp = MCOperand::createExpr(Val: Expr);
108 return true;
109 }
110 case MachineOperand::MO_RegisterMask:
111 // Regmasks are like implicit defs.
112 return false;
113 case MachineOperand::MO_MCSymbol:
114 if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) {
115 MCSymbol *Sym = MO.getMCSymbol();
116 MCOp = MCOperand::createExpr(Val: Sym->getVariableValue());
117 return true;
118 }
119 break;
120 }
121 llvm_unreachable("unknown operand type");
122}
123
124// Lower true16 D16 Pseudo instruction to d16_lo/d16_hi MCInst based on
125// Dst/Data's .l/.h selection
126void AMDGPUMCInstLower::lowerT16D16Helper(const MachineInstr *MI,
127 MCInst &OutMI) const {
128 unsigned Opcode = MI->getOpcode();
129 const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
130 const SIRegisterInfo &TRI = TII->getRegisterInfo();
131 const auto *Info = AMDGPU::getT16D16Helper(T16Op: Opcode);
132
133 llvm::AMDGPU::OpName OpName;
134 if (TII->isDS(Opcode)) {
135 if (MI->mayLoad())
136 OpName = llvm::AMDGPU::OpName::vdst;
137 else if (MI->mayStore())
138 OpName = llvm::AMDGPU::OpName::data0;
139 else
140 llvm_unreachable("LDS load or store expected");
141 } else {
142 OpName = AMDGPU::hasNamedOperand(Opcode, NamedIdx: llvm::AMDGPU::OpName::vdata)
143 ? llvm::AMDGPU::OpName::vdata
144 : llvm::AMDGPU::OpName::vdst;
145 }
146
147 // select Dst/Data
148 int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: OpName);
149 const MachineOperand &MIVDstOrVData = MI->getOperand(i: VDstOrVDataIdx);
150
151 // select hi/lo MCInst
152 bool IsHi = AMDGPU::isHi16Reg(Reg: MIVDstOrVData.getReg(), MRI: TRI);
153 Opcode = IsHi ? Info->HiOp : Info->LoOp;
154
155 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
156 assert(MCOpcode != -1 &&
157 "Pseudo instruction doesn't have a target-specific version");
158 OutMI.setOpcode(MCOpcode);
159
160 // lower operands
161 for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) {
162 const MachineOperand &MO = MI->getOperand(i: I);
163 MCOperand MCOp;
164 if (I == VDstOrVDataIdx)
165 MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: MIVDstOrVData.getReg()));
166 else
167 lowerOperand(MO, MCOp);
168 OutMI.addOperand(Op: MCOp);
169 }
170
171 if (AMDGPU::hasNamedOperand(Opcode: MCOpcode, NamedIdx: AMDGPU::OpName::vdst_in)) {
172 MCOperand MCOp;
173 lowerOperand(MO: MIVDstOrVData, MCOp);
174 OutMI.addOperand(Op: MCOp);
175 }
176}
177
178void AMDGPUMCInstLower::lowerT16FmaMixFP16(const MachineInstr *MI,
179 MCInst &OutMI) const {
180 unsigned Opcode = MI->getOpcode();
181 const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo());
182 const SIRegisterInfo &TRI = TII->getRegisterInfo();
183
184 int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: llvm::AMDGPU::OpName::vdst);
185 const MachineOperand &VDst = MI->getOperand(i: VDstIdx);
186 bool IsHi = AMDGPU::isHi16Reg(Reg: VDst.getReg(), MRI: TRI);
187 switch (Opcode) {
188 case AMDGPU::V_FMA_MIX_F16_t16:
189 Opcode = IsHi ? AMDGPU::V_FMA_MIXHI_F16 : AMDGPU::V_FMA_MIXLO_F16;
190 break;
191 case AMDGPU::V_FMA_MIX_BF16_t16:
192 Opcode = IsHi ? AMDGPU::V_FMA_MIXHI_BF16 : AMDGPU::V_FMA_MIXLO_BF16;
193 break;
194 }
195 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
196 assert(MCOpcode != -1 &&
197 "Pseudo instruction doesn't have a target-specific version");
198 OutMI.setOpcode(MCOpcode);
199
200 // lower operands
201 for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) {
202 const MachineOperand &MO = MI->getOperand(i: I);
203 MCOperand MCOp;
204 if (I == VDstIdx)
205 MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: VDst.getReg()));
206 else
207 lowerOperand(MO, MCOp);
208 OutMI.addOperand(Op: MCOp);
209 }
210}
211
212void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
213 unsigned Opcode = MI->getOpcode();
214 const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo());
215
216 // FIXME: Should be able to handle this with lowerPseudoInstExpansion. We
217 // need to select it to the subtarget specific version, and there's no way to
218 // do that with a single pseudo source operation.
219 if (Opcode == AMDGPU::S_SETPC_B64_return)
220 Opcode = AMDGPU::S_SETPC_B64;
221 else if (Opcode == AMDGPU::SI_CALL) {
222 // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
223 // called function (which we need to remove here).
224 OutMI.setOpcode(TII->pseudoToMCOpcode(Opcode: AMDGPU::S_SWAPPC_B64));
225 MCOperand Dest, Src;
226 lowerOperand(MO: MI->getOperand(i: 0), MCOp&: Dest);
227 lowerOperand(MO: MI->getOperand(i: 1), MCOp&: Src);
228 OutMI.addOperand(Op: Dest);
229 OutMI.addOperand(Op: Src);
230 return;
231 } else if (Opcode == AMDGPU::SI_TCRETURN ||
232 Opcode == AMDGPU::SI_TCRETURN_GFX ||
233 Opcode == AMDGPU::SI_TCRETURN_CHAIN) {
234 // TODO: How to use branch immediate and avoid register+add?
235 Opcode = AMDGPU::S_SETPC_B64;
236 } else if (AMDGPU::getT16D16Helper(T16Op: Opcode)) {
237 lowerT16D16Helper(MI, OutMI);
238 return;
239 } else if (Opcode == AMDGPU::V_FMA_MIX_F16_t16 ||
240 Opcode == AMDGPU::V_FMA_MIX_BF16_t16) {
241 lowerT16FmaMixFP16(MI, OutMI);
242 return;
243 }
244
245 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
246 if (MCOpcode == -1) {
247 LLVMContext &C = MI->getMF()->getFunction().getContext();
248 C.emitError(ErrorStr: "AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
249 "a target-specific version: " + Twine(MI->getOpcode()));
250 }
251
252 OutMI.setOpcode(MCOpcode);
253
254 for (const MachineOperand &MO : MI->explicit_operands()) {
255 MCOperand MCOp;
256 lowerOperand(MO, MCOp);
257 OutMI.addOperand(Op: MCOp);
258 }
259
260 int FIIdx = AMDGPU::getNamedOperandIdx(Opcode: MCOpcode, Name: AMDGPU::OpName::fi);
261 if (FIIdx >= (int)OutMI.getNumOperands())
262 OutMI.addOperand(Op: MCOperand::createImm(Val: 0));
263}
264
265bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
266 MCOperand &MCOp) const {
267 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
268 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
269 return MCInstLowering.lowerOperand(MO, MCOp);
270}
271
272const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV,
273 const Constant *BaseCV,
274 uint64_t Offset) {
275
276 // Intercept LDS variables with known addresses
277 if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(Val: CV)) {
278 if (std::optional<uint32_t> Address =
279 AMDGPUMachineFunction::getLDSAbsoluteAddress(GV: *GV)) {
280 auto *IntTy = Type::getInt32Ty(C&: CV->getContext());
281 return AsmPrinter::lowerConstant(CV: ConstantInt::get(Ty: IntTy, V: *Address),
282 BaseCV, Offset);
283 }
284 }
285
286 if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
287 return E;
288 return AsmPrinter::lowerConstant(CV, BaseCV, Offset);
289}
290
291static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
292 const TargetRegisterInfo *TRI,
293 const SIMachineFunctionInfo *MFI,
294 MCStreamer &OS) {
295 // The instruction will only transfer a subset of the registers in the block,
296 // based on the mask that is stored in m0. We could search for the instruction
297 // that sets m0, but most of the time we'll already have the mask stored in
298 // the machine function info. Try to use that. This assumes that we only use
299 // block loads/stores for CSR spills.
300 Register RegBlock =
301 TII->getNamedOperand(MI: *MI, OperandName: MI->mayLoad() ? AMDGPU::OpName::vdst
302 : AMDGPU::OpName::vdata)
303 ->getReg();
304 Register FirstRegInBlock = TRI->getSubReg(Reg: RegBlock, Idx: AMDGPU::sub0);
305 uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegisterBlock: RegBlock);
306
307 if (!Mask)
308 return; // Nothing to report
309
310 SmallString<512> TransferredRegs;
311 for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) {
312 if (Mask & (1 << I)) {
313 (llvm::Twine(" ") + TRI->getRegAsmName(Reg: FirstRegInBlock + I))
314 .toVector(Out&: TransferredRegs);
315 }
316 }
317
318 OS.emitRawComment(T: " transferring at most " + TransferredRegs);
319}
320
321void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
322 // FIXME: Enable feature predicate checks once all the test pass.
323 // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
324 // getSubtargetInfo().getFeatureBits());
325
326 if (MCInst OutInst; lowerPseudoInstExpansion(MI, Inst&: OutInst)) {
327 EmitToStreamer(S&: *OutStreamer, Inst: OutInst);
328 return;
329 }
330
331 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
332 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
333
334 StringRef Err;
335 if (!STI.getInstrInfo()->verifyInstruction(MI: *MI, ErrInfo&: Err)) {
336 LLVMContext &C = MI->getMF()->getFunction().getContext();
337 C.emitError(ErrorStr: "Illegal instruction detected: " + Err);
338 MI->print(OS&: errs());
339 }
340
341 if (MI->isBundle()) {
342 const MachineBasicBlock *MBB = MI->getParent();
343 MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
344 while (I != MBB->instr_end() && I->isInsideBundle()) {
345 emitInstruction(MI: &*I);
346 ++I;
347 }
348 } else {
349 // We don't want these pseudo instructions encoded. They are
350 // placeholder terminator instructions and should only be printed as
351 // comments.
352 if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
353 if (isVerbose())
354 OutStreamer->emitRawComment(T: " return to shader part epilog");
355 return;
356 }
357
358 if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) {
359 if (isVerbose())
360 OutStreamer->emitRawComment(T: " wave barrier");
361 return;
362 }
363
364 if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) {
365 if (isVerbose()) {
366 std::string HexString;
367 raw_string_ostream HexStream(HexString);
368 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
369 OutStreamer->emitRawComment(T: " sched_barrier mask(" + HexString + ")");
370 }
371 return;
372 }
373
374 if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) {
375 if (isVerbose()) {
376 std::string HexString;
377 raw_string_ostream HexStream(HexString);
378 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
379 OutStreamer->emitRawComment(
380 T: " sched_group_barrier mask(" + HexString + ") size(" +
381 Twine(MI->getOperand(i: 1).getImm()) + ") SyncID(" +
382 Twine(MI->getOperand(i: 2).getImm()) + ")");
383 }
384 return;
385 }
386
387 if (MI->getOpcode() == AMDGPU::IGLP_OPT) {
388 if (isVerbose()) {
389 std::string HexString;
390 raw_string_ostream HexStream(HexString);
391 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
392 OutStreamer->emitRawComment(T: " iglp_opt mask(" + HexString + ")");
393 }
394 return;
395 }
396
397 if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) {
398 if (isVerbose())
399 OutStreamer->emitRawComment(T: " divergent unreachable");
400 return;
401 }
402
403 if (MI->isMetaInstruction()) {
404 if (isVerbose())
405 OutStreamer->emitRawComment(T: " meta instruction");
406 return;
407 }
408
409 unsigned Opc = MI->getOpcode();
410 if (LLVM_UNLIKELY(Opc == TargetOpcode::STATEPOINT ||
411 Opc == TargetOpcode::STACKMAP ||
412 Opc == TargetOpcode::PATCHPOINT)) {
413 LLVMContext &Ctx = MI->getMF()->getFunction().getContext();
414 Ctx.emitError(ErrorStr: "unhandled statepoint-like instruction");
415 OutStreamer->emitRawComment(T: "unsupported statepoint/stackmap/patchpoint");
416 return;
417 }
418
419 if (isVerbose())
420 if (STI.getInstrInfo()->isBlockLoadStore(Opcode: MI->getOpcode()))
421 emitVGPRBlockComment(MI, TII: STI.getInstrInfo(), TRI: STI.getRegisterInfo(),
422 MFI: MF->getInfo<SIMachineFunctionInfo>(),
423 OS&: *OutStreamer);
424
425 if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
426 unsigned V = MI->getOperand(i: 0).getImm() & 0xff;
427 OutStreamer->AddComment(
428 T: " msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) +
429 " src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3));
430 }
431
432 MCInst TmpInst;
433 MCInstLowering.lower(MI, OutMI&: TmpInst);
434 EmitToStreamer(S&: *OutStreamer, Inst: TmpInst);
435
436#ifdef EXPENSIVE_CHECKS
437 // Check getInstSizeInBytes on explicitly specified CPUs (it cannot
438 // work correctly for the generic CPU).
439 //
440 // The isPseudo check really shouldn't be here, but unfortunately there are
441 // some negative lit tests that depend on being able to continue through
442 // here even when pseudo instructions haven't been lowered.
443 //
444 // We also overestimate branch sizes with the offset bug.
445 if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) &&
446 (!STI.hasOffset3fBug() || !MI->isBranch())) {
447 SmallVector<MCFixup, 4> Fixups;
448 SmallVector<char, 16> CodeBytes;
449
450 std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter(
451 *STI.getInstrInfo(), OutContext));
452 InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI);
453
454 assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
455 }
456#endif
457
458 if (DumpCodeInstEmitter) {
459 // Disassemble instruction/operands to text
460 DisasmLines.resize(new_size: DisasmLines.size() + 1);
461 std::string &DisasmLine = DisasmLines.back();
462 raw_string_ostream DisasmStream(DisasmLine);
463
464 AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(),
465 *STI.getRegisterInfo());
466 InstPrinter.printInst(MI: &TmpInst, Address: 0, Annot: StringRef(), STI, O&: DisasmStream);
467
468 // Disassemble instruction/operands to hex representation.
469 SmallVector<MCFixup, 4> Fixups;
470 SmallVector<char, 16> CodeBytes;
471
472 DumpCodeInstEmitter->encodeInstruction(
473 Inst: TmpInst, CB&: CodeBytes, Fixups, STI: MF->getSubtarget<MCSubtargetInfo>());
474 HexLines.resize(new_size: HexLines.size() + 1);
475 std::string &HexLine = HexLines.back();
476 raw_string_ostream HexStream(HexLine);
477
478 for (size_t i = 0; i < CodeBytes.size(); i += 4) {
479 unsigned int CodeDWord =
480 support::endian::read32le(P: CodeBytes.data() + i);
481 HexStream << format(Fmt: "%s%08X", Vals: (i > 0 ? " " : ""), Vals: CodeDWord);
482 }
483
484 DisasmLineMaxLen = std::max(a: DisasmLineMaxLen, b: DisasmLine.size());
485 }
486 }
487}
488