1//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
11//
12//===----------------------------------------------------------------------===//
13//
14
15#include "AMDGPUMCInstLower.h"
16#include "AMDGPU.h"
17#include "AMDGPUAsmPrinter.h"
18#include "AMDGPUMachineFunctionInfo.h"
19#include "MCTargetDesc/AMDGPUInstPrinter.h"
20#include "MCTargetDesc/AMDGPUMCExpr.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIMachineFunctionInfo.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/GlobalVariable.h"
28#include "llvm/MC/MCCodeEmitter.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCObjectStreamer.h"
33#include "llvm/MC/MCStreamer.h"
34#include "llvm/Support/Endian.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/Format.h"
37#include <algorithm>
38
39using namespace llvm;
40
41#include "AMDGPUGenMCPseudoLowering.inc"
42
43AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
44 const TargetSubtargetInfo &st,
45 const AsmPrinter &ap):
46 Ctx(ctx), ST(st), AP(ap) { }
47
48static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) {
49 switch (MOFlags) {
50 default:
51 return AMDGPUMCExpr::S_None;
52 case SIInstrInfo::MO_GOTPCREL:
53 case SIInstrInfo::MO_GOTPCREL64:
54 return AMDGPUMCExpr::S_GOTPCREL;
55 case SIInstrInfo::MO_GOTPCREL32_LO:
56 return AMDGPUMCExpr::S_GOTPCREL32_LO;
57 case SIInstrInfo::MO_GOTPCREL32_HI:
58 return AMDGPUMCExpr::S_GOTPCREL32_HI;
59 case SIInstrInfo::MO_REL32_LO:
60 return AMDGPUMCExpr::S_REL32_LO;
61 case SIInstrInfo::MO_REL32_HI:
62 return AMDGPUMCExpr::S_REL32_HI;
63 case SIInstrInfo::MO_REL64:
64 return AMDGPUMCExpr::S_REL64;
65 case SIInstrInfo::MO_ABS32_LO:
66 return AMDGPUMCExpr::S_ABS32_LO;
67 case SIInstrInfo::MO_ABS32_HI:
68 return AMDGPUMCExpr::S_ABS32_HI;
69 case SIInstrInfo::MO_ABS64:
70 return AMDGPUMCExpr::S_ABS64;
71 }
72}
73
74bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
75 MCOperand &MCOp) const {
76 switch (MO.getType()) {
77 default:
78 break;
79 case MachineOperand::MO_Immediate:
80 MCOp = MCOperand::createImm(Val: MO.getImm());
81 return true;
82 case MachineOperand::MO_Register:
83 MCOp = MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: MO.getReg(), STI: ST));
84 return true;
85 case MachineOperand::MO_MachineBasicBlock:
86 MCOp = MCOperand::createExpr(
87 Val: MCSymbolRefExpr::create(Symbol: MO.getMBB()->getSymbol(), Ctx));
88 return true;
89 case MachineOperand::MO_GlobalAddress: {
90 const GlobalValue *GV = MO.getGlobal();
91 SmallString<128> SymbolName;
92 AP.getNameWithPrefix(Name&: SymbolName, GV);
93 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: SymbolName);
94 const MCExpr *Expr =
95 MCSymbolRefExpr::create(Symbol: Sym, specifier: getSpecifier(MOFlags: MO.getTargetFlags()), Ctx);
96 int64_t Offset = MO.getOffset();
97 if (Offset != 0) {
98 Expr = MCBinaryExpr::createAdd(LHS: Expr,
99 RHS: MCConstantExpr::create(Value: Offset, Ctx), Ctx);
100 }
101 MCOp = MCOperand::createExpr(Val: Expr);
102 return true;
103 }
104 case MachineOperand::MO_ExternalSymbol: {
105 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: StringRef(MO.getSymbolName()));
106 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
107 MCOp = MCOperand::createExpr(Val: Expr);
108 return true;
109 }
110 case MachineOperand::MO_BlockAddress: {
111 MCSymbol *Sym = AP.GetBlockAddressSymbol(BA: MO.getBlockAddress());
112 const MCSymbolRefExpr *Expr =
113 MCSymbolRefExpr::create(Symbol: Sym, specifier: getSpecifier(MOFlags: MO.getTargetFlags()), Ctx);
114 assert(MO.getOffset() == 0);
115 MCOp = MCOperand::createExpr(Val: Expr);
116 return true;
117 }
118 case MachineOperand::MO_RegisterMask:
119 // Regmasks are like implicit defs.
120 return false;
121 case MachineOperand::MO_MCSymbol:
122 if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) {
123 MCSymbol *Sym = MO.getMCSymbol();
124 MCOp = MCOperand::createExpr(Val: Sym->getVariableValue());
125 return true;
126 }
127 break;
128 }
129 llvm_unreachable("unknown operand type");
130}
131
132// Lower true16 D16 Pseudo instruction to d16_lo/d16_hi MCInst based on
133// Dst/Data's .l/.h selection
134void AMDGPUMCInstLower::lowerT16D16Helper(const MachineInstr *MI,
135 MCInst &OutMI) const {
136 unsigned Opcode = MI->getOpcode();
137 const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
138 const SIRegisterInfo &TRI = TII->getRegisterInfo();
139 const auto *Info = AMDGPU::getT16D16Helper(T16Op: Opcode);
140
141 llvm::AMDGPU::OpName OpName;
142 if (TII->isDS(Opcode)) {
143 if (MI->mayLoad())
144 OpName = llvm::AMDGPU::OpName::vdst;
145 else if (MI->mayStore())
146 OpName = llvm::AMDGPU::OpName::data0;
147 else
148 llvm_unreachable("LDS load or store expected");
149 } else {
150 OpName = AMDGPU::hasNamedOperand(Opcode, NamedIdx: llvm::AMDGPU::OpName::vdata)
151 ? llvm::AMDGPU::OpName::vdata
152 : llvm::AMDGPU::OpName::vdst;
153 }
154
155 // select Dst/Data
156 int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: OpName);
157 const MachineOperand &MIVDstOrVData = MI->getOperand(i: VDstOrVDataIdx);
158
159 // select hi/lo MCInst
160 bool IsHi = AMDGPU::isHi16Reg(Reg: MIVDstOrVData.getReg(), MRI: TRI);
161 Opcode = IsHi ? Info->HiOp : Info->LoOp;
162
163 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
164 assert(MCOpcode != -1 &&
165 "Pseudo instruction doesn't have a target-specific version");
166 OutMI.setOpcode(MCOpcode);
167
168 // lower operands
169 for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) {
170 const MachineOperand &MO = MI->getOperand(i: I);
171 MCOperand MCOp;
172 if (I == VDstOrVDataIdx)
173 MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: MIVDstOrVData.getReg()));
174 else
175 lowerOperand(MO, MCOp);
176 OutMI.addOperand(Op: MCOp);
177 }
178
179 if (AMDGPU::hasNamedOperand(Opcode: MCOpcode, NamedIdx: AMDGPU::OpName::vdst_in)) {
180 MCOperand MCOp;
181 lowerOperand(MO: MIVDstOrVData, MCOp);
182 OutMI.addOperand(Op: MCOp);
183 }
184}
185
186void AMDGPUMCInstLower::lowerT16FmaMixFP16(const MachineInstr *MI,
187 MCInst &OutMI) const {
188 unsigned Opcode = MI->getOpcode();
189 const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo());
190 const SIRegisterInfo &TRI = TII->getRegisterInfo();
191
192 int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: llvm::AMDGPU::OpName::vdst);
193 const MachineOperand &VDst = MI->getOperand(i: VDstIdx);
194 bool IsHi = AMDGPU::isHi16Reg(Reg: VDst.getReg(), MRI: TRI);
195 switch (Opcode) {
196 case AMDGPU::V_FMA_MIX_F16_t16:
197 Opcode = IsHi ? AMDGPU::V_FMA_MIXHI_F16 : AMDGPU::V_FMA_MIXLO_F16;
198 break;
199 case AMDGPU::V_FMA_MIX_BF16_t16:
200 Opcode = IsHi ? AMDGPU::V_FMA_MIXHI_BF16 : AMDGPU::V_FMA_MIXLO_BF16;
201 break;
202 }
203 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
204 assert(MCOpcode != -1 &&
205 "Pseudo instruction doesn't have a target-specific version");
206 OutMI.setOpcode(MCOpcode);
207
208 // lower operands
209 for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) {
210 const MachineOperand &MO = MI->getOperand(i: I);
211 MCOperand MCOp;
212 if (I == VDstIdx)
213 MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: VDst.getReg()));
214 else
215 lowerOperand(MO, MCOp);
216 OutMI.addOperand(Op: MCOp);
217 }
218}
219
220void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
221 unsigned Opcode = MI->getOpcode();
222 const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo());
223
224 // FIXME: Should be able to handle this with lowerPseudoInstExpansion. We
225 // need to select it to the subtarget specific version, and there's no way to
226 // do that with a single pseudo source operation.
227 if (Opcode == AMDGPU::S_SETPC_B64_return)
228 Opcode = AMDGPU::S_SETPC_B64;
229 else if (Opcode == AMDGPU::SI_CALL) {
230 // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
231 // called function (which we need to remove here).
232 OutMI.setOpcode(TII->pseudoToMCOpcode(Opcode: AMDGPU::S_SWAPPC_B64));
233 MCOperand Dest, Src;
234 lowerOperand(MO: MI->getOperand(i: 0), MCOp&: Dest);
235 lowerOperand(MO: MI->getOperand(i: 1), MCOp&: Src);
236 OutMI.addOperand(Op: Dest);
237 OutMI.addOperand(Op: Src);
238 return;
239 } else if (Opcode == AMDGPU::SI_TCRETURN ||
240 Opcode == AMDGPU::SI_TCRETURN_GFX ||
241 Opcode == AMDGPU::SI_TCRETURN_CHAIN) {
242 // TODO: How to use branch immediate and avoid register+add?
243 Opcode = AMDGPU::S_SETPC_B64;
244 } else if (AMDGPU::getT16D16Helper(T16Op: Opcode)) {
245 lowerT16D16Helper(MI, OutMI);
246 return;
247 } else if (Opcode == AMDGPU::V_FMA_MIX_F16_t16 ||
248 Opcode == AMDGPU::V_FMA_MIX_BF16_t16) {
249 lowerT16FmaMixFP16(MI, OutMI);
250 return;
251 }
252
253 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
254 if (MCOpcode == -1) {
255 LLVMContext &C = MI->getMF()->getFunction().getContext();
256 C.emitError(ErrorStr: "AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
257 "a target-specific version: " + Twine(MI->getOpcode()));
258 return;
259 }
260
261 OutMI.setOpcode(MCOpcode);
262
263 for (const MachineOperand &MO : MI->explicit_operands()) {
264 MCOperand MCOp;
265 lowerOperand(MO, MCOp);
266 OutMI.addOperand(Op: MCOp);
267 }
268
269 int FIIdx = AMDGPU::getNamedOperandIdx(Opcode: MCOpcode, Name: AMDGPU::OpName::fi);
270 if (FIIdx >= (int)OutMI.getNumOperands())
271 OutMI.addOperand(Op: MCOperand::createImm(Val: 0));
272}
273
274bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
275 MCOperand &MCOp) const {
276 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
277 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
278 return MCInstLowering.lowerOperand(MO, MCOp);
279}
280
281const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV,
282 const Constant *BaseCV,
283 uint64_t Offset) {
284
285 // Intercept LDS variables with known addresses
286 if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(Val: CV)) {
287 if (std::optional<uint32_t> Address =
288 AMDGPUMachineFunctionInfo::getLDSAbsoluteAddress(GV: *GV)) {
289 auto *IntTy = Type::getInt32Ty(C&: CV->getContext());
290 return AsmPrinter::lowerConstant(CV: ConstantInt::get(Ty: IntTy, V: *Address),
291 BaseCV, Offset);
292 }
293 }
294
295 if (const MCExpr *E = lowerAddrSpaceCast(CV, OutContext))
296 return E;
297 return AsmPrinter::lowerConstant(CV, BaseCV, Offset);
298}
299
300static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
301 const TargetRegisterInfo *TRI,
302 const SIMachineFunctionInfo *MFI,
303 MCStreamer &OS) {
304 // The instruction will only transfer a subset of the registers in the block,
305 // based on the mask that is stored in m0. We could search for the instruction
306 // that sets m0, but most of the time we'll already have the mask stored in
307 // the machine function info. Try to use that. This assumes that we only use
308 // block loads/stores for CSR spills.
309 Register RegBlock =
310 TII->getNamedOperand(MI: *MI, OperandName: MI->mayLoad() ? AMDGPU::OpName::vdst
311 : AMDGPU::OpName::vdata)
312 ->getReg();
313 Register FirstRegInBlock = TRI->getSubReg(Reg: RegBlock, Idx: AMDGPU::sub0);
314 uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegisterBlock: RegBlock);
315
316 if (!Mask)
317 return; // Nothing to report
318
319 SmallString<512> TransferredRegs;
320 for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) {
321 if (Mask & (1 << I)) {
322 (llvm::Twine(" ") + TRI->getRegAsmName(Reg: FirstRegInBlock + I))
323 .toVector(Out&: TransferredRegs);
324 }
325 }
326
327 OS.emitRawComment(T: " transferring at most " + TransferredRegs);
328}
329
330void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
331 if (MI->isCall())
332 collectCallEdge(MI: *MI);
333
334 // FIXME: Enable feature predicate checks once all the test pass.
335 // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
336 // getSubtargetInfo().getFeatureBits());
337
338 if (MCInst OutInst; lowerPseudoInstExpansion(MI, Inst&: OutInst)) {
339 EmitToStreamer(S&: *OutStreamer, Inst: OutInst);
340 return;
341 }
342
343 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
344 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
345
346 StringRef Err;
347 if (!STI.getInstrInfo()->verifyInstruction(MI: *MI, ErrInfo&: Err)) {
348 LLVMContext &C = MI->getMF()->getFunction().getContext();
349 C.emitError(ErrorStr: "Illegal instruction detected: " + Err);
350 MI->print(OS&: errs());
351 }
352
353 if (MI->isBundle()) {
354 const MachineBasicBlock *MBB = MI->getParent();
355 MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
356 while (I != MBB->instr_end() && I->isInsideBundle()) {
357 emitInstruction(MI: &*I);
358 ++I;
359 }
360 } else {
361 // We don't want these pseudo instructions encoded. They are
362 // placeholder instructions and should only be printed as
363 // comments.
364 if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
365 if (isVerbose())
366 OutStreamer->emitRawComment(T: " return to shader part epilog");
367 return;
368 }
369
370 if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) {
371 if (isVerbose())
372 OutStreamer->emitRawComment(T: " wave barrier");
373 return;
374 }
375
376 if (MI->getOpcode() == AMDGPU::ASYNCMARK) {
377 if (isVerbose())
378 OutStreamer->emitRawComment(T: " asyncmark");
379 return;
380 }
381
382 if (MI->getOpcode() == AMDGPU::WAIT_ASYNCMARK) {
383 if (isVerbose()) {
384 OutStreamer->emitRawComment(T: " wait_asyncmark(" +
385 Twine(MI->getOperand(i: 0).getImm()) + ")");
386 }
387 return;
388 }
389
390 if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) {
391 if (isVerbose()) {
392 std::string HexString;
393 raw_string_ostream HexStream(HexString);
394 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
395 OutStreamer->emitRawComment(T: " sched_barrier mask(" + HexString + ")");
396 }
397 return;
398 }
399
400 if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) {
401 if (isVerbose()) {
402 std::string HexString;
403 raw_string_ostream HexStream(HexString);
404 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
405 OutStreamer->emitRawComment(
406 T: " sched_group_barrier mask(" + HexString + ") size(" +
407 Twine(MI->getOperand(i: 1).getImm()) + ") SyncID(" +
408 Twine(MI->getOperand(i: 2).getImm()) + ")");
409 }
410 return;
411 }
412
413 if (MI->getOpcode() == AMDGPU::IGLP_OPT) {
414 if (isVerbose()) {
415 std::string HexString;
416 raw_string_ostream HexStream(HexString);
417 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
418 OutStreamer->emitRawComment(T: " iglp_opt mask(" + HexString + ")");
419 }
420 return;
421 }
422
423 if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) {
424 if (isVerbose())
425 OutStreamer->emitRawComment(T: " divergent unreachable");
426 return;
427 }
428
429 if (MI->isMetaInstruction()) {
430 if (isVerbose())
431 OutStreamer->emitRawComment(T: " meta instruction");
432 return;
433 }
434
435 unsigned Opc = MI->getOpcode();
436 if (LLVM_UNLIKELY(Opc == TargetOpcode::STATEPOINT ||
437 Opc == TargetOpcode::STACKMAP ||
438 Opc == TargetOpcode::PATCHPOINT)) {
439 LLVMContext &Ctx = MI->getMF()->getFunction().getContext();
440 Ctx.emitError(ErrorStr: "unhandled statepoint-like instruction");
441 OutStreamer->emitRawComment(T: "unsupported statepoint/stackmap/patchpoint");
442 return;
443 }
444
445 if (isVerbose())
446 if (STI.getInstrInfo()->isBlockLoadStore(Opcode: MI->getOpcode()))
447 emitVGPRBlockComment(MI, TII: STI.getInstrInfo(), TRI: STI.getRegisterInfo(),
448 MFI: MF->getInfo<SIMachineFunctionInfo>(),
449 OS&: *OutStreamer);
450
451 if (isVerbose() && (MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
452 (MI->getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
453 STI.has1024AddressableVGPRs()))) {
454 std::optional<unsigned> V;
455 if (MI->getOpcode() == AMDGPU::S_SETREG_IMM32_B32)
456 V = AMDGPU::convertSetRegImmToVgprMSBs(MI: *MI,
457 HasSetregVGPRMSBFixup: STI.hasSetregVGPRMSBFixup());
458 else
459 V = MI->getOperand(i: 0).getImm() & 0xff;
460 if (V.has_value())
461 OutStreamer->AddComment(
462 T: " msbs: dst=" + Twine(*V >> 6) + " src0=" + Twine(*V & 3) +
463 " src1=" + Twine((*V >> 2) & 3) + " src2=" + Twine((*V >> 4) & 3));
464 }
465
466 MCInst TmpInst;
467 MCInstLowering.lower(MI, OutMI&: TmpInst);
468 EmitToStreamer(S&: *OutStreamer, Inst: TmpInst);
469
470 if (DumpCodeInstEmitter) {
471 // Disassemble instruction/operands to text
472 DisasmLines.resize(new_size: DisasmLines.size() + 1);
473 std::string &DisasmLine = DisasmLines.back();
474 raw_string_ostream DisasmStream(DisasmLine);
475
476 AMDGPUInstPrinter InstPrinter(TM.getMCAsmInfo(), *STI.getInstrInfo(),
477 *STI.getRegisterInfo());
478 InstPrinter.printInst(MI: &TmpInst, Address: 0, Annot: StringRef(), STI, O&: DisasmStream);
479
480 // Disassemble instruction/operands to hex representation.
481 SmallVector<MCFixup, 4> Fixups;
482 SmallVector<char, 16> CodeBytes;
483
484 DumpCodeInstEmitter->encodeInstruction(
485 Inst: TmpInst, CB&: CodeBytes, Fixups, STI: MF->getSubtarget<MCSubtargetInfo>());
486 HexLines.resize(new_size: HexLines.size() + 1);
487 std::string &HexLine = HexLines.back();
488 raw_string_ostream HexStream(HexLine);
489
490 for (size_t i = 0; i < CodeBytes.size(); i += 4) {
491 unsigned int CodeDWord =
492 support::endian::read32le(P: CodeBytes.data() + i);
493 HexStream << format(Fmt: "%s%08X", Vals: (i > 0 ? " " : ""), Vals: CodeDWord);
494 }
495
496 DisasmLineMaxLen = std::max(a: DisasmLineMaxLen, b: DisasmLine.size());
497 }
498 }
499}
500