1//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
11//
12//===----------------------------------------------------------------------===//
13//
14
15#include "AMDGPUMCInstLower.h"
16#include "AMDGPU.h"
17#include "AMDGPUAsmPrinter.h"
18#include "AMDGPUMachineFunction.h"
19#include "MCTargetDesc/AMDGPUInstPrinter.h"
20#include "MCTargetDesc/AMDGPUMCExpr.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIMachineFunctionInfo.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/GlobalVariable.h"
28#include "llvm/MC/MCCodeEmitter.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCObjectStreamer.h"
33#include "llvm/MC/MCStreamer.h"
34#include "llvm/Support/Endian.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/Format.h"
37#include <algorithm>
38
39using namespace llvm;
40
41#include "AMDGPUGenMCPseudoLowering.inc"
42
43AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
44 const TargetSubtargetInfo &st,
45 const AsmPrinter &ap):
46 Ctx(ctx), ST(st), AP(ap) { }
47
48static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) {
49 switch (MOFlags) {
50 default:
51 return AMDGPUMCExpr::S_None;
52 case SIInstrInfo::MO_GOTPCREL:
53 return AMDGPUMCExpr::S_GOTPCREL;
54 case SIInstrInfo::MO_GOTPCREL32_LO:
55 return AMDGPUMCExpr::S_GOTPCREL32_LO;
56 case SIInstrInfo::MO_GOTPCREL32_HI:
57 return AMDGPUMCExpr::S_GOTPCREL32_HI;
58 case SIInstrInfo::MO_REL32_LO:
59 return AMDGPUMCExpr::S_REL32_LO;
60 case SIInstrInfo::MO_REL32_HI:
61 return AMDGPUMCExpr::S_REL32_HI;
62 case SIInstrInfo::MO_ABS32_LO:
63 return AMDGPUMCExpr::S_ABS32_LO;
64 case SIInstrInfo::MO_ABS32_HI:
65 return AMDGPUMCExpr::S_ABS32_HI;
66 }
67}
68
69bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
70 MCOperand &MCOp) const {
71 switch (MO.getType()) {
72 default:
73 break;
74 case MachineOperand::MO_Immediate:
75 MCOp = MCOperand::createImm(Val: MO.getImm());
76 return true;
77 case MachineOperand::MO_Register:
78 MCOp = MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: MO.getReg(), STI: ST));
79 return true;
80 case MachineOperand::MO_MachineBasicBlock:
81 MCOp = MCOperand::createExpr(
82 Val: MCSymbolRefExpr::create(Symbol: MO.getMBB()->getSymbol(), Ctx));
83 return true;
84 case MachineOperand::MO_GlobalAddress: {
85 const GlobalValue *GV = MO.getGlobal();
86 SmallString<128> SymbolName;
87 AP.getNameWithPrefix(Name&: SymbolName, GV);
88 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: SymbolName);
89 const MCExpr *Expr =
90 MCSymbolRefExpr::create(Symbol: Sym, specifier: getSpecifier(MOFlags: MO.getTargetFlags()), Ctx);
91 int64_t Offset = MO.getOffset();
92 if (Offset != 0) {
93 Expr = MCBinaryExpr::createAdd(LHS: Expr,
94 RHS: MCConstantExpr::create(Value: Offset, Ctx), Ctx);
95 }
96 MCOp = MCOperand::createExpr(Val: Expr);
97 return true;
98 }
99 case MachineOperand::MO_ExternalSymbol: {
100 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: StringRef(MO.getSymbolName()));
101 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
102 MCOp = MCOperand::createExpr(Val: Expr);
103 return true;
104 }
105 case MachineOperand::MO_RegisterMask:
106 // Regmasks are like implicit defs.
107 return false;
108 case MachineOperand::MO_MCSymbol:
109 if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) {
110 MCSymbol *Sym = MO.getMCSymbol();
111 MCOp = MCOperand::createExpr(Val: Sym->getVariableValue());
112 return true;
113 }
114 break;
115 }
116 llvm_unreachable("unknown operand type");
117}
118
119// Lower true16 D16 Pseudo instruction to d16_lo/d16_hi MCInst based on
120// Dst/Data's .l/.h selection
121void AMDGPUMCInstLower::lowerT16D16Helper(const MachineInstr *MI,
122 MCInst &OutMI) const {
123 unsigned Opcode = MI->getOpcode();
124 const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
125 const SIRegisterInfo &TRI = TII->getRegisterInfo();
126 const auto *Info = AMDGPU::getT16D16Helper(T16Op: Opcode);
127
128 llvm::AMDGPU::OpName OpName;
129 if (TII->isDS(Opcode)) {
130 if (MI->mayLoad())
131 OpName = llvm::AMDGPU::OpName::vdst;
132 else if (MI->mayStore())
133 OpName = llvm::AMDGPU::OpName::data0;
134 else
135 llvm_unreachable("LDS load or store expected");
136 } else {
137 OpName = AMDGPU::hasNamedOperand(Opcode, NamedIdx: llvm::AMDGPU::OpName::vdata)
138 ? llvm::AMDGPU::OpName::vdata
139 : llvm::AMDGPU::OpName::vdst;
140 }
141
142 // select Dst/Data
143 int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: OpName);
144 const MachineOperand &MIVDstOrVData = MI->getOperand(i: VDstOrVDataIdx);
145
146 // select hi/lo MCInst
147 bool IsHi = AMDGPU::isHi16Reg(Reg: MIVDstOrVData.getReg(), MRI: TRI);
148 Opcode = IsHi ? Info->HiOp : Info->LoOp;
149
150 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
151 assert(MCOpcode != -1 &&
152 "Pseudo instruction doesn't have a target-specific version");
153 OutMI.setOpcode(MCOpcode);
154
155 // lower operands
156 for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) {
157 const MachineOperand &MO = MI->getOperand(i: I);
158 MCOperand MCOp;
159 if (I == VDstOrVDataIdx)
160 MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: MIVDstOrVData.getReg()));
161 else
162 lowerOperand(MO, MCOp);
163 OutMI.addOperand(Op: MCOp);
164 }
165
166 if (AMDGPU::hasNamedOperand(Opcode: MCOpcode, NamedIdx: AMDGPU::OpName::vdst_in)) {
167 MCOperand MCOp;
168 lowerOperand(MO: MIVDstOrVData, MCOp);
169 OutMI.addOperand(Op: MCOp);
170 }
171}
172
173void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
174 unsigned Opcode = MI->getOpcode();
175 const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo());
176
177 // FIXME: Should be able to handle this with lowerPseudoInstExpansion. We
178 // need to select it to the subtarget specific version, and there's no way to
179 // do that with a single pseudo source operation.
180 if (Opcode == AMDGPU::S_SETPC_B64_return)
181 Opcode = AMDGPU::S_SETPC_B64;
182 else if (Opcode == AMDGPU::SI_CALL) {
183 // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
184 // called function (which we need to remove here).
185 OutMI.setOpcode(TII->pseudoToMCOpcode(Opcode: AMDGPU::S_SWAPPC_B64));
186 MCOperand Dest, Src;
187 lowerOperand(MO: MI->getOperand(i: 0), MCOp&: Dest);
188 lowerOperand(MO: MI->getOperand(i: 1), MCOp&: Src);
189 OutMI.addOperand(Op: Dest);
190 OutMI.addOperand(Op: Src);
191 return;
192 } else if (Opcode == AMDGPU::SI_TCRETURN ||
193 Opcode == AMDGPU::SI_TCRETURN_GFX) {
194 // TODO: How to use branch immediate and avoid register+add?
195 Opcode = AMDGPU::S_SETPC_B64;
196 } else if (AMDGPU::getT16D16Helper(T16Op: Opcode)) {
197 lowerT16D16Helper(MI, OutMI);
198 return;
199 }
200
201 int MCOpcode = TII->pseudoToMCOpcode(Opcode);
202 if (MCOpcode == -1) {
203 LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
204 C.emitError(ErrorStr: "AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
205 "a target-specific version: " + Twine(MI->getOpcode()));
206 }
207
208 OutMI.setOpcode(MCOpcode);
209
210 for (const MachineOperand &MO : MI->explicit_operands()) {
211 MCOperand MCOp;
212 lowerOperand(MO, MCOp);
213 OutMI.addOperand(Op: MCOp);
214 }
215
216 int FIIdx = AMDGPU::getNamedOperandIdx(Opcode: MCOpcode, Name: AMDGPU::OpName::fi);
217 if (FIIdx >= (int)OutMI.getNumOperands())
218 OutMI.addOperand(Op: MCOperand::createImm(Val: 0));
219}
220
221bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
222 MCOperand &MCOp) const {
223 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
224 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
225 return MCInstLowering.lowerOperand(MO, MCOp);
226}
227
228const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV,
229 const Constant *BaseCV,
230 uint64_t Offset) {
231
232 // Intercept LDS variables with known addresses
233 if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(Val: CV)) {
234 if (std::optional<uint32_t> Address =
235 AMDGPUMachineFunction::getLDSAbsoluteAddress(GV: *GV)) {
236 auto *IntTy = Type::getInt32Ty(C&: CV->getContext());
237 return AsmPrinter::lowerConstant(CV: ConstantInt::get(Ty: IntTy, V: *Address),
238 BaseCV, Offset);
239 }
240 }
241
242 if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
243 return E;
244 return AsmPrinter::lowerConstant(CV, BaseCV, Offset);
245}
246
247static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
248 const TargetRegisterInfo *TRI,
249 const SIMachineFunctionInfo *MFI,
250 MCStreamer &OS) {
251 // The instruction will only transfer a subset of the registers in the block,
252 // based on the mask that is stored in m0. We could search for the instruction
253 // that sets m0, but most of the time we'll already have the mask stored in
254 // the machine function info. Try to use that. This assumes that we only use
255 // block loads/stores for CSR spills.
256 Register RegBlock =
257 TII->getNamedOperand(MI: *MI, OperandName: MI->mayLoad() ? AMDGPU::OpName::vdst
258 : AMDGPU::OpName::vdata)
259 ->getReg();
260 Register FirstRegInBlock = TRI->getSubReg(Reg: RegBlock, Idx: AMDGPU::sub0);
261 uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegisterBlock: RegBlock);
262
263 if (!Mask)
264 return; // Nothing to report
265
266 SmallString<512> TransferredRegs;
267 for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) {
268 if (Mask & (1 << I)) {
269 (llvm::Twine(" ") + TRI->getRegAsmName(Reg: FirstRegInBlock + I))
270 .toVector(Out&: TransferredRegs);
271 }
272 }
273
274 OS.emitRawComment(T: " transferring at most " + TransferredRegs);
275}
276
277void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
278 // FIXME: Enable feature predicate checks once all the test pass.
279 // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
280 // getSubtargetInfo().getFeatureBits());
281
282 if (MCInst OutInst; lowerPseudoInstExpansion(MI, Inst&: OutInst)) {
283 EmitToStreamer(S&: *OutStreamer, Inst: OutInst);
284 return;
285 }
286
287 const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
288 AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
289
290 StringRef Err;
291 if (!STI.getInstrInfo()->verifyInstruction(MI: *MI, ErrInfo&: Err)) {
292 LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
293 C.emitError(ErrorStr: "Illegal instruction detected: " + Err);
294 MI->print(OS&: errs());
295 }
296
297 if (MI->isBundle()) {
298 const MachineBasicBlock *MBB = MI->getParent();
299 MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
300 while (I != MBB->instr_end() && I->isInsideBundle()) {
301 emitInstruction(MI: &*I);
302 ++I;
303 }
304 } else {
305 // We don't want these pseudo instructions encoded. They are
306 // placeholder terminator instructions and should only be printed as
307 // comments.
308 if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
309 if (isVerbose())
310 OutStreamer->emitRawComment(T: " return to shader part epilog");
311 return;
312 }
313
314 if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) {
315 if (isVerbose())
316 OutStreamer->emitRawComment(T: " wave barrier");
317 return;
318 }
319
320 if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) {
321 if (isVerbose()) {
322 std::string HexString;
323 raw_string_ostream HexStream(HexString);
324 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
325 OutStreamer->emitRawComment(T: " sched_barrier mask(" + HexString + ")");
326 }
327 return;
328 }
329
330 if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) {
331 if (isVerbose()) {
332 std::string HexString;
333 raw_string_ostream HexStream(HexString);
334 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
335 OutStreamer->emitRawComment(
336 T: " sched_group_barrier mask(" + HexString + ") size(" +
337 Twine(MI->getOperand(i: 1).getImm()) + ") SyncID(" +
338 Twine(MI->getOperand(i: 2).getImm()) + ")");
339 }
340 return;
341 }
342
343 if (MI->getOpcode() == AMDGPU::IGLP_OPT) {
344 if (isVerbose()) {
345 std::string HexString;
346 raw_string_ostream HexStream(HexString);
347 HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true);
348 OutStreamer->emitRawComment(T: " iglp_opt mask(" + HexString + ")");
349 }
350 return;
351 }
352
353 if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) {
354 if (isVerbose())
355 OutStreamer->emitRawComment(T: " divergent unreachable");
356 return;
357 }
358
359 if (MI->isMetaInstruction()) {
360 if (isVerbose())
361 OutStreamer->emitRawComment(T: " meta instruction");
362 return;
363 }
364
365 if (isVerbose())
366 if (STI.getInstrInfo()->isBlockLoadStore(Opcode: MI->getOpcode()))
367 emitVGPRBlockComment(MI, TII: STI.getInstrInfo(), TRI: STI.getRegisterInfo(),
368 MFI: MF->getInfo<SIMachineFunctionInfo>(),
369 OS&: *OutStreamer);
370
371 MCInst TmpInst;
372 MCInstLowering.lower(MI, OutMI&: TmpInst);
373 EmitToStreamer(S&: *OutStreamer, Inst: TmpInst);
374
375#ifdef EXPENSIVE_CHECKS
376 // Check getInstSizeInBytes on explicitly specified CPUs (it cannot
377 // work correctly for the generic CPU).
378 //
379 // The isPseudo check really shouldn't be here, but unfortunately there are
380 // some negative lit tests that depend on being able to continue through
381 // here even when pseudo instructions haven't been lowered.
382 //
383 // We also overestimate branch sizes with the offset bug.
384 if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) &&
385 (!STI.hasOffset3fBug() || !MI->isBranch())) {
386 SmallVector<MCFixup, 4> Fixups;
387 SmallVector<char, 16> CodeBytes;
388
389 std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter(
390 *STI.getInstrInfo(), OutContext));
391 InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI);
392
393 assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
394 }
395#endif
396
397 if (DumpCodeInstEmitter) {
398 // Disassemble instruction/operands to text
399 DisasmLines.resize(new_size: DisasmLines.size() + 1);
400 std::string &DisasmLine = DisasmLines.back();
401 raw_string_ostream DisasmStream(DisasmLine);
402
403 AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(),
404 *STI.getRegisterInfo());
405 InstPrinter.printInst(MI: &TmpInst, Address: 0, Annot: StringRef(), STI, O&: DisasmStream);
406
407 // Disassemble instruction/operands to hex representation.
408 SmallVector<MCFixup, 4> Fixups;
409 SmallVector<char, 16> CodeBytes;
410
411 DumpCodeInstEmitter->encodeInstruction(
412 Inst: TmpInst, CB&: CodeBytes, Fixups, STI: MF->getSubtarget<MCSubtargetInfo>());
413 HexLines.resize(new_size: HexLines.size() + 1);
414 std::string &HexLine = HexLines.back();
415 raw_string_ostream HexStream(HexLine);
416
417 for (size_t i = 0; i < CodeBytes.size(); i += 4) {
418 unsigned int CodeDWord =
419 support::endian::read32le(P: CodeBytes.data() + i);
420 HexStream << format(Fmt: "%s%08X", Vals: (i > 0 ? " " : ""), Vals: CodeDWord);
421 }
422
423 DisasmLineMaxLen = std::max(a: DisasmLineMaxLen, b: DisasmLine.size());
424 }
425 }
426}
427