1 | //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Code to lower AMDGPU MachineInstrs to their corresponding MCInst. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | // |
14 | |
15 | #include "AMDGPUMCInstLower.h" |
16 | #include "AMDGPU.h" |
17 | #include "AMDGPUAsmPrinter.h" |
18 | #include "AMDGPUMachineFunction.h" |
19 | #include "MCTargetDesc/AMDGPUInstPrinter.h" |
20 | #include "MCTargetDesc/AMDGPUMCExpr.h" |
21 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
22 | #include "SIMachineFunctionInfo.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineInstr.h" |
25 | #include "llvm/IR/Constants.h" |
26 | #include "llvm/IR/Function.h" |
27 | #include "llvm/IR/GlobalVariable.h" |
28 | #include "llvm/MC/MCCodeEmitter.h" |
29 | #include "llvm/MC/MCContext.h" |
30 | #include "llvm/MC/MCExpr.h" |
31 | #include "llvm/MC/MCInst.h" |
32 | #include "llvm/MC/MCObjectStreamer.h" |
33 | #include "llvm/MC/MCStreamer.h" |
34 | #include "llvm/Support/Endian.h" |
35 | #include "llvm/Support/ErrorHandling.h" |
36 | #include "llvm/Support/Format.h" |
37 | #include <algorithm> |
38 | |
39 | using namespace llvm; |
40 | |
41 | #include "AMDGPUGenMCPseudoLowering.inc" |
42 | |
43 | AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, |
44 | const TargetSubtargetInfo &st, |
45 | const AsmPrinter &ap): |
46 | Ctx(ctx), ST(st), AP(ap) { } |
47 | |
48 | static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) { |
49 | switch (MOFlags) { |
50 | default: |
51 | return AMDGPUMCExpr::S_None; |
52 | case SIInstrInfo::MO_GOTPCREL: |
53 | return AMDGPUMCExpr::S_GOTPCREL; |
54 | case SIInstrInfo::MO_GOTPCREL32_LO: |
55 | return AMDGPUMCExpr::S_GOTPCREL32_LO; |
56 | case SIInstrInfo::MO_GOTPCREL32_HI: |
57 | return AMDGPUMCExpr::S_GOTPCREL32_HI; |
58 | case SIInstrInfo::MO_REL32_LO: |
59 | return AMDGPUMCExpr::S_REL32_LO; |
60 | case SIInstrInfo::MO_REL32_HI: |
61 | return AMDGPUMCExpr::S_REL32_HI; |
62 | case SIInstrInfo::MO_ABS32_LO: |
63 | return AMDGPUMCExpr::S_ABS32_LO; |
64 | case SIInstrInfo::MO_ABS32_HI: |
65 | return AMDGPUMCExpr::S_ABS32_HI; |
66 | } |
67 | } |
68 | |
69 | bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, |
70 | MCOperand &MCOp) const { |
71 | switch (MO.getType()) { |
72 | default: |
73 | break; |
74 | case MachineOperand::MO_Immediate: |
75 | MCOp = MCOperand::createImm(Val: MO.getImm()); |
76 | return true; |
77 | case MachineOperand::MO_Register: |
78 | MCOp = MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: MO.getReg(), STI: ST)); |
79 | return true; |
80 | case MachineOperand::MO_MachineBasicBlock: |
81 | MCOp = MCOperand::createExpr( |
82 | Val: MCSymbolRefExpr::create(Symbol: MO.getMBB()->getSymbol(), Ctx)); |
83 | return true; |
84 | case MachineOperand::MO_GlobalAddress: { |
85 | const GlobalValue *GV = MO.getGlobal(); |
86 | SmallString<128> SymbolName; |
87 | AP.getNameWithPrefix(Name&: SymbolName, GV); |
88 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: SymbolName); |
89 | const MCExpr *Expr = |
90 | MCSymbolRefExpr::create(Symbol: Sym, specifier: getSpecifier(MOFlags: MO.getTargetFlags()), Ctx); |
91 | int64_t Offset = MO.getOffset(); |
92 | if (Offset != 0) { |
93 | Expr = MCBinaryExpr::createAdd(LHS: Expr, |
94 | RHS: MCConstantExpr::create(Value: Offset, Ctx), Ctx); |
95 | } |
96 | MCOp = MCOperand::createExpr(Val: Expr); |
97 | return true; |
98 | } |
99 | case MachineOperand::MO_ExternalSymbol: { |
100 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: StringRef(MO.getSymbolName())); |
101 | const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
102 | MCOp = MCOperand::createExpr(Val: Expr); |
103 | return true; |
104 | } |
105 | case MachineOperand::MO_RegisterMask: |
106 | // Regmasks are like implicit defs. |
107 | return false; |
108 | case MachineOperand::MO_MCSymbol: |
109 | if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) { |
110 | MCSymbol *Sym = MO.getMCSymbol(); |
111 | MCOp = MCOperand::createExpr(Val: Sym->getVariableValue()); |
112 | return true; |
113 | } |
114 | break; |
115 | } |
116 | llvm_unreachable("unknown operand type" ); |
117 | } |
118 | |
119 | // Lower true16 D16 Pseudo instruction to d16_lo/d16_hi MCInst based on |
120 | // Dst/Data's .l/.h selection |
121 | void AMDGPUMCInstLower::lowerT16D16Helper(const MachineInstr *MI, |
122 | MCInst &OutMI) const { |
123 | unsigned Opcode = MI->getOpcode(); |
124 | const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); |
125 | const SIRegisterInfo &TRI = TII->getRegisterInfo(); |
126 | const auto *Info = AMDGPU::getT16D16Helper(T16Op: Opcode); |
127 | |
128 | llvm::AMDGPU::OpName OpName; |
129 | if (TII->isDS(Opcode)) { |
130 | if (MI->mayLoad()) |
131 | OpName = llvm::AMDGPU::OpName::vdst; |
132 | else if (MI->mayStore()) |
133 | OpName = llvm::AMDGPU::OpName::data0; |
134 | else |
135 | llvm_unreachable("LDS load or store expected" ); |
136 | } else { |
137 | OpName = AMDGPU::hasNamedOperand(Opcode, NamedIdx: llvm::AMDGPU::OpName::vdata) |
138 | ? llvm::AMDGPU::OpName::vdata |
139 | : llvm::AMDGPU::OpName::vdst; |
140 | } |
141 | |
142 | // select Dst/Data |
143 | int VDstOrVDataIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: OpName); |
144 | const MachineOperand &MIVDstOrVData = MI->getOperand(i: VDstOrVDataIdx); |
145 | |
146 | // select hi/lo MCInst |
147 | bool IsHi = AMDGPU::isHi16Reg(Reg: MIVDstOrVData.getReg(), MRI: TRI); |
148 | Opcode = IsHi ? Info->HiOp : Info->LoOp; |
149 | |
150 | int MCOpcode = TII->pseudoToMCOpcode(Opcode); |
151 | assert(MCOpcode != -1 && |
152 | "Pseudo instruction doesn't have a target-specific version" ); |
153 | OutMI.setOpcode(MCOpcode); |
154 | |
155 | // lower operands |
156 | for (int I = 0, E = MI->getNumExplicitOperands(); I < E; I++) { |
157 | const MachineOperand &MO = MI->getOperand(i: I); |
158 | MCOperand MCOp; |
159 | if (I == VDstOrVDataIdx) |
160 | MCOp = MCOperand::createReg(Reg: TRI.get32BitRegister(Reg: MIVDstOrVData.getReg())); |
161 | else |
162 | lowerOperand(MO, MCOp); |
163 | OutMI.addOperand(Op: MCOp); |
164 | } |
165 | |
166 | if (AMDGPU::hasNamedOperand(Opcode: MCOpcode, NamedIdx: AMDGPU::OpName::vdst_in)) { |
167 | MCOperand MCOp; |
168 | lowerOperand(MO: MIVDstOrVData, MCOp); |
169 | OutMI.addOperand(Op: MCOp); |
170 | } |
171 | } |
172 | |
173 | void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { |
174 | unsigned Opcode = MI->getOpcode(); |
175 | const auto *TII = static_cast<const SIInstrInfo *>(ST.getInstrInfo()); |
176 | |
177 | // FIXME: Should be able to handle this with lowerPseudoInstExpansion. We |
178 | // need to select it to the subtarget specific version, and there's no way to |
179 | // do that with a single pseudo source operation. |
180 | if (Opcode == AMDGPU::S_SETPC_B64_return) |
181 | Opcode = AMDGPU::S_SETPC_B64; |
182 | else if (Opcode == AMDGPU::SI_CALL) { |
183 | // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the |
184 | // called function (which we need to remove here). |
185 | OutMI.setOpcode(TII->pseudoToMCOpcode(Opcode: AMDGPU::S_SWAPPC_B64)); |
186 | MCOperand Dest, Src; |
187 | lowerOperand(MO: MI->getOperand(i: 0), MCOp&: Dest); |
188 | lowerOperand(MO: MI->getOperand(i: 1), MCOp&: Src); |
189 | OutMI.addOperand(Op: Dest); |
190 | OutMI.addOperand(Op: Src); |
191 | return; |
192 | } else if (Opcode == AMDGPU::SI_TCRETURN || |
193 | Opcode == AMDGPU::SI_TCRETURN_GFX) { |
194 | // TODO: How to use branch immediate and avoid register+add? |
195 | Opcode = AMDGPU::S_SETPC_B64; |
196 | } else if (AMDGPU::getT16D16Helper(T16Op: Opcode)) { |
197 | lowerT16D16Helper(MI, OutMI); |
198 | return; |
199 | } |
200 | |
201 | int MCOpcode = TII->pseudoToMCOpcode(Opcode); |
202 | if (MCOpcode == -1) { |
203 | LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); |
204 | C.emitError(ErrorStr: "AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " |
205 | "a target-specific version: " + Twine(MI->getOpcode())); |
206 | } |
207 | |
208 | OutMI.setOpcode(MCOpcode); |
209 | |
210 | for (const MachineOperand &MO : MI->explicit_operands()) { |
211 | MCOperand MCOp; |
212 | lowerOperand(MO, MCOp); |
213 | OutMI.addOperand(Op: MCOp); |
214 | } |
215 | |
216 | int FIIdx = AMDGPU::getNamedOperandIdx(Opcode: MCOpcode, Name: AMDGPU::OpName::fi); |
217 | if (FIIdx >= (int)OutMI.getNumOperands()) |
218 | OutMI.addOperand(Op: MCOperand::createImm(Val: 0)); |
219 | } |
220 | |
221 | bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, |
222 | MCOperand &MCOp) const { |
223 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
224 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
225 | return MCInstLowering.lowerOperand(MO, MCOp); |
226 | } |
227 | |
228 | const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV, |
229 | const Constant *BaseCV, |
230 | uint64_t Offset) { |
231 | |
232 | // Intercept LDS variables with known addresses |
233 | if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(Val: CV)) { |
234 | if (std::optional<uint32_t> Address = |
235 | AMDGPUMachineFunction::getLDSAbsoluteAddress(GV: *GV)) { |
236 | auto *IntTy = Type::getInt32Ty(C&: CV->getContext()); |
237 | return AsmPrinter::lowerConstant(CV: ConstantInt::get(Ty: IntTy, V: *Address), |
238 | BaseCV, Offset); |
239 | } |
240 | } |
241 | |
242 | if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) |
243 | return E; |
244 | return AsmPrinter::lowerConstant(CV, BaseCV, Offset); |
245 | } |
246 | |
247 | static void (const MachineInstr *MI, const SIInstrInfo *TII, |
248 | const TargetRegisterInfo *TRI, |
249 | const SIMachineFunctionInfo *MFI, |
250 | MCStreamer &OS) { |
251 | // The instruction will only transfer a subset of the registers in the block, |
252 | // based on the mask that is stored in m0. We could search for the instruction |
253 | // that sets m0, but most of the time we'll already have the mask stored in |
254 | // the machine function info. Try to use that. This assumes that we only use |
255 | // block loads/stores for CSR spills. |
256 | Register RegBlock = |
257 | TII->getNamedOperand(MI: *MI, OperandName: MI->mayLoad() ? AMDGPU::OpName::vdst |
258 | : AMDGPU::OpName::vdata) |
259 | ->getReg(); |
260 | Register FirstRegInBlock = TRI->getSubReg(Reg: RegBlock, Idx: AMDGPU::sub0); |
261 | uint32_t Mask = MFI->getMaskForVGPRBlockOps(RegisterBlock: RegBlock); |
262 | |
263 | if (!Mask) |
264 | return; // Nothing to report |
265 | |
266 | SmallString<512> TransferredRegs; |
267 | for (unsigned I = 0; I < sizeof(Mask) * 8; ++I) { |
268 | if (Mask & (1 << I)) { |
269 | (llvm::Twine(" " ) + TRI->getRegAsmName(Reg: FirstRegInBlock + I)) |
270 | .toVector(Out&: TransferredRegs); |
271 | } |
272 | } |
273 | |
274 | OS.emitRawComment(T: " transferring at most " + TransferredRegs); |
275 | } |
276 | |
277 | void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { |
278 | // FIXME: Enable feature predicate checks once all the test pass. |
279 | // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(), |
280 | // getSubtargetInfo().getFeatureBits()); |
281 | |
282 | if (MCInst OutInst; lowerPseudoInstExpansion(MI, Inst&: OutInst)) { |
283 | EmitToStreamer(S&: *OutStreamer, Inst: OutInst); |
284 | return; |
285 | } |
286 | |
287 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
288 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
289 | |
290 | StringRef Err; |
291 | if (!STI.getInstrInfo()->verifyInstruction(MI: *MI, ErrInfo&: Err)) { |
292 | LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); |
293 | C.emitError(ErrorStr: "Illegal instruction detected: " + Err); |
294 | MI->print(OS&: errs()); |
295 | } |
296 | |
297 | if (MI->isBundle()) { |
298 | const MachineBasicBlock *MBB = MI->getParent(); |
299 | MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); |
300 | while (I != MBB->instr_end() && I->isInsideBundle()) { |
301 | emitInstruction(MI: &*I); |
302 | ++I; |
303 | } |
304 | } else { |
305 | // We don't want these pseudo instructions encoded. They are |
306 | // placeholder terminator instructions and should only be printed as |
307 | // comments. |
308 | if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { |
309 | if (isVerbose()) |
310 | OutStreamer->emitRawComment(T: " return to shader part epilog" ); |
311 | return; |
312 | } |
313 | |
314 | if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) { |
315 | if (isVerbose()) |
316 | OutStreamer->emitRawComment(T: " wave barrier" ); |
317 | return; |
318 | } |
319 | |
320 | if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) { |
321 | if (isVerbose()) { |
322 | std::string HexString; |
323 | raw_string_ostream HexStream(HexString); |
324 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
325 | OutStreamer->emitRawComment(T: " sched_barrier mask(" + HexString + ")" ); |
326 | } |
327 | return; |
328 | } |
329 | |
330 | if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) { |
331 | if (isVerbose()) { |
332 | std::string HexString; |
333 | raw_string_ostream HexStream(HexString); |
334 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
335 | OutStreamer->emitRawComment( |
336 | T: " sched_group_barrier mask(" + HexString + ") size(" + |
337 | Twine(MI->getOperand(i: 1).getImm()) + ") SyncID(" + |
338 | Twine(MI->getOperand(i: 2).getImm()) + ")" ); |
339 | } |
340 | return; |
341 | } |
342 | |
343 | if (MI->getOpcode() == AMDGPU::IGLP_OPT) { |
344 | if (isVerbose()) { |
345 | std::string HexString; |
346 | raw_string_ostream HexStream(HexString); |
347 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
348 | OutStreamer->emitRawComment(T: " iglp_opt mask(" + HexString + ")" ); |
349 | } |
350 | return; |
351 | } |
352 | |
353 | if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { |
354 | if (isVerbose()) |
355 | OutStreamer->emitRawComment(T: " divergent unreachable" ); |
356 | return; |
357 | } |
358 | |
359 | if (MI->isMetaInstruction()) { |
360 | if (isVerbose()) |
361 | OutStreamer->emitRawComment(T: " meta instruction" ); |
362 | return; |
363 | } |
364 | |
365 | if (isVerbose()) |
366 | if (STI.getInstrInfo()->isBlockLoadStore(Opcode: MI->getOpcode())) |
367 | emitVGPRBlockComment(MI, TII: STI.getInstrInfo(), TRI: STI.getRegisterInfo(), |
368 | MFI: MF->getInfo<SIMachineFunctionInfo>(), |
369 | OS&: *OutStreamer); |
370 | |
371 | MCInst TmpInst; |
372 | MCInstLowering.lower(MI, OutMI&: TmpInst); |
373 | EmitToStreamer(S&: *OutStreamer, Inst: TmpInst); |
374 | |
375 | #ifdef EXPENSIVE_CHECKS |
376 | // Check getInstSizeInBytes on explicitly specified CPUs (it cannot |
377 | // work correctly for the generic CPU). |
378 | // |
379 | // The isPseudo check really shouldn't be here, but unfortunately there are |
380 | // some negative lit tests that depend on being able to continue through |
381 | // here even when pseudo instructions haven't been lowered. |
382 | // |
383 | // We also overestimate branch sizes with the offset bug. |
384 | if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) && |
385 | (!STI.hasOffset3fBug() || !MI->isBranch())) { |
386 | SmallVector<MCFixup, 4> Fixups; |
387 | SmallVector<char, 16> CodeBytes; |
388 | |
389 | std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter( |
390 | *STI.getInstrInfo(), OutContext)); |
391 | InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI); |
392 | |
393 | assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); |
394 | } |
395 | #endif |
396 | |
397 | if (DumpCodeInstEmitter) { |
398 | // Disassemble instruction/operands to text |
399 | DisasmLines.resize(new_size: DisasmLines.size() + 1); |
400 | std::string &DisasmLine = DisasmLines.back(); |
401 | raw_string_ostream DisasmStream(DisasmLine); |
402 | |
403 | AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(), |
404 | *STI.getRegisterInfo()); |
405 | InstPrinter.printInst(MI: &TmpInst, Address: 0, Annot: StringRef(), STI, O&: DisasmStream); |
406 | |
407 | // Disassemble instruction/operands to hex representation. |
408 | SmallVector<MCFixup, 4> Fixups; |
409 | SmallVector<char, 16> CodeBytes; |
410 | |
411 | DumpCodeInstEmitter->encodeInstruction( |
412 | Inst: TmpInst, CB&: CodeBytes, Fixups, STI: MF->getSubtarget<MCSubtargetInfo>()); |
413 | HexLines.resize(new_size: HexLines.size() + 1); |
414 | std::string &HexLine = HexLines.back(); |
415 | raw_string_ostream HexStream(HexLine); |
416 | |
417 | for (size_t i = 0; i < CodeBytes.size(); i += 4) { |
418 | unsigned int CodeDWord = |
419 | support::endian::read32le(P: CodeBytes.data() + i); |
420 | HexStream << format(Fmt: "%s%08X" , Vals: (i > 0 ? " " : "" ), Vals: CodeDWord); |
421 | } |
422 | |
423 | DisasmLineMaxLen = std::max(a: DisasmLineMaxLen, b: DisasmLine.size()); |
424 | } |
425 | } |
426 | } |
427 | |