1 | //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Code to lower AMDGPU MachineInstrs to their corresponding MCInst. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | // |
14 | |
15 | #include "AMDGPUMCInstLower.h" |
16 | #include "AMDGPU.h" |
17 | #include "AMDGPUAsmPrinter.h" |
18 | #include "AMDGPUMachineFunction.h" |
19 | #include "AMDGPUTargetMachine.h" |
20 | #include "MCTargetDesc/AMDGPUInstPrinter.h" |
21 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineInstr.h" |
24 | #include "llvm/IR/Constants.h" |
25 | #include "llvm/IR/Function.h" |
26 | #include "llvm/IR/GlobalVariable.h" |
27 | #include "llvm/MC/MCCodeEmitter.h" |
28 | #include "llvm/MC/MCContext.h" |
29 | #include "llvm/MC/MCExpr.h" |
30 | #include "llvm/MC/MCInst.h" |
31 | #include "llvm/MC/MCObjectStreamer.h" |
32 | #include "llvm/MC/MCStreamer.h" |
33 | #include "llvm/Support/ErrorHandling.h" |
34 | #include "llvm/Support/Format.h" |
35 | #include <algorithm> |
36 | |
37 | using namespace llvm; |
38 | |
39 | #include "AMDGPUGenMCPseudoLowering.inc" |
40 | |
41 | AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, |
42 | const TargetSubtargetInfo &st, |
43 | const AsmPrinter &ap): |
44 | Ctx(ctx), ST(st), AP(ap) { } |
45 | |
46 | static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) { |
47 | switch (MOFlags) { |
48 | default: |
49 | return MCSymbolRefExpr::VK_None; |
50 | case SIInstrInfo::MO_GOTPCREL: |
51 | return MCSymbolRefExpr::VK_GOTPCREL; |
52 | case SIInstrInfo::MO_GOTPCREL32_LO: |
53 | return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_LO; |
54 | case SIInstrInfo::MO_GOTPCREL32_HI: |
55 | return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_HI; |
56 | case SIInstrInfo::MO_REL32_LO: |
57 | return MCSymbolRefExpr::VK_AMDGPU_REL32_LO; |
58 | case SIInstrInfo::MO_REL32_HI: |
59 | return MCSymbolRefExpr::VK_AMDGPU_REL32_HI; |
60 | case SIInstrInfo::MO_ABS32_LO: |
61 | return MCSymbolRefExpr::VK_AMDGPU_ABS32_LO; |
62 | case SIInstrInfo::MO_ABS32_HI: |
63 | return MCSymbolRefExpr::VK_AMDGPU_ABS32_HI; |
64 | } |
65 | } |
66 | |
67 | bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, |
68 | MCOperand &MCOp) const { |
69 | switch (MO.getType()) { |
70 | default: |
71 | break; |
72 | case MachineOperand::MO_Immediate: |
73 | MCOp = MCOperand::createImm(Val: MO.getImm()); |
74 | return true; |
75 | case MachineOperand::MO_Register: |
76 | MCOp = MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: MO.getReg(), STI: ST)); |
77 | return true; |
78 | case MachineOperand::MO_MachineBasicBlock: |
79 | MCOp = MCOperand::createExpr( |
80 | Val: MCSymbolRefExpr::create(Symbol: MO.getMBB()->getSymbol(), Ctx)); |
81 | return true; |
82 | case MachineOperand::MO_GlobalAddress: { |
83 | const GlobalValue *GV = MO.getGlobal(); |
84 | SmallString<128> SymbolName; |
85 | AP.getNameWithPrefix(Name&: SymbolName, GV); |
86 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: SymbolName); |
87 | const MCExpr *Expr = |
88 | MCSymbolRefExpr::create(Symbol: Sym, Kind: getVariantKind(MOFlags: MO.getTargetFlags()),Ctx); |
89 | int64_t Offset = MO.getOffset(); |
90 | if (Offset != 0) { |
91 | Expr = MCBinaryExpr::createAdd(LHS: Expr, |
92 | RHS: MCConstantExpr::create(Value: Offset, Ctx), Ctx); |
93 | } |
94 | MCOp = MCOperand::createExpr(Val: Expr); |
95 | return true; |
96 | } |
97 | case MachineOperand::MO_ExternalSymbol: { |
98 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: StringRef(MO.getSymbolName())); |
99 | const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
100 | MCOp = MCOperand::createExpr(Val: Expr); |
101 | return true; |
102 | } |
103 | case MachineOperand::MO_RegisterMask: |
104 | // Regmasks are like implicit defs. |
105 | return false; |
106 | case MachineOperand::MO_MCSymbol: |
107 | if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) { |
108 | MCSymbol *Sym = MO.getMCSymbol(); |
109 | MCOp = MCOperand::createExpr(Val: Sym->getVariableValue()); |
110 | return true; |
111 | } |
112 | break; |
113 | } |
114 | llvm_unreachable("unknown operand type" ); |
115 | } |
116 | |
117 | void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { |
118 | unsigned Opcode = MI->getOpcode(); |
119 | const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); |
120 | |
121 | // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We |
122 | // need to select it to the subtarget specific version, and there's no way to |
123 | // do that with a single pseudo source operation. |
124 | if (Opcode == AMDGPU::S_SETPC_B64_return) |
125 | Opcode = AMDGPU::S_SETPC_B64; |
126 | else if (Opcode == AMDGPU::SI_CALL) { |
127 | // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the |
128 | // called function (which we need to remove here). |
129 | OutMI.setOpcode(TII->pseudoToMCOpcode(Opcode: AMDGPU::S_SWAPPC_B64)); |
130 | MCOperand Dest, Src; |
131 | lowerOperand(MO: MI->getOperand(i: 0), MCOp&: Dest); |
132 | lowerOperand(MO: MI->getOperand(i: 1), MCOp&: Src); |
133 | OutMI.addOperand(Op: Dest); |
134 | OutMI.addOperand(Op: Src); |
135 | return; |
136 | } else if (Opcode == AMDGPU::SI_TCRETURN || |
137 | Opcode == AMDGPU::SI_TCRETURN_GFX) { |
138 | // TODO: How to use branch immediate and avoid register+add? |
139 | Opcode = AMDGPU::S_SETPC_B64; |
140 | } |
141 | |
142 | int MCOpcode = TII->pseudoToMCOpcode(Opcode); |
143 | if (MCOpcode == -1) { |
144 | LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); |
145 | C.emitError(ErrorStr: "AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " |
146 | "a target-specific version: " + Twine(MI->getOpcode())); |
147 | } |
148 | |
149 | OutMI.setOpcode(MCOpcode); |
150 | |
151 | for (const MachineOperand &MO : MI->explicit_operands()) { |
152 | MCOperand MCOp; |
153 | lowerOperand(MO, MCOp); |
154 | OutMI.addOperand(Op: MCOp); |
155 | } |
156 | |
157 | int FIIdx = AMDGPU::getNamedOperandIdx(Opcode: MCOpcode, NamedIdx: AMDGPU::OpName::fi); |
158 | if (FIIdx >= (int)OutMI.getNumOperands()) |
159 | OutMI.addOperand(Op: MCOperand::createImm(Val: 0)); |
160 | } |
161 | |
162 | bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, |
163 | MCOperand &MCOp) const { |
164 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
165 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
166 | return MCInstLowering.lowerOperand(MO, MCOp); |
167 | } |
168 | |
169 | const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { |
170 | |
171 | // Intercept LDS variables with known addresses |
172 | if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(Val: CV)) { |
173 | if (std::optional<uint32_t> Address = |
174 | AMDGPUMachineFunction::getLDSAbsoluteAddress(GV: *GV)) { |
175 | auto *IntTy = Type::getInt32Ty(C&: CV->getContext()); |
176 | return AsmPrinter::lowerConstant(CV: ConstantInt::get(Ty: IntTy, V: *Address)); |
177 | } |
178 | } |
179 | |
180 | if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) |
181 | return E; |
182 | return AsmPrinter::lowerConstant(CV); |
183 | } |
184 | |
185 | void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { |
186 | // FIXME: Enable feature predicate checks once all the test pass. |
187 | // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(), |
188 | // getSubtargetInfo().getFeatureBits()); |
189 | |
190 | if (emitPseudoExpansionLowering(OutStreamer&: *OutStreamer, MI)) |
191 | return; |
192 | |
193 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
194 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
195 | |
196 | StringRef Err; |
197 | if (!STI.getInstrInfo()->verifyInstruction(MI: *MI, ErrInfo&: Err)) { |
198 | LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); |
199 | C.emitError(ErrorStr: "Illegal instruction detected: " + Err); |
200 | MI->print(OS&: errs()); |
201 | } |
202 | |
203 | if (MI->isBundle()) { |
204 | const MachineBasicBlock *MBB = MI->getParent(); |
205 | MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); |
206 | while (I != MBB->instr_end() && I->isInsideBundle()) { |
207 | emitInstruction(MI: &*I); |
208 | ++I; |
209 | } |
210 | } else { |
211 | // We don't want these pseudo instructions encoded. They are |
212 | // placeholder terminator instructions and should only be printed as |
213 | // comments. |
214 | if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { |
215 | if (isVerbose()) |
216 | OutStreamer->emitRawComment(T: " return to shader part epilog" ); |
217 | return; |
218 | } |
219 | |
220 | if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) { |
221 | if (isVerbose()) |
222 | OutStreamer->emitRawComment(T: " wave barrier" ); |
223 | return; |
224 | } |
225 | |
226 | if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) { |
227 | if (isVerbose()) { |
228 | std::string HexString; |
229 | raw_string_ostream HexStream(HexString); |
230 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
231 | OutStreamer->emitRawComment(T: " sched_barrier mask(" + HexString + ")" ); |
232 | } |
233 | return; |
234 | } |
235 | |
236 | if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) { |
237 | if (isVerbose()) { |
238 | std::string HexString; |
239 | raw_string_ostream HexStream(HexString); |
240 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
241 | OutStreamer->emitRawComment( |
242 | T: " sched_group_barrier mask(" + HexString + ") size(" + |
243 | Twine(MI->getOperand(i: 1).getImm()) + ") SyncID(" + |
244 | Twine(MI->getOperand(i: 2).getImm()) + ")" ); |
245 | } |
246 | return; |
247 | } |
248 | |
249 | if (MI->getOpcode() == AMDGPU::IGLP_OPT) { |
250 | if (isVerbose()) { |
251 | std::string HexString; |
252 | raw_string_ostream HexStream(HexString); |
253 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
254 | OutStreamer->emitRawComment(T: " iglp_opt mask(" + HexString + ")" ); |
255 | } |
256 | return; |
257 | } |
258 | |
259 | if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { |
260 | if (isVerbose()) |
261 | OutStreamer->emitRawComment(T: " divergent unreachable" ); |
262 | return; |
263 | } |
264 | |
265 | if (MI->isMetaInstruction()) { |
266 | if (isVerbose()) |
267 | OutStreamer->emitRawComment(T: " meta instruction" ); |
268 | return; |
269 | } |
270 | |
271 | MCInst TmpInst; |
272 | MCInstLowering.lower(MI, OutMI&: TmpInst); |
273 | EmitToStreamer(S&: *OutStreamer, Inst: TmpInst); |
274 | |
275 | #ifdef EXPENSIVE_CHECKS |
276 | // Check getInstSizeInBytes on explicitly specified CPUs (it cannot |
277 | // work correctly for the generic CPU). |
278 | // |
279 | // The isPseudo check really shouldn't be here, but unfortunately there are |
280 | // some negative lit tests that depend on being able to continue through |
281 | // here even when pseudo instructions haven't been lowered. |
282 | // |
283 | // We also overestimate branch sizes with the offset bug. |
284 | if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) && |
285 | (!STI.hasOffset3fBug() || !MI->isBranch())) { |
286 | SmallVector<MCFixup, 4> Fixups; |
287 | SmallVector<char, 16> CodeBytes; |
288 | |
289 | std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter( |
290 | *STI.getInstrInfo(), OutContext)); |
291 | InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI); |
292 | |
293 | assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); |
294 | } |
295 | #endif |
296 | |
297 | if (DumpCodeInstEmitter) { |
298 | // Disassemble instruction/operands to text |
299 | DisasmLines.resize(new_size: DisasmLines.size() + 1); |
300 | std::string &DisasmLine = DisasmLines.back(); |
301 | raw_string_ostream DisasmStream(DisasmLine); |
302 | |
303 | AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(), |
304 | *STI.getRegisterInfo()); |
305 | InstPrinter.printInst(MI: &TmpInst, Address: 0, Annot: StringRef(), STI, O&: DisasmStream); |
306 | |
307 | // Disassemble instruction/operands to hex representation. |
308 | SmallVector<MCFixup, 4> Fixups; |
309 | SmallVector<char, 16> CodeBytes; |
310 | |
311 | DumpCodeInstEmitter->encodeInstruction( |
312 | Inst: TmpInst, CB&: CodeBytes, Fixups, STI: MF->getSubtarget<MCSubtargetInfo>()); |
313 | HexLines.resize(new_size: HexLines.size() + 1); |
314 | std::string &HexLine = HexLines.back(); |
315 | raw_string_ostream HexStream(HexLine); |
316 | |
317 | for (size_t i = 0; i < CodeBytes.size(); i += 4) { |
318 | unsigned int CodeDWord = *(unsigned int *)&CodeBytes[i]; |
319 | HexStream << format(Fmt: "%s%08X" , Vals: (i > 0 ? " " : "" ), Vals: CodeDWord); |
320 | } |
321 | |
322 | DisasmStream.flush(); |
323 | DisasmLineMaxLen = std::max(a: DisasmLineMaxLen, b: DisasmLine.size()); |
324 | } |
325 | } |
326 | } |
327 | |