1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCExpr.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
24#include "TargetInfo/AMDGPUTargetInfo.h"
25#include "Utils/AMDGPUAsmUtils.h"
26#include "Utils/AMDGPUBaseInfo.h"
27#include "llvm-c/DisassemblerTypes.h"
28#include "llvm/BinaryFormat/ELF.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
32#include "llvm/MC/MCDecoderOps.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/MC/MCRegisterInfo.h"
36#include "llvm/MC/MCSubtargetInfo.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/AMDHSAKernelDescriptor.h"
39#include "llvm/Support/Compiler.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
50using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
57AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(STI: &STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError(reason: "disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Id: Symbol, Val: Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
74}
75
76void AMDGPUDisassembler::setABIVersion(unsigned Version) {
77 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(ABIVersion: Version);
78}
79
80void AMDGPUDisassembler::emitTargetIDIfSupported(raw_ostream &OS,
81 unsigned EFlags) const {
82 OS << "\t.amdgcn_target \""
83 << STI.getTargetTriple().normalize(Form: Triple::CanonicalForm::FOUR_IDENT)
84 << '-';
85
86 // Get CPU name from ELF e_flags MACH field
87 unsigned MACH = EFlags & ELF::EF_AMDGPU_MACH;
88
89#define X(NUM, ENUM, NAME) \
90 case ELF::ENUM: \
91 OS << NAME; \
92 break;
93 switch (MACH) {
94 AMDGPU_MACH_LIST(X)
95 default:
96 OS << "unknown";
97 break;
98 }
99#undef X
100
101 // Add xnack and sramecc from ELF flags (v4 format)
102 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV4) {
103 unsigned SrameccSetting = EFlags & ELF::EF_AMDGPU_FEATURE_SRAMECC_V4;
104 switch (SrameccSetting) {
105 case ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
106 case ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
107 break;
108 case ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
109 OS << ":sramecc-";
110 break;
111 case ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
112 OS << ":sramecc+";
113 break;
114 }
115
116 unsigned XnackSetting = EFlags & ELF::EF_AMDGPU_FEATURE_XNACK_V4;
117 switch (XnackSetting) {
118 case ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
119 case ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4:
120 break;
121 case ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4:
122 OS << ":xnack-";
123 break;
124 case ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4:
125 OS << ":xnack+";
126 break;
127 }
128 }
129
130 OS << "\"\n";
131}
132
133inline static MCDisassembler::DecodeStatus
134addOperand(MCInst &Inst, const MCOperand& Opnd) {
135 Inst.addOperand(Op: Opnd);
136 return Opnd.isValid() ?
137 MCDisassembler::Success :
138 MCDisassembler::Fail;
139}
140
141static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
142 AMDGPU::OpName Name) {
143 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name);
144 if (OpIdx != -1) {
145 auto *I = MI.begin();
146 std::advance(i&: I, n: OpIdx);
147 MI.insert(I, Op);
148 }
149 return OpIdx;
150}
151
152static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
153 uint64_t Addr,
154 const MCDisassembler *Decoder) {
155 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
156
157 // Our branches take a simm16.
158 int64_t Offset = SignExtend64<16>(x: Imm) * 4 + 4 + Addr;
159
160 if (DAsm->tryAddingSymbolicOperand(Inst, Value: Offset, Address: Addr, IsBranch: true, Offset: 2, OpSize: 2, InstSize: 0))
161 return MCDisassembler::Success;
162 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Imm));
163}
164
165static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
166 const MCDisassembler *Decoder) {
167 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
168 int64_t Offset;
169 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
170 Offset = SignExtend64<24>(x: Imm);
171 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
172 Offset = Imm & 0xFFFFF;
173 } else { // GFX9+ supports 21-bit signed offsets.
174 Offset = SignExtend64<21>(x: Imm);
175 }
176 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Offset));
177}
178
179static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
180 const MCDisassembler *Decoder) {
181 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
182 return addOperand(Inst, Opnd: DAsm->decodeBoolReg(Inst, Val));
183}
184
185static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
186 uint64_t Addr,
187 const MCDisassembler *Decoder) {
188 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
189 return addOperand(Inst, Opnd: DAsm->decodeSplitBarrier(Inst, Val));
190}
191
192static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
193 const MCDisassembler *Decoder) {
194 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
195 return addOperand(Inst, Opnd: DAsm->decodeDpp8FI(Val));
196}
197
198#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
199 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
200 uint64_t /*Addr*/, \
201 const MCDisassembler *Decoder) { \
202 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
203 return addOperand(Inst, DAsm->DecoderName(Imm)); \
204 }
205
206// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
207// number of register. Used by VGPR only and AGPR only operands.
208#define DECODE_OPERAND_REG_8(RegClass) \
209 static DecodeStatus Decode##RegClass##RegisterClass( \
210 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
211 const MCDisassembler *Decoder) { \
212 assert(Imm < (1 << 8) && "8-bit encoding"); \
213 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
214 return addOperand( \
215 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
216 }
217
218#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
219 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
220 const MCDisassembler *Decoder) { \
221 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
222 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
223 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
224 }
225
226static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
227 unsigned OpWidth, unsigned Imm, unsigned EncImm,
228 const MCDisassembler *Decoder) {
229 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
230 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
231 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: OpWidth, Val: EncImm));
232}
233
234// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
235// get register class. Used by SGPR only operands.
236#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
237 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
238
239#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
240 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
241
242// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
243// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
244// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
245// Used by AV_ register classes (AGPR or VGPR only register operands).
246template <unsigned OpWidth>
247static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
248 const MCDisassembler *Decoder) {
249 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm | AMDGPU::EncValues::IS_VGPR,
250 Decoder);
251}
252
253// Decoder for Src(9-bit encoding) registers only.
254template <unsigned OpWidth>
255static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
256 uint64_t /* Addr */,
257 const MCDisassembler *Decoder) {
258 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
259}
260
261// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
262// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
263// only.
264template <unsigned OpWidth>
265static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
266 const MCDisassembler *Decoder) {
267 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
268}
269
270// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
271// Imm{9} is acc, registers only.
272template <unsigned OpWidth>
273static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
274 uint64_t /* Addr */,
275 const MCDisassembler *Decoder) {
276 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm, Decoder);
277}
278
279// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
280// register from RegClass or immediate. Registers that don't belong to RegClass
281// will be decoded and InstPrinter will report warning. Immediate will be
282// decoded into constant matching the OperandType (important for floating point
283// types).
284template <unsigned OpWidth>
285static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
286 uint64_t /* Addr */,
287 const MCDisassembler *Decoder) {
288 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
289}
290
291// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
292// and decode using 'enum10' from decodeSrcOp.
293template <unsigned OpWidth>
294static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
295 uint64_t /* Addr */,
296 const MCDisassembler *Decoder) {
297 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
298}
299
300// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
301// when RegisterClass is used as an operand. Most often used for destination
302// operands.
303
304DECODE_OPERAND_REG_8(VGPR_32)
305DECODE_OPERAND_REG_8(VGPR_32_Lo128)
306DECODE_OPERAND_REG_8(VReg_64)
307DECODE_OPERAND_REG_8(VReg_96)
308DECODE_OPERAND_REG_8(VReg_128)
309DECODE_OPERAND_REG_8(VReg_192)
310DECODE_OPERAND_REG_8(VReg_256)
311DECODE_OPERAND_REG_8(VReg_288)
312DECODE_OPERAND_REG_8(VReg_320)
313DECODE_OPERAND_REG_8(VReg_352)
314DECODE_OPERAND_REG_8(VReg_384)
315DECODE_OPERAND_REG_8(VReg_512)
316DECODE_OPERAND_REG_8(VReg_1024)
317
318DECODE_OPERAND_SREG_7(SReg_32, 32)
319DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
320DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
321DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
322DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
323DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
324DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
325DECODE_OPERAND_SREG_7(SReg_96, 96)
326DECODE_OPERAND_SREG_7(SReg_128, 128)
327DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
328DECODE_OPERAND_SREG_7(SReg_256, 256)
329DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
330DECODE_OPERAND_SREG_7(SReg_512, 512)
331
332DECODE_OPERAND_SREG_8(SReg_64, 64)
333
334DECODE_OPERAND_REG_8(AGPR_32)
335DECODE_OPERAND_REG_8(AReg_64)
336DECODE_OPERAND_REG_8(AReg_128)
337DECODE_OPERAND_REG_8(AReg_256)
338DECODE_OPERAND_REG_8(AReg_512)
339DECODE_OPERAND_REG_8(AReg_1024)
340
341static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
346
347 bool IsHi = Imm & (1 << 9);
348 unsigned RegIdx = Imm & 0xff;
349 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
350 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
351}
352
353static DecodeStatus
354DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
355 const MCDisassembler *Decoder) {
356 assert(isUInt<8>(Imm) && "8-bit encoding expected");
357
358 bool IsHi = Imm & (1 << 7);
359 unsigned RegIdx = Imm & 0x7f;
360 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
361 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
362}
363
364template <unsigned OpWidth>
365static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
366 uint64_t /*Addr*/,
367 const MCDisassembler *Decoder) {
368 assert(isUInt<9>(Imm) && "9-bit encoding expected");
369
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 if (Imm & AMDGPU::EncValues::IS_VGPR) {
372 bool IsHi = Imm & (1 << 7);
373 unsigned RegIdx = Imm & 0x7f;
374 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
375 }
376 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
377}
378
379template <unsigned OpWidth>
380static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
381 uint64_t /*Addr*/,
382 const MCDisassembler *Decoder) {
383 assert(isUInt<10>(Imm) && "10-bit encoding expected");
384
385 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
386 if (Imm & AMDGPU::EncValues::IS_VGPR) {
387 bool IsHi = Imm & (1 << 9);
388 unsigned RegIdx = Imm & 0xff;
389 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
390 }
391 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
392}
393
394static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
395 uint64_t /*Addr*/,
396 const MCDisassembler *Decoder) {
397 assert(isUInt<10>(Imm) && "10-bit encoding expected");
398 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
399
400 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
401
402 bool IsHi = Imm & (1 << 9);
403 unsigned RegIdx = Imm & 0xff;
404 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
405}
406
407static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
408 uint64_t Addr,
409 const MCDisassembler *Decoder) {
410 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
411 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteralConstant(Imm));
412}
413
414static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm,
415 uint64_t Addr,
416 const MCDisassembler *Decoder) {
417 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
418 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteral64Constant(Imm));
419}
420
421static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
422 uint64_t Addr, const void *Decoder) {
423 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
424 return addOperand(Inst, Opnd: DAsm->decodeVOPDDstYOp(Inst, Val));
425}
426
427static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
428 const MCDisassembler *Decoder) {
429 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
430 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: Opw, Val: Imm | 256));
431}
432
433template <unsigned Opw>
434static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
435 uint64_t /* Addr */,
436 const MCDisassembler *Decoder) {
437 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
438}
439
440static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
441 uint64_t Addr,
442 const MCDisassembler *Decoder) {
443 assert(Imm < (1 << 9) && "9-bit encoding");
444 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
445 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: 64, Val: Imm));
446}
447
448#define DECODE_SDWA(DecName) \
449DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
450
451DECODE_SDWA(Src32)
452DECODE_SDWA(Src16)
453DECODE_SDWA(VopcDst)
454
455static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
456 uint64_t /* Addr */,
457 const MCDisassembler *Decoder) {
458 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
459 return addOperand(Inst, Opnd: DAsm->decodeVersionImm(Imm));
460}
461
462#include "AMDGPUGenDisassemblerTables.inc"
463
464namespace {
465// Define bitwidths for various types used to instantiate the decoder.
466template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
467template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
468template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
469template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
470} // namespace
471
472//===----------------------------------------------------------------------===//
473//
474//===----------------------------------------------------------------------===//
475
476template <typename InsnType>
477DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI,
478 InsnType Inst, uint64_t Address,
479 raw_ostream &Comments) const {
480 assert(MI.getOpcode() == 0);
481 assert(MI.getNumOperands() == 0);
482 MCInst TmpInst;
483 HasLiteral = false;
484 const auto SavedBytes = Bytes;
485
486 SmallString<64> LocalComments;
487 raw_svector_ostream LocalCommentStream(LocalComments);
488 CommentStream = &LocalCommentStream;
489
490 DecodeStatus Res =
491 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
492
493 CommentStream = nullptr;
494
495 if (Res != MCDisassembler::Fail) {
496 MI = TmpInst;
497 Comments << LocalComments;
498 return MCDisassembler::Success;
499 }
500 Bytes = SavedBytes;
501 return MCDisassembler::Fail;
502}
503
504template <typename InsnType>
505DecodeStatus
506AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
507 MCInst &MI, InsnType Inst, uint64_t Address,
508 raw_ostream &Comments) const {
509 for (const uint8_t *T : {Table1, Table2}) {
510 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
511 return Res;
512 }
513 return MCDisassembler::Fail;
514}
515
516template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
517 assert(Bytes.size() >= sizeof(T));
518 const auto Res =
519 support::endian::read<T, llvm::endianness::little>(Bytes.data());
520 Bytes = Bytes.slice(N: sizeof(T));
521 return Res;
522}
523
524static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
525 using namespace llvm::support::endian;
526 assert(Bytes.size() >= 12);
527 std::bitset<96> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
528 Bytes = Bytes.slice(N: 8);
529 std::bitset<96> Hi(read<uint32_t, endianness::little>(P: Bytes.data()));
530 Bytes = Bytes.slice(N: 4);
531 return (Hi << 64) | Lo;
532}
533
534static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
535 using namespace llvm::support::endian;
536 assert(Bytes.size() >= 16);
537 std::bitset<128> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
538 Bytes = Bytes.slice(N: 8);
539 std::bitset<128> Hi(read<uint64_t, endianness::little>(P: Bytes.data()));
540 Bytes = Bytes.slice(N: 8);
541 return (Hi << 64) | Lo;
542}
543
544void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
545 const MCInstrInfo &MCII) const {
546 const MCInstrDesc &Desc = MCII.get(Opcode: MI.getOpcode());
547 for (auto [OpNo, OpDesc] : enumerate(First: Desc.operands())) {
548 if (OpNo >= MI.getNumOperands())
549 continue;
550
551 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
552 // defined to take VGPR_32, but in reality allowing inline constants.
553 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
554 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
555 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
556 continue;
557
558 MCOperand &Op = MI.getOperand(i: OpNo);
559 if (!Op.isImm())
560 continue;
561 int64_t Imm = Op.getImm();
562 if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm &&
563 Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) {
564 Op = decodeIntImmed(Imm);
565 continue;
566 }
567
568 if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
569 Op = decodeLiteralConstant(Desc, OpDesc);
570 continue;
571 }
572
573 if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm &&
574 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) {
575 switch (OpDesc.OperandType) {
576 case AMDGPU::OPERAND_REG_IMM_BF16:
577 case AMDGPU::OPERAND_REG_IMM_V2BF16:
578 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
579 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
580 Imm = getInlineImmValBF16(Imm);
581 break;
582 case AMDGPU::OPERAND_REG_IMM_FP16:
583 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
584 Imm = getInlineImmValF16(Imm);
585 break;
586 case AMDGPU::OPERAND_REG_IMM_V2FP16:
587 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
588 Imm = getInlineImmValF16(Imm);
589 break;
590 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: {
591 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
592 // halves, so we need to produce the duplicated value for correct
593 // round-trip.
594 if (isGFX11Plus()) {
595 int64_t F16Val = getInlineImmValF16(Imm);
596 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
597 } else {
598 Imm = getInlineImmValF16(Imm);
599 }
600 break;
601 }
602 case AMDGPU::OPERAND_REG_IMM_FP64:
603 case AMDGPU::OPERAND_REG_IMM_INT64:
604 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
605 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
606 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
607 case AMDGPU::OPERAND_REG_IMM_V2FP64:
608 case AMDGPU::OPERAND_REG_IMM_V2INT64:
609 Imm = getInlineImmVal64(Imm);
610 break;
611 default:
612 Imm = getInlineImmVal32(Imm);
613 }
614 Op.setImm(Imm);
615 }
616 }
617}
618
619DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
620 ArrayRef<uint8_t> Bytes_,
621 uint64_t Address,
622 raw_ostream &CS) const {
623 unsigned MaxInstBytesNum = std::min(a: (size_t)TargetMaxInstBytes, b: Bytes_.size());
624 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
625
626 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
627 // there are fewer bytes left). This will be overridden on success.
628 Size = std::min(a: (size_t)4, b: Bytes_.size());
629
630 do {
631 // ToDo: better to switch encoding length using some bit predicate
632 // but it is unknown yet, so try all we can
633
634 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
635 // encodings
636 if (isGFX1250Plus() && Bytes.size() >= 16) {
637 std::bitset<128> DecW = eat16Bytes(Bytes);
638 if (tryDecodeInst(Table: DecoderTableGFX1250128, MI, Inst: DecW, Address, Comments&: CS))
639 break;
640 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
641 }
642
643 if (isGFX11Plus() && Bytes.size() >= 12) {
644 std::bitset<96> DecW = eat12Bytes(Bytes);
645
646 if (isGFX1170() &&
647 tryDecodeInst(Table1: DecoderTableGFX117096, Table2: DecoderTableGFX1170_FAKE1696, MI,
648 Inst: DecW, Address, Comments&: CS))
649 break;
650
651 if (isGFX11() &&
652 tryDecodeInst(Table1: DecoderTableGFX1196, Table2: DecoderTableGFX11_FAKE1696, MI,
653 Inst: DecW, Address, Comments&: CS))
654 break;
655
656 if (isGFX1250() &&
657 tryDecodeInst(Table1: DecoderTableGFX125096, Table2: DecoderTableGFX1250_FAKE1696, MI,
658 Inst: DecW, Address, Comments&: CS))
659 break;
660
661 if (isGFX12() &&
662 tryDecodeInst(Table1: DecoderTableGFX1296, Table2: DecoderTableGFX12_FAKE1696, MI,
663 Inst: DecW, Address, Comments&: CS))
664 break;
665
666 if (isGFX12() &&
667 tryDecodeInst(Table: DecoderTableGFX12W6496, MI, Inst: DecW, Address, Comments&: CS))
668 break;
669
670 if (isGFX13() &&
671 tryDecodeInst(Table1: DecoderTableGFX1396, Table2: DecoderTableGFX13_FAKE1696, MI,
672 Inst: DecW, Address, Comments&: CS))
673 break;
674
675 if (STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
676 // Return 8 bytes for a potential literal.
677 Bytes = Bytes_.slice(N: 4, M: MaxInstBytesNum - 4);
678
679 if (isGFX1250() &&
680 tryDecodeInst(Table: DecoderTableGFX125096, MI, Inst: DecW, Address, Comments&: CS))
681 break;
682 }
683
684 // Reinitialize Bytes
685 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
686
687 } else if (Bytes.size() >= 16 &&
688 STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts)) {
689 std::bitset<128> DecW = eat16Bytes(Bytes);
690 if (tryDecodeInst(Table: DecoderTableGFX940128, MI, Inst: DecW, Address, Comments&: CS))
691 break;
692
693 // Reinitialize Bytes
694 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
695 }
696
697 if (Bytes.size() >= 8) {
698 const uint64_t QW = eatBytes<uint64_t>(Bytes);
699
700 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
701 tryDecodeInst(Table: DecoderTableGFX10_B64, MI, Inst: QW, Address, Comments&: CS))
702 break;
703
704 if (STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) &&
705 tryDecodeInst(Table: DecoderTableGFX80_UNPACKED64, MI, Inst: QW, Address, Comments&: CS))
706 break;
707
708 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
709 tryDecodeInst(Table: DecoderTableGFX95064, MI, Inst: QW, Address, Comments&: CS))
710 break;
711
712 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
713 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
714 // table first so we print the correct name.
715 if (STI.hasFeature(Feature: AMDGPU::FeatureFmaMixInsts) &&
716 tryDecodeInst(Table: DecoderTableGFX9_DL64, MI, Inst: QW, Address, Comments&: CS))
717 break;
718
719 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts) &&
720 tryDecodeInst(Table: DecoderTableGFX94064, MI, Inst: QW, Address, Comments&: CS))
721 break;
722
723 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
724 tryDecodeInst(Table: DecoderTableGFX90A64, MI, Inst: QW, Address, Comments&: CS))
725 break;
726
727 if ((isVI() || isGFX9()) &&
728 tryDecodeInst(Table: DecoderTableGFX864, MI, Inst: QW, Address, Comments&: CS))
729 break;
730
731 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX964, MI, Inst: QW, Address, Comments&: CS))
732 break;
733
734 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1064, MI, Inst: QW, Address, Comments&: CS))
735 break;
736
737 if (isGFX1250() &&
738 tryDecodeInst(Table1: DecoderTableGFX125064, Table2: DecoderTableGFX1250_FAKE1664, MI,
739 Inst: QW, Address, Comments&: CS))
740 break;
741
742 if (isGFX12() &&
743 tryDecodeInst(Table1: DecoderTableGFX1264, Table2: DecoderTableGFX12_FAKE1664, MI, Inst: QW,
744 Address, Comments&: CS))
745 break;
746
747 if (isGFX1170() &&
748 tryDecodeInst(Table1: DecoderTableGFX117064, Table2: DecoderTableGFX1170_FAKE1664, MI,
749 Inst: QW, Address, Comments&: CS))
750 break;
751
752 if (isGFX11() &&
753 tryDecodeInst(Table1: DecoderTableGFX1164, Table2: DecoderTableGFX11_FAKE1664, MI, Inst: QW,
754 Address, Comments&: CS))
755 break;
756
757 if (isGFX1170() &&
758 tryDecodeInst(Table: DecoderTableGFX1170W6464, MI, Inst: QW, Address, Comments&: CS))
759 break;
760
761 if (isGFX11() &&
762 tryDecodeInst(Table: DecoderTableGFX11W6464, MI, Inst: QW, Address, Comments&: CS))
763 break;
764
765 if (isGFX12() &&
766 tryDecodeInst(Table: DecoderTableGFX12W6464, MI, Inst: QW, Address, Comments&: CS))
767 break;
768
769 if (isGFX13() &&
770 tryDecodeInst(Table1: DecoderTableGFX1364, Table2: DecoderTableGFX13_FAKE1664, MI, Inst: QW,
771 Address, Comments&: CS))
772 break;
773
774 // Reinitialize Bytes
775 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
776 }
777
778 // Try decode 32-bit instruction
779 if (Bytes.size() >= 4) {
780 const uint32_t DW = eatBytes<uint32_t>(Bytes);
781
782 if ((isVI() || isGFX9()) &&
783 tryDecodeInst(Table: DecoderTableGFX832, MI, Inst: DW, Address, Comments&: CS))
784 break;
785
786 if (tryDecodeInst(Table: DecoderTableAMDGPU32, MI, Inst: DW, Address, Comments&: CS))
787 break;
788
789 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX932, MI, Inst: DW, Address, Comments&: CS))
790 break;
791
792 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
793 tryDecodeInst(Table: DecoderTableGFX95032, MI, Inst: DW, Address, Comments&: CS))
794 break;
795
796 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
797 tryDecodeInst(Table: DecoderTableGFX90A32, MI, Inst: DW, Address, Comments&: CS))
798 break;
799
800 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
801 tryDecodeInst(Table: DecoderTableGFX10_B32, MI, Inst: DW, Address, Comments&: CS))
802 break;
803
804 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1032, MI, Inst: DW, Address, Comments&: CS))
805 break;
806
807 if (isGFX1170() &&
808 tryDecodeInst(Table1: DecoderTableGFX117032, Table2: DecoderTableGFX1170_FAKE1632, MI,
809 Inst: DW, Address, Comments&: CS))
810 break;
811
812 if (isGFX11() &&
813 tryDecodeInst(Table1: DecoderTableGFX1132, Table2: DecoderTableGFX11_FAKE1632, MI, Inst: DW,
814 Address, Comments&: CS))
815 break;
816
817 if (isGFX1250() &&
818 tryDecodeInst(Table1: DecoderTableGFX125032, Table2: DecoderTableGFX1250_FAKE1632, MI,
819 Inst: DW, Address, Comments&: CS))
820 break;
821
822 if (isGFX12() &&
823 tryDecodeInst(Table1: DecoderTableGFX1232, Table2: DecoderTableGFX12_FAKE1632, MI, Inst: DW,
824 Address, Comments&: CS))
825 break;
826
827 if (isGFX13() &&
828 tryDecodeInst(Table1: DecoderTableGFX1332, Table2: DecoderTableGFX13_FAKE1632, MI, Inst: DW,
829 Address, Comments&: CS))
830 break;
831 }
832
833 return MCDisassembler::Fail;
834 } while (false);
835
836 DecodeStatus Status = MCDisassembler::Success;
837
838 decodeImmOperands(MI, MCII: *MCII);
839
840 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
841 if (isMacDPP(MI))
842 convertMacDPPInst(MI);
843
844 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
845 convertVOP3PDPPInst(MI);
846 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
847 convertVOPCDPPInst(MI); // Special VOP3 case
848 else if (AMDGPU::isVOPC64DPP(Opc: MI.getOpcode()))
849 convertVOPC64DPPInst(MI); // Special VOP3 case
850 else if (AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dpp8) !=
851 -1)
852 convertDPP8Inst(MI);
853 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
854 convertVOP3DPPInst(MI); // Regular VOP3 case
855 }
856
857 convertTrue16OpSel(MI);
858
859 if (AMDGPU::isMAC(Opc: MI.getOpcode())) {
860 // Insert dummy unused src2_modifiers.
861 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
862 Name: AMDGPU::OpName::src2_modifiers);
863 }
864
865 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
866 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
867 // Insert dummy unused src2_modifiers.
868 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
869 Name: AMDGPU::OpName::src2_modifiers);
870 }
871
872 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
873 !AMDGPU::hasGDS(STI)) {
874 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::gds);
875 }
876
877 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
878 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
879 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
880 Name: AMDGPU::OpName::cpol);
881 if (CPolPos != -1) {
882 unsigned CPol =
883 (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
884 AMDGPU::CPol::GLC : 0;
885 if (MI.getNumOperands() <= (unsigned)CPolPos) {
886 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: CPol),
887 Name: AMDGPU::OpName::cpol);
888 } else if (CPol) {
889 MI.getOperand(i: CPolPos).setImm(MI.getOperand(i: CPolPos).getImm() | CPol);
890 }
891 }
892 }
893
894 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags &
895 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
896 (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts))) {
897 // GFX90A lost TFE, its place is occupied by ACC.
898 int TFEOpIdx =
899 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::tfe);
900 if (TFEOpIdx != -1) {
901 auto *TFEIter = MI.begin();
902 std::advance(i&: TFEIter, n: TFEOpIdx);
903 MI.insert(I: TFEIter, Op: MCOperand::createImm(Val: 0));
904 }
905 }
906
907 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
908 if (isGFX12Plus() && isBufferInstruction(MI)) {
909 int OffsetIdx =
910 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::offset);
911 if (OffsetIdx != -1) {
912 uint32_t Imm = MI.getOperand(i: OffsetIdx).getImm();
913 int64_t SignedOffset = SignExtend64<24>(x: Imm);
914 if (SignedOffset < 0)
915 return MCDisassembler::Fail;
916 }
917 }
918
919 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
920 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
921 int SWZOpIdx =
922 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::swz);
923 if (SWZOpIdx != -1) {
924 auto *SWZIter = MI.begin();
925 std::advance(i&: SWZIter, n: SWZOpIdx);
926 MI.insert(I: SWZIter, Op: MCOperand::createImm(Val: 0));
927 }
928 }
929
930 const MCInstrDesc &Desc = MCII->get(Opcode: MI.getOpcode());
931 if (Desc.TSFlags & SIInstrFlags::MIMG) {
932 int VAddr0Idx =
933 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
934 int RsrcIdx =
935 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::srsrc);
936 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
937 if (VAddr0Idx >= 0 && NSAArgs > 0) {
938 unsigned NSAWords = (NSAArgs + 3) / 4;
939 if (Bytes.size() < 4 * NSAWords)
940 return MCDisassembler::Fail;
941 for (unsigned i = 0; i < NSAArgs; ++i) {
942 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
943 auto VAddrRCID =
944 MCII->getOpRegClassID(OpInfo: Desc.operands()[VAddrIdx], HwModeId: HwModeRegClass);
945 MI.insert(I: MI.begin() + VAddrIdx, Op: createRegOperand(RegClassID: VAddrRCID, Val: Bytes[i]));
946 }
947 Bytes = Bytes.slice(N: 4 * NSAWords);
948 }
949
950 convertMIMGInst(MI);
951 }
952
953 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
954 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
955 convertMIMGInst(MI);
956
957 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
958 convertEXPInst(MI);
959
960 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
961 convertVINTERPInst(MI);
962
963 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
964 convertSDWAInst(MI);
965
966 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
967 convertMAIInst(MI);
968
969 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
970 convertWMMAInst(MI);
971
972 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
973 Name: AMDGPU::OpName::vdst_in);
974 if (VDstIn_Idx != -1) {
975 int Tied = MCII->get(Opcode: MI.getOpcode()).getOperandConstraint(OpNum: VDstIn_Idx,
976 Constraint: MCOI::OperandConstraint::TIED_TO);
977 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
978 !MI.getOperand(i: VDstIn_Idx).isReg() ||
979 MI.getOperand(i: VDstIn_Idx).getReg() != MI.getOperand(i: Tied).getReg())) {
980 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
981 MI.erase(I: &MI.getOperand(i: VDstIn_Idx));
982 insertNamedMCOperand(MI,
983 Op: MCOperand::createReg(Reg: MI.getOperand(i: Tied).getReg()),
984 Name: AMDGPU::OpName::vdst_in);
985 }
986 }
987
988 bool IsSOPK = MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
989 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::imm) && !IsSOPK)
990 convertFMAanyK(MI);
991
992 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
993 // have EXEC as implicit destination. Issue a warning if encoding for
994 // vdst is not EXEC.
995 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
996 MCII->get(Opcode: MI.getOpcode()).getNumDefs() == 0 &&
997 MCII->get(Opcode: MI.getOpcode()).hasImplicitDefOfPhysReg(Reg: AMDGPU::EXEC)) {
998 auto ExecEncoding = MRI.getEncodingValue(Reg: AMDGPU::EXEC_LO);
999 if (Bytes_[0] != ExecEncoding)
1000 Status = MCDisassembler::SoftFail;
1001 }
1002
1003 Size = MaxInstBytesNum - Bytes.size();
1004 return Status;
1005}
1006
1007void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
1008 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX11Insts)) {
1009 // The MCInst still has these fields even though they are no longer encoded
1010 // in the GFX11 instruction.
1011 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vm);
1012 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::compr);
1013 }
1014}
1015
1016void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
1017 convertTrue16OpSel(MI);
1018 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
1019 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
1020 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
1021 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
1022 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
1023 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
1024 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
1025 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
1026 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
1027 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
1028 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
1029 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
1030 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
1031 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
1032 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
1033 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
1034 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
1035 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
1036 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
1037 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
1038 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
1039 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
1040 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
1041 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
1042 // The MCInst has this field that is not directly encoded in the
1043 // instruction.
1044 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::op_sel);
1045 }
1046}
1047
1048void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
1049 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
1050 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
1051 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::sdst))
1052 // VOPC - insert clamp
1053 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::clamp);
1054 } else if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands)) {
1055 int SDst = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::sdst);
1056 if (SDst != -1) {
1057 // VOPC - insert VCC register as sdst
1058 insertNamedMCOperand(MI, Op: createRegOperand(Reg: AMDGPU::VCC),
1059 Name: AMDGPU::OpName::sdst);
1060 } else {
1061 // VOP1/2 - insert omod if present in instruction
1062 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::omod);
1063 }
1064 }
1065}
1066
1067/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1068/// appropriate subregister for the used format width.
1069static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI,
1070 MCOperand &MO, uint8_t NumRegs) {
1071 switch (NumRegs) {
1072 case 4:
1073 return MO.setReg(MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3));
1074 case 6:
1075 return MO.setReg(
1076 MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1077 case 8:
1078 if (MCRegister NewReg = MRI.getSubReg(
1079 Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1080 MO.setReg(NewReg);
1081 }
1082 return;
1083 case 12: {
1084 // There is no 384-bit subreg index defined.
1085 MCRegister BaseReg = MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0);
1086 MCRegister NewReg = MRI.getMatchingSuperReg(
1087 Reg: BaseReg, SubIdx: AMDGPU::sub0, RC: &MRI.getRegClass(i: AMDGPU::VReg_384RegClassID));
1088 return MO.setReg(NewReg);
1089 }
1090 case 16:
1091 // No-op in cases where one operand is still f8/bf8.
1092 return;
1093 default:
1094 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1095 }
1096}
1097
1098/// f8f6f4 instructions have different pseudos depending on the used formats. In
1099/// the disassembler table, we only have the variants with the largest register
1100/// classes which assume using an fp8/bf8 format for both operands. The actual
1101/// register class depends on the format in blgp and cbsz operands. Adjust the
1102/// register classes depending on the used format.
1103void AMDGPUDisassembler::convertMAIInst(MCInst &MI) const {
1104 int BlgpIdx =
1105 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::blgp);
1106 if (BlgpIdx == -1)
1107 return;
1108
1109 int CbszIdx =
1110 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::cbsz);
1111
1112 unsigned CBSZ = MI.getOperand(i: CbszIdx).getImm();
1113 unsigned BLGP = MI.getOperand(i: BlgpIdx).getImm();
1114
1115 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1116 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, F8F8Opcode: MI.getOpcode());
1117 if (!AdjustedRegClassOpcode ||
1118 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1119 return;
1120
1121 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1122 int Src0Idx =
1123 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1124 int Src1Idx =
1125 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1126 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1127 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1128 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1129 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1130}
1131
1132void AMDGPUDisassembler::convertWMMAInst(MCInst &MI) const {
1133 int FmtAIdx =
1134 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_a_fmt);
1135 if (FmtAIdx == -1)
1136 return;
1137
1138 int FmtBIdx =
1139 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_b_fmt);
1140
1141 unsigned FmtA = MI.getOperand(i: FmtAIdx).getImm();
1142 unsigned FmtB = MI.getOperand(i: FmtBIdx).getImm();
1143
1144 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1145 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, F8F8Opcode: MI.getOpcode());
1146 if (!AdjustedRegClassOpcode ||
1147 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1148 return;
1149
1150 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1151 int Src0Idx =
1152 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1153 int Src1Idx =
1154 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1155 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1156 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1157 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1158 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1159}
1160
1161struct VOPModifiers {
1162 unsigned OpSel = 0;
1163 unsigned OpSelHi = 0;
1164 unsigned NegLo = 0;
1165 unsigned NegHi = 0;
1166};
1167
1168// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1169// Note that these values do not affect disassembler output,
1170// so this is only necessary for consistency with src_modifiers.
1171static VOPModifiers collectVOPModifiers(const MCInst &MI,
1172 bool IsVOP3P = false) {
1173 VOPModifiers Modifiers;
1174 unsigned Opc = MI.getOpcode();
1175 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1176 AMDGPU::OpName::src1_modifiers,
1177 AMDGPU::OpName::src2_modifiers};
1178 for (int J = 0; J < 3; ++J) {
1179 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
1180 if (OpIdx == -1)
1181 continue;
1182
1183 unsigned Val = MI.getOperand(i: OpIdx).getImm();
1184
1185 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1186 if (IsVOP3P) {
1187 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1188 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1189 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1190 } else if (J == 0) {
1191 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1192 }
1193 }
1194
1195 return Modifiers;
1196}
1197
1198// Instructions decode the op_sel/suffix bits into the src_modifier
1199// operands. Copy those bits into the src operands for true16 VGPRs.
1200void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
1201 const unsigned Opc = MI.getOpcode();
1202 const MCRegisterClass &ConversionRC =
1203 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID);
1204 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1205 OpAndOpMods = {._M_elems: {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1206 SISrcMods::OP_SEL_0},
1207 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1208 SISrcMods::OP_SEL_0},
1209 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1210 SISrcMods::OP_SEL_0},
1211 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1212 SISrcMods::DST_OP_SEL}}};
1213 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1214 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
1215 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpModsName);
1216 if (OpIdx == -1 || OpModsIdx == -1)
1217 continue;
1218 MCOperand &Op = MI.getOperand(i: OpIdx);
1219 if (!Op.isReg())
1220 continue;
1221 if (!ConversionRC.contains(Reg: Op.getReg()))
1222 continue;
1223 unsigned OpEnc = MRI.getEncodingValue(Reg: Op.getReg());
1224 const MCOperand &OpMods = MI.getOperand(i: OpModsIdx);
1225 unsigned ModVal = OpMods.getImm();
1226 if (ModVal & OpSelMask) { // isHi
1227 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1228 Op.setReg(ConversionRC.getRegister(i: RegIdx * 2 + 1));
1229 }
1230 }
1231}
1232
1233// MAC opcodes have special old and src2 operands.
1234// src2 is tied to dst, while old is not tied (but assumed to be).
1235bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
1236 constexpr int DST_IDX = 0;
1237 auto Opcode = MI.getOpcode();
1238 const auto &Desc = MCII->get(Opcode);
1239 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::old);
1240
1241 if (OldIdx != -1 && Desc.getOperandConstraint(
1242 OpNum: OldIdx, Constraint: MCOI::OperandConstraint::TIED_TO) == -1) {
1243 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1244 assert(Desc.getOperandConstraint(
1245 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1246 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
1247 (void)DST_IDX;
1248 return true;
1249 }
1250
1251 return false;
1252}
1253
1254// Create dummy old operand and insert dummy unused src2_modifiers
1255void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
1256 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1257 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1258 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1259 Name: AMDGPU::OpName::src2_modifiers);
1260}
1261
1262void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
1263 unsigned Opc = MI.getOpcode();
1264
1265 int VDstInIdx =
1266 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1267 if (VDstInIdx != -1)
1268 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1269
1270 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1271 if (MI.getNumOperands() < DescNumOps &&
1272 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1273 convertTrue16OpSel(MI);
1274 auto Mods = collectVOPModifiers(MI);
1275 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1276 Name: AMDGPU::OpName::op_sel);
1277 } else {
1278 // Insert dummy unused src modifiers.
1279 if (MI.getNumOperands() < DescNumOps &&
1280 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1281 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1282 Name: AMDGPU::OpName::src0_modifiers);
1283
1284 if (MI.getNumOperands() < DescNumOps &&
1285 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1286 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1287 Name: AMDGPU::OpName::src1_modifiers);
1288 }
1289}
1290
1291void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
1292 convertTrue16OpSel(MI);
1293
1294 int VDstInIdx =
1295 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1296 if (VDstInIdx != -1)
1297 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1298
1299 unsigned Opc = MI.getOpcode();
1300 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1301 if (MI.getNumOperands() < DescNumOps &&
1302 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1303 auto Mods = collectVOPModifiers(MI);
1304 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1305 Name: AMDGPU::OpName::op_sel);
1306 }
1307}
1308
1309// Given a wide tuple \p Reg check if it will overflow 256 registers.
1310// \returns \p Reg on success or NoRegister otherwise.
1311static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC,
1312 const MCRegisterInfo &MRI) {
1313 unsigned NumRegs = RC.getSizeInBits() / 32;
1314 MCRegister Sub0 = MRI.getSubReg(Reg, Idx: AMDGPU::sub0);
1315 if (!Sub0)
1316 return Reg;
1317
1318 MCRegister BaseReg;
1319 if (MRI.getRegClass(i: AMDGPU::VGPR_32RegClassID).contains(Reg: Sub0))
1320 BaseReg = AMDGPU::VGPR0;
1321 else if (MRI.getRegClass(i: AMDGPU::AGPR_32RegClassID).contains(Reg: Sub0))
1322 BaseReg = AMDGPU::AGPR0;
1323
1324 assert(BaseReg && "Only vector registers expected");
1325
1326 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1327}
1328
1329// Note that before gfx10, the MIMG encoding provided no information about
1330// VADDR size. Consequently, decoded instructions always show address as if it
1331// has 1 dword, which could be not really so.
1332void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1333 auto TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
1334
1335 int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1336 Name: AMDGPU::OpName::vdst);
1337
1338 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1339 Name: AMDGPU::OpName::vdata);
1340 int VAddr0Idx =
1341 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
1342 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1343 ? AMDGPU::OpName::srsrc
1344 : AMDGPU::OpName::rsrc;
1345 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: RsrcOpName);
1346 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1347 Name: AMDGPU::OpName::dmask);
1348
1349 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1350 Name: AMDGPU::OpName::tfe);
1351 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1352 Name: AMDGPU::OpName::d16);
1353
1354 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
1355 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1356 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
1357
1358 assert(VDataIdx != -1);
1359 if (BaseOpcode->BVH) {
1360 // Add A16 operand for intersect_ray instructions
1361 addOperand(Inst&: MI, Opnd: MCOperand::createImm(Val: BaseOpcode->A16));
1362 return;
1363 }
1364
1365 bool IsAtomic = (VDstIdx != -1);
1366 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1367 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1368 bool IsNSA = false;
1369 bool IsPartialNSA = false;
1370 unsigned AddrSize = Info->VAddrDwords;
1371
1372 if (isGFX10Plus()) {
1373 unsigned DimIdx =
1374 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dim);
1375 int A16Idx =
1376 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::a16);
1377 const AMDGPU::MIMGDimInfo *Dim =
1378 AMDGPU::getMIMGDimInfoByEncoding(DimEnc: MI.getOperand(i: DimIdx).getImm());
1379 const bool IsA16 = (A16Idx != -1 && MI.getOperand(i: A16Idx).getImm());
1380
1381 AddrSize =
1382 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, IsG16Supported: AMDGPU::hasG16(STI));
1383
1384 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1385 // VIMAGE insts other than BVH never use vaddr4.
1386 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1387 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1388 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12 ||
1389 Info->MIMGEncoding == AMDGPU::MIMGEncGfx13;
1390 if (!IsNSA) {
1391 if (!IsVSample && AddrSize > 12)
1392 AddrSize = 16;
1393 } else {
1394 if (AddrSize > Info->VAddrDwords) {
1395 if (!STI.hasFeature(Feature: AMDGPU::FeaturePartialNSAEncoding)) {
1396 // The NSA encoding does not contain enough operands for the
1397 // combination of base opcode / dimension. Should this be an error?
1398 return;
1399 }
1400 IsPartialNSA = true;
1401 }
1402 }
1403 }
1404
1405 unsigned DMask = MI.getOperand(i: DMaskIdx).getImm() & 0xf;
1406 unsigned DstSize = IsGather4 ? 4 : std::max(a: llvm::popcount(Value: DMask), b: 1);
1407
1408 bool D16 = D16Idx >= 0 && MI.getOperand(i: D16Idx).getImm();
1409 if (D16 && AMDGPU::hasPackedD16(STI)) {
1410 DstSize = (DstSize + 1) / 2;
1411 }
1412
1413 if (TFEIdx != -1 && MI.getOperand(i: TFEIdx).getImm())
1414 DstSize += 1;
1415
1416 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1417 return;
1418
1419 int NewOpcode =
1420 AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: Info->MIMGEncoding, VDataDwords: DstSize, VAddrDwords: AddrSize);
1421 if (NewOpcode == -1)
1422 return;
1423
1424 // Widen the register to the correct number of enabled channels.
1425 MCRegister NewVdata;
1426 if (DstSize != Info->VDataDwords) {
1427 auto DataRCID = MCII->getOpRegClassID(
1428 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VDataIdx], HwModeId: HwModeRegClass);
1429
1430 // Get first subregister of VData
1431 MCRegister Vdata0 = MI.getOperand(i: VDataIdx).getReg();
1432 MCRegister VdataSub0 = MRI.getSubReg(Reg: Vdata0, Idx: AMDGPU::sub0);
1433 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1434
1435 const MCRegisterClass &NewRC = MRI.getRegClass(i: DataRCID);
1436 NewVdata = MRI.getMatchingSuperReg(Reg: Vdata0, SubIdx: AMDGPU::sub0, RC: &NewRC);
1437 NewVdata = CheckVGPROverflow(Reg: NewVdata, RC: NewRC, MRI);
1438 if (!NewVdata) {
1439 // It's possible to encode this such that the low register + enabled
1440 // components exceeds the register count.
1441 return;
1442 }
1443 }
1444
1445 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1446 // If using partial NSA on GFX11+ widen last address register.
1447 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1448 MCRegister NewVAddrSA;
1449 if (STI.hasFeature(Feature: AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1450 AddrSize != Info->VAddrDwords) {
1451 MCRegister VAddrSA = MI.getOperand(i: VAddrSAIdx).getReg();
1452 MCRegister VAddrSubSA = MRI.getSubReg(Reg: VAddrSA, Idx: AMDGPU::sub0);
1453 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1454
1455 auto AddrRCID = MCII->getOpRegClassID(
1456 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VAddrSAIdx], HwModeId: HwModeRegClass);
1457
1458 const MCRegisterClass &NewRC = MRI.getRegClass(i: AddrRCID);
1459 NewVAddrSA = MRI.getMatchingSuperReg(Reg: VAddrSA, SubIdx: AMDGPU::sub0, RC: &NewRC);
1460 NewVAddrSA = CheckVGPROverflow(Reg: NewVAddrSA, RC: NewRC, MRI);
1461 if (!NewVAddrSA)
1462 return;
1463 }
1464
1465 MI.setOpcode(NewOpcode);
1466
1467 if (NewVdata != AMDGPU::NoRegister) {
1468 MI.getOperand(i: VDataIdx) = MCOperand::createReg(Reg: NewVdata);
1469
1470 if (IsAtomic) {
1471 // Atomic operations have an additional operand (a copy of data)
1472 MI.getOperand(i: VDstIdx) = MCOperand::createReg(Reg: NewVdata);
1473 }
1474 }
1475
1476 if (NewVAddrSA) {
1477 MI.getOperand(i: VAddrSAIdx) = MCOperand::createReg(Reg: NewVAddrSA);
1478 } else if (IsNSA) {
1479 assert(AddrSize <= Info->VAddrDwords);
1480 MI.erase(First: MI.begin() + VAddr0Idx + AddrSize,
1481 Last: MI.begin() + VAddr0Idx + Info->VAddrDwords);
1482 }
1483}
1484
1485// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1486// decoder only adds to src_modifiers, so manually add the bits to the other
1487// operands.
1488void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1489 unsigned Opc = MI.getOpcode();
1490 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1491 auto Mods = collectVOPModifiers(MI, IsVOP3P: true);
1492
1493 if (MI.getNumOperands() < DescNumOps &&
1494 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
1495 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vdst_in);
1496
1497 if (MI.getNumOperands() < DescNumOps &&
1498 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel))
1499 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1500 Name: AMDGPU::OpName::op_sel);
1501 if (MI.getNumOperands() < DescNumOps &&
1502 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi))
1503 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSelHi),
1504 Name: AMDGPU::OpName::op_sel_hi);
1505 if (MI.getNumOperands() < DescNumOps &&
1506 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_lo))
1507 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegLo),
1508 Name: AMDGPU::OpName::neg_lo);
1509 if (MI.getNumOperands() < DescNumOps &&
1510 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_hi))
1511 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegHi),
1512 Name: AMDGPU::OpName::neg_hi);
1513}
1514
1515// Create dummy old operand and insert optional operands
1516void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1517 unsigned Opc = MI.getOpcode();
1518 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1519
1520 if (MI.getNumOperands() < DescNumOps &&
1521 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::old))
1522 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1523
1524 if (MI.getNumOperands() < DescNumOps &&
1525 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1526 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1527 Name: AMDGPU::OpName::src0_modifiers);
1528
1529 if (MI.getNumOperands() < DescNumOps &&
1530 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1531 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1532 Name: AMDGPU::OpName::src1_modifiers);
1533}
1534
1535void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
1536 unsigned Opc = MI.getOpcode();
1537 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1538
1539 convertTrue16OpSel(MI);
1540
1541 if (MI.getNumOperands() < DescNumOps &&
1542 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1543 VOPModifiers Mods = collectVOPModifiers(MI);
1544 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1545 Name: AMDGPU::OpName::op_sel);
1546 }
1547}
1548
1549void AMDGPUDisassembler::convertFMAanyK(MCInst &MI) const {
1550 assert(HasLiteral && "Should have decoded a literal");
1551 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Literal), Name: AMDGPU::OpName::immX);
1552}
1553
1554const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1555 return getContext().getRegisterInfo()->
1556 getRegClassName(Class: &AMDGPUMCRegisterClasses[RegClassID]);
1557}
1558
1559inline
1560MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1561 const Twine& ErrMsg) const {
1562 *CommentStream << "Error: " + ErrMsg;
1563
1564 // ToDo: add support for error operands to MCInst.h
1565 // return MCOperand::createError(V);
1566 return MCOperand();
1567}
1568
1569inline MCOperand AMDGPUDisassembler::createRegOperand(MCRegister Reg) const {
1570 return MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg, STI));
1571}
1572
1573inline
1574MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1575 unsigned Val) const {
1576 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1577 if (Val >= RegCl.getNumRegs())
1578 return errOperand(V: Val, ErrMsg: Twine(getRegClassName(RegClassID)) +
1579 ": unknown register " + Twine(Val));
1580 return createRegOperand(Reg: RegCl.getRegister(i: Val));
1581}
1582
1583inline
1584MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1585 unsigned Val) const {
1586 // ToDo: SI/CI have 104 SGPRs, VI - 102
1587 // Valery: here we accepting as much as we can, let assembler sort it out
1588 int shift = 0;
1589 switch (SRegClassID) {
1590 case AMDGPU::SGPR_32RegClassID:
1591 case AMDGPU::TTMP_32RegClassID:
1592 break;
1593 case AMDGPU::SGPR_64RegClassID:
1594 case AMDGPU::TTMP_64RegClassID:
1595 shift = 1;
1596 break;
1597 case AMDGPU::SGPR_96RegClassID:
1598 case AMDGPU::TTMP_96RegClassID:
1599 case AMDGPU::SGPR_128RegClassID:
1600 case AMDGPU::TTMP_128RegClassID:
1601 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1602 // this bundle?
1603 case AMDGPU::SGPR_256RegClassID:
1604 case AMDGPU::TTMP_256RegClassID:
1605 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1606 // this bundle?
1607 case AMDGPU::SGPR_288RegClassID:
1608 case AMDGPU::TTMP_288RegClassID:
1609 case AMDGPU::SGPR_320RegClassID:
1610 case AMDGPU::TTMP_320RegClassID:
1611 case AMDGPU::SGPR_352RegClassID:
1612 case AMDGPU::TTMP_352RegClassID:
1613 case AMDGPU::SGPR_384RegClassID:
1614 case AMDGPU::TTMP_384RegClassID:
1615 case AMDGPU::SGPR_512RegClassID:
1616 case AMDGPU::TTMP_512RegClassID:
1617 shift = 2;
1618 break;
1619 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1620 // this bundle?
1621 default:
1622 llvm_unreachable("unhandled register class");
1623 }
1624
1625 if (Val % (1 << shift)) {
1626 *CommentStream << "Warning: " << getRegClassName(RegClassID: SRegClassID)
1627 << ": scalar reg isn't aligned " << Val;
1628 }
1629
1630 return createRegOperand(RegClassID: SRegClassID, Val: Val >> shift);
1631}
1632
1633MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1634 bool IsHi) const {
1635 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1636 return createRegOperand(RegClassID: AMDGPU::VGPR_16RegClassID, Val: RegIdxInVGPR16);
1637}
1638
1639// Decode Literals for insts which always have a literal in the encoding
1640MCOperand
1641AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1642 if (HasLiteral) {
1643 assert(
1644 AMDGPU::hasVOPD(STI) &&
1645 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1646 if (Literal != Val)
1647 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1648 }
1649 HasLiteral = true;
1650 Literal = Val;
1651 return MCOperand::createImm(Val: Literal);
1652}
1653
1654MCOperand
1655AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
1656 if (HasLiteral) {
1657 if (Literal != Val)
1658 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1659 }
1660 HasLiteral = true;
1661 Literal = Val;
1662
1663 bool UseLit64 = Hi_32(Value: Literal) == 0;
1664 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1665 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1666 : MCOperand::createImm(Val: Literal);
1667}
1668
1669MCOperand
1670AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
1671 const MCOperandInfo &OpDesc) const {
1672 // For now all literal constants are supposed to be unsigned integer
1673 // ToDo: deal with signed/unsigned 64-bit integer constants
1674 // ToDo: deal with float/double constants
1675 if (!HasLiteral) {
1676 if (Bytes.size() < 4) {
1677 return errOperand(V: 0, ErrMsg: "cannot read literal, inst bytes left " +
1678 Twine(Bytes.size()));
1679 }
1680 HasLiteral = true;
1681 Literal = eatBytes<uint32_t>(Bytes);
1682 }
1683
1684 // For disassembling always assume all inline constants are available.
1685 bool HasInv2Pi = true;
1686
1687 // Invalid instruction codes may contain literals for inline-only
1688 // operands, so we support them here as well.
1689 int64_t Val = Literal;
1690 bool UseLit = false;
1691 switch (OpDesc.OperandType) {
1692 default:
1693 llvm_unreachable("Unexpected operand type!");
1694 case AMDGPU::OPERAND_REG_IMM_BF16:
1695 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1696 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1697 UseLit = AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
1698 break;
1699 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1700 UseLit = AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
1701 break;
1702 case AMDGPU::OPERAND_REG_IMM_FP16:
1703 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1704 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1705 UseLit = AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
1706 break;
1707 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1708 UseLit = AMDGPU::isInlinableLiteralV2F16(Literal: Val);
1709 break;
1710 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1711 UseLit = AMDGPU::isPKFMACF16InlineConstant(Literal: Val, IsGFX11Plus: isGFX11Plus());
1712 break;
1713 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1714 break;
1715 case AMDGPU::OPERAND_REG_IMM_INT16:
1716 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1717 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1718 UseLit = AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi);
1719 break;
1720 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1721 UseLit = AMDGPU::isInlinableLiteralV2I16(Literal: Val);
1722 break;
1723 case AMDGPU::OPERAND_REG_IMM_FP32:
1724 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1725 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1726 case AMDGPU::OPERAND_REG_IMM_INT32:
1727 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1728 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1729 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1730 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1731 case AMDGPU::OPERAND_KIMM32:
1732 UseLit = AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi);
1733 break;
1734 case AMDGPU::OPERAND_REG_IMM_FP64:
1735 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1736 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1737 case AMDGPU::OPERAND_REG_IMM_V2FP64:
1738 UseLit = AMDGPU::isInlinableLiteral64(Literal: Val << 32, HasInv2Pi);
1739 if (!UseLit)
1740 Val <<= 32;
1741 break;
1742 case AMDGPU::OPERAND_REG_IMM_INT64:
1743 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1744 case AMDGPU::OPERAND_REG_IMM_V2INT64:
1745 UseLit = AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi);
1746 break;
1747 case MCOI::OPERAND_REGISTER:
1748 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1749 // decoding a literal in a position of a register operand. Give
1750 // it special handling in the caller, decodeImmOperands(), instead
1751 // of quietly allowing it here.
1752 break;
1753 }
1754
1755 return UseLit ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1756 Lit: LitModifier::Lit, Value: Val, Ctx&: getContext()))
1757 : MCOperand::createImm(Val);
1758}
1759
1760MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
1761 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1762
1763 if (!HasLiteral) {
1764 if (Bytes.size() < 8) {
1765 return errOperand(V: 0, ErrMsg: "cannot read literal64, inst bytes left " +
1766 Twine(Bytes.size()));
1767 }
1768 HasLiteral = true;
1769 Literal = eatBytes<uint64_t>(Bytes);
1770 }
1771
1772 bool UseLit64 = Hi_32(Value: Literal) == 0;
1773
1774 UseLit64 |= AMDGPU::isInlinableLiteral64(
1775 Literal, HasInv2Pi: STI.hasFeature(Feature: AMDGPU::FeatureInv2PiInlineImm));
1776
1777 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1778 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1779 : MCOperand::createImm(Val: Literal);
1780}
1781
1782MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1783 using namespace AMDGPU::EncValues;
1784
1785 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1786 return MCOperand::createImm(Val: (Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1787 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1788 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1789 // Cast prevents negative overflow.
1790}
1791
1792static int64_t getInlineImmVal32(unsigned Imm) {
1793 switch (Imm) {
1794 case 240:
1795 return llvm::bit_cast<uint32_t>(from: 0.5f);
1796 case 241:
1797 return llvm::bit_cast<uint32_t>(from: -0.5f);
1798 case 242:
1799 return llvm::bit_cast<uint32_t>(from: 1.0f);
1800 case 243:
1801 return llvm::bit_cast<uint32_t>(from: -1.0f);
1802 case 244:
1803 return llvm::bit_cast<uint32_t>(from: 2.0f);
1804 case 245:
1805 return llvm::bit_cast<uint32_t>(from: -2.0f);
1806 case 246:
1807 return llvm::bit_cast<uint32_t>(from: 4.0f);
1808 case 247:
1809 return llvm::bit_cast<uint32_t>(from: -4.0f);
1810 case 248: // 1 / (2 * PI)
1811 return 0x3e22f983;
1812 default:
1813 llvm_unreachable("invalid fp inline imm");
1814 }
1815}
1816
1817static int64_t getInlineImmVal64(unsigned Imm) {
1818 switch (Imm) {
1819 case 240:
1820 return llvm::bit_cast<uint64_t>(from: 0.5);
1821 case 241:
1822 return llvm::bit_cast<uint64_t>(from: -0.5);
1823 case 242:
1824 return llvm::bit_cast<uint64_t>(from: 1.0);
1825 case 243:
1826 return llvm::bit_cast<uint64_t>(from: -1.0);
1827 case 244:
1828 return llvm::bit_cast<uint64_t>(from: 2.0);
1829 case 245:
1830 return llvm::bit_cast<uint64_t>(from: -2.0);
1831 case 246:
1832 return llvm::bit_cast<uint64_t>(from: 4.0);
1833 case 247:
1834 return llvm::bit_cast<uint64_t>(from: -4.0);
1835 case 248: // 1 / (2 * PI)
1836 return 0x3fc45f306dc9c882;
1837 default:
1838 llvm_unreachable("invalid fp inline imm");
1839 }
1840}
1841
1842static int64_t getInlineImmValF16(unsigned Imm) {
1843 switch (Imm) {
1844 case 240:
1845 return 0x3800;
1846 case 241:
1847 return 0xB800;
1848 case 242:
1849 return 0x3C00;
1850 case 243:
1851 return 0xBC00;
1852 case 244:
1853 return 0x4000;
1854 case 245:
1855 return 0xC000;
1856 case 246:
1857 return 0x4400;
1858 case 247:
1859 return 0xC400;
1860 case 248: // 1 / (2 * PI)
1861 return 0x3118;
1862 default:
1863 llvm_unreachable("invalid fp inline imm");
1864 }
1865}
1866
1867static int64_t getInlineImmValBF16(unsigned Imm) {
1868 switch (Imm) {
1869 case 240:
1870 return 0x3F00;
1871 case 241:
1872 return 0xBF00;
1873 case 242:
1874 return 0x3F80;
1875 case 243:
1876 return 0xBF80;
1877 case 244:
1878 return 0x4000;
1879 case 245:
1880 return 0xC000;
1881 case 246:
1882 return 0x4080;
1883 case 247:
1884 return 0xC080;
1885 case 248: // 1 / (2 * PI)
1886 return 0x3E22;
1887 default:
1888 llvm_unreachable("invalid fp inline imm");
1889 }
1890}
1891
1892unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1893 using namespace AMDGPU;
1894
1895 switch (Width) {
1896 case 16:
1897 case 32:
1898 return VGPR_32RegClassID;
1899 case 64:
1900 return VReg_64RegClassID;
1901 case 96:
1902 return VReg_96RegClassID;
1903 case 128:
1904 return VReg_128RegClassID;
1905 case 160:
1906 return VReg_160RegClassID;
1907 case 192:
1908 return VReg_192RegClassID;
1909 case 256:
1910 return VReg_256RegClassID;
1911 case 288:
1912 return VReg_288RegClassID;
1913 case 320:
1914 return VReg_320RegClassID;
1915 case 352:
1916 return VReg_352RegClassID;
1917 case 384:
1918 return VReg_384RegClassID;
1919 case 512:
1920 return VReg_512RegClassID;
1921 case 1024:
1922 return VReg_1024RegClassID;
1923 }
1924 llvm_unreachable("Invalid register width!");
1925}
1926
1927unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1928 using namespace AMDGPU;
1929
1930 switch (Width) {
1931 case 16:
1932 case 32:
1933 return AGPR_32RegClassID;
1934 case 64:
1935 return AReg_64RegClassID;
1936 case 96:
1937 return AReg_96RegClassID;
1938 case 128:
1939 return AReg_128RegClassID;
1940 case 160:
1941 return AReg_160RegClassID;
1942 case 256:
1943 return AReg_256RegClassID;
1944 case 288:
1945 return AReg_288RegClassID;
1946 case 320:
1947 return AReg_320RegClassID;
1948 case 352:
1949 return AReg_352RegClassID;
1950 case 384:
1951 return AReg_384RegClassID;
1952 case 512:
1953 return AReg_512RegClassID;
1954 case 1024:
1955 return AReg_1024RegClassID;
1956 }
1957 llvm_unreachable("Invalid register width!");
1958}
1959
1960unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1961 using namespace AMDGPU;
1962
1963 switch (Width) {
1964 case 16:
1965 case 32:
1966 return SGPR_32RegClassID;
1967 case 64:
1968 return SGPR_64RegClassID;
1969 case 96:
1970 return SGPR_96RegClassID;
1971 case 128:
1972 return SGPR_128RegClassID;
1973 case 160:
1974 return SGPR_160RegClassID;
1975 case 256:
1976 return SGPR_256RegClassID;
1977 case 288:
1978 return SGPR_288RegClassID;
1979 case 320:
1980 return SGPR_320RegClassID;
1981 case 352:
1982 return SGPR_352RegClassID;
1983 case 384:
1984 return SGPR_384RegClassID;
1985 case 512:
1986 return SGPR_512RegClassID;
1987 }
1988 llvm_unreachable("Invalid register width!");
1989}
1990
1991unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1992 using namespace AMDGPU;
1993
1994 switch (Width) {
1995 case 16:
1996 case 32:
1997 return TTMP_32RegClassID;
1998 case 64:
1999 return TTMP_64RegClassID;
2000 case 128:
2001 return TTMP_128RegClassID;
2002 case 256:
2003 return TTMP_256RegClassID;
2004 case 288:
2005 return TTMP_288RegClassID;
2006 case 320:
2007 return TTMP_320RegClassID;
2008 case 352:
2009 return TTMP_352RegClassID;
2010 case 384:
2011 return TTMP_384RegClassID;
2012 case 512:
2013 return TTMP_512RegClassID;
2014 }
2015 llvm_unreachable("Invalid register width!");
2016}
2017
2018int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
2019 using namespace AMDGPU::EncValues;
2020
2021 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
2022 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
2023
2024 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
2025}
2026
2027MCOperand AMDGPUDisassembler::decodeSrcOp(const MCInst &Inst, unsigned Width,
2028 unsigned Val) const {
2029 using namespace AMDGPU::EncValues;
2030
2031 assert(Val < 1024); // enum10
2032
2033 bool IsAGPR = Val & 512;
2034 Val &= 511;
2035
2036 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
2037 return createRegOperand(RegClassID: IsAGPR ? getAgprClassId(Width)
2038 : getVgprClassId(Width), Val: Val - VGPR_MIN);
2039 }
2040 return decodeNonVGPRSrcOp(Inst, Width, Val: Val & 0xFF);
2041}
2042
2043MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst,
2044 unsigned Width,
2045 unsigned Val) const {
2046 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
2047 // decoded earlier.
2048 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
2049 using namespace AMDGPU::EncValues;
2050
2051 if (Val <= SGPR_MAX) {
2052 // "SGPR_MIN <= Val" is always true and causes compilation warning.
2053 static_assert(SGPR_MIN == 0);
2054 return createSRegOperand(SRegClassID: getSgprClassId(Width), Val: Val - SGPR_MIN);
2055 }
2056
2057 int TTmpIdx = getTTmpIdx(Val);
2058 if (TTmpIdx >= 0) {
2059 return createSRegOperand(SRegClassID: getTtmpClassId(Width), Val: TTmpIdx);
2060 }
2061
2062 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2063 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2064 Val == LITERAL_CONST)
2065 return MCOperand::createImm(Val);
2066
2067 if (Val == LITERAL64_CONST && STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
2068 return decodeLiteral64Constant();
2069 }
2070
2071 switch (Width) {
2072 case 32:
2073 case 16:
2074 return decodeSpecialReg32(Val);
2075 case 64:
2076 return decodeSpecialReg64(Val);
2077 case 96:
2078 case 128:
2079 case 256:
2080 case 512:
2081 return decodeSpecialReg96Plus(Val);
2082 default:
2083 llvm_unreachable("unexpected immediate type");
2084 }
2085}
2086
2087// Bit 0 of DstY isn't stored in the instruction, because it's always the
2088// opposite of bit 0 of DstX.
2089MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
2090 unsigned Val) const {
2091 int VDstXInd =
2092 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::vdstX);
2093 assert(VDstXInd != -1);
2094 assert(Inst.getOperand(VDstXInd).isReg());
2095 unsigned XDstReg = MRI.getEncodingValue(Reg: Inst.getOperand(i: VDstXInd).getReg());
2096 Val |= ~XDstReg & 1;
2097 return createRegOperand(RegClassID: getVgprClassId(Width: 32), Val);
2098}
2099
2100MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
2101 using namespace AMDGPU;
2102
2103 switch (Val) {
2104 // clang-format off
2105 case 102: return createRegOperand(Reg: FLAT_SCR_LO);
2106 case 103: return createRegOperand(Reg: FLAT_SCR_HI);
2107 case 104: return createRegOperand(Reg: XNACK_MASK_LO);
2108 case 105: return createRegOperand(Reg: XNACK_MASK_HI);
2109 case 106: return createRegOperand(Reg: VCC_LO);
2110 case 107: return createRegOperand(Reg: VCC_HI);
2111 case 108: return createRegOperand(Reg: TBA_LO);
2112 case 109: return createRegOperand(Reg: TBA_HI);
2113 case 110: return createRegOperand(Reg: TMA_LO);
2114 case 111: return createRegOperand(Reg: TMA_HI);
2115 case 124:
2116 return isGFX11Plus() ? createRegOperand(Reg: SGPR_NULL) : createRegOperand(Reg: M0);
2117 case 125:
2118 return isGFX11Plus() ? createRegOperand(Reg: M0) : createRegOperand(Reg: SGPR_NULL);
2119 case 126: return createRegOperand(Reg: EXEC_LO);
2120 case 127: return createRegOperand(Reg: EXEC_HI);
2121 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2122 case 231: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_HI);
2123 case 235: return createRegOperand(Reg: SRC_SHARED_BASE_LO);
2124 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT_LO);
2125 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE_LO);
2126 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT_LO);
2127 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2128 case 251: return createRegOperand(Reg: SRC_VCCZ);
2129 case 252: return createRegOperand(Reg: SRC_EXECZ);
2130 case 253: return createRegOperand(Reg: SRC_SCC);
2131 case 254: return createRegOperand(Reg: LDS_DIRECT);
2132 default: break;
2133 // clang-format on
2134 }
2135 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2136}
2137
2138MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
2139 using namespace AMDGPU;
2140
2141 switch (Val) {
2142 case 102: return createRegOperand(Reg: FLAT_SCR);
2143 case 104: return createRegOperand(Reg: XNACK_MASK);
2144 case 106: return createRegOperand(Reg: VCC);
2145 case 108: return createRegOperand(Reg: TBA);
2146 case 110: return createRegOperand(Reg: TMA);
2147 case 124:
2148 if (isGFX11Plus())
2149 return createRegOperand(Reg: SGPR_NULL);
2150 break;
2151 case 125:
2152 if (!isGFX11Plus())
2153 return createRegOperand(Reg: SGPR_NULL);
2154 break;
2155 case 126: return createRegOperand(Reg: EXEC);
2156 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2157 case 235: return createRegOperand(Reg: SRC_SHARED_BASE);
2158 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT);
2159 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE);
2160 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT);
2161 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2162 case 251: return createRegOperand(Reg: SRC_VCCZ);
2163 case 252: return createRegOperand(Reg: SRC_EXECZ);
2164 case 253: return createRegOperand(Reg: SRC_SCC);
2165 default: break;
2166 }
2167 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2168}
2169
2170MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
2171 using namespace AMDGPU;
2172
2173 switch (Val) {
2174 case 124:
2175 if (isGFX11Plus())
2176 return createRegOperand(Reg: SGPR_NULL);
2177 break;
2178 case 125:
2179 if (!isGFX11Plus())
2180 return createRegOperand(Reg: SGPR_NULL);
2181 break;
2182 default:
2183 break;
2184 }
2185 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2186}
2187
2188MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width,
2189 const unsigned Val) const {
2190 using namespace AMDGPU::SDWA;
2191 using namespace AMDGPU::EncValues;
2192
2193 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
2194 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
2195 // XXX: cast to int is needed to avoid stupid warning:
2196 // compare with unsigned is always true
2197 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2198 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2199 return createRegOperand(RegClassID: getVgprClassId(Width),
2200 Val: Val - SDWA9EncValues::SRC_VGPR_MIN);
2201 }
2202 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2203 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2204 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2205 return createSRegOperand(SRegClassID: getSgprClassId(Width),
2206 Val: Val - SDWA9EncValues::SRC_SGPR_MIN);
2207 }
2208 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2209 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2210 return createSRegOperand(SRegClassID: getTtmpClassId(Width),
2211 Val: Val - SDWA9EncValues::SRC_TTMP_MIN);
2212 }
2213
2214 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2215
2216 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2217 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2218 return MCOperand::createImm(Val: SVal);
2219
2220 return decodeSpecialReg32(Val: SVal);
2221 }
2222 if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands))
2223 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
2224 llvm_unreachable("unsupported target");
2225}
2226
2227MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
2228 return decodeSDWASrc(Width: 16, Val);
2229}
2230
2231MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
2232 return decodeSDWASrc(Width: 32, Val);
2233}
2234
2235MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
2236 using namespace AMDGPU::SDWA;
2237
2238 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2239 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2240 "SDWAVopcDst should be present only on GFX9+");
2241
2242 bool IsWave32 = STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32);
2243
2244 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2245 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2246
2247 int TTmpIdx = getTTmpIdx(Val);
2248 if (TTmpIdx >= 0) {
2249 auto TTmpClsId = getTtmpClassId(Width: IsWave32 ? 32 : 64);
2250 return createSRegOperand(SRegClassID: TTmpClsId, Val: TTmpIdx);
2251 }
2252 if (Val > SGPR_MAX) {
2253 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2254 }
2255 return createSRegOperand(SRegClassID: getSgprClassId(Width: IsWave32 ? 32 : 64), Val);
2256 }
2257 return createRegOperand(Reg: IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2258}
2259
2260MCOperand AMDGPUDisassembler::decodeBoolReg(const MCInst &Inst,
2261 unsigned Val) const {
2262 return STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32)
2263 ? decodeSrcOp(Inst, Width: 32, Val)
2264 : decodeSrcOp(Inst, Width: 64, Val);
2265}
2266
2267MCOperand AMDGPUDisassembler::decodeSplitBarrier(const MCInst &Inst,
2268 unsigned Val) const {
2269 return decodeSrcOp(Inst, Width: 32, Val);
2270}
2271
2272MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
2273 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
2274 return MCOperand();
2275 return MCOperand::createImm(Val);
2276}
2277
2278MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
2279 using VersionField = AMDGPU::EncodingField<7, 0>;
2280 using W64Bit = AMDGPU::EncodingBit<13>;
2281 using W32Bit = AMDGPU::EncodingBit<14>;
2282 using MDPBit = AMDGPU::EncodingBit<15>;
2283 using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
2284
2285 auto [Version, W64, W32, MDP] = Encoding::decode(Encoded: Imm);
2286
2287 // Decode into a plain immediate if any unused bits are raised.
2288 if (Encoding::encode(Values: Version, Values: W64, Values: W32, Values: MDP) != Imm)
2289 return MCOperand::createImm(Val: Imm);
2290
2291 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2292 const auto *I = find_if(
2293 Range: Versions, P: [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2294 return V.Code == Version;
2295 });
2296 MCContext &Ctx = getContext();
2297 const MCExpr *E;
2298 if (I == Versions.end())
2299 E = MCConstantExpr::create(Value: Version, Ctx);
2300 else
2301 E = MCSymbolRefExpr::create(Symbol: Ctx.getOrCreateSymbol(Name: I->Symbol), Ctx);
2302
2303 if (W64)
2304 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW64Expr, Ctx);
2305 if (W32)
2306 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW32Expr, Ctx);
2307 if (MDP)
2308 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionMDPExpr, Ctx);
2309
2310 return MCOperand::createExpr(Val: E);
2311}
2312
2313bool AMDGPUDisassembler::isVI() const {
2314 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2315}
2316
2317bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
2318
2319bool AMDGPUDisassembler::isGFX90A() const {
2320 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2321}
2322
2323bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
2324
2325bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
2326
2327bool AMDGPUDisassembler::isGFX10Plus() const {
2328 return AMDGPU::isGFX10Plus(STI);
2329}
2330
2331bool AMDGPUDisassembler::isGFX11() const {
2332 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2333}
2334
2335bool AMDGPUDisassembler::isGFX11Plus() const {
2336 return AMDGPU::isGFX11Plus(STI);
2337}
2338
2339bool AMDGPUDisassembler::isGFX1170() const {
2340 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11_7Insts);
2341}
2342
2343bool AMDGPUDisassembler::isGFX12() const {
2344 return STI.hasFeature(Feature: AMDGPU::FeatureGFX12);
2345}
2346
2347bool AMDGPUDisassembler::isGFX12Plus() const {
2348 return AMDGPU::isGFX12Plus(STI);
2349}
2350
2351bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); }
2352
2353bool AMDGPUDisassembler::isGFX1250Plus() const {
2354 return AMDGPU::isGFX1250Plus(STI);
2355}
2356
2357bool AMDGPUDisassembler::isGFX13() const { return AMDGPU::isGFX13(STI); }
2358
2359bool AMDGPUDisassembler::isGFX13Plus() const {
2360 return AMDGPU::isGFX13Plus(STI);
2361}
2362
2363bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
2364 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2365}
2366
2367bool AMDGPUDisassembler::hasKernargPreload() const {
2368 return AMDGPU::hasKernargPreload(STI);
2369}
2370
2371//===----------------------------------------------------------------------===//
2372// AMDGPU specific symbol handling
2373//===----------------------------------------------------------------------===//
2374
2375/// Print a string describing the reserved bit range specified by Mask with
2376/// offset BaseBytes for use in error comments. Mask is a single continuous
2377/// range of 1s surrounded by zeros. The format here is meant to align with the
2378/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2379static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2380 SmallString<32> Result;
2381 raw_svector_ostream S(Result);
2382
2383 int TrailingZeros = llvm::countr_zero(Val: Mask);
2384 int PopCount = llvm::popcount(Value: Mask);
2385
2386 if (PopCount == 1) {
2387 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2388 } else {
2389 S << "bits in range ("
2390 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2391 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2392 }
2393
2394 return Result;
2395}
2396
2397#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2398#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2399 do { \
2400 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2401 } while (0)
2402#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2403 do { \
2404 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2405 << GET_FIELD(MASK) << '\n'; \
2406 } while (0)
2407
2408#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2409 do { \
2410 if (FourByteBuffer & (MASK)) { \
2411 return createStringError(std::errc::invalid_argument, \
2412 "kernel descriptor " DESC \
2413 " reserved %s set" MSG, \
2414 getBitRangeFromMask((MASK), 0).c_str()); \
2415 } \
2416 } while (0)
2417
2418#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2419#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2420 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2421#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2422 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2423#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2424 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2425
2426// NOLINTNEXTLINE(readability-identifier-naming)
2427Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
2428 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2429 using namespace amdhsa;
2430 StringRef Indent = "\t";
2431
2432 // We cannot accurately backward compute #VGPRs used from
2433 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2434 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2435 // simply calculate the inverse of what the assembler does.
2436
2437 uint32_t GranulatedWorkitemVGPRCount =
2438 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2439
2440 uint32_t NextFreeVGPR =
2441 (GranulatedWorkitemVGPRCount + 1) *
2442 AMDGPU::IsaInfo::getVGPREncodingGranule(STI, EnableWavefrontSize32);
2443
2444 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2445
2446 // We cannot backward compute values used to calculate
2447 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2448 // directives can't be computed:
2449 // .amdhsa_reserve_vcc
2450 // .amdhsa_reserve_flat_scratch
2451 // .amdhsa_reserve_xnack_mask
2452 // They take their respective default values if not specified in the assembly.
2453 //
2454 // GRANULATED_WAVEFRONT_SGPR_COUNT
2455 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2456 //
2457 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2458 // are set to 0. So while disassembling we consider that:
2459 //
2460 // GRANULATED_WAVEFRONT_SGPR_COUNT
2461 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2462 //
2463 // The disassembler cannot recover the original values of those 3 directives.
2464
2465 uint32_t GranulatedWavefrontSGPRCount =
2466 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2467
2468 if (isGFX10Plus())
2469 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2470 "must be zero on gfx10+");
2471
2472 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2473 AMDGPU::IsaInfo::getSGPREncodingGranule(STI);
2474
2475 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2476 if (!hasArchitectedFlatScratch())
2477 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2478 bool ReservedXnackMask = STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2479 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2480 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2481 << '\n';
2482 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2483
2484 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2485
2486 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2487 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2488 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2489 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2490 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2491 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2492 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2493 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2494
2495 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2496
2497 if (STI.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
2498 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2499 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2500
2501 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2502
2503 if (STI.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
2504 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2505 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2506
2507 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2508 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2509
2510 // Bits [26].
2511 if (isGFX9Plus()) {
2512 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2513 } else {
2514 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2515 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2516 }
2517
2518 // Bits [27].
2519 if (isGFX1250Plus()) {
2520 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2521 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2522 } else {
2523 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2524 "COMPUTE_PGM_RSRC1");
2525 }
2526
2527 // Bits [28].
2528 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2529
2530 // Bits [29-31].
2531 if (isGFX10Plus()) {
2532 // WGP_MODE is not available on GFX1250.
2533 if (!isGFX1250Plus()) {
2534 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2535 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2536 }
2537 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2538 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2539 } else {
2540 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2541 "COMPUTE_PGM_RSRC1");
2542 }
2543
2544 if (isGFX12Plus())
2545 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2546 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2547
2548 return true;
2549}
2550
2551// NOLINTNEXTLINE(readability-identifier-naming)
2552Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2553 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2554 using namespace amdhsa;
2555 StringRef Indent = "\t";
2556 if (hasArchitectedFlatScratch())
2557 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2558 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2559 else
2560 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2561 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2562 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2563 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2564 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2565 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2566 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2567 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2568 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2569 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2570 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2571 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2572
2573 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2574 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2575 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2576
2577 PRINT_DIRECTIVE(
2578 ".amdhsa_exception_fp_ieee_invalid_op",
2579 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2580 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2581 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2582 PRINT_DIRECTIVE(
2583 ".amdhsa_exception_fp_ieee_div_zero",
2584 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2585 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2586 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2587 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2588 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2589 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2590 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2591 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2592 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2593
2594 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2595
2596 return true;
2597}
2598
2599// NOLINTNEXTLINE(readability-identifier-naming)
2600Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2601 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2602 using namespace amdhsa;
2603 StringRef Indent = "\t";
2604 if (isGFX90A()) {
2605 KdStream << Indent << ".amdhsa_accum_offset "
2606 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2607 << '\n';
2608
2609 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2610
2611 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2612 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2613 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2614 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2615 } else if (isGFX10Plus()) {
2616 // Bits [0-3].
2617 if (!isGFX12Plus()) {
2618 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2619 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2620 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2621 } else {
2622 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2623 "SHARED_VGPR_COUNT",
2624 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2625 }
2626 } else {
2627 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2628 "COMPUTE_PGM_RSRC3",
2629 "must be zero on gfx12+");
2630 }
2631
2632 // Bits [4-11].
2633 if (isGFX11()) {
2634 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2635 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2636 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2637 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2638 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2639 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2640 } else if (isGFX12Plus()) {
2641 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2642 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2643 } else {
2644 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2645 "COMPUTE_PGM_RSRC3",
2646 "must be zero on gfx10");
2647 }
2648
2649 // Bits [12].
2650 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2651 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2652
2653 // Bits [13].
2654 if (isGFX12Plus()) {
2655 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2656 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2657 } else {
2658 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2659 "COMPUTE_PGM_RSRC3",
2660 "must be zero on gfx10 or gfx11");
2661 }
2662
2663 // Bits [14-21].
2664 if (isGFX1250Plus()) {
2665 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2666 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2667 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2668 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2669 PRINT_PSEUDO_DIRECTIVE_COMMENT("TCP_SPLIT",
2670 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2671 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2672 "ENABLE_DIDT_THROTTLE",
2673 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2674 } else {
2675 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2676 "COMPUTE_PGM_RSRC3",
2677 "must be zero on gfx10+");
2678 }
2679
2680 // Bits [22-30].
2681 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2682 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2683
2684 // Bits [31].
2685 if (isGFX11Plus()) {
2686 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2687 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2688 } else {
2689 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2690 "COMPUTE_PGM_RSRC3",
2691 "must be zero on gfx10");
2692 }
2693 } else if (FourByteBuffer) {
2694 return createStringError(
2695 EC: std::errc::invalid_argument,
2696 Fmt: "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2697 }
2698 return true;
2699}
2700#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2701#undef PRINT_DIRECTIVE
2702#undef GET_FIELD
2703#undef CHECK_RESERVED_BITS_IMPL
2704#undef CHECK_RESERVED_BITS
2705#undef CHECK_RESERVED_BITS_MSG
2706#undef CHECK_RESERVED_BITS_DESC
2707#undef CHECK_RESERVED_BITS_DESC_MSG
2708
2709/// Create an error object to return from onSymbolStart for reserved kernel
2710/// descriptor bits being set.
2711static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2712 const char *Msg = "") {
2713 return createStringError(
2714 EC: std::errc::invalid_argument, Fmt: "kernel descriptor reserved %s set%s%s",
2715 Vals: getBitRangeFromMask(Mask, BaseBytes).c_str(), Vals: *Msg ? ", " : "", Vals: Msg);
2716}
2717
2718/// Create an error object to return from onSymbolStart for reserved kernel
2719/// descriptor bytes being set.
2720static Error createReservedKDBytesError(unsigned BaseInBytes,
2721 unsigned WidthInBytes) {
2722 // Create an error comment in the same format as the "Kernel Descriptor"
2723 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2724 return createStringError(
2725 EC: std::errc::invalid_argument,
2726 Fmt: "kernel descriptor reserved bits in range (%u:%u) set",
2727 Vals: (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, Vals: BaseInBytes * CHAR_BIT);
2728}
2729
2730Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2731 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2732 raw_string_ostream &KdStream) const {
2733#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2734 do { \
2735 KdStream << Indent << DIRECTIVE " " \
2736 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2737 } while (0)
2738
2739 uint16_t TwoByteBuffer = 0;
2740 uint32_t FourByteBuffer = 0;
2741
2742 StringRef ReservedBytes;
2743 StringRef Indent = "\t";
2744
2745 assert(Bytes.size() == 64);
2746 DataExtractor DE(Bytes, /*IsLittleEndian=*/true);
2747
2748 switch (Cursor.tell()) {
2749 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2750 FourByteBuffer = DE.getU32(C&: Cursor);
2751 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2752 << '\n';
2753 return true;
2754
2755 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2756 FourByteBuffer = DE.getU32(C&: Cursor);
2757 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2758 << FourByteBuffer << '\n';
2759 return true;
2760
2761 case amdhsa::KERNARG_SIZE_OFFSET:
2762 FourByteBuffer = DE.getU32(C&: Cursor);
2763 KdStream << Indent << ".amdhsa_kernarg_size "
2764 << FourByteBuffer << '\n';
2765 return true;
2766
2767 case amdhsa::RESERVED0_OFFSET:
2768 // 4 reserved bytes, must be 0.
2769 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2770 for (char B : ReservedBytes) {
2771 if (B != 0)
2772 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED0_OFFSET, WidthInBytes: 4);
2773 }
2774 return true;
2775
2776 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2777 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2778 // So far no directive controls this for Code Object V3, so simply skip for
2779 // disassembly.
2780 DE.skip(C&: Cursor, Length: 8);
2781 return true;
2782
2783 case amdhsa::RESERVED1_OFFSET:
2784 // 20 reserved bytes, must be 0.
2785 ReservedBytes = DE.getBytes(C&: Cursor, Length: 20);
2786 for (char B : ReservedBytes) {
2787 if (B != 0)
2788 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED1_OFFSET, WidthInBytes: 20);
2789 }
2790 return true;
2791
2792 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2793 FourByteBuffer = DE.getU32(C&: Cursor);
2794 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2795
2796 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2797 FourByteBuffer = DE.getU32(C&: Cursor);
2798 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2799
2800 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2801 FourByteBuffer = DE.getU32(C&: Cursor);
2802 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2803
2804 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2805 using namespace amdhsa;
2806 TwoByteBuffer = DE.getU16(C&: Cursor);
2807
2808 if (!hasArchitectedFlatScratch())
2809 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2810 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2811 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2812 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2813 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2814 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2815 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2816 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2817 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2818 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2819 if (!hasArchitectedFlatScratch())
2820 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2821 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2822 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2823 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2824
2825 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2826 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED0,
2827 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2828
2829 // Reserved for GFX9
2830 if (isGFX9() &&
2831 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2832 return createReservedKDBitsError(
2833 Mask: KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2834 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, Msg: "must be zero on gfx9");
2835 }
2836 if (isGFX10Plus()) {
2837 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2838 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2839 }
2840
2841 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2842 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2843 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2844
2845 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2846 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED1,
2847 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2848 }
2849
2850 return true;
2851
2852 case amdhsa::KERNARG_PRELOAD_OFFSET:
2853 using namespace amdhsa;
2854 TwoByteBuffer = DE.getU16(C&: Cursor);
2855 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2856 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2857 KERNARG_PRELOAD_SPEC_LENGTH);
2858 }
2859
2860 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2861 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2862 KERNARG_PRELOAD_SPEC_OFFSET);
2863 }
2864 return true;
2865
2866 case amdhsa::RESERVED3_OFFSET:
2867 // 4 bytes from here are reserved, must be 0.
2868 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2869 for (char B : ReservedBytes) {
2870 if (B != 0)
2871 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED3_OFFSET, WidthInBytes: 4);
2872 }
2873 return true;
2874
2875 default:
2876 llvm_unreachable("Unhandled index. Case statements cover everything.");
2877 return true;
2878 }
2879#undef PRINT_DIRECTIVE
2880}
2881
2882Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2883 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2884
2885 // CP microcode requires the kernel descriptor to be 64 aligned.
2886 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2887 return createStringError(EC: std::errc::invalid_argument,
2888 Fmt: "kernel descriptor must be 64-byte aligned");
2889
2890 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2891 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2892 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2893 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2894 // when required.
2895 if (isGFX10Plus()) {
2896 uint16_t KernelCodeProperties =
2897 support::endian::read16(P: &Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2898 E: llvm::endianness::little);
2899 EnableWavefrontSize32 =
2900 AMDHSA_BITS_GET(KernelCodeProperties,
2901 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2902 }
2903
2904 std::string Kd;
2905 raw_string_ostream KdStream(Kd);
2906 KdStream << ".amdhsa_kernel " << KdName << '\n';
2907
2908 DataExtractor::Cursor C(0);
2909 while (C && C.tell() < Bytes.size()) {
2910 Expected<bool> Res = decodeKernelDescriptorDirective(Cursor&: C, Bytes, KdStream);
2911
2912 cantFail(Err: C.takeError());
2913
2914 if (!Res)
2915 return Res;
2916 }
2917 KdStream << ".end_amdhsa_kernel\n";
2918 outs() << KdStream.str();
2919 return true;
2920}
2921
2922Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2923 uint64_t &Size,
2924 ArrayRef<uint8_t> Bytes,
2925 uint64_t Address) const {
2926 // Right now only kernel descriptor needs to be handled.
2927 // We ignore all other symbols for target specific handling.
2928 // TODO:
2929 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2930 // Object V2 and V3 when symbols are marked protected.
2931
2932 // amd_kernel_code_t for Code Object V2.
2933 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2934 Size = 256;
2935 return createStringError(EC: std::errc::invalid_argument,
2936 Fmt: "code object v2 is not supported");
2937 }
2938
2939 // Code Object V3 kernel descriptors.
2940 StringRef Name = Symbol.Name;
2941 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(Suffix: StringRef(".kd"))) {
2942 Size = 64; // Size = 64 regardless of success or failure.
2943 return decodeKernelDescriptor(KdName: Name.drop_back(N: 3), Bytes, KdAddress: Address);
2944 }
2945
2946 return false;
2947}
2948
2949const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2950 int64_t Val) {
2951 MCContext &Ctx = getContext();
2952 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2953 // Note: only set value to Val on a new symbol in case an dissassembler
2954 // has already been initialized in this context.
2955 if (!Sym->isVariable()) {
2956 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2957 } else {
2958 int64_t Res = ~Val;
2959 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2960 if (!Valid || Res != Val)
2961 Ctx.reportWarning(L: SMLoc(), Msg: "unsupported redefinition of " + Id);
2962 }
2963 return MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2964}
2965
2966bool AMDGPUDisassembler::isBufferInstruction(const MCInst &MI) const {
2967 const uint64_t TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
2968
2969 // Check for MUBUF and MTBUF instructions
2970 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2971 return true;
2972
2973 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2974 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(Opc: MI.getOpcode()))
2975 return true;
2976
2977 return false;
2978}
2979
2980//===----------------------------------------------------------------------===//
2981// AMDGPUSymbolizer
2982//===----------------------------------------------------------------------===//
2983
2984// Try to find symbol name for specified label
2985bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2986 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2987 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2988 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2989
2990 if (!IsBranch) {
2991 return false;
2992 }
2993
2994 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2995 if (!Symbols)
2996 return false;
2997
2998 auto Result = llvm::find_if(Range&: *Symbols, P: [Value](const SymbolInfoTy &Val) {
2999 return Val.Addr == static_cast<uint64_t>(Value) &&
3000 Val.Type == ELF::STT_NOTYPE;
3001 });
3002 if (Result != Symbols->end()) {
3003 auto *Sym = Ctx.getOrCreateSymbol(Name: Result->Name);
3004 const auto *Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
3005 Inst.addOperand(Op: MCOperand::createExpr(Val: Add));
3006 return true;
3007 }
3008 // Add to list of referenced addresses, so caller can synthesize a label.
3009 ReferencedAddresses.push_back(x: static_cast<uint64_t>(Value));
3010 return false;
3011}
3012
3013void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
3014 int64_t Value,
3015 uint64_t Address) {
3016 llvm_unreachable("unimplemented");
3017}
3018
3019//===----------------------------------------------------------------------===//
3020// Initialization
3021//===----------------------------------------------------------------------===//
3022
3023static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
3024 LLVMOpInfoCallback /*GetOpInfo*/,
3025 LLVMSymbolLookupCallback /*SymbolLookUp*/,
3026 void *DisInfo,
3027 MCContext *Ctx,
3028 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
3029 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
3030}
3031
3032static MCDisassembler *createAMDGPUDisassembler(const Target &T,
3033 const MCSubtargetInfo &STI,
3034 MCContext &Ctx) {
3035 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
3036}
3037
3038extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
3039LLVMInitializeAMDGPUDisassembler() {
3040 TargetRegistry::RegisterMCDisassembler(T&: getTheGCNTarget(),
3041 Fn: createAMDGPUDisassembler);
3042 TargetRegistry::RegisterMCSymbolizer(T&: getTheGCNTarget(),
3043 Fn: createAMDGPUSymbolizer);
3044}
3045