1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCExpr.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
24#include "TargetInfo/AMDGPUTargetInfo.h"
25#include "Utils/AMDGPUAsmUtils.h"
26#include "Utils/AMDGPUBaseInfo.h"
27#include "llvm-c/DisassemblerTypes.h"
28#include "llvm/BinaryFormat/ELF.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
32#include "llvm/MC/MCDecoderOps.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/MC/MCRegisterInfo.h"
36#include "llvm/MC/MCSubtargetInfo.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/AMDHSAKernelDescriptor.h"
39#include "llvm/Support/Compiler.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
50using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
57AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(STI: &STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError(reason: "disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Id: Symbol, Val: Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
74}
75
76void AMDGPUDisassembler::setABIVersion(unsigned Version) {
77 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(ABIVersion: Version);
78}
79
80inline static MCDisassembler::DecodeStatus
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Op: Opnd);
83 return Opnd.isValid() ?
84 MCDisassembler::Success :
85 MCDisassembler::Fail;
86}
87
88static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(i&: I, n: OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(x: Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Value: Offset, Address: Addr, IsBranch: true, Offset: 2, OpSize: 2, InstSize: 0))
108 return MCDisassembler::Success;
109 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
117 Offset = SignExtend64<24>(x: Imm);
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
121 Offset = SignExtend64<21>(x: Imm);
122 }
123 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Offset));
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, Opnd: DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, Opnd: DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, Opnd: DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: OpWidth, Val: EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
251DECODE_OPERAND_REG_8(VGPR_32)
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
253DECODE_OPERAND_REG_8(VReg_64)
254DECODE_OPERAND_REG_8(VReg_96)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
281DECODE_OPERAND_REG_8(AGPR_32)
282DECODE_OPERAND_REG_8(AReg_64)
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
288static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
301DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
312static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
361static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm,
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, Opnd: DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: Opw, Val: Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: 64, Val: Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, Opnd: DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
424DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI,
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
445 return MCDisassembler::Success;
446 }
447 Bytes = SavedBytes;
448 return MCDisassembler::Fail;
449}
450
451template <typename InsnType>
452DecodeStatus
453AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
460 return MCDisassembler::Fail;
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
466 support::endian::read<T, llvm::endianness::little>(Bytes.data());
467 Bytes = Bytes.slice(N: sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
475 Bytes = Bytes.slice(N: 8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(P: Bytes.data()));
477 Bytes = Bytes.slice(N: 4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
485 Bytes = Bytes.slice(N: 8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(P: Bytes.data()));
487 Bytes = Bytes.slice(N: 8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(Opcode: MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(First: Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(i: OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
509 if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm &&
510 Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) {
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
515 if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
520 if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm &&
521 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) {
522 switch (OpDesc.OperandType) {
523 case AMDGPU::OPERAND_REG_IMM_BF16:
524 case AMDGPU::OPERAND_REG_IMM_V2BF16:
525 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
526 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
527 Imm = getInlineImmValBF16(Imm);
528 break;
529 case AMDGPU::OPERAND_REG_IMM_FP16:
530 case AMDGPU::OPERAND_REG_IMM_INT16:
531 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
532 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
533 Imm = getInlineImmValF16(Imm);
534 break;
535 case AMDGPU::OPERAND_REG_IMM_V2FP16:
536 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
537 Imm = getInlineImmValF16(Imm);
538 break;
539 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: {
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
551 case AMDGPU::OPERAND_REG_IMM_FP64:
552 case AMDGPU::OPERAND_REG_IMM_INT64:
553 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
554 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
555 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
566DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
567 ArrayRef<uint8_t> Bytes_,
568 uint64_t Address,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min(a: (size_t)TargetMaxInstBytes, b: Bytes_.size());
571 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min(a: (size_t)4, b: Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(Table: DecoderTableGFX1250128, MI, Inst: DecW, Address, Comments&: CS))
586 break;
587 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX11() &&
594 tryDecodeInst(Table1: DecoderTableGFX1196, Table2: DecoderTableGFX11_FAKE1696, MI,
595 Inst: DecW, Address, Comments&: CS))
596 break;
597
598 if (isGFX1250() &&
599 tryDecodeInst(Table1: DecoderTableGFX125096, Table2: DecoderTableGFX1250_FAKE1696, MI,
600 Inst: DecW, Address, Comments&: CS))
601 break;
602
603 if (isGFX12() &&
604 tryDecodeInst(Table1: DecoderTableGFX1296, Table2: DecoderTableGFX12_FAKE1696, MI,
605 Inst: DecW, Address, Comments&: CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(Table: DecoderTableGFX12W6496, MI, Inst: DecW, Address, Comments&: CS))
610 break;
611
612 if (isGFX13() &&
613 tryDecodeInst(Table1: DecoderTableGFX1396, Table2: DecoderTableGFX13_FAKE1696, MI,
614 Inst: DecW, Address, Comments&: CS))
615 break;
616
617 if (STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
618 // Return 8 bytes for a potential literal.
619 Bytes = Bytes_.slice(N: 4, M: MaxInstBytesNum - 4);
620
621 if (isGFX1250() &&
622 tryDecodeInst(Table: DecoderTableGFX125096, MI, Inst: DecW, Address, Comments&: CS))
623 break;
624 }
625
626 // Reinitialize Bytes
627 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
628
629 } else if (Bytes.size() >= 16 &&
630 STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts)) {
631 std::bitset<128> DecW = eat16Bytes(Bytes);
632 if (tryDecodeInst(Table: DecoderTableGFX940128, MI, Inst: DecW, Address, Comments&: CS))
633 break;
634
635 // Reinitialize Bytes
636 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
637 }
638
639 if (Bytes.size() >= 8) {
640 const uint64_t QW = eatBytes<uint64_t>(Bytes);
641
642 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
643 tryDecodeInst(Table: DecoderTableGFX10_B64, MI, Inst: QW, Address, Comments&: CS))
644 break;
645
646 if (STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) &&
647 tryDecodeInst(Table: DecoderTableGFX80_UNPACKED64, MI, Inst: QW, Address, Comments&: CS))
648 break;
649
650 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
651 tryDecodeInst(Table: DecoderTableGFX95064, MI, Inst: QW, Address, Comments&: CS))
652 break;
653
654 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
655 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
656 // table first so we print the correct name.
657 if (STI.hasFeature(Feature: AMDGPU::FeatureFmaMixInsts) &&
658 tryDecodeInst(Table: DecoderTableGFX9_DL64, MI, Inst: QW, Address, Comments&: CS))
659 break;
660
661 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts) &&
662 tryDecodeInst(Table: DecoderTableGFX94064, MI, Inst: QW, Address, Comments&: CS))
663 break;
664
665 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
666 tryDecodeInst(Table: DecoderTableGFX90A64, MI, Inst: QW, Address, Comments&: CS))
667 break;
668
669 if ((isVI() || isGFX9()) &&
670 tryDecodeInst(Table: DecoderTableGFX864, MI, Inst: QW, Address, Comments&: CS))
671 break;
672
673 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX964, MI, Inst: QW, Address, Comments&: CS))
674 break;
675
676 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1064, MI, Inst: QW, Address, Comments&: CS))
677 break;
678
679 if (isGFX1250() &&
680 tryDecodeInst(Table1: DecoderTableGFX125064, Table2: DecoderTableGFX1250_FAKE1664, MI,
681 Inst: QW, Address, Comments&: CS))
682 break;
683
684 if (isGFX12() &&
685 tryDecodeInst(Table1: DecoderTableGFX1264, Table2: DecoderTableGFX12_FAKE1664, MI, Inst: QW,
686 Address, Comments&: CS))
687 break;
688
689 if (isGFX11() &&
690 tryDecodeInst(Table1: DecoderTableGFX1164, Table2: DecoderTableGFX11_FAKE1664, MI, Inst: QW,
691 Address, Comments&: CS))
692 break;
693
694 if (isGFX11() &&
695 tryDecodeInst(Table: DecoderTableGFX11W6464, MI, Inst: QW, Address, Comments&: CS))
696 break;
697
698 if (isGFX12() &&
699 tryDecodeInst(Table: DecoderTableGFX12W6464, MI, Inst: QW, Address, Comments&: CS))
700 break;
701
702 if (isGFX13() &&
703 tryDecodeInst(Table1: DecoderTableGFX1364, Table2: DecoderTableGFX13_FAKE1664, MI, Inst: QW,
704 Address, Comments&: CS))
705 break;
706
707 // Reinitialize Bytes
708 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
709 }
710
711 // Try decode 32-bit instruction
712 if (Bytes.size() >= 4) {
713 const uint32_t DW = eatBytes<uint32_t>(Bytes);
714
715 if ((isVI() || isGFX9()) &&
716 tryDecodeInst(Table: DecoderTableGFX832, MI, Inst: DW, Address, Comments&: CS))
717 break;
718
719 if (tryDecodeInst(Table: DecoderTableAMDGPU32, MI, Inst: DW, Address, Comments&: CS))
720 break;
721
722 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX932, MI, Inst: DW, Address, Comments&: CS))
723 break;
724
725 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
726 tryDecodeInst(Table: DecoderTableGFX95032, MI, Inst: DW, Address, Comments&: CS))
727 break;
728
729 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
730 tryDecodeInst(Table: DecoderTableGFX90A32, MI, Inst: DW, Address, Comments&: CS))
731 break;
732
733 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
734 tryDecodeInst(Table: DecoderTableGFX10_B32, MI, Inst: DW, Address, Comments&: CS))
735 break;
736
737 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1032, MI, Inst: DW, Address, Comments&: CS))
738 break;
739
740 if (isGFX11() &&
741 tryDecodeInst(Table1: DecoderTableGFX1132, Table2: DecoderTableGFX11_FAKE1632, MI, Inst: DW,
742 Address, Comments&: CS))
743 break;
744
745 if (isGFX1250() &&
746 tryDecodeInst(Table1: DecoderTableGFX125032, Table2: DecoderTableGFX1250_FAKE1632, MI,
747 Inst: DW, Address, Comments&: CS))
748 break;
749
750 if (isGFX12() &&
751 tryDecodeInst(Table1: DecoderTableGFX1232, Table2: DecoderTableGFX12_FAKE1632, MI, Inst: DW,
752 Address, Comments&: CS))
753 break;
754
755 if (isGFX13() &&
756 tryDecodeInst(Table1: DecoderTableGFX1332, Table2: DecoderTableGFX13_FAKE1632, MI, Inst: DW,
757 Address, Comments&: CS))
758 break;
759 }
760
761 return MCDisassembler::Fail;
762 } while (false);
763
764 DecodeStatus Status = MCDisassembler::Success;
765
766 decodeImmOperands(MI, MCII: *MCII);
767
768 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
769 if (isMacDPP(MI))
770 convertMacDPPInst(MI);
771
772 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
773 convertVOP3PDPPInst(MI);
774 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
775 convertVOPCDPPInst(MI); // Special VOP3 case
776 else if (AMDGPU::isVOPC64DPP(Opc: MI.getOpcode()))
777 convertVOPC64DPPInst(MI); // Special VOP3 case
778 else if (AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dpp8) !=
779 -1)
780 convertDPP8Inst(MI);
781 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
782 convertVOP3DPPInst(MI); // Regular VOP3 case
783 }
784
785 convertTrue16OpSel(MI);
786
787 if (AMDGPU::isMAC(Opc: MI.getOpcode())) {
788 // Insert dummy unused src2_modifiers.
789 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
790 Name: AMDGPU::OpName::src2_modifiers);
791 }
792
793 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
794 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
795 // Insert dummy unused src2_modifiers.
796 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
797 Name: AMDGPU::OpName::src2_modifiers);
798 }
799
800 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
801 !AMDGPU::hasGDS(STI)) {
802 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::gds);
803 }
804
805 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
806 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
807 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
808 Name: AMDGPU::OpName::cpol);
809 if (CPolPos != -1) {
810 unsigned CPol =
811 (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
812 AMDGPU::CPol::GLC : 0;
813 if (MI.getNumOperands() <= (unsigned)CPolPos) {
814 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: CPol),
815 Name: AMDGPU::OpName::cpol);
816 } else if (CPol) {
817 MI.getOperand(i: CPolPos).setImm(MI.getOperand(i: CPolPos).getImm() | CPol);
818 }
819 }
820 }
821
822 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags &
823 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
824 (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts))) {
825 // GFX90A lost TFE, its place is occupied by ACC.
826 int TFEOpIdx =
827 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::tfe);
828 if (TFEOpIdx != -1) {
829 auto *TFEIter = MI.begin();
830 std::advance(i&: TFEIter, n: TFEOpIdx);
831 MI.insert(I: TFEIter, Op: MCOperand::createImm(Val: 0));
832 }
833 }
834
835 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
836 if (isGFX12Plus() && isBufferInstruction(MI)) {
837 int OffsetIdx =
838 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::offset);
839 if (OffsetIdx != -1) {
840 uint32_t Imm = MI.getOperand(i: OffsetIdx).getImm();
841 int64_t SignedOffset = SignExtend64<24>(x: Imm);
842 if (SignedOffset < 0)
843 return MCDisassembler::Fail;
844 }
845 }
846
847 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
848 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
849 int SWZOpIdx =
850 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::swz);
851 if (SWZOpIdx != -1) {
852 auto *SWZIter = MI.begin();
853 std::advance(i&: SWZIter, n: SWZOpIdx);
854 MI.insert(I: SWZIter, Op: MCOperand::createImm(Val: 0));
855 }
856 }
857
858 const MCInstrDesc &Desc = MCII->get(Opcode: MI.getOpcode());
859 if (Desc.TSFlags & SIInstrFlags::MIMG) {
860 int VAddr0Idx =
861 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
862 int RsrcIdx =
863 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::srsrc);
864 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
865 if (VAddr0Idx >= 0 && NSAArgs > 0) {
866 unsigned NSAWords = (NSAArgs + 3) / 4;
867 if (Bytes.size() < 4 * NSAWords)
868 return MCDisassembler::Fail;
869 for (unsigned i = 0; i < NSAArgs; ++i) {
870 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
871 auto VAddrRCID =
872 MCII->getOpRegClassID(OpInfo: Desc.operands()[VAddrIdx], HwModeId: HwModeRegClass);
873 MI.insert(I: MI.begin() + VAddrIdx, Op: createRegOperand(RegClassID: VAddrRCID, Val: Bytes[i]));
874 }
875 Bytes = Bytes.slice(N: 4 * NSAWords);
876 }
877
878 convertMIMGInst(MI);
879 }
880
881 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
882 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
883 convertMIMGInst(MI);
884
885 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
886 convertEXPInst(MI);
887
888 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
889 convertVINTERPInst(MI);
890
891 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
892 convertSDWAInst(MI);
893
894 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
895 convertMAIInst(MI);
896
897 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
898 convertWMMAInst(MI);
899
900 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
901 Name: AMDGPU::OpName::vdst_in);
902 if (VDstIn_Idx != -1) {
903 int Tied = MCII->get(Opcode: MI.getOpcode()).getOperandConstraint(OpNum: VDstIn_Idx,
904 Constraint: MCOI::OperandConstraint::TIED_TO);
905 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
906 !MI.getOperand(i: VDstIn_Idx).isReg() ||
907 MI.getOperand(i: VDstIn_Idx).getReg() != MI.getOperand(i: Tied).getReg())) {
908 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
909 MI.erase(I: &MI.getOperand(i: VDstIn_Idx));
910 insertNamedMCOperand(MI,
911 Op: MCOperand::createReg(Reg: MI.getOperand(i: Tied).getReg()),
912 Name: AMDGPU::OpName::vdst_in);
913 }
914 }
915
916 bool IsSOPK = MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
917 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::imm) && !IsSOPK)
918 convertFMAanyK(MI);
919
920 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
921 // have EXEC as implicit destination. Issue a warning if encoding for
922 // vdst is not EXEC.
923 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
924 MCII->get(Opcode: MI.getOpcode()).getNumDefs() == 0 &&
925 MCII->get(Opcode: MI.getOpcode()).hasImplicitDefOfPhysReg(Reg: AMDGPU::EXEC)) {
926 auto ExecEncoding = MRI.getEncodingValue(Reg: AMDGPU::EXEC_LO);
927 if (Bytes_[0] != ExecEncoding)
928 Status = MCDisassembler::SoftFail;
929 }
930
931 Size = MaxInstBytesNum - Bytes.size();
932 return Status;
933}
934
935void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
936 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX11Insts)) {
937 // The MCInst still has these fields even though they are no longer encoded
938 // in the GFX11 instruction.
939 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vm);
940 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::compr);
941 }
942}
943
944void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
945 convertTrue16OpSel(MI);
946 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
947 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
948 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
949 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
950 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
951 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
952 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
953 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
954 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
955 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
956 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
957 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
958 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
959 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
960 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
961 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
962 // The MCInst has this field that is not directly encoded in the
963 // instruction.
964 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::op_sel);
965 }
966}
967
968void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
969 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
970 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
971 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::sdst))
972 // VOPC - insert clamp
973 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::clamp);
974 } else if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands)) {
975 int SDst = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::sdst);
976 if (SDst != -1) {
977 // VOPC - insert VCC register as sdst
978 insertNamedMCOperand(MI, Op: createRegOperand(Reg: AMDGPU::VCC),
979 Name: AMDGPU::OpName::sdst);
980 } else {
981 // VOP1/2 - insert omod if present in instruction
982 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::omod);
983 }
984 }
985}
986
987/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
988/// appropriate subregister for the used format width.
989static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI,
990 MCOperand &MO, uint8_t NumRegs) {
991 switch (NumRegs) {
992 case 4:
993 return MO.setReg(MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3));
994 case 6:
995 return MO.setReg(
996 MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
997 case 8:
998 if (MCRegister NewReg = MRI.getSubReg(
999 Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1000 MO.setReg(NewReg);
1001 }
1002 return;
1003 case 12: {
1004 // There is no 384-bit subreg index defined.
1005 MCRegister BaseReg = MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0);
1006 MCRegister NewReg = MRI.getMatchingSuperReg(
1007 Reg: BaseReg, SubIdx: AMDGPU::sub0, RC: &MRI.getRegClass(i: AMDGPU::VReg_384RegClassID));
1008 return MO.setReg(NewReg);
1009 }
1010 case 16:
1011 // No-op in cases where one operand is still f8/bf8.
1012 return;
1013 default:
1014 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1015 }
1016}
1017
1018/// f8f6f4 instructions have different pseudos depending on the used formats. In
1019/// the disassembler table, we only have the variants with the largest register
1020/// classes which assume using an fp8/bf8 format for both operands. The actual
1021/// register class depends on the format in blgp and cbsz operands. Adjust the
1022/// register classes depending on the used format.
1023void AMDGPUDisassembler::convertMAIInst(MCInst &MI) const {
1024 int BlgpIdx =
1025 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::blgp);
1026 if (BlgpIdx == -1)
1027 return;
1028
1029 int CbszIdx =
1030 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::cbsz);
1031
1032 unsigned CBSZ = MI.getOperand(i: CbszIdx).getImm();
1033 unsigned BLGP = MI.getOperand(i: BlgpIdx).getImm();
1034
1035 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1036 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, F8F8Opcode: MI.getOpcode());
1037 if (!AdjustedRegClassOpcode ||
1038 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1039 return;
1040
1041 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1042 int Src0Idx =
1043 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1044 int Src1Idx =
1045 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1046 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1047 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1048 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1049 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1050}
1051
1052void AMDGPUDisassembler::convertWMMAInst(MCInst &MI) const {
1053 int FmtAIdx =
1054 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_a_fmt);
1055 if (FmtAIdx == -1)
1056 return;
1057
1058 int FmtBIdx =
1059 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_b_fmt);
1060
1061 unsigned FmtA = MI.getOperand(i: FmtAIdx).getImm();
1062 unsigned FmtB = MI.getOperand(i: FmtBIdx).getImm();
1063
1064 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1065 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, F8F8Opcode: MI.getOpcode());
1066 if (!AdjustedRegClassOpcode ||
1067 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1068 return;
1069
1070 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1071 int Src0Idx =
1072 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1073 int Src1Idx =
1074 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1075 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1076 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1077 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1078 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1079}
1080
1081struct VOPModifiers {
1082 unsigned OpSel = 0;
1083 unsigned OpSelHi = 0;
1084 unsigned NegLo = 0;
1085 unsigned NegHi = 0;
1086};
1087
1088// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1089// Note that these values do not affect disassembler output,
1090// so this is only necessary for consistency with src_modifiers.
1091static VOPModifiers collectVOPModifiers(const MCInst &MI,
1092 bool IsVOP3P = false) {
1093 VOPModifiers Modifiers;
1094 unsigned Opc = MI.getOpcode();
1095 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1096 AMDGPU::OpName::src1_modifiers,
1097 AMDGPU::OpName::src2_modifiers};
1098 for (int J = 0; J < 3; ++J) {
1099 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
1100 if (OpIdx == -1)
1101 continue;
1102
1103 unsigned Val = MI.getOperand(i: OpIdx).getImm();
1104
1105 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1106 if (IsVOP3P) {
1107 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1108 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1109 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1110 } else if (J == 0) {
1111 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1112 }
1113 }
1114
1115 return Modifiers;
1116}
1117
1118// Instructions decode the op_sel/suffix bits into the src_modifier
1119// operands. Copy those bits into the src operands for true16 VGPRs.
1120void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
1121 const unsigned Opc = MI.getOpcode();
1122 const MCRegisterClass &ConversionRC =
1123 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID);
1124 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1125 OpAndOpMods = {._M_elems: {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1126 SISrcMods::OP_SEL_0},
1127 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1128 SISrcMods::OP_SEL_0},
1129 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1130 SISrcMods::OP_SEL_0},
1131 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1132 SISrcMods::DST_OP_SEL}}};
1133 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1134 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
1135 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpModsName);
1136 if (OpIdx == -1 || OpModsIdx == -1)
1137 continue;
1138 MCOperand &Op = MI.getOperand(i: OpIdx);
1139 if (!Op.isReg())
1140 continue;
1141 if (!ConversionRC.contains(Reg: Op.getReg()))
1142 continue;
1143 unsigned OpEnc = MRI.getEncodingValue(Reg: Op.getReg());
1144 const MCOperand &OpMods = MI.getOperand(i: OpModsIdx);
1145 unsigned ModVal = OpMods.getImm();
1146 if (ModVal & OpSelMask) { // isHi
1147 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1148 Op.setReg(ConversionRC.getRegister(i: RegIdx * 2 + 1));
1149 }
1150 }
1151}
1152
1153// MAC opcodes have special old and src2 operands.
1154// src2 is tied to dst, while old is not tied (but assumed to be).
1155bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
1156 constexpr int DST_IDX = 0;
1157 auto Opcode = MI.getOpcode();
1158 const auto &Desc = MCII->get(Opcode);
1159 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::old);
1160
1161 if (OldIdx != -1 && Desc.getOperandConstraint(
1162 OpNum: OldIdx, Constraint: MCOI::OperandConstraint::TIED_TO) == -1) {
1163 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1164 assert(Desc.getOperandConstraint(
1165 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1166 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
1167 (void)DST_IDX;
1168 return true;
1169 }
1170
1171 return false;
1172}
1173
1174// Create dummy old operand and insert dummy unused src2_modifiers
1175void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
1176 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1177 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1178 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1179 Name: AMDGPU::OpName::src2_modifiers);
1180}
1181
1182void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
1183 unsigned Opc = MI.getOpcode();
1184
1185 int VDstInIdx =
1186 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1187 if (VDstInIdx != -1)
1188 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1189
1190 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1191 if (MI.getNumOperands() < DescNumOps &&
1192 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1193 convertTrue16OpSel(MI);
1194 auto Mods = collectVOPModifiers(MI);
1195 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1196 Name: AMDGPU::OpName::op_sel);
1197 } else {
1198 // Insert dummy unused src modifiers.
1199 if (MI.getNumOperands() < DescNumOps &&
1200 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1201 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1202 Name: AMDGPU::OpName::src0_modifiers);
1203
1204 if (MI.getNumOperands() < DescNumOps &&
1205 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1206 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1207 Name: AMDGPU::OpName::src1_modifiers);
1208 }
1209}
1210
1211void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
1212 convertTrue16OpSel(MI);
1213
1214 int VDstInIdx =
1215 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1216 if (VDstInIdx != -1)
1217 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1218
1219 unsigned Opc = MI.getOpcode();
1220 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1221 if (MI.getNumOperands() < DescNumOps &&
1222 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1223 auto Mods = collectVOPModifiers(MI);
1224 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1225 Name: AMDGPU::OpName::op_sel);
1226 }
1227}
1228
1229// Given a wide tuple \p Reg check if it will overflow 256 registers.
1230// \returns \p Reg on success or NoRegister otherwise.
1231static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC,
1232 const MCRegisterInfo &MRI) {
1233 unsigned NumRegs = RC.getSizeInBits() / 32;
1234 MCRegister Sub0 = MRI.getSubReg(Reg, Idx: AMDGPU::sub0);
1235 if (!Sub0)
1236 return Reg;
1237
1238 MCRegister BaseReg;
1239 if (MRI.getRegClass(i: AMDGPU::VGPR_32RegClassID).contains(Reg: Sub0))
1240 BaseReg = AMDGPU::VGPR0;
1241 else if (MRI.getRegClass(i: AMDGPU::AGPR_32RegClassID).contains(Reg: Sub0))
1242 BaseReg = AMDGPU::AGPR0;
1243
1244 assert(BaseReg && "Only vector registers expected");
1245
1246 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1247}
1248
1249// Note that before gfx10, the MIMG encoding provided no information about
1250// VADDR size. Consequently, decoded instructions always show address as if it
1251// has 1 dword, which could be not really so.
1252void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1253 auto TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
1254
1255 int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1256 Name: AMDGPU::OpName::vdst);
1257
1258 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1259 Name: AMDGPU::OpName::vdata);
1260 int VAddr0Idx =
1261 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
1262 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1263 ? AMDGPU::OpName::srsrc
1264 : AMDGPU::OpName::rsrc;
1265 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: RsrcOpName);
1266 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1267 Name: AMDGPU::OpName::dmask);
1268
1269 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1270 Name: AMDGPU::OpName::tfe);
1271 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1272 Name: AMDGPU::OpName::d16);
1273
1274 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
1275 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1276 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
1277
1278 assert(VDataIdx != -1);
1279 if (BaseOpcode->BVH) {
1280 // Add A16 operand for intersect_ray instructions
1281 addOperand(Inst&: MI, Opnd: MCOperand::createImm(Val: BaseOpcode->A16));
1282 return;
1283 }
1284
1285 bool IsAtomic = (VDstIdx != -1);
1286 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1287 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1288 bool IsNSA = false;
1289 bool IsPartialNSA = false;
1290 unsigned AddrSize = Info->VAddrDwords;
1291
1292 if (isGFX10Plus()) {
1293 unsigned DimIdx =
1294 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dim);
1295 int A16Idx =
1296 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::a16);
1297 const AMDGPU::MIMGDimInfo *Dim =
1298 AMDGPU::getMIMGDimInfoByEncoding(DimEnc: MI.getOperand(i: DimIdx).getImm());
1299 const bool IsA16 = (A16Idx != -1 && MI.getOperand(i: A16Idx).getImm());
1300
1301 AddrSize =
1302 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, IsG16Supported: AMDGPU::hasG16(STI));
1303
1304 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1305 // VIMAGE insts other than BVH never use vaddr4.
1306 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1307 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1308 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1309 if (!IsNSA) {
1310 if (!IsVSample && AddrSize > 12)
1311 AddrSize = 16;
1312 } else {
1313 if (AddrSize > Info->VAddrDwords) {
1314 if (!STI.hasFeature(Feature: AMDGPU::FeaturePartialNSAEncoding)) {
1315 // The NSA encoding does not contain enough operands for the
1316 // combination of base opcode / dimension. Should this be an error?
1317 return;
1318 }
1319 IsPartialNSA = true;
1320 }
1321 }
1322 }
1323
1324 unsigned DMask = MI.getOperand(i: DMaskIdx).getImm() & 0xf;
1325 unsigned DstSize = IsGather4 ? 4 : std::max(a: llvm::popcount(Value: DMask), b: 1);
1326
1327 bool D16 = D16Idx >= 0 && MI.getOperand(i: D16Idx).getImm();
1328 if (D16 && AMDGPU::hasPackedD16(STI)) {
1329 DstSize = (DstSize + 1) / 2;
1330 }
1331
1332 if (TFEIdx != -1 && MI.getOperand(i: TFEIdx).getImm())
1333 DstSize += 1;
1334
1335 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1336 return;
1337
1338 int NewOpcode =
1339 AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: Info->MIMGEncoding, VDataDwords: DstSize, VAddrDwords: AddrSize);
1340 if (NewOpcode == -1)
1341 return;
1342
1343 // Widen the register to the correct number of enabled channels.
1344 MCRegister NewVdata;
1345 if (DstSize != Info->VDataDwords) {
1346 auto DataRCID = MCII->getOpRegClassID(
1347 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VDataIdx], HwModeId: HwModeRegClass);
1348
1349 // Get first subregister of VData
1350 MCRegister Vdata0 = MI.getOperand(i: VDataIdx).getReg();
1351 MCRegister VdataSub0 = MRI.getSubReg(Reg: Vdata0, Idx: AMDGPU::sub0);
1352 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1353
1354 const MCRegisterClass &NewRC = MRI.getRegClass(i: DataRCID);
1355 NewVdata = MRI.getMatchingSuperReg(Reg: Vdata0, SubIdx: AMDGPU::sub0, RC: &NewRC);
1356 NewVdata = CheckVGPROverflow(Reg: NewVdata, RC: NewRC, MRI);
1357 if (!NewVdata) {
1358 // It's possible to encode this such that the low register + enabled
1359 // components exceeds the register count.
1360 return;
1361 }
1362 }
1363
1364 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1365 // If using partial NSA on GFX11+ widen last address register.
1366 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1367 MCRegister NewVAddrSA;
1368 if (STI.hasFeature(Feature: AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1369 AddrSize != Info->VAddrDwords) {
1370 MCRegister VAddrSA = MI.getOperand(i: VAddrSAIdx).getReg();
1371 MCRegister VAddrSubSA = MRI.getSubReg(Reg: VAddrSA, Idx: AMDGPU::sub0);
1372 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1373
1374 auto AddrRCID = MCII->getOpRegClassID(
1375 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VAddrSAIdx], HwModeId: HwModeRegClass);
1376
1377 const MCRegisterClass &NewRC = MRI.getRegClass(i: AddrRCID);
1378 NewVAddrSA = MRI.getMatchingSuperReg(Reg: VAddrSA, SubIdx: AMDGPU::sub0, RC: &NewRC);
1379 NewVAddrSA = CheckVGPROverflow(Reg: NewVAddrSA, RC: NewRC, MRI);
1380 if (!NewVAddrSA)
1381 return;
1382 }
1383
1384 MI.setOpcode(NewOpcode);
1385
1386 if (NewVdata != AMDGPU::NoRegister) {
1387 MI.getOperand(i: VDataIdx) = MCOperand::createReg(Reg: NewVdata);
1388
1389 if (IsAtomic) {
1390 // Atomic operations have an additional operand (a copy of data)
1391 MI.getOperand(i: VDstIdx) = MCOperand::createReg(Reg: NewVdata);
1392 }
1393 }
1394
1395 if (NewVAddrSA) {
1396 MI.getOperand(i: VAddrSAIdx) = MCOperand::createReg(Reg: NewVAddrSA);
1397 } else if (IsNSA) {
1398 assert(AddrSize <= Info->VAddrDwords);
1399 MI.erase(First: MI.begin() + VAddr0Idx + AddrSize,
1400 Last: MI.begin() + VAddr0Idx + Info->VAddrDwords);
1401 }
1402}
1403
1404// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1405// decoder only adds to src_modifiers, so manually add the bits to the other
1406// operands.
1407void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1408 unsigned Opc = MI.getOpcode();
1409 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1410 auto Mods = collectVOPModifiers(MI, IsVOP3P: true);
1411
1412 if (MI.getNumOperands() < DescNumOps &&
1413 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
1414 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vdst_in);
1415
1416 if (MI.getNumOperands() < DescNumOps &&
1417 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel))
1418 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1419 Name: AMDGPU::OpName::op_sel);
1420 if (MI.getNumOperands() < DescNumOps &&
1421 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi))
1422 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSelHi),
1423 Name: AMDGPU::OpName::op_sel_hi);
1424 if (MI.getNumOperands() < DescNumOps &&
1425 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_lo))
1426 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegLo),
1427 Name: AMDGPU::OpName::neg_lo);
1428 if (MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_hi))
1430 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegHi),
1431 Name: AMDGPU::OpName::neg_hi);
1432}
1433
1434// Create dummy old operand and insert optional operands
1435void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1436 unsigned Opc = MI.getOpcode();
1437 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1438
1439 if (MI.getNumOperands() < DescNumOps &&
1440 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::old))
1441 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1442
1443 if (MI.getNumOperands() < DescNumOps &&
1444 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1445 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1446 Name: AMDGPU::OpName::src0_modifiers);
1447
1448 if (MI.getNumOperands() < DescNumOps &&
1449 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1450 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1451 Name: AMDGPU::OpName::src1_modifiers);
1452}
1453
1454void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
1455 unsigned Opc = MI.getOpcode();
1456 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1457
1458 convertTrue16OpSel(MI);
1459
1460 if (MI.getNumOperands() < DescNumOps &&
1461 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1462 VOPModifiers Mods = collectVOPModifiers(MI);
1463 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1464 Name: AMDGPU::OpName::op_sel);
1465 }
1466}
1467
1468void AMDGPUDisassembler::convertFMAanyK(MCInst &MI) const {
1469 assert(HasLiteral && "Should have decoded a literal");
1470 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Literal), Name: AMDGPU::OpName::immX);
1471}
1472
1473const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1474 return getContext().getRegisterInfo()->
1475 getRegClassName(Class: &AMDGPUMCRegisterClasses[RegClassID]);
1476}
1477
1478inline
1479MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1480 const Twine& ErrMsg) const {
1481 *CommentStream << "Error: " + ErrMsg;
1482
1483 // ToDo: add support for error operands to MCInst.h
1484 // return MCOperand::createError(V);
1485 return MCOperand();
1486}
1487
1488inline MCOperand AMDGPUDisassembler::createRegOperand(MCRegister Reg) const {
1489 return MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg, STI));
1490}
1491
1492inline
1493MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1494 unsigned Val) const {
1495 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1496 if (Val >= RegCl.getNumRegs())
1497 return errOperand(V: Val, ErrMsg: Twine(getRegClassName(RegClassID)) +
1498 ": unknown register " + Twine(Val));
1499 return createRegOperand(Reg: RegCl.getRegister(i: Val));
1500}
1501
1502inline
1503MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1504 unsigned Val) const {
1505 // ToDo: SI/CI have 104 SGPRs, VI - 102
1506 // Valery: here we accepting as much as we can, let assembler sort it out
1507 int shift = 0;
1508 switch (SRegClassID) {
1509 case AMDGPU::SGPR_32RegClassID:
1510 case AMDGPU::TTMP_32RegClassID:
1511 break;
1512 case AMDGPU::SGPR_64RegClassID:
1513 case AMDGPU::TTMP_64RegClassID:
1514 shift = 1;
1515 break;
1516 case AMDGPU::SGPR_96RegClassID:
1517 case AMDGPU::TTMP_96RegClassID:
1518 case AMDGPU::SGPR_128RegClassID:
1519 case AMDGPU::TTMP_128RegClassID:
1520 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1521 // this bundle?
1522 case AMDGPU::SGPR_256RegClassID:
1523 case AMDGPU::TTMP_256RegClassID:
1524 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1525 // this bundle?
1526 case AMDGPU::SGPR_288RegClassID:
1527 case AMDGPU::TTMP_288RegClassID:
1528 case AMDGPU::SGPR_320RegClassID:
1529 case AMDGPU::TTMP_320RegClassID:
1530 case AMDGPU::SGPR_352RegClassID:
1531 case AMDGPU::TTMP_352RegClassID:
1532 case AMDGPU::SGPR_384RegClassID:
1533 case AMDGPU::TTMP_384RegClassID:
1534 case AMDGPU::SGPR_512RegClassID:
1535 case AMDGPU::TTMP_512RegClassID:
1536 shift = 2;
1537 break;
1538 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1539 // this bundle?
1540 default:
1541 llvm_unreachable("unhandled register class");
1542 }
1543
1544 if (Val % (1 << shift)) {
1545 *CommentStream << "Warning: " << getRegClassName(RegClassID: SRegClassID)
1546 << ": scalar reg isn't aligned " << Val;
1547 }
1548
1549 return createRegOperand(RegClassID: SRegClassID, Val: Val >> shift);
1550}
1551
1552MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1553 bool IsHi) const {
1554 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1555 return createRegOperand(RegClassID: AMDGPU::VGPR_16RegClassID, Val: RegIdxInVGPR16);
1556}
1557
1558// Decode Literals for insts which always have a literal in the encoding
1559MCOperand
1560AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1561 if (HasLiteral) {
1562 assert(
1563 AMDGPU::hasVOPD(STI) &&
1564 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1565 if (Literal != Val)
1566 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1567 }
1568 HasLiteral = true;
1569 Literal = Val;
1570 return MCOperand::createImm(Val: Literal);
1571}
1572
1573MCOperand
1574AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
1575 if (HasLiteral) {
1576 if (Literal != Val)
1577 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1578 }
1579 HasLiteral = true;
1580 Literal = Val;
1581
1582 bool UseLit64 = Hi_32(Value: Literal) == 0;
1583 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1584 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1585 : MCOperand::createImm(Val: Literal);
1586}
1587
1588MCOperand
1589AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
1590 const MCOperandInfo &OpDesc) const {
1591 // For now all literal constants are supposed to be unsigned integer
1592 // ToDo: deal with signed/unsigned 64-bit integer constants
1593 // ToDo: deal with float/double constants
1594 if (!HasLiteral) {
1595 if (Bytes.size() < 4) {
1596 return errOperand(V: 0, ErrMsg: "cannot read literal, inst bytes left " +
1597 Twine(Bytes.size()));
1598 }
1599 HasLiteral = true;
1600 Literal = eatBytes<uint32_t>(Bytes);
1601 }
1602
1603 // For disassembling always assume all inline constants are available.
1604 bool HasInv2Pi = true;
1605
1606 // Invalid instruction codes may contain literals for inline-only
1607 // operands, so we support them here as well.
1608 int64_t Val = Literal;
1609 bool UseLit = false;
1610 switch (OpDesc.OperandType) {
1611 default:
1612 llvm_unreachable("Unexpected operand type!");
1613 case AMDGPU::OPERAND_REG_IMM_BF16:
1614 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1615 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1616 UseLit = AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
1617 break;
1618 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1619 UseLit = AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
1620 break;
1621 case AMDGPU::OPERAND_REG_IMM_FP16:
1622 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1623 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1624 UseLit = AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
1625 break;
1626 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1627 UseLit = AMDGPU::isInlinableLiteralV2F16(Literal: Val);
1628 break;
1629 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1630 UseLit = AMDGPU::isPKFMACF16InlineConstant(Literal: Val, IsGFX11Plus: isGFX11Plus());
1631 break;
1632 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1633 break;
1634 case AMDGPU::OPERAND_REG_IMM_INT16:
1635 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1636 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1637 UseLit = AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi);
1638 break;
1639 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1640 UseLit = AMDGPU::isInlinableLiteralV2I16(Literal: Val);
1641 break;
1642 case AMDGPU::OPERAND_REG_IMM_FP32:
1643 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1644 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1645 case AMDGPU::OPERAND_REG_IMM_INT32:
1646 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1647 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1648 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1649 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1650 case AMDGPU::OPERAND_KIMM32:
1651 UseLit = AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi);
1652 break;
1653 case AMDGPU::OPERAND_REG_IMM_FP64:
1654 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1655 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1656 Val <<= 32;
1657 break;
1658 case AMDGPU::OPERAND_REG_IMM_INT64:
1659 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1660 UseLit = AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi);
1661 break;
1662 case MCOI::OPERAND_REGISTER:
1663 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1664 // decoding a literal in a position of a register operand. Give
1665 // it special handling in the caller, decodeImmOperands(), instead
1666 // of quietly allowing it here.
1667 break;
1668 }
1669
1670 return UseLit ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1671 Lit: LitModifier::Lit, Value: Val, Ctx&: getContext()))
1672 : MCOperand::createImm(Val);
1673}
1674
1675MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
1676 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1677
1678 if (!HasLiteral) {
1679 if (Bytes.size() < 8) {
1680 return errOperand(V: 0, ErrMsg: "cannot read literal64, inst bytes left " +
1681 Twine(Bytes.size()));
1682 }
1683 HasLiteral = true;
1684 Literal = eatBytes<uint64_t>(Bytes);
1685 }
1686
1687 bool UseLit64 = Hi_32(Value: Literal) == 0;
1688 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1689 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1690 : MCOperand::createImm(Val: Literal);
1691}
1692
1693MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1694 using namespace AMDGPU::EncValues;
1695
1696 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1697 return MCOperand::createImm(Val: (Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1698 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1699 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1700 // Cast prevents negative overflow.
1701}
1702
1703static int64_t getInlineImmVal32(unsigned Imm) {
1704 switch (Imm) {
1705 case 240:
1706 return llvm::bit_cast<uint32_t>(from: 0.5f);
1707 case 241:
1708 return llvm::bit_cast<uint32_t>(from: -0.5f);
1709 case 242:
1710 return llvm::bit_cast<uint32_t>(from: 1.0f);
1711 case 243:
1712 return llvm::bit_cast<uint32_t>(from: -1.0f);
1713 case 244:
1714 return llvm::bit_cast<uint32_t>(from: 2.0f);
1715 case 245:
1716 return llvm::bit_cast<uint32_t>(from: -2.0f);
1717 case 246:
1718 return llvm::bit_cast<uint32_t>(from: 4.0f);
1719 case 247:
1720 return llvm::bit_cast<uint32_t>(from: -4.0f);
1721 case 248: // 1 / (2 * PI)
1722 return 0x3e22f983;
1723 default:
1724 llvm_unreachable("invalid fp inline imm");
1725 }
1726}
1727
1728static int64_t getInlineImmVal64(unsigned Imm) {
1729 switch (Imm) {
1730 case 240:
1731 return llvm::bit_cast<uint64_t>(from: 0.5);
1732 case 241:
1733 return llvm::bit_cast<uint64_t>(from: -0.5);
1734 case 242:
1735 return llvm::bit_cast<uint64_t>(from: 1.0);
1736 case 243:
1737 return llvm::bit_cast<uint64_t>(from: -1.0);
1738 case 244:
1739 return llvm::bit_cast<uint64_t>(from: 2.0);
1740 case 245:
1741 return llvm::bit_cast<uint64_t>(from: -2.0);
1742 case 246:
1743 return llvm::bit_cast<uint64_t>(from: 4.0);
1744 case 247:
1745 return llvm::bit_cast<uint64_t>(from: -4.0);
1746 case 248: // 1 / (2 * PI)
1747 return 0x3fc45f306dc9c882;
1748 default:
1749 llvm_unreachable("invalid fp inline imm");
1750 }
1751}
1752
1753static int64_t getInlineImmValF16(unsigned Imm) {
1754 switch (Imm) {
1755 case 240:
1756 return 0x3800;
1757 case 241:
1758 return 0xB800;
1759 case 242:
1760 return 0x3C00;
1761 case 243:
1762 return 0xBC00;
1763 case 244:
1764 return 0x4000;
1765 case 245:
1766 return 0xC000;
1767 case 246:
1768 return 0x4400;
1769 case 247:
1770 return 0xC400;
1771 case 248: // 1 / (2 * PI)
1772 return 0x3118;
1773 default:
1774 llvm_unreachable("invalid fp inline imm");
1775 }
1776}
1777
1778static int64_t getInlineImmValBF16(unsigned Imm) {
1779 switch (Imm) {
1780 case 240:
1781 return 0x3F00;
1782 case 241:
1783 return 0xBF00;
1784 case 242:
1785 return 0x3F80;
1786 case 243:
1787 return 0xBF80;
1788 case 244:
1789 return 0x4000;
1790 case 245:
1791 return 0xC000;
1792 case 246:
1793 return 0x4080;
1794 case 247:
1795 return 0xC080;
1796 case 248: // 1 / (2 * PI)
1797 return 0x3E22;
1798 default:
1799 llvm_unreachable("invalid fp inline imm");
1800 }
1801}
1802
1803unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1804 using namespace AMDGPU;
1805
1806 switch (Width) {
1807 case 16:
1808 case 32:
1809 return VGPR_32RegClassID;
1810 case 64:
1811 return VReg_64RegClassID;
1812 case 96:
1813 return VReg_96RegClassID;
1814 case 128:
1815 return VReg_128RegClassID;
1816 case 160:
1817 return VReg_160RegClassID;
1818 case 192:
1819 return VReg_192RegClassID;
1820 case 256:
1821 return VReg_256RegClassID;
1822 case 288:
1823 return VReg_288RegClassID;
1824 case 320:
1825 return VReg_320RegClassID;
1826 case 352:
1827 return VReg_352RegClassID;
1828 case 384:
1829 return VReg_384RegClassID;
1830 case 512:
1831 return VReg_512RegClassID;
1832 case 1024:
1833 return VReg_1024RegClassID;
1834 }
1835 llvm_unreachable("Invalid register width!");
1836}
1837
1838unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1839 using namespace AMDGPU;
1840
1841 switch (Width) {
1842 case 16:
1843 case 32:
1844 return AGPR_32RegClassID;
1845 case 64:
1846 return AReg_64RegClassID;
1847 case 96:
1848 return AReg_96RegClassID;
1849 case 128:
1850 return AReg_128RegClassID;
1851 case 160:
1852 return AReg_160RegClassID;
1853 case 256:
1854 return AReg_256RegClassID;
1855 case 288:
1856 return AReg_288RegClassID;
1857 case 320:
1858 return AReg_320RegClassID;
1859 case 352:
1860 return AReg_352RegClassID;
1861 case 384:
1862 return AReg_384RegClassID;
1863 case 512:
1864 return AReg_512RegClassID;
1865 case 1024:
1866 return AReg_1024RegClassID;
1867 }
1868 llvm_unreachable("Invalid register width!");
1869}
1870
1871unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1872 using namespace AMDGPU;
1873
1874 switch (Width) {
1875 case 16:
1876 case 32:
1877 return SGPR_32RegClassID;
1878 case 64:
1879 return SGPR_64RegClassID;
1880 case 96:
1881 return SGPR_96RegClassID;
1882 case 128:
1883 return SGPR_128RegClassID;
1884 case 160:
1885 return SGPR_160RegClassID;
1886 case 256:
1887 return SGPR_256RegClassID;
1888 case 288:
1889 return SGPR_288RegClassID;
1890 case 320:
1891 return SGPR_320RegClassID;
1892 case 352:
1893 return SGPR_352RegClassID;
1894 case 384:
1895 return SGPR_384RegClassID;
1896 case 512:
1897 return SGPR_512RegClassID;
1898 }
1899 llvm_unreachable("Invalid register width!");
1900}
1901
1902unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1903 using namespace AMDGPU;
1904
1905 switch (Width) {
1906 case 16:
1907 case 32:
1908 return TTMP_32RegClassID;
1909 case 64:
1910 return TTMP_64RegClassID;
1911 case 128:
1912 return TTMP_128RegClassID;
1913 case 256:
1914 return TTMP_256RegClassID;
1915 case 288:
1916 return TTMP_288RegClassID;
1917 case 320:
1918 return TTMP_320RegClassID;
1919 case 352:
1920 return TTMP_352RegClassID;
1921 case 384:
1922 return TTMP_384RegClassID;
1923 case 512:
1924 return TTMP_512RegClassID;
1925 }
1926 llvm_unreachable("Invalid register width!");
1927}
1928
1929int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1930 using namespace AMDGPU::EncValues;
1931
1932 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1933 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1934
1935 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1936}
1937
1938MCOperand AMDGPUDisassembler::decodeSrcOp(const MCInst &Inst, unsigned Width,
1939 unsigned Val) const {
1940 using namespace AMDGPU::EncValues;
1941
1942 assert(Val < 1024); // enum10
1943
1944 bool IsAGPR = Val & 512;
1945 Val &= 511;
1946
1947 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1948 return createRegOperand(RegClassID: IsAGPR ? getAgprClassId(Width)
1949 : getVgprClassId(Width), Val: Val - VGPR_MIN);
1950 }
1951 return decodeNonVGPRSrcOp(Inst, Width, Val: Val & 0xFF);
1952}
1953
1954MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst,
1955 unsigned Width,
1956 unsigned Val) const {
1957 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1958 // decoded earlier.
1959 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1960 using namespace AMDGPU::EncValues;
1961
1962 if (Val <= SGPR_MAX) {
1963 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1964 static_assert(SGPR_MIN == 0);
1965 return createSRegOperand(SRegClassID: getSgprClassId(Width), Val: Val - SGPR_MIN);
1966 }
1967
1968 int TTmpIdx = getTTmpIdx(Val);
1969 if (TTmpIdx >= 0) {
1970 return createSRegOperand(SRegClassID: getTtmpClassId(Width), Val: TTmpIdx);
1971 }
1972
1973 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1974 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1975 Val == LITERAL_CONST)
1976 return MCOperand::createImm(Val);
1977
1978 if (Val == LITERAL64_CONST && STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
1979 return decodeLiteral64Constant();
1980 }
1981
1982 switch (Width) {
1983 case 32:
1984 case 16:
1985 return decodeSpecialReg32(Val);
1986 case 64:
1987 return decodeSpecialReg64(Val);
1988 case 96:
1989 case 128:
1990 case 256:
1991 case 512:
1992 return decodeSpecialReg96Plus(Val);
1993 default:
1994 llvm_unreachable("unexpected immediate type");
1995 }
1996}
1997
1998// Bit 0 of DstY isn't stored in the instruction, because it's always the
1999// opposite of bit 0 of DstX.
2000MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
2001 unsigned Val) const {
2002 int VDstXInd =
2003 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::vdstX);
2004 assert(VDstXInd != -1);
2005 assert(Inst.getOperand(VDstXInd).isReg());
2006 unsigned XDstReg = MRI.getEncodingValue(Reg: Inst.getOperand(i: VDstXInd).getReg());
2007 Val |= ~XDstReg & 1;
2008 return createRegOperand(RegClassID: getVgprClassId(Width: 32), Val);
2009}
2010
2011MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
2012 using namespace AMDGPU;
2013
2014 switch (Val) {
2015 // clang-format off
2016 case 102: return createRegOperand(Reg: FLAT_SCR_LO);
2017 case 103: return createRegOperand(Reg: FLAT_SCR_HI);
2018 case 104: return createRegOperand(Reg: XNACK_MASK_LO);
2019 case 105: return createRegOperand(Reg: XNACK_MASK_HI);
2020 case 106: return createRegOperand(Reg: VCC_LO);
2021 case 107: return createRegOperand(Reg: VCC_HI);
2022 case 108: return createRegOperand(Reg: TBA_LO);
2023 case 109: return createRegOperand(Reg: TBA_HI);
2024 case 110: return createRegOperand(Reg: TMA_LO);
2025 case 111: return createRegOperand(Reg: TMA_HI);
2026 case 124:
2027 return isGFX11Plus() ? createRegOperand(Reg: SGPR_NULL) : createRegOperand(Reg: M0);
2028 case 125:
2029 return isGFX11Plus() ? createRegOperand(Reg: M0) : createRegOperand(Reg: SGPR_NULL);
2030 case 126: return createRegOperand(Reg: EXEC_LO);
2031 case 127: return createRegOperand(Reg: EXEC_HI);
2032 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2033 case 231: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_HI);
2034 case 235: return createRegOperand(Reg: SRC_SHARED_BASE_LO);
2035 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT_LO);
2036 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE_LO);
2037 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT_LO);
2038 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2039 case 251: return createRegOperand(Reg: SRC_VCCZ);
2040 case 252: return createRegOperand(Reg: SRC_EXECZ);
2041 case 253: return createRegOperand(Reg: SRC_SCC);
2042 case 254: return createRegOperand(Reg: LDS_DIRECT);
2043 default: break;
2044 // clang-format on
2045 }
2046 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2047}
2048
2049MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
2050 using namespace AMDGPU;
2051
2052 switch (Val) {
2053 case 102: return createRegOperand(Reg: FLAT_SCR);
2054 case 104: return createRegOperand(Reg: XNACK_MASK);
2055 case 106: return createRegOperand(Reg: VCC);
2056 case 108: return createRegOperand(Reg: TBA);
2057 case 110: return createRegOperand(Reg: TMA);
2058 case 124:
2059 if (isGFX11Plus())
2060 return createRegOperand(Reg: SGPR_NULL);
2061 break;
2062 case 125:
2063 if (!isGFX11Plus())
2064 return createRegOperand(Reg: SGPR_NULL);
2065 break;
2066 case 126: return createRegOperand(Reg: EXEC);
2067 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2068 case 235: return createRegOperand(Reg: SRC_SHARED_BASE);
2069 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT);
2070 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE);
2071 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT);
2072 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2073 case 251: return createRegOperand(Reg: SRC_VCCZ);
2074 case 252: return createRegOperand(Reg: SRC_EXECZ);
2075 case 253: return createRegOperand(Reg: SRC_SCC);
2076 default: break;
2077 }
2078 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2079}
2080
2081MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
2082 using namespace AMDGPU;
2083
2084 switch (Val) {
2085 case 124:
2086 if (isGFX11Plus())
2087 return createRegOperand(Reg: SGPR_NULL);
2088 break;
2089 case 125:
2090 if (!isGFX11Plus())
2091 return createRegOperand(Reg: SGPR_NULL);
2092 break;
2093 default:
2094 break;
2095 }
2096 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2097}
2098
2099MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width,
2100 const unsigned Val) const {
2101 using namespace AMDGPU::SDWA;
2102 using namespace AMDGPU::EncValues;
2103
2104 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
2105 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
2106 // XXX: cast to int is needed to avoid stupid warning:
2107 // compare with unsigned is always true
2108 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2109 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2110 return createRegOperand(RegClassID: getVgprClassId(Width),
2111 Val: Val - SDWA9EncValues::SRC_VGPR_MIN);
2112 }
2113 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2114 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2115 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2116 return createSRegOperand(SRegClassID: getSgprClassId(Width),
2117 Val: Val - SDWA9EncValues::SRC_SGPR_MIN);
2118 }
2119 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2120 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2121 return createSRegOperand(SRegClassID: getTtmpClassId(Width),
2122 Val: Val - SDWA9EncValues::SRC_TTMP_MIN);
2123 }
2124
2125 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2126
2127 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2128 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2129 return MCOperand::createImm(Val: SVal);
2130
2131 return decodeSpecialReg32(Val: SVal);
2132 }
2133 if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands))
2134 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
2135 llvm_unreachable("unsupported target");
2136}
2137
2138MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
2139 return decodeSDWASrc(Width: 16, Val);
2140}
2141
2142MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
2143 return decodeSDWASrc(Width: 32, Val);
2144}
2145
2146MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
2147 using namespace AMDGPU::SDWA;
2148
2149 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2150 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2151 "SDWAVopcDst should be present only on GFX9+");
2152
2153 bool IsWave32 = STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32);
2154
2155 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2156 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2157
2158 int TTmpIdx = getTTmpIdx(Val);
2159 if (TTmpIdx >= 0) {
2160 auto TTmpClsId = getTtmpClassId(Width: IsWave32 ? 32 : 64);
2161 return createSRegOperand(SRegClassID: TTmpClsId, Val: TTmpIdx);
2162 }
2163 if (Val > SGPR_MAX) {
2164 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2165 }
2166 return createSRegOperand(SRegClassID: getSgprClassId(Width: IsWave32 ? 32 : 64), Val);
2167 }
2168 return createRegOperand(Reg: IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2169}
2170
2171MCOperand AMDGPUDisassembler::decodeBoolReg(const MCInst &Inst,
2172 unsigned Val) const {
2173 return STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32)
2174 ? decodeSrcOp(Inst, Width: 32, Val)
2175 : decodeSrcOp(Inst, Width: 64, Val);
2176}
2177
2178MCOperand AMDGPUDisassembler::decodeSplitBarrier(const MCInst &Inst,
2179 unsigned Val) const {
2180 return decodeSrcOp(Inst, Width: 32, Val);
2181}
2182
2183MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
2184 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
2185 return MCOperand();
2186 return MCOperand::createImm(Val);
2187}
2188
2189MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
2190 using VersionField = AMDGPU::EncodingField<7, 0>;
2191 using W64Bit = AMDGPU::EncodingBit<13>;
2192 using W32Bit = AMDGPU::EncodingBit<14>;
2193 using MDPBit = AMDGPU::EncodingBit<15>;
2194 using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
2195
2196 auto [Version, W64, W32, MDP] = Encoding::decode(Encoded: Imm);
2197
2198 // Decode into a plain immediate if any unused bits are raised.
2199 if (Encoding::encode(Values: Version, Values: W64, Values: W32, Values: MDP) != Imm)
2200 return MCOperand::createImm(Val: Imm);
2201
2202 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2203 const auto *I = find_if(
2204 Range: Versions, P: [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2205 return V.Code == Version;
2206 });
2207 MCContext &Ctx = getContext();
2208 const MCExpr *E;
2209 if (I == Versions.end())
2210 E = MCConstantExpr::create(Value: Version, Ctx);
2211 else
2212 E = MCSymbolRefExpr::create(Symbol: Ctx.getOrCreateSymbol(Name: I->Symbol), Ctx);
2213
2214 if (W64)
2215 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW64Expr, Ctx);
2216 if (W32)
2217 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW32Expr, Ctx);
2218 if (MDP)
2219 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionMDPExpr, Ctx);
2220
2221 return MCOperand::createExpr(Val: E);
2222}
2223
2224bool AMDGPUDisassembler::isVI() const {
2225 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2226}
2227
2228bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
2229
2230bool AMDGPUDisassembler::isGFX90A() const {
2231 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2232}
2233
2234bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
2235
2236bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
2237
2238bool AMDGPUDisassembler::isGFX10Plus() const {
2239 return AMDGPU::isGFX10Plus(STI);
2240}
2241
2242bool AMDGPUDisassembler::isGFX11() const {
2243 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2244}
2245
2246bool AMDGPUDisassembler::isGFX11Plus() const {
2247 return AMDGPU::isGFX11Plus(STI);
2248}
2249
2250bool AMDGPUDisassembler::isGFX12() const {
2251 return STI.hasFeature(Feature: AMDGPU::FeatureGFX12);
2252}
2253
2254bool AMDGPUDisassembler::isGFX12Plus() const {
2255 return AMDGPU::isGFX12Plus(STI);
2256}
2257
2258bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); }
2259
2260bool AMDGPUDisassembler::isGFX1250Plus() const {
2261 return AMDGPU::isGFX1250Plus(STI);
2262}
2263
2264bool AMDGPUDisassembler::isGFX13() const { return AMDGPU::isGFX13(STI); }
2265
2266bool AMDGPUDisassembler::isGFX13Plus() const {
2267 return AMDGPU::isGFX13Plus(STI);
2268}
2269
2270bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
2271 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2272}
2273
2274bool AMDGPUDisassembler::hasKernargPreload() const {
2275 return AMDGPU::hasKernargPreload(STI);
2276}
2277
2278//===----------------------------------------------------------------------===//
2279// AMDGPU specific symbol handling
2280//===----------------------------------------------------------------------===//
2281
2282/// Print a string describing the reserved bit range specified by Mask with
2283/// offset BaseBytes for use in error comments. Mask is a single continuous
2284/// range of 1s surrounded by zeros. The format here is meant to align with the
2285/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2286static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2287 SmallString<32> Result;
2288 raw_svector_ostream S(Result);
2289
2290 int TrailingZeros = llvm::countr_zero(Val: Mask);
2291 int PopCount = llvm::popcount(Value: Mask);
2292
2293 if (PopCount == 1) {
2294 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2295 } else {
2296 S << "bits in range ("
2297 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2298 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2299 }
2300
2301 return Result;
2302}
2303
2304#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2305#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2306 do { \
2307 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2308 } while (0)
2309#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2310 do { \
2311 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2312 << GET_FIELD(MASK) << '\n'; \
2313 } while (0)
2314
2315#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2316 do { \
2317 if (FourByteBuffer & (MASK)) { \
2318 return createStringError(std::errc::invalid_argument, \
2319 "kernel descriptor " DESC \
2320 " reserved %s set" MSG, \
2321 getBitRangeFromMask((MASK), 0).c_str()); \
2322 } \
2323 } while (0)
2324
2325#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2326#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2327 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2328#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2329 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2330#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2331 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2332
2333// NOLINTNEXTLINE(readability-identifier-naming)
2334Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
2335 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2336 using namespace amdhsa;
2337 StringRef Indent = "\t";
2338
2339 // We cannot accurately backward compute #VGPRs used from
2340 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2341 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2342 // simply calculate the inverse of what the assembler does.
2343
2344 uint32_t GranulatedWorkitemVGPRCount =
2345 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2346
2347 uint32_t NextFreeVGPR =
2348 (GranulatedWorkitemVGPRCount + 1) *
2349 AMDGPU::IsaInfo::getVGPREncodingGranule(STI: &STI, EnableWavefrontSize32);
2350
2351 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2352
2353 // We cannot backward compute values used to calculate
2354 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2355 // directives can't be computed:
2356 // .amdhsa_reserve_vcc
2357 // .amdhsa_reserve_flat_scratch
2358 // .amdhsa_reserve_xnack_mask
2359 // They take their respective default values if not specified in the assembly.
2360 //
2361 // GRANULATED_WAVEFRONT_SGPR_COUNT
2362 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2363 //
2364 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2365 // are set to 0. So while disassembling we consider that:
2366 //
2367 // GRANULATED_WAVEFRONT_SGPR_COUNT
2368 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2369 //
2370 // The disassembler cannot recover the original values of those 3 directives.
2371
2372 uint32_t GranulatedWavefrontSGPRCount =
2373 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2374
2375 if (isGFX10Plus())
2376 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2377 "must be zero on gfx10+");
2378
2379 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2380 AMDGPU::IsaInfo::getSGPREncodingGranule(STI: &STI);
2381
2382 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2383 if (!hasArchitectedFlatScratch())
2384 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2385 bool ReservedXnackMask = STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2386 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2387 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2388 << '\n';
2389 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2390
2391 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2392
2393 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2394 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2395 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2396 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2397 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2398 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2399 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2400 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2401
2402 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2403
2404 if (!isGFX12Plus())
2405 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2406 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2407
2408 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2409
2410 if (!isGFX12Plus())
2411 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2412 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2413
2414 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2415 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2416
2417 // Bits [26].
2418 if (isGFX9Plus()) {
2419 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2420 } else {
2421 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2422 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2423 }
2424
2425 // Bits [27].
2426 if (isGFX1250Plus()) {
2427 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2428 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2429 } else {
2430 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2431 "COMPUTE_PGM_RSRC1");
2432 }
2433
2434 // Bits [28].
2435 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2436
2437 // Bits [29-31].
2438 if (isGFX10Plus()) {
2439 // WGP_MODE is not available on GFX1250.
2440 if (!isGFX1250Plus()) {
2441 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2442 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2443 }
2444 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2445 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2446 } else {
2447 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2448 "COMPUTE_PGM_RSRC1");
2449 }
2450
2451 if (isGFX12Plus())
2452 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2453 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2454
2455 return true;
2456}
2457
2458// NOLINTNEXTLINE(readability-identifier-naming)
2459Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2460 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2461 using namespace amdhsa;
2462 StringRef Indent = "\t";
2463 if (hasArchitectedFlatScratch())
2464 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2465 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2466 else
2467 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2468 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2469 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2470 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2471 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2472 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2473 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2474 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2475 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2476 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2477 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2478 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2479
2480 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2481 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2482 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2483
2484 PRINT_DIRECTIVE(
2485 ".amdhsa_exception_fp_ieee_invalid_op",
2486 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2487 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2488 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2489 PRINT_DIRECTIVE(
2490 ".amdhsa_exception_fp_ieee_div_zero",
2491 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2492 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2493 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2494 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2495 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2496 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2497 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2498 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2499 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2500
2501 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2502
2503 return true;
2504}
2505
2506// NOLINTNEXTLINE(readability-identifier-naming)
2507Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2508 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2509 using namespace amdhsa;
2510 StringRef Indent = "\t";
2511 if (isGFX90A()) {
2512 KdStream << Indent << ".amdhsa_accum_offset "
2513 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2514 << '\n';
2515
2516 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2517
2518 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2519 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2520 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2521 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2522 } else if (isGFX10Plus()) {
2523 // Bits [0-3].
2524 if (!isGFX12Plus()) {
2525 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2526 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2527 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2528 } else {
2529 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2530 "SHARED_VGPR_COUNT",
2531 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2532 }
2533 } else {
2534 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2535 "COMPUTE_PGM_RSRC3",
2536 "must be zero on gfx12+");
2537 }
2538
2539 // Bits [4-11].
2540 if (isGFX11()) {
2541 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2542 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2543 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2544 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2545 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2546 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2547 } else if (isGFX12Plus()) {
2548 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2549 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2550 } else {
2551 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2552 "COMPUTE_PGM_RSRC3",
2553 "must be zero on gfx10");
2554 }
2555
2556 // Bits [12].
2557 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2558 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2559
2560 // Bits [13].
2561 if (isGFX12Plus()) {
2562 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2563 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2564 } else {
2565 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2566 "COMPUTE_PGM_RSRC3",
2567 "must be zero on gfx10 or gfx11");
2568 }
2569
2570 // Bits [14-21].
2571 if (isGFX1250Plus()) {
2572 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2573 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2574 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2575 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2576 PRINT_PSEUDO_DIRECTIVE_COMMENT("TCP_SPLIT",
2577 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2578 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2579 "ENABLE_DIDT_THROTTLE",
2580 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2581 } else {
2582 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2583 "COMPUTE_PGM_RSRC3",
2584 "must be zero on gfx10+");
2585 }
2586
2587 // Bits [22-30].
2588 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2589 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2590
2591 // Bits [31].
2592 if (isGFX11Plus()) {
2593 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2594 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2595 } else {
2596 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2597 "COMPUTE_PGM_RSRC3",
2598 "must be zero on gfx10");
2599 }
2600 } else if (FourByteBuffer) {
2601 return createStringError(
2602 EC: std::errc::invalid_argument,
2603 Fmt: "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2604 }
2605 return true;
2606}
2607#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2608#undef PRINT_DIRECTIVE
2609#undef GET_FIELD
2610#undef CHECK_RESERVED_BITS_IMPL
2611#undef CHECK_RESERVED_BITS
2612#undef CHECK_RESERVED_BITS_MSG
2613#undef CHECK_RESERVED_BITS_DESC
2614#undef CHECK_RESERVED_BITS_DESC_MSG
2615
2616/// Create an error object to return from onSymbolStart for reserved kernel
2617/// descriptor bits being set.
2618static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2619 const char *Msg = "") {
2620 return createStringError(
2621 EC: std::errc::invalid_argument, Fmt: "kernel descriptor reserved %s set%s%s",
2622 Vals: getBitRangeFromMask(Mask, BaseBytes).c_str(), Vals: *Msg ? ", " : "", Vals: Msg);
2623}
2624
2625/// Create an error object to return from onSymbolStart for reserved kernel
2626/// descriptor bytes being set.
2627static Error createReservedKDBytesError(unsigned BaseInBytes,
2628 unsigned WidthInBytes) {
2629 // Create an error comment in the same format as the "Kernel Descriptor"
2630 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2631 return createStringError(
2632 EC: std::errc::invalid_argument,
2633 Fmt: "kernel descriptor reserved bits in range (%u:%u) set",
2634 Vals: (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, Vals: BaseInBytes * CHAR_BIT);
2635}
2636
2637Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2638 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2639 raw_string_ostream &KdStream) const {
2640#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2641 do { \
2642 KdStream << Indent << DIRECTIVE " " \
2643 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2644 } while (0)
2645
2646 uint16_t TwoByteBuffer = 0;
2647 uint32_t FourByteBuffer = 0;
2648
2649 StringRef ReservedBytes;
2650 StringRef Indent = "\t";
2651
2652 assert(Bytes.size() == 64);
2653 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2654
2655 switch (Cursor.tell()) {
2656 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2657 FourByteBuffer = DE.getU32(C&: Cursor);
2658 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2659 << '\n';
2660 return true;
2661
2662 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2663 FourByteBuffer = DE.getU32(C&: Cursor);
2664 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2665 << FourByteBuffer << '\n';
2666 return true;
2667
2668 case amdhsa::KERNARG_SIZE_OFFSET:
2669 FourByteBuffer = DE.getU32(C&: Cursor);
2670 KdStream << Indent << ".amdhsa_kernarg_size "
2671 << FourByteBuffer << '\n';
2672 return true;
2673
2674 case amdhsa::RESERVED0_OFFSET:
2675 // 4 reserved bytes, must be 0.
2676 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2677 for (int I = 0; I < 4; ++I) {
2678 if (ReservedBytes[I] != 0)
2679 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED0_OFFSET, WidthInBytes: 4);
2680 }
2681 return true;
2682
2683 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2684 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2685 // So far no directive controls this for Code Object V3, so simply skip for
2686 // disassembly.
2687 DE.skip(C&: Cursor, Length: 8);
2688 return true;
2689
2690 case amdhsa::RESERVED1_OFFSET:
2691 // 20 reserved bytes, must be 0.
2692 ReservedBytes = DE.getBytes(C&: Cursor, Length: 20);
2693 for (int I = 0; I < 20; ++I) {
2694 if (ReservedBytes[I] != 0)
2695 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED1_OFFSET, WidthInBytes: 20);
2696 }
2697 return true;
2698
2699 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2700 FourByteBuffer = DE.getU32(C&: Cursor);
2701 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2702
2703 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2704 FourByteBuffer = DE.getU32(C&: Cursor);
2705 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2706
2707 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2708 FourByteBuffer = DE.getU32(C&: Cursor);
2709 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2710
2711 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2712 using namespace amdhsa;
2713 TwoByteBuffer = DE.getU16(C&: Cursor);
2714
2715 if (!hasArchitectedFlatScratch())
2716 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2717 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2718 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2719 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2720 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2721 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2722 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2723 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2724 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2725 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2726 if (!hasArchitectedFlatScratch())
2727 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2728 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2729 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2730 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2731
2732 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2733 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED0,
2734 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2735
2736 // Reserved for GFX9
2737 if (isGFX9() &&
2738 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2739 return createReservedKDBitsError(
2740 Mask: KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2741 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, Msg: "must be zero on gfx9");
2742 }
2743 if (isGFX10Plus()) {
2744 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2745 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2746 }
2747
2748 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2749 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2750 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2751
2752 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2753 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED1,
2754 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2755 }
2756
2757 return true;
2758
2759 case amdhsa::KERNARG_PRELOAD_OFFSET:
2760 using namespace amdhsa;
2761 TwoByteBuffer = DE.getU16(C&: Cursor);
2762 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2763 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2764 KERNARG_PRELOAD_SPEC_LENGTH);
2765 }
2766
2767 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2768 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2769 KERNARG_PRELOAD_SPEC_OFFSET);
2770 }
2771 return true;
2772
2773 case amdhsa::RESERVED3_OFFSET:
2774 // 4 bytes from here are reserved, must be 0.
2775 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2776 for (int I = 0; I < 4; ++I) {
2777 if (ReservedBytes[I] != 0)
2778 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED3_OFFSET, WidthInBytes: 4);
2779 }
2780 return true;
2781
2782 default:
2783 llvm_unreachable("Unhandled index. Case statements cover everything.");
2784 return true;
2785 }
2786#undef PRINT_DIRECTIVE
2787}
2788
2789Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2790 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2791
2792 // CP microcode requires the kernel descriptor to be 64 aligned.
2793 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2794 return createStringError(EC: std::errc::invalid_argument,
2795 Fmt: "kernel descriptor must be 64-byte aligned");
2796
2797 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2798 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2799 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2800 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2801 // when required.
2802 if (isGFX10Plus()) {
2803 uint16_t KernelCodeProperties =
2804 support::endian::read16(P: &Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2805 E: llvm::endianness::little);
2806 EnableWavefrontSize32 =
2807 AMDHSA_BITS_GET(KernelCodeProperties,
2808 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2809 }
2810
2811 std::string Kd;
2812 raw_string_ostream KdStream(Kd);
2813 KdStream << ".amdhsa_kernel " << KdName << '\n';
2814
2815 DataExtractor::Cursor C(0);
2816 while (C && C.tell() < Bytes.size()) {
2817 Expected<bool> Res = decodeKernelDescriptorDirective(Cursor&: C, Bytes, KdStream);
2818
2819 cantFail(Err: C.takeError());
2820
2821 if (!Res)
2822 return Res;
2823 }
2824 KdStream << ".end_amdhsa_kernel\n";
2825 outs() << KdStream.str();
2826 return true;
2827}
2828
2829Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2830 uint64_t &Size,
2831 ArrayRef<uint8_t> Bytes,
2832 uint64_t Address) const {
2833 // Right now only kernel descriptor needs to be handled.
2834 // We ignore all other symbols for target specific handling.
2835 // TODO:
2836 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2837 // Object V2 and V3 when symbols are marked protected.
2838
2839 // amd_kernel_code_t for Code Object V2.
2840 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2841 Size = 256;
2842 return createStringError(EC: std::errc::invalid_argument,
2843 Fmt: "code object v2 is not supported");
2844 }
2845
2846 // Code Object V3 kernel descriptors.
2847 StringRef Name = Symbol.Name;
2848 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(Suffix: StringRef(".kd"))) {
2849 Size = 64; // Size = 64 regardless of success or failure.
2850 return decodeKernelDescriptor(KdName: Name.drop_back(N: 3), Bytes, KdAddress: Address);
2851 }
2852
2853 return false;
2854}
2855
2856const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2857 int64_t Val) {
2858 MCContext &Ctx = getContext();
2859 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2860 // Note: only set value to Val on a new symbol in case an dissassembler
2861 // has already been initialized in this context.
2862 if (!Sym->isVariable()) {
2863 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2864 } else {
2865 int64_t Res = ~Val;
2866 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2867 if (!Valid || Res != Val)
2868 Ctx.reportWarning(L: SMLoc(), Msg: "unsupported redefinition of " + Id);
2869 }
2870 return MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2871}
2872
2873bool AMDGPUDisassembler::isBufferInstruction(const MCInst &MI) const {
2874 const uint64_t TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
2875
2876 // Check for MUBUF and MTBUF instructions
2877 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2878 return true;
2879
2880 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2881 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(Opc: MI.getOpcode()))
2882 return true;
2883
2884 return false;
2885}
2886
2887//===----------------------------------------------------------------------===//
2888// AMDGPUSymbolizer
2889//===----------------------------------------------------------------------===//
2890
2891// Try to find symbol name for specified label
2892bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2893 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2894 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2895 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2896
2897 if (!IsBranch) {
2898 return false;
2899 }
2900
2901 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2902 if (!Symbols)
2903 return false;
2904
2905 auto Result = llvm::find_if(Range&: *Symbols, P: [Value](const SymbolInfoTy &Val) {
2906 return Val.Addr == static_cast<uint64_t>(Value) &&
2907 Val.Type == ELF::STT_NOTYPE;
2908 });
2909 if (Result != Symbols->end()) {
2910 auto *Sym = Ctx.getOrCreateSymbol(Name: Result->Name);
2911 const auto *Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2912 Inst.addOperand(Op: MCOperand::createExpr(Val: Add));
2913 return true;
2914 }
2915 // Add to list of referenced addresses, so caller can synthesize a label.
2916 ReferencedAddresses.push_back(x: static_cast<uint64_t>(Value));
2917 return false;
2918}
2919
2920void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2921 int64_t Value,
2922 uint64_t Address) {
2923 llvm_unreachable("unimplemented");
2924}
2925
2926//===----------------------------------------------------------------------===//
2927// Initialization
2928//===----------------------------------------------------------------------===//
2929
2930static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2931 LLVMOpInfoCallback /*GetOpInfo*/,
2932 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2933 void *DisInfo,
2934 MCContext *Ctx,
2935 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2936 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2937}
2938
2939static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2940 const MCSubtargetInfo &STI,
2941 MCContext &Ctx) {
2942 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2943}
2944
2945extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
2946LLVMInitializeAMDGPUDisassembler() {
2947 TargetRegistry::RegisterMCDisassembler(T&: getTheGCNTarget(),
2948 Fn: createAMDGPUDisassembler);
2949 TargetRegistry::RegisterMCSymbolizer(T&: getTheGCNTarget(),
2950 Fn: createAMDGPUSymbolizer);
2951}
2952