1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCExpr.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
24#include "TargetInfo/AMDGPUTargetInfo.h"
25#include "Utils/AMDGPUAsmUtils.h"
26#include "Utils/AMDGPUBaseInfo.h"
27#include "llvm-c/DisassemblerTypes.h"
28#include "llvm/BinaryFormat/ELF.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
32#include "llvm/MC/MCDecoderOps.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/MC/MCRegisterInfo.h"
36#include "llvm/MC/MCSubtargetInfo.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/AMDHSAKernelDescriptor.h"
39#include "llvm/Support/Compiler.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
50using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
57AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(STI: &STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError(reason: "disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Id: Symbol, Val: Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
74}
75
76void AMDGPUDisassembler::setABIVersion(unsigned Version) {
77 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(ABIVersion: Version);
78}
79
80inline static MCDisassembler::DecodeStatus
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Op: Opnd);
83 return Opnd.isValid() ?
84 MCDisassembler::Success :
85 MCDisassembler::Fail;
86}
87
88static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(i&: I, n: OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(x: Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Value: Offset, Address: Addr, IsBranch: true, Offset: 2, OpSize: 2, InstSize: 0))
108 return MCDisassembler::Success;
109 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
117 Offset = SignExtend64<24>(x: Imm);
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
121 Offset = SignExtend64<21>(x: Imm);
122 }
123 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Offset));
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, Opnd: DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, Opnd: DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, Opnd: DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: OpWidth, Val: EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
251DECODE_OPERAND_REG_8(VGPR_32)
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
253DECODE_OPERAND_REG_8(VReg_64)
254DECODE_OPERAND_REG_8(VReg_96)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
281DECODE_OPERAND_REG_8(AGPR_32)
282DECODE_OPERAND_REG_8(AReg_64)
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
288static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
301DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
312static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
361static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm,
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, Opnd: DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: Opw, Val: Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: 64, Val: Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, Opnd: DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
424DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI,
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
445 return MCDisassembler::Success;
446 }
447 Bytes = SavedBytes;
448 return MCDisassembler::Fail;
449}
450
451template <typename InsnType>
452DecodeStatus
453AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
460 return MCDisassembler::Fail;
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
466 support::endian::read<T, llvm::endianness::little>(Bytes.data());
467 Bytes = Bytes.slice(N: sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
475 Bytes = Bytes.slice(N: 8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(P: Bytes.data()));
477 Bytes = Bytes.slice(N: 4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
485 Bytes = Bytes.slice(N: 8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(P: Bytes.data()));
487 Bytes = Bytes.slice(N: 8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(Opcode: MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(First: Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(i: OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
509 if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm &&
510 Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) {
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
515 if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
520 if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm &&
521 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) {
522 switch (OpDesc.OperandType) {
523 case AMDGPU::OPERAND_REG_IMM_BF16:
524 case AMDGPU::OPERAND_REG_IMM_V2BF16:
525 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
526 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
527 Imm = getInlineImmValBF16(Imm);
528 break;
529 case AMDGPU::OPERAND_REG_IMM_FP16:
530 case AMDGPU::OPERAND_REG_IMM_INT16:
531 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
532 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
533 Imm = getInlineImmValF16(Imm);
534 break;
535 case AMDGPU::OPERAND_REG_IMM_V2FP16:
536 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
537 Imm = getInlineImmValF16(Imm);
538 break;
539 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: {
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
551 case AMDGPU::OPERAND_REG_IMM_FP64:
552 case AMDGPU::OPERAND_REG_IMM_INT64:
553 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
554 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
555 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
566DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
567 ArrayRef<uint8_t> Bytes_,
568 uint64_t Address,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min(a: (size_t)TargetMaxInstBytes, b: Bytes_.size());
571 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min(a: (size_t)4, b: Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(Table: DecoderTableGFX1250128, MI, Inst: DecW, Address, Comments&: CS))
586 break;
587 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX1170() &&
594 tryDecodeInst(Table1: DecoderTableGFX117096, Table2: DecoderTableGFX1170_FAKE1696, MI,
595 Inst: DecW, Address, Comments&: CS))
596 break;
597
598 if (isGFX11() &&
599 tryDecodeInst(Table1: DecoderTableGFX1196, Table2: DecoderTableGFX11_FAKE1696, MI,
600 Inst: DecW, Address, Comments&: CS))
601 break;
602
603 if (isGFX1250() &&
604 tryDecodeInst(Table1: DecoderTableGFX125096, Table2: DecoderTableGFX1250_FAKE1696, MI,
605 Inst: DecW, Address, Comments&: CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(Table1: DecoderTableGFX1296, Table2: DecoderTableGFX12_FAKE1696, MI,
610 Inst: DecW, Address, Comments&: CS))
611 break;
612
613 if (isGFX12() &&
614 tryDecodeInst(Table: DecoderTableGFX12W6496, MI, Inst: DecW, Address, Comments&: CS))
615 break;
616
617 if (isGFX13() &&
618 tryDecodeInst(Table1: DecoderTableGFX1396, Table2: DecoderTableGFX13_FAKE1696, MI,
619 Inst: DecW, Address, Comments&: CS))
620 break;
621
622 if (STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
623 // Return 8 bytes for a potential literal.
624 Bytes = Bytes_.slice(N: 4, M: MaxInstBytesNum - 4);
625
626 if (isGFX1250() &&
627 tryDecodeInst(Table: DecoderTableGFX125096, MI, Inst: DecW, Address, Comments&: CS))
628 break;
629 }
630
631 // Reinitialize Bytes
632 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
633
634 } else if (Bytes.size() >= 16 &&
635 STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts)) {
636 std::bitset<128> DecW = eat16Bytes(Bytes);
637 if (tryDecodeInst(Table: DecoderTableGFX940128, MI, Inst: DecW, Address, Comments&: CS))
638 break;
639
640 // Reinitialize Bytes
641 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
642 }
643
644 if (Bytes.size() >= 8) {
645 const uint64_t QW = eatBytes<uint64_t>(Bytes);
646
647 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
648 tryDecodeInst(Table: DecoderTableGFX10_B64, MI, Inst: QW, Address, Comments&: CS))
649 break;
650
651 if (STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) &&
652 tryDecodeInst(Table: DecoderTableGFX80_UNPACKED64, MI, Inst: QW, Address, Comments&: CS))
653 break;
654
655 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
656 tryDecodeInst(Table: DecoderTableGFX95064, MI, Inst: QW, Address, Comments&: CS))
657 break;
658
659 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
660 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
661 // table first so we print the correct name.
662 if (STI.hasFeature(Feature: AMDGPU::FeatureFmaMixInsts) &&
663 tryDecodeInst(Table: DecoderTableGFX9_DL64, MI, Inst: QW, Address, Comments&: CS))
664 break;
665
666 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts) &&
667 tryDecodeInst(Table: DecoderTableGFX94064, MI, Inst: QW, Address, Comments&: CS))
668 break;
669
670 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
671 tryDecodeInst(Table: DecoderTableGFX90A64, MI, Inst: QW, Address, Comments&: CS))
672 break;
673
674 if ((isVI() || isGFX9()) &&
675 tryDecodeInst(Table: DecoderTableGFX864, MI, Inst: QW, Address, Comments&: CS))
676 break;
677
678 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX964, MI, Inst: QW, Address, Comments&: CS))
679 break;
680
681 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1064, MI, Inst: QW, Address, Comments&: CS))
682 break;
683
684 if (isGFX1250() &&
685 tryDecodeInst(Table1: DecoderTableGFX125064, Table2: DecoderTableGFX1250_FAKE1664, MI,
686 Inst: QW, Address, Comments&: CS))
687 break;
688
689 if (isGFX12() &&
690 tryDecodeInst(Table1: DecoderTableGFX1264, Table2: DecoderTableGFX12_FAKE1664, MI, Inst: QW,
691 Address, Comments&: CS))
692 break;
693
694 if (isGFX1170() &&
695 tryDecodeInst(Table1: DecoderTableGFX117064, Table2: DecoderTableGFX1170_FAKE1664, MI,
696 Inst: QW, Address, Comments&: CS))
697 break;
698
699 if (isGFX11() &&
700 tryDecodeInst(Table1: DecoderTableGFX1164, Table2: DecoderTableGFX11_FAKE1664, MI, Inst: QW,
701 Address, Comments&: CS))
702 break;
703
704 if (isGFX1170() &&
705 tryDecodeInst(Table: DecoderTableGFX1170W6464, MI, Inst: QW, Address, Comments&: CS))
706 break;
707
708 if (isGFX11() &&
709 tryDecodeInst(Table: DecoderTableGFX11W6464, MI, Inst: QW, Address, Comments&: CS))
710 break;
711
712 if (isGFX12() &&
713 tryDecodeInst(Table: DecoderTableGFX12W6464, MI, Inst: QW, Address, Comments&: CS))
714 break;
715
716 if (isGFX13() &&
717 tryDecodeInst(Table1: DecoderTableGFX1364, Table2: DecoderTableGFX13_FAKE1664, MI, Inst: QW,
718 Address, Comments&: CS))
719 break;
720
721 // Reinitialize Bytes
722 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
723 }
724
725 // Try decode 32-bit instruction
726 if (Bytes.size() >= 4) {
727 const uint32_t DW = eatBytes<uint32_t>(Bytes);
728
729 if ((isVI() || isGFX9()) &&
730 tryDecodeInst(Table: DecoderTableGFX832, MI, Inst: DW, Address, Comments&: CS))
731 break;
732
733 if (tryDecodeInst(Table: DecoderTableAMDGPU32, MI, Inst: DW, Address, Comments&: CS))
734 break;
735
736 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX932, MI, Inst: DW, Address, Comments&: CS))
737 break;
738
739 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
740 tryDecodeInst(Table: DecoderTableGFX95032, MI, Inst: DW, Address, Comments&: CS))
741 break;
742
743 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
744 tryDecodeInst(Table: DecoderTableGFX90A32, MI, Inst: DW, Address, Comments&: CS))
745 break;
746
747 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
748 tryDecodeInst(Table: DecoderTableGFX10_B32, MI, Inst: DW, Address, Comments&: CS))
749 break;
750
751 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1032, MI, Inst: DW, Address, Comments&: CS))
752 break;
753
754 if (isGFX1170() &&
755 tryDecodeInst(Table1: DecoderTableGFX117032, Table2: DecoderTableGFX1170_FAKE1632, MI,
756 Inst: DW, Address, Comments&: CS))
757 break;
758
759 if (isGFX11() &&
760 tryDecodeInst(Table1: DecoderTableGFX1132, Table2: DecoderTableGFX11_FAKE1632, MI, Inst: DW,
761 Address, Comments&: CS))
762 break;
763
764 if (isGFX1250() &&
765 tryDecodeInst(Table1: DecoderTableGFX125032, Table2: DecoderTableGFX1250_FAKE1632, MI,
766 Inst: DW, Address, Comments&: CS))
767 break;
768
769 if (isGFX12() &&
770 tryDecodeInst(Table1: DecoderTableGFX1232, Table2: DecoderTableGFX12_FAKE1632, MI, Inst: DW,
771 Address, Comments&: CS))
772 break;
773
774 if (isGFX13() &&
775 tryDecodeInst(Table1: DecoderTableGFX1332, Table2: DecoderTableGFX13_FAKE1632, MI, Inst: DW,
776 Address, Comments&: CS))
777 break;
778 }
779
780 return MCDisassembler::Fail;
781 } while (false);
782
783 DecodeStatus Status = MCDisassembler::Success;
784
785 decodeImmOperands(MI, MCII: *MCII);
786
787 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
788 if (isMacDPP(MI))
789 convertMacDPPInst(MI);
790
791 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
792 convertVOP3PDPPInst(MI);
793 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
794 convertVOPCDPPInst(MI); // Special VOP3 case
795 else if (AMDGPU::isVOPC64DPP(Opc: MI.getOpcode()))
796 convertVOPC64DPPInst(MI); // Special VOP3 case
797 else if (AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dpp8) !=
798 -1)
799 convertDPP8Inst(MI);
800 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
801 convertVOP3DPPInst(MI); // Regular VOP3 case
802 }
803
804 convertTrue16OpSel(MI);
805
806 if (AMDGPU::isMAC(Opc: MI.getOpcode())) {
807 // Insert dummy unused src2_modifiers.
808 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
809 Name: AMDGPU::OpName::src2_modifiers);
810 }
811
812 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
813 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
814 // Insert dummy unused src2_modifiers.
815 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
816 Name: AMDGPU::OpName::src2_modifiers);
817 }
818
819 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
820 !AMDGPU::hasGDS(STI)) {
821 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::gds);
822 }
823
824 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
825 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
826 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
827 Name: AMDGPU::OpName::cpol);
828 if (CPolPos != -1) {
829 unsigned CPol =
830 (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
831 AMDGPU::CPol::GLC : 0;
832 if (MI.getNumOperands() <= (unsigned)CPolPos) {
833 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: CPol),
834 Name: AMDGPU::OpName::cpol);
835 } else if (CPol) {
836 MI.getOperand(i: CPolPos).setImm(MI.getOperand(i: CPolPos).getImm() | CPol);
837 }
838 }
839 }
840
841 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags &
842 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
843 (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts))) {
844 // GFX90A lost TFE, its place is occupied by ACC.
845 int TFEOpIdx =
846 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::tfe);
847 if (TFEOpIdx != -1) {
848 auto *TFEIter = MI.begin();
849 std::advance(i&: TFEIter, n: TFEOpIdx);
850 MI.insert(I: TFEIter, Op: MCOperand::createImm(Val: 0));
851 }
852 }
853
854 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
855 if (isGFX12Plus() && isBufferInstruction(MI)) {
856 int OffsetIdx =
857 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::offset);
858 if (OffsetIdx != -1) {
859 uint32_t Imm = MI.getOperand(i: OffsetIdx).getImm();
860 int64_t SignedOffset = SignExtend64<24>(x: Imm);
861 if (SignedOffset < 0)
862 return MCDisassembler::Fail;
863 }
864 }
865
866 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
867 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
868 int SWZOpIdx =
869 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::swz);
870 if (SWZOpIdx != -1) {
871 auto *SWZIter = MI.begin();
872 std::advance(i&: SWZIter, n: SWZOpIdx);
873 MI.insert(I: SWZIter, Op: MCOperand::createImm(Val: 0));
874 }
875 }
876
877 const MCInstrDesc &Desc = MCII->get(Opcode: MI.getOpcode());
878 if (Desc.TSFlags & SIInstrFlags::MIMG) {
879 int VAddr0Idx =
880 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
881 int RsrcIdx =
882 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::srsrc);
883 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
884 if (VAddr0Idx >= 0 && NSAArgs > 0) {
885 unsigned NSAWords = (NSAArgs + 3) / 4;
886 if (Bytes.size() < 4 * NSAWords)
887 return MCDisassembler::Fail;
888 for (unsigned i = 0; i < NSAArgs; ++i) {
889 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
890 auto VAddrRCID =
891 MCII->getOpRegClassID(OpInfo: Desc.operands()[VAddrIdx], HwModeId: HwModeRegClass);
892 MI.insert(I: MI.begin() + VAddrIdx, Op: createRegOperand(RegClassID: VAddrRCID, Val: Bytes[i]));
893 }
894 Bytes = Bytes.slice(N: 4 * NSAWords);
895 }
896
897 convertMIMGInst(MI);
898 }
899
900 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
901 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
902 convertMIMGInst(MI);
903
904 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
905 convertEXPInst(MI);
906
907 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
908 convertVINTERPInst(MI);
909
910 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
911 convertSDWAInst(MI);
912
913 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
914 convertMAIInst(MI);
915
916 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
917 convertWMMAInst(MI);
918
919 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
920 Name: AMDGPU::OpName::vdst_in);
921 if (VDstIn_Idx != -1) {
922 int Tied = MCII->get(Opcode: MI.getOpcode()).getOperandConstraint(OpNum: VDstIn_Idx,
923 Constraint: MCOI::OperandConstraint::TIED_TO);
924 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
925 !MI.getOperand(i: VDstIn_Idx).isReg() ||
926 MI.getOperand(i: VDstIn_Idx).getReg() != MI.getOperand(i: Tied).getReg())) {
927 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
928 MI.erase(I: &MI.getOperand(i: VDstIn_Idx));
929 insertNamedMCOperand(MI,
930 Op: MCOperand::createReg(Reg: MI.getOperand(i: Tied).getReg()),
931 Name: AMDGPU::OpName::vdst_in);
932 }
933 }
934
935 bool IsSOPK = MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
936 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::imm) && !IsSOPK)
937 convertFMAanyK(MI);
938
939 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
940 // have EXEC as implicit destination. Issue a warning if encoding for
941 // vdst is not EXEC.
942 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
943 MCII->get(Opcode: MI.getOpcode()).getNumDefs() == 0 &&
944 MCII->get(Opcode: MI.getOpcode()).hasImplicitDefOfPhysReg(Reg: AMDGPU::EXEC)) {
945 auto ExecEncoding = MRI.getEncodingValue(Reg: AMDGPU::EXEC_LO);
946 if (Bytes_[0] != ExecEncoding)
947 Status = MCDisassembler::SoftFail;
948 }
949
950 Size = MaxInstBytesNum - Bytes.size();
951 return Status;
952}
953
954void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
955 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX11Insts)) {
956 // The MCInst still has these fields even though they are no longer encoded
957 // in the GFX11 instruction.
958 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vm);
959 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::compr);
960 }
961}
962
963void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
964 convertTrue16OpSel(MI);
965 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
971 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
972 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
973 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
974 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
975 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
976 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
977 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
978 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
979 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
980 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
981 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
982 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
983 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
984 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
985 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
986 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
987 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
988 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
989 // The MCInst has this field that is not directly encoded in the
990 // instruction.
991 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::op_sel);
992 }
993}
994
995void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
996 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
997 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
998 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::sdst))
999 // VOPC - insert clamp
1000 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::clamp);
1001 } else if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands)) {
1002 int SDst = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::sdst);
1003 if (SDst != -1) {
1004 // VOPC - insert VCC register as sdst
1005 insertNamedMCOperand(MI, Op: createRegOperand(Reg: AMDGPU::VCC),
1006 Name: AMDGPU::OpName::sdst);
1007 } else {
1008 // VOP1/2 - insert omod if present in instruction
1009 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::omod);
1010 }
1011 }
1012}
1013
1014/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1015/// appropriate subregister for the used format width.
1016static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI,
1017 MCOperand &MO, uint8_t NumRegs) {
1018 switch (NumRegs) {
1019 case 4:
1020 return MO.setReg(MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3));
1021 case 6:
1022 return MO.setReg(
1023 MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1024 case 8:
1025 if (MCRegister NewReg = MRI.getSubReg(
1026 Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1027 MO.setReg(NewReg);
1028 }
1029 return;
1030 case 12: {
1031 // There is no 384-bit subreg index defined.
1032 MCRegister BaseReg = MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0);
1033 MCRegister NewReg = MRI.getMatchingSuperReg(
1034 Reg: BaseReg, SubIdx: AMDGPU::sub0, RC: &MRI.getRegClass(i: AMDGPU::VReg_384RegClassID));
1035 return MO.setReg(NewReg);
1036 }
1037 case 16:
1038 // No-op in cases where one operand is still f8/bf8.
1039 return;
1040 default:
1041 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1042 }
1043}
1044
1045/// f8f6f4 instructions have different pseudos depending on the used formats. In
1046/// the disassembler table, we only have the variants with the largest register
1047/// classes which assume using an fp8/bf8 format for both operands. The actual
1048/// register class depends on the format in blgp and cbsz operands. Adjust the
1049/// register classes depending on the used format.
1050void AMDGPUDisassembler::convertMAIInst(MCInst &MI) const {
1051 int BlgpIdx =
1052 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::blgp);
1053 if (BlgpIdx == -1)
1054 return;
1055
1056 int CbszIdx =
1057 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::cbsz);
1058
1059 unsigned CBSZ = MI.getOperand(i: CbszIdx).getImm();
1060 unsigned BLGP = MI.getOperand(i: BlgpIdx).getImm();
1061
1062 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1063 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, F8F8Opcode: MI.getOpcode());
1064 if (!AdjustedRegClassOpcode ||
1065 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1066 return;
1067
1068 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1069 int Src0Idx =
1070 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1071 int Src1Idx =
1072 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1073 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1074 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1075 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1076 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1077}
1078
1079void AMDGPUDisassembler::convertWMMAInst(MCInst &MI) const {
1080 int FmtAIdx =
1081 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_a_fmt);
1082 if (FmtAIdx == -1)
1083 return;
1084
1085 int FmtBIdx =
1086 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_b_fmt);
1087
1088 unsigned FmtA = MI.getOperand(i: FmtAIdx).getImm();
1089 unsigned FmtB = MI.getOperand(i: FmtBIdx).getImm();
1090
1091 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1092 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, F8F8Opcode: MI.getOpcode());
1093 if (!AdjustedRegClassOpcode ||
1094 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1095 return;
1096
1097 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1098 int Src0Idx =
1099 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1100 int Src1Idx =
1101 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1102 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1103 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1104 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1105 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1106}
1107
1108struct VOPModifiers {
1109 unsigned OpSel = 0;
1110 unsigned OpSelHi = 0;
1111 unsigned NegLo = 0;
1112 unsigned NegHi = 0;
1113};
1114
1115// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1116// Note that these values do not affect disassembler output,
1117// so this is only necessary for consistency with src_modifiers.
1118static VOPModifiers collectVOPModifiers(const MCInst &MI,
1119 bool IsVOP3P = false) {
1120 VOPModifiers Modifiers;
1121 unsigned Opc = MI.getOpcode();
1122 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1123 AMDGPU::OpName::src1_modifiers,
1124 AMDGPU::OpName::src2_modifiers};
1125 for (int J = 0; J < 3; ++J) {
1126 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
1127 if (OpIdx == -1)
1128 continue;
1129
1130 unsigned Val = MI.getOperand(i: OpIdx).getImm();
1131
1132 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1133 if (IsVOP3P) {
1134 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1135 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1136 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1137 } else if (J == 0) {
1138 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1139 }
1140 }
1141
1142 return Modifiers;
1143}
1144
1145// Instructions decode the op_sel/suffix bits into the src_modifier
1146// operands. Copy those bits into the src operands for true16 VGPRs.
1147void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
1148 const unsigned Opc = MI.getOpcode();
1149 const MCRegisterClass &ConversionRC =
1150 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID);
1151 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1152 OpAndOpMods = {._M_elems: {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1153 SISrcMods::OP_SEL_0},
1154 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1155 SISrcMods::OP_SEL_0},
1156 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1157 SISrcMods::OP_SEL_0},
1158 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1159 SISrcMods::DST_OP_SEL}}};
1160 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1161 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
1162 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpModsName);
1163 if (OpIdx == -1 || OpModsIdx == -1)
1164 continue;
1165 MCOperand &Op = MI.getOperand(i: OpIdx);
1166 if (!Op.isReg())
1167 continue;
1168 if (!ConversionRC.contains(Reg: Op.getReg()))
1169 continue;
1170 unsigned OpEnc = MRI.getEncodingValue(Reg: Op.getReg());
1171 const MCOperand &OpMods = MI.getOperand(i: OpModsIdx);
1172 unsigned ModVal = OpMods.getImm();
1173 if (ModVal & OpSelMask) { // isHi
1174 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1175 Op.setReg(ConversionRC.getRegister(i: RegIdx * 2 + 1));
1176 }
1177 }
1178}
1179
1180// MAC opcodes have special old and src2 operands.
1181// src2 is tied to dst, while old is not tied (but assumed to be).
1182bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
1183 constexpr int DST_IDX = 0;
1184 auto Opcode = MI.getOpcode();
1185 const auto &Desc = MCII->get(Opcode);
1186 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::old);
1187
1188 if (OldIdx != -1 && Desc.getOperandConstraint(
1189 OpNum: OldIdx, Constraint: MCOI::OperandConstraint::TIED_TO) == -1) {
1190 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1191 assert(Desc.getOperandConstraint(
1192 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1193 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
1194 (void)DST_IDX;
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1201// Create dummy old operand and insert dummy unused src2_modifiers
1202void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
1203 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1204 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1205 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1206 Name: AMDGPU::OpName::src2_modifiers);
1207}
1208
1209void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
1210 unsigned Opc = MI.getOpcode();
1211
1212 int VDstInIdx =
1213 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1214 if (VDstInIdx != -1)
1215 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1216
1217 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1218 if (MI.getNumOperands() < DescNumOps &&
1219 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1220 convertTrue16OpSel(MI);
1221 auto Mods = collectVOPModifiers(MI);
1222 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1223 Name: AMDGPU::OpName::op_sel);
1224 } else {
1225 // Insert dummy unused src modifiers.
1226 if (MI.getNumOperands() < DescNumOps &&
1227 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1228 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1229 Name: AMDGPU::OpName::src0_modifiers);
1230
1231 if (MI.getNumOperands() < DescNumOps &&
1232 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1233 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1234 Name: AMDGPU::OpName::src1_modifiers);
1235 }
1236}
1237
1238void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
1239 convertTrue16OpSel(MI);
1240
1241 int VDstInIdx =
1242 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1243 if (VDstInIdx != -1)
1244 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1245
1246 unsigned Opc = MI.getOpcode();
1247 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1248 if (MI.getNumOperands() < DescNumOps &&
1249 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1250 auto Mods = collectVOPModifiers(MI);
1251 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1252 Name: AMDGPU::OpName::op_sel);
1253 }
1254}
1255
1256// Given a wide tuple \p Reg check if it will overflow 256 registers.
1257// \returns \p Reg on success or NoRegister otherwise.
1258static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC,
1259 const MCRegisterInfo &MRI) {
1260 unsigned NumRegs = RC.getSizeInBits() / 32;
1261 MCRegister Sub0 = MRI.getSubReg(Reg, Idx: AMDGPU::sub0);
1262 if (!Sub0)
1263 return Reg;
1264
1265 MCRegister BaseReg;
1266 if (MRI.getRegClass(i: AMDGPU::VGPR_32RegClassID).contains(Reg: Sub0))
1267 BaseReg = AMDGPU::VGPR0;
1268 else if (MRI.getRegClass(i: AMDGPU::AGPR_32RegClassID).contains(Reg: Sub0))
1269 BaseReg = AMDGPU::AGPR0;
1270
1271 assert(BaseReg && "Only vector registers expected");
1272
1273 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1274}
1275
1276// Note that before gfx10, the MIMG encoding provided no information about
1277// VADDR size. Consequently, decoded instructions always show address as if it
1278// has 1 dword, which could be not really so.
1279void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1280 auto TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
1281
1282 int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1283 Name: AMDGPU::OpName::vdst);
1284
1285 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1286 Name: AMDGPU::OpName::vdata);
1287 int VAddr0Idx =
1288 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
1289 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1290 ? AMDGPU::OpName::srsrc
1291 : AMDGPU::OpName::rsrc;
1292 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: RsrcOpName);
1293 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1294 Name: AMDGPU::OpName::dmask);
1295
1296 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1297 Name: AMDGPU::OpName::tfe);
1298 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1299 Name: AMDGPU::OpName::d16);
1300
1301 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
1302 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1303 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
1304
1305 assert(VDataIdx != -1);
1306 if (BaseOpcode->BVH) {
1307 // Add A16 operand for intersect_ray instructions
1308 addOperand(Inst&: MI, Opnd: MCOperand::createImm(Val: BaseOpcode->A16));
1309 return;
1310 }
1311
1312 bool IsAtomic = (VDstIdx != -1);
1313 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1314 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1315 bool IsNSA = false;
1316 bool IsPartialNSA = false;
1317 unsigned AddrSize = Info->VAddrDwords;
1318
1319 if (isGFX10Plus()) {
1320 unsigned DimIdx =
1321 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dim);
1322 int A16Idx =
1323 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::a16);
1324 const AMDGPU::MIMGDimInfo *Dim =
1325 AMDGPU::getMIMGDimInfoByEncoding(DimEnc: MI.getOperand(i: DimIdx).getImm());
1326 const bool IsA16 = (A16Idx != -1 && MI.getOperand(i: A16Idx).getImm());
1327
1328 AddrSize =
1329 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, IsG16Supported: AMDGPU::hasG16(STI));
1330
1331 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1332 // VIMAGE insts other than BVH never use vaddr4.
1333 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1334 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1335 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1336 if (!IsNSA) {
1337 if (!IsVSample && AddrSize > 12)
1338 AddrSize = 16;
1339 } else {
1340 if (AddrSize > Info->VAddrDwords) {
1341 if (!STI.hasFeature(Feature: AMDGPU::FeaturePartialNSAEncoding)) {
1342 // The NSA encoding does not contain enough operands for the
1343 // combination of base opcode / dimension. Should this be an error?
1344 return;
1345 }
1346 IsPartialNSA = true;
1347 }
1348 }
1349 }
1350
1351 unsigned DMask = MI.getOperand(i: DMaskIdx).getImm() & 0xf;
1352 unsigned DstSize = IsGather4 ? 4 : std::max(a: llvm::popcount(Value: DMask), b: 1);
1353
1354 bool D16 = D16Idx >= 0 && MI.getOperand(i: D16Idx).getImm();
1355 if (D16 && AMDGPU::hasPackedD16(STI)) {
1356 DstSize = (DstSize + 1) / 2;
1357 }
1358
1359 if (TFEIdx != -1 && MI.getOperand(i: TFEIdx).getImm())
1360 DstSize += 1;
1361
1362 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1363 return;
1364
1365 int NewOpcode =
1366 AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: Info->MIMGEncoding, VDataDwords: DstSize, VAddrDwords: AddrSize);
1367 if (NewOpcode == -1)
1368 return;
1369
1370 // Widen the register to the correct number of enabled channels.
1371 MCRegister NewVdata;
1372 if (DstSize != Info->VDataDwords) {
1373 auto DataRCID = MCII->getOpRegClassID(
1374 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VDataIdx], HwModeId: HwModeRegClass);
1375
1376 // Get first subregister of VData
1377 MCRegister Vdata0 = MI.getOperand(i: VDataIdx).getReg();
1378 MCRegister VdataSub0 = MRI.getSubReg(Reg: Vdata0, Idx: AMDGPU::sub0);
1379 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1380
1381 const MCRegisterClass &NewRC = MRI.getRegClass(i: DataRCID);
1382 NewVdata = MRI.getMatchingSuperReg(Reg: Vdata0, SubIdx: AMDGPU::sub0, RC: &NewRC);
1383 NewVdata = CheckVGPROverflow(Reg: NewVdata, RC: NewRC, MRI);
1384 if (!NewVdata) {
1385 // It's possible to encode this such that the low register + enabled
1386 // components exceeds the register count.
1387 return;
1388 }
1389 }
1390
1391 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1392 // If using partial NSA on GFX11+ widen last address register.
1393 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1394 MCRegister NewVAddrSA;
1395 if (STI.hasFeature(Feature: AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1396 AddrSize != Info->VAddrDwords) {
1397 MCRegister VAddrSA = MI.getOperand(i: VAddrSAIdx).getReg();
1398 MCRegister VAddrSubSA = MRI.getSubReg(Reg: VAddrSA, Idx: AMDGPU::sub0);
1399 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1400
1401 auto AddrRCID = MCII->getOpRegClassID(
1402 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VAddrSAIdx], HwModeId: HwModeRegClass);
1403
1404 const MCRegisterClass &NewRC = MRI.getRegClass(i: AddrRCID);
1405 NewVAddrSA = MRI.getMatchingSuperReg(Reg: VAddrSA, SubIdx: AMDGPU::sub0, RC: &NewRC);
1406 NewVAddrSA = CheckVGPROverflow(Reg: NewVAddrSA, RC: NewRC, MRI);
1407 if (!NewVAddrSA)
1408 return;
1409 }
1410
1411 MI.setOpcode(NewOpcode);
1412
1413 if (NewVdata != AMDGPU::NoRegister) {
1414 MI.getOperand(i: VDataIdx) = MCOperand::createReg(Reg: NewVdata);
1415
1416 if (IsAtomic) {
1417 // Atomic operations have an additional operand (a copy of data)
1418 MI.getOperand(i: VDstIdx) = MCOperand::createReg(Reg: NewVdata);
1419 }
1420 }
1421
1422 if (NewVAddrSA) {
1423 MI.getOperand(i: VAddrSAIdx) = MCOperand::createReg(Reg: NewVAddrSA);
1424 } else if (IsNSA) {
1425 assert(AddrSize <= Info->VAddrDwords);
1426 MI.erase(First: MI.begin() + VAddr0Idx + AddrSize,
1427 Last: MI.begin() + VAddr0Idx + Info->VAddrDwords);
1428 }
1429}
1430
1431// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1432// decoder only adds to src_modifiers, so manually add the bits to the other
1433// operands.
1434void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1435 unsigned Opc = MI.getOpcode();
1436 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1437 auto Mods = collectVOPModifiers(MI, IsVOP3P: true);
1438
1439 if (MI.getNumOperands() < DescNumOps &&
1440 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
1441 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vdst_in);
1442
1443 if (MI.getNumOperands() < DescNumOps &&
1444 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel))
1445 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1446 Name: AMDGPU::OpName::op_sel);
1447 if (MI.getNumOperands() < DescNumOps &&
1448 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi))
1449 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSelHi),
1450 Name: AMDGPU::OpName::op_sel_hi);
1451 if (MI.getNumOperands() < DescNumOps &&
1452 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_lo))
1453 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegLo),
1454 Name: AMDGPU::OpName::neg_lo);
1455 if (MI.getNumOperands() < DescNumOps &&
1456 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_hi))
1457 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegHi),
1458 Name: AMDGPU::OpName::neg_hi);
1459}
1460
1461// Create dummy old operand and insert optional operands
1462void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1463 unsigned Opc = MI.getOpcode();
1464 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1465
1466 if (MI.getNumOperands() < DescNumOps &&
1467 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::old))
1468 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1469
1470 if (MI.getNumOperands() < DescNumOps &&
1471 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1472 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1473 Name: AMDGPU::OpName::src0_modifiers);
1474
1475 if (MI.getNumOperands() < DescNumOps &&
1476 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1477 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1478 Name: AMDGPU::OpName::src1_modifiers);
1479}
1480
1481void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
1482 unsigned Opc = MI.getOpcode();
1483 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1484
1485 convertTrue16OpSel(MI);
1486
1487 if (MI.getNumOperands() < DescNumOps &&
1488 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1489 VOPModifiers Mods = collectVOPModifiers(MI);
1490 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1491 Name: AMDGPU::OpName::op_sel);
1492 }
1493}
1494
1495void AMDGPUDisassembler::convertFMAanyK(MCInst &MI) const {
1496 assert(HasLiteral && "Should have decoded a literal");
1497 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Literal), Name: AMDGPU::OpName::immX);
1498}
1499
1500const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1501 return getContext().getRegisterInfo()->
1502 getRegClassName(Class: &AMDGPUMCRegisterClasses[RegClassID]);
1503}
1504
1505inline
1506MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1507 const Twine& ErrMsg) const {
1508 *CommentStream << "Error: " + ErrMsg;
1509
1510 // ToDo: add support for error operands to MCInst.h
1511 // return MCOperand::createError(V);
1512 return MCOperand();
1513}
1514
1515inline MCOperand AMDGPUDisassembler::createRegOperand(MCRegister Reg) const {
1516 return MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg, STI));
1517}
1518
1519inline
1520MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1521 unsigned Val) const {
1522 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1523 if (Val >= RegCl.getNumRegs())
1524 return errOperand(V: Val, ErrMsg: Twine(getRegClassName(RegClassID)) +
1525 ": unknown register " + Twine(Val));
1526 return createRegOperand(Reg: RegCl.getRegister(i: Val));
1527}
1528
1529inline
1530MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1531 unsigned Val) const {
1532 // ToDo: SI/CI have 104 SGPRs, VI - 102
1533 // Valery: here we accepting as much as we can, let assembler sort it out
1534 int shift = 0;
1535 switch (SRegClassID) {
1536 case AMDGPU::SGPR_32RegClassID:
1537 case AMDGPU::TTMP_32RegClassID:
1538 break;
1539 case AMDGPU::SGPR_64RegClassID:
1540 case AMDGPU::TTMP_64RegClassID:
1541 shift = 1;
1542 break;
1543 case AMDGPU::SGPR_96RegClassID:
1544 case AMDGPU::TTMP_96RegClassID:
1545 case AMDGPU::SGPR_128RegClassID:
1546 case AMDGPU::TTMP_128RegClassID:
1547 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1548 // this bundle?
1549 case AMDGPU::SGPR_256RegClassID:
1550 case AMDGPU::TTMP_256RegClassID:
1551 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1552 // this bundle?
1553 case AMDGPU::SGPR_288RegClassID:
1554 case AMDGPU::TTMP_288RegClassID:
1555 case AMDGPU::SGPR_320RegClassID:
1556 case AMDGPU::TTMP_320RegClassID:
1557 case AMDGPU::SGPR_352RegClassID:
1558 case AMDGPU::TTMP_352RegClassID:
1559 case AMDGPU::SGPR_384RegClassID:
1560 case AMDGPU::TTMP_384RegClassID:
1561 case AMDGPU::SGPR_512RegClassID:
1562 case AMDGPU::TTMP_512RegClassID:
1563 shift = 2;
1564 break;
1565 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1566 // this bundle?
1567 default:
1568 llvm_unreachable("unhandled register class");
1569 }
1570
1571 if (Val % (1 << shift)) {
1572 *CommentStream << "Warning: " << getRegClassName(RegClassID: SRegClassID)
1573 << ": scalar reg isn't aligned " << Val;
1574 }
1575
1576 return createRegOperand(RegClassID: SRegClassID, Val: Val >> shift);
1577}
1578
1579MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1580 bool IsHi) const {
1581 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1582 return createRegOperand(RegClassID: AMDGPU::VGPR_16RegClassID, Val: RegIdxInVGPR16);
1583}
1584
1585// Decode Literals for insts which always have a literal in the encoding
1586MCOperand
1587AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1588 if (HasLiteral) {
1589 assert(
1590 AMDGPU::hasVOPD(STI) &&
1591 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1592 if (Literal != Val)
1593 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1594 }
1595 HasLiteral = true;
1596 Literal = Val;
1597 return MCOperand::createImm(Val: Literal);
1598}
1599
1600MCOperand
1601AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
1602 if (HasLiteral) {
1603 if (Literal != Val)
1604 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1605 }
1606 HasLiteral = true;
1607 Literal = Val;
1608
1609 bool UseLit64 = Hi_32(Value: Literal) == 0;
1610 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1611 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1612 : MCOperand::createImm(Val: Literal);
1613}
1614
1615MCOperand
1616AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
1617 const MCOperandInfo &OpDesc) const {
1618 // For now all literal constants are supposed to be unsigned integer
1619 // ToDo: deal with signed/unsigned 64-bit integer constants
1620 // ToDo: deal with float/double constants
1621 if (!HasLiteral) {
1622 if (Bytes.size() < 4) {
1623 return errOperand(V: 0, ErrMsg: "cannot read literal, inst bytes left " +
1624 Twine(Bytes.size()));
1625 }
1626 HasLiteral = true;
1627 Literal = eatBytes<uint32_t>(Bytes);
1628 }
1629
1630 // For disassembling always assume all inline constants are available.
1631 bool HasInv2Pi = true;
1632
1633 // Invalid instruction codes may contain literals for inline-only
1634 // operands, so we support them here as well.
1635 int64_t Val = Literal;
1636 bool UseLit = false;
1637 switch (OpDesc.OperandType) {
1638 default:
1639 llvm_unreachable("Unexpected operand type!");
1640 case AMDGPU::OPERAND_REG_IMM_BF16:
1641 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1642 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1643 UseLit = AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
1644 break;
1645 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1646 UseLit = AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
1647 break;
1648 case AMDGPU::OPERAND_REG_IMM_FP16:
1649 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1650 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1651 UseLit = AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
1652 break;
1653 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1654 UseLit = AMDGPU::isInlinableLiteralV2F16(Literal: Val);
1655 break;
1656 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1657 UseLit = AMDGPU::isPKFMACF16InlineConstant(Literal: Val, IsGFX11Plus: isGFX11Plus());
1658 break;
1659 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1660 break;
1661 case AMDGPU::OPERAND_REG_IMM_INT16:
1662 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1663 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1664 UseLit = AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi);
1665 break;
1666 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1667 UseLit = AMDGPU::isInlinableLiteralV2I16(Literal: Val);
1668 break;
1669 case AMDGPU::OPERAND_REG_IMM_FP32:
1670 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1671 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1672 case AMDGPU::OPERAND_REG_IMM_INT32:
1673 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1674 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1675 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1676 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1677 case AMDGPU::OPERAND_KIMM32:
1678 UseLit = AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi);
1679 break;
1680 case AMDGPU::OPERAND_REG_IMM_FP64:
1681 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1682 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1683 Val <<= 32;
1684 break;
1685 case AMDGPU::OPERAND_REG_IMM_INT64:
1686 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1687 UseLit = AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi);
1688 break;
1689 case MCOI::OPERAND_REGISTER:
1690 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1691 // decoding a literal in a position of a register operand. Give
1692 // it special handling in the caller, decodeImmOperands(), instead
1693 // of quietly allowing it here.
1694 break;
1695 }
1696
1697 return UseLit ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1698 Lit: LitModifier::Lit, Value: Val, Ctx&: getContext()))
1699 : MCOperand::createImm(Val);
1700}
1701
1702MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
1703 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1704
1705 if (!HasLiteral) {
1706 if (Bytes.size() < 8) {
1707 return errOperand(V: 0, ErrMsg: "cannot read literal64, inst bytes left " +
1708 Twine(Bytes.size()));
1709 }
1710 HasLiteral = true;
1711 Literal = eatBytes<uint64_t>(Bytes);
1712 }
1713
1714 bool UseLit64 = Hi_32(Value: Literal) == 0;
1715 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1716 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1717 : MCOperand::createImm(Val: Literal);
1718}
1719
1720MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1721 using namespace AMDGPU::EncValues;
1722
1723 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1724 return MCOperand::createImm(Val: (Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1725 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1726 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1727 // Cast prevents negative overflow.
1728}
1729
1730static int64_t getInlineImmVal32(unsigned Imm) {
1731 switch (Imm) {
1732 case 240:
1733 return llvm::bit_cast<uint32_t>(from: 0.5f);
1734 case 241:
1735 return llvm::bit_cast<uint32_t>(from: -0.5f);
1736 case 242:
1737 return llvm::bit_cast<uint32_t>(from: 1.0f);
1738 case 243:
1739 return llvm::bit_cast<uint32_t>(from: -1.0f);
1740 case 244:
1741 return llvm::bit_cast<uint32_t>(from: 2.0f);
1742 case 245:
1743 return llvm::bit_cast<uint32_t>(from: -2.0f);
1744 case 246:
1745 return llvm::bit_cast<uint32_t>(from: 4.0f);
1746 case 247:
1747 return llvm::bit_cast<uint32_t>(from: -4.0f);
1748 case 248: // 1 / (2 * PI)
1749 return 0x3e22f983;
1750 default:
1751 llvm_unreachable("invalid fp inline imm");
1752 }
1753}
1754
1755static int64_t getInlineImmVal64(unsigned Imm) {
1756 switch (Imm) {
1757 case 240:
1758 return llvm::bit_cast<uint64_t>(from: 0.5);
1759 case 241:
1760 return llvm::bit_cast<uint64_t>(from: -0.5);
1761 case 242:
1762 return llvm::bit_cast<uint64_t>(from: 1.0);
1763 case 243:
1764 return llvm::bit_cast<uint64_t>(from: -1.0);
1765 case 244:
1766 return llvm::bit_cast<uint64_t>(from: 2.0);
1767 case 245:
1768 return llvm::bit_cast<uint64_t>(from: -2.0);
1769 case 246:
1770 return llvm::bit_cast<uint64_t>(from: 4.0);
1771 case 247:
1772 return llvm::bit_cast<uint64_t>(from: -4.0);
1773 case 248: // 1 / (2 * PI)
1774 return 0x3fc45f306dc9c882;
1775 default:
1776 llvm_unreachable("invalid fp inline imm");
1777 }
1778}
1779
1780static int64_t getInlineImmValF16(unsigned Imm) {
1781 switch (Imm) {
1782 case 240:
1783 return 0x3800;
1784 case 241:
1785 return 0xB800;
1786 case 242:
1787 return 0x3C00;
1788 case 243:
1789 return 0xBC00;
1790 case 244:
1791 return 0x4000;
1792 case 245:
1793 return 0xC000;
1794 case 246:
1795 return 0x4400;
1796 case 247:
1797 return 0xC400;
1798 case 248: // 1 / (2 * PI)
1799 return 0x3118;
1800 default:
1801 llvm_unreachable("invalid fp inline imm");
1802 }
1803}
1804
1805static int64_t getInlineImmValBF16(unsigned Imm) {
1806 switch (Imm) {
1807 case 240:
1808 return 0x3F00;
1809 case 241:
1810 return 0xBF00;
1811 case 242:
1812 return 0x3F80;
1813 case 243:
1814 return 0xBF80;
1815 case 244:
1816 return 0x4000;
1817 case 245:
1818 return 0xC000;
1819 case 246:
1820 return 0x4080;
1821 case 247:
1822 return 0xC080;
1823 case 248: // 1 / (2 * PI)
1824 return 0x3E22;
1825 default:
1826 llvm_unreachable("invalid fp inline imm");
1827 }
1828}
1829
1830unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1831 using namespace AMDGPU;
1832
1833 switch (Width) {
1834 case 16:
1835 case 32:
1836 return VGPR_32RegClassID;
1837 case 64:
1838 return VReg_64RegClassID;
1839 case 96:
1840 return VReg_96RegClassID;
1841 case 128:
1842 return VReg_128RegClassID;
1843 case 160:
1844 return VReg_160RegClassID;
1845 case 192:
1846 return VReg_192RegClassID;
1847 case 256:
1848 return VReg_256RegClassID;
1849 case 288:
1850 return VReg_288RegClassID;
1851 case 320:
1852 return VReg_320RegClassID;
1853 case 352:
1854 return VReg_352RegClassID;
1855 case 384:
1856 return VReg_384RegClassID;
1857 case 512:
1858 return VReg_512RegClassID;
1859 case 1024:
1860 return VReg_1024RegClassID;
1861 }
1862 llvm_unreachable("Invalid register width!");
1863}
1864
1865unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1866 using namespace AMDGPU;
1867
1868 switch (Width) {
1869 case 16:
1870 case 32:
1871 return AGPR_32RegClassID;
1872 case 64:
1873 return AReg_64RegClassID;
1874 case 96:
1875 return AReg_96RegClassID;
1876 case 128:
1877 return AReg_128RegClassID;
1878 case 160:
1879 return AReg_160RegClassID;
1880 case 256:
1881 return AReg_256RegClassID;
1882 case 288:
1883 return AReg_288RegClassID;
1884 case 320:
1885 return AReg_320RegClassID;
1886 case 352:
1887 return AReg_352RegClassID;
1888 case 384:
1889 return AReg_384RegClassID;
1890 case 512:
1891 return AReg_512RegClassID;
1892 case 1024:
1893 return AReg_1024RegClassID;
1894 }
1895 llvm_unreachable("Invalid register width!");
1896}
1897
1898unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1899 using namespace AMDGPU;
1900
1901 switch (Width) {
1902 case 16:
1903 case 32:
1904 return SGPR_32RegClassID;
1905 case 64:
1906 return SGPR_64RegClassID;
1907 case 96:
1908 return SGPR_96RegClassID;
1909 case 128:
1910 return SGPR_128RegClassID;
1911 case 160:
1912 return SGPR_160RegClassID;
1913 case 256:
1914 return SGPR_256RegClassID;
1915 case 288:
1916 return SGPR_288RegClassID;
1917 case 320:
1918 return SGPR_320RegClassID;
1919 case 352:
1920 return SGPR_352RegClassID;
1921 case 384:
1922 return SGPR_384RegClassID;
1923 case 512:
1924 return SGPR_512RegClassID;
1925 }
1926 llvm_unreachable("Invalid register width!");
1927}
1928
1929unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1930 using namespace AMDGPU;
1931
1932 switch (Width) {
1933 case 16:
1934 case 32:
1935 return TTMP_32RegClassID;
1936 case 64:
1937 return TTMP_64RegClassID;
1938 case 128:
1939 return TTMP_128RegClassID;
1940 case 256:
1941 return TTMP_256RegClassID;
1942 case 288:
1943 return TTMP_288RegClassID;
1944 case 320:
1945 return TTMP_320RegClassID;
1946 case 352:
1947 return TTMP_352RegClassID;
1948 case 384:
1949 return TTMP_384RegClassID;
1950 case 512:
1951 return TTMP_512RegClassID;
1952 }
1953 llvm_unreachable("Invalid register width!");
1954}
1955
1956int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1957 using namespace AMDGPU::EncValues;
1958
1959 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1960 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1961
1962 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1963}
1964
1965MCOperand AMDGPUDisassembler::decodeSrcOp(const MCInst &Inst, unsigned Width,
1966 unsigned Val) const {
1967 using namespace AMDGPU::EncValues;
1968
1969 assert(Val < 1024); // enum10
1970
1971 bool IsAGPR = Val & 512;
1972 Val &= 511;
1973
1974 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1975 return createRegOperand(RegClassID: IsAGPR ? getAgprClassId(Width)
1976 : getVgprClassId(Width), Val: Val - VGPR_MIN);
1977 }
1978 return decodeNonVGPRSrcOp(Inst, Width, Val: Val & 0xFF);
1979}
1980
1981MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst,
1982 unsigned Width,
1983 unsigned Val) const {
1984 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1985 // decoded earlier.
1986 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1987 using namespace AMDGPU::EncValues;
1988
1989 if (Val <= SGPR_MAX) {
1990 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1991 static_assert(SGPR_MIN == 0);
1992 return createSRegOperand(SRegClassID: getSgprClassId(Width), Val: Val - SGPR_MIN);
1993 }
1994
1995 int TTmpIdx = getTTmpIdx(Val);
1996 if (TTmpIdx >= 0) {
1997 return createSRegOperand(SRegClassID: getTtmpClassId(Width), Val: TTmpIdx);
1998 }
1999
2000 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2001 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2002 Val == LITERAL_CONST)
2003 return MCOperand::createImm(Val);
2004
2005 if (Val == LITERAL64_CONST && STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
2006 return decodeLiteral64Constant();
2007 }
2008
2009 switch (Width) {
2010 case 32:
2011 case 16:
2012 return decodeSpecialReg32(Val);
2013 case 64:
2014 return decodeSpecialReg64(Val);
2015 case 96:
2016 case 128:
2017 case 256:
2018 case 512:
2019 return decodeSpecialReg96Plus(Val);
2020 default:
2021 llvm_unreachable("unexpected immediate type");
2022 }
2023}
2024
2025// Bit 0 of DstY isn't stored in the instruction, because it's always the
2026// opposite of bit 0 of DstX.
2027MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
2028 unsigned Val) const {
2029 int VDstXInd =
2030 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::vdstX);
2031 assert(VDstXInd != -1);
2032 assert(Inst.getOperand(VDstXInd).isReg());
2033 unsigned XDstReg = MRI.getEncodingValue(Reg: Inst.getOperand(i: VDstXInd).getReg());
2034 Val |= ~XDstReg & 1;
2035 return createRegOperand(RegClassID: getVgprClassId(Width: 32), Val);
2036}
2037
2038MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
2039 using namespace AMDGPU;
2040
2041 switch (Val) {
2042 // clang-format off
2043 case 102: return createRegOperand(Reg: FLAT_SCR_LO);
2044 case 103: return createRegOperand(Reg: FLAT_SCR_HI);
2045 case 104: return createRegOperand(Reg: XNACK_MASK_LO);
2046 case 105: return createRegOperand(Reg: XNACK_MASK_HI);
2047 case 106: return createRegOperand(Reg: VCC_LO);
2048 case 107: return createRegOperand(Reg: VCC_HI);
2049 case 108: return createRegOperand(Reg: TBA_LO);
2050 case 109: return createRegOperand(Reg: TBA_HI);
2051 case 110: return createRegOperand(Reg: TMA_LO);
2052 case 111: return createRegOperand(Reg: TMA_HI);
2053 case 124:
2054 return isGFX11Plus() ? createRegOperand(Reg: SGPR_NULL) : createRegOperand(Reg: M0);
2055 case 125:
2056 return isGFX11Plus() ? createRegOperand(Reg: M0) : createRegOperand(Reg: SGPR_NULL);
2057 case 126: return createRegOperand(Reg: EXEC_LO);
2058 case 127: return createRegOperand(Reg: EXEC_HI);
2059 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2060 case 231: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_HI);
2061 case 235: return createRegOperand(Reg: SRC_SHARED_BASE_LO);
2062 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT_LO);
2063 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE_LO);
2064 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT_LO);
2065 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2066 case 251: return createRegOperand(Reg: SRC_VCCZ);
2067 case 252: return createRegOperand(Reg: SRC_EXECZ);
2068 case 253: return createRegOperand(Reg: SRC_SCC);
2069 case 254: return createRegOperand(Reg: LDS_DIRECT);
2070 default: break;
2071 // clang-format on
2072 }
2073 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2074}
2075
2076MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
2077 using namespace AMDGPU;
2078
2079 switch (Val) {
2080 case 102: return createRegOperand(Reg: FLAT_SCR);
2081 case 104: return createRegOperand(Reg: XNACK_MASK);
2082 case 106: return createRegOperand(Reg: VCC);
2083 case 108: return createRegOperand(Reg: TBA);
2084 case 110: return createRegOperand(Reg: TMA);
2085 case 124:
2086 if (isGFX11Plus())
2087 return createRegOperand(Reg: SGPR_NULL);
2088 break;
2089 case 125:
2090 if (!isGFX11Plus())
2091 return createRegOperand(Reg: SGPR_NULL);
2092 break;
2093 case 126: return createRegOperand(Reg: EXEC);
2094 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2095 case 235: return createRegOperand(Reg: SRC_SHARED_BASE);
2096 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT);
2097 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE);
2098 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT);
2099 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2100 case 251: return createRegOperand(Reg: SRC_VCCZ);
2101 case 252: return createRegOperand(Reg: SRC_EXECZ);
2102 case 253: return createRegOperand(Reg: SRC_SCC);
2103 default: break;
2104 }
2105 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2106}
2107
2108MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
2109 using namespace AMDGPU;
2110
2111 switch (Val) {
2112 case 124:
2113 if (isGFX11Plus())
2114 return createRegOperand(Reg: SGPR_NULL);
2115 break;
2116 case 125:
2117 if (!isGFX11Plus())
2118 return createRegOperand(Reg: SGPR_NULL);
2119 break;
2120 default:
2121 break;
2122 }
2123 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2124}
2125
2126MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width,
2127 const unsigned Val) const {
2128 using namespace AMDGPU::SDWA;
2129 using namespace AMDGPU::EncValues;
2130
2131 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
2132 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
2133 // XXX: cast to int is needed to avoid stupid warning:
2134 // compare with unsigned is always true
2135 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2136 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2137 return createRegOperand(RegClassID: getVgprClassId(Width),
2138 Val: Val - SDWA9EncValues::SRC_VGPR_MIN);
2139 }
2140 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2141 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2142 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2143 return createSRegOperand(SRegClassID: getSgprClassId(Width),
2144 Val: Val - SDWA9EncValues::SRC_SGPR_MIN);
2145 }
2146 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2147 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2148 return createSRegOperand(SRegClassID: getTtmpClassId(Width),
2149 Val: Val - SDWA9EncValues::SRC_TTMP_MIN);
2150 }
2151
2152 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2153
2154 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2155 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2156 return MCOperand::createImm(Val: SVal);
2157
2158 return decodeSpecialReg32(Val: SVal);
2159 }
2160 if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands))
2161 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
2162 llvm_unreachable("unsupported target");
2163}
2164
2165MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
2166 return decodeSDWASrc(Width: 16, Val);
2167}
2168
2169MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
2170 return decodeSDWASrc(Width: 32, Val);
2171}
2172
2173MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
2174 using namespace AMDGPU::SDWA;
2175
2176 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2177 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2178 "SDWAVopcDst should be present only on GFX9+");
2179
2180 bool IsWave32 = STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32);
2181
2182 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2183 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2184
2185 int TTmpIdx = getTTmpIdx(Val);
2186 if (TTmpIdx >= 0) {
2187 auto TTmpClsId = getTtmpClassId(Width: IsWave32 ? 32 : 64);
2188 return createSRegOperand(SRegClassID: TTmpClsId, Val: TTmpIdx);
2189 }
2190 if (Val > SGPR_MAX) {
2191 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2192 }
2193 return createSRegOperand(SRegClassID: getSgprClassId(Width: IsWave32 ? 32 : 64), Val);
2194 }
2195 return createRegOperand(Reg: IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2196}
2197
2198MCOperand AMDGPUDisassembler::decodeBoolReg(const MCInst &Inst,
2199 unsigned Val) const {
2200 return STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32)
2201 ? decodeSrcOp(Inst, Width: 32, Val)
2202 : decodeSrcOp(Inst, Width: 64, Val);
2203}
2204
2205MCOperand AMDGPUDisassembler::decodeSplitBarrier(const MCInst &Inst,
2206 unsigned Val) const {
2207 return decodeSrcOp(Inst, Width: 32, Val);
2208}
2209
2210MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
2211 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
2212 return MCOperand();
2213 return MCOperand::createImm(Val);
2214}
2215
2216MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
2217 using VersionField = AMDGPU::EncodingField<7, 0>;
2218 using W64Bit = AMDGPU::EncodingBit<13>;
2219 using W32Bit = AMDGPU::EncodingBit<14>;
2220 using MDPBit = AMDGPU::EncodingBit<15>;
2221 using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
2222
2223 auto [Version, W64, W32, MDP] = Encoding::decode(Encoded: Imm);
2224
2225 // Decode into a plain immediate if any unused bits are raised.
2226 if (Encoding::encode(Values: Version, Values: W64, Values: W32, Values: MDP) != Imm)
2227 return MCOperand::createImm(Val: Imm);
2228
2229 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2230 const auto *I = find_if(
2231 Range: Versions, P: [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2232 return V.Code == Version;
2233 });
2234 MCContext &Ctx = getContext();
2235 const MCExpr *E;
2236 if (I == Versions.end())
2237 E = MCConstantExpr::create(Value: Version, Ctx);
2238 else
2239 E = MCSymbolRefExpr::create(Symbol: Ctx.getOrCreateSymbol(Name: I->Symbol), Ctx);
2240
2241 if (W64)
2242 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW64Expr, Ctx);
2243 if (W32)
2244 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW32Expr, Ctx);
2245 if (MDP)
2246 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionMDPExpr, Ctx);
2247
2248 return MCOperand::createExpr(Val: E);
2249}
2250
2251bool AMDGPUDisassembler::isVI() const {
2252 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2253}
2254
2255bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
2256
2257bool AMDGPUDisassembler::isGFX90A() const {
2258 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2259}
2260
2261bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
2262
2263bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
2264
2265bool AMDGPUDisassembler::isGFX10Plus() const {
2266 return AMDGPU::isGFX10Plus(STI);
2267}
2268
2269bool AMDGPUDisassembler::isGFX11() const {
2270 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2271}
2272
2273bool AMDGPUDisassembler::isGFX11Plus() const {
2274 return AMDGPU::isGFX11Plus(STI);
2275}
2276
2277bool AMDGPUDisassembler::isGFX1170() const {
2278 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11_7Insts);
2279}
2280
2281bool AMDGPUDisassembler::isGFX12() const {
2282 return STI.hasFeature(Feature: AMDGPU::FeatureGFX12);
2283}
2284
2285bool AMDGPUDisassembler::isGFX12Plus() const {
2286 return AMDGPU::isGFX12Plus(STI);
2287}
2288
2289bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); }
2290
2291bool AMDGPUDisassembler::isGFX1250Plus() const {
2292 return AMDGPU::isGFX1250Plus(STI);
2293}
2294
2295bool AMDGPUDisassembler::isGFX13() const { return AMDGPU::isGFX13(STI); }
2296
2297bool AMDGPUDisassembler::isGFX13Plus() const {
2298 return AMDGPU::isGFX13Plus(STI);
2299}
2300
2301bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
2302 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2303}
2304
2305bool AMDGPUDisassembler::hasKernargPreload() const {
2306 return AMDGPU::hasKernargPreload(STI);
2307}
2308
2309//===----------------------------------------------------------------------===//
2310// AMDGPU specific symbol handling
2311//===----------------------------------------------------------------------===//
2312
2313/// Print a string describing the reserved bit range specified by Mask with
2314/// offset BaseBytes for use in error comments. Mask is a single continuous
2315/// range of 1s surrounded by zeros. The format here is meant to align with the
2316/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2317static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2318 SmallString<32> Result;
2319 raw_svector_ostream S(Result);
2320
2321 int TrailingZeros = llvm::countr_zero(Val: Mask);
2322 int PopCount = llvm::popcount(Value: Mask);
2323
2324 if (PopCount == 1) {
2325 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2326 } else {
2327 S << "bits in range ("
2328 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2329 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2330 }
2331
2332 return Result;
2333}
2334
2335#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2336#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2337 do { \
2338 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2339 } while (0)
2340#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2341 do { \
2342 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2343 << GET_FIELD(MASK) << '\n'; \
2344 } while (0)
2345
2346#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2347 do { \
2348 if (FourByteBuffer & (MASK)) { \
2349 return createStringError(std::errc::invalid_argument, \
2350 "kernel descriptor " DESC \
2351 " reserved %s set" MSG, \
2352 getBitRangeFromMask((MASK), 0).c_str()); \
2353 } \
2354 } while (0)
2355
2356#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2357#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2358 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2359#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2360 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2361#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2362 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2363
2364// NOLINTNEXTLINE(readability-identifier-naming)
2365Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
2366 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2367 using namespace amdhsa;
2368 StringRef Indent = "\t";
2369
2370 // We cannot accurately backward compute #VGPRs used from
2371 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2372 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2373 // simply calculate the inverse of what the assembler does.
2374
2375 uint32_t GranulatedWorkitemVGPRCount =
2376 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2377
2378 uint32_t NextFreeVGPR =
2379 (GranulatedWorkitemVGPRCount + 1) *
2380 AMDGPU::IsaInfo::getVGPREncodingGranule(STI: &STI, EnableWavefrontSize32);
2381
2382 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2383
2384 // We cannot backward compute values used to calculate
2385 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2386 // directives can't be computed:
2387 // .amdhsa_reserve_vcc
2388 // .amdhsa_reserve_flat_scratch
2389 // .amdhsa_reserve_xnack_mask
2390 // They take their respective default values if not specified in the assembly.
2391 //
2392 // GRANULATED_WAVEFRONT_SGPR_COUNT
2393 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2394 //
2395 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2396 // are set to 0. So while disassembling we consider that:
2397 //
2398 // GRANULATED_WAVEFRONT_SGPR_COUNT
2399 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2400 //
2401 // The disassembler cannot recover the original values of those 3 directives.
2402
2403 uint32_t GranulatedWavefrontSGPRCount =
2404 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2405
2406 if (isGFX10Plus())
2407 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2408 "must be zero on gfx10+");
2409
2410 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2411 AMDGPU::IsaInfo::getSGPREncodingGranule(STI: &STI);
2412
2413 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2414 if (!hasArchitectedFlatScratch())
2415 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2416 bool ReservedXnackMask = STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2417 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2418 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2419 << '\n';
2420 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2421
2422 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2423
2424 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2425 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2426 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2427 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2428 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2429 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2430 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2431 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2432
2433 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2434
2435 if (STI.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
2436 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2437 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2438
2439 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2440
2441 if (STI.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode))
2442 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2443 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2444
2445 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2446 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2447
2448 // Bits [26].
2449 if (isGFX9Plus()) {
2450 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2451 } else {
2452 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2453 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2454 }
2455
2456 // Bits [27].
2457 if (isGFX1250Plus()) {
2458 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2459 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2460 } else {
2461 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2462 "COMPUTE_PGM_RSRC1");
2463 }
2464
2465 // Bits [28].
2466 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2467
2468 // Bits [29-31].
2469 if (isGFX10Plus()) {
2470 // WGP_MODE is not available on GFX1250.
2471 if (!isGFX1250Plus()) {
2472 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2473 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2474 }
2475 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2476 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2477 } else {
2478 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2479 "COMPUTE_PGM_RSRC1");
2480 }
2481
2482 if (isGFX12Plus())
2483 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2484 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2485
2486 return true;
2487}
2488
2489// NOLINTNEXTLINE(readability-identifier-naming)
2490Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2491 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2492 using namespace amdhsa;
2493 StringRef Indent = "\t";
2494 if (hasArchitectedFlatScratch())
2495 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2496 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2497 else
2498 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2499 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2500 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2501 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2502 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2503 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2504 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2505 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2506 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2507 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2508 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2509 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2510
2511 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2512 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2513 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2514
2515 PRINT_DIRECTIVE(
2516 ".amdhsa_exception_fp_ieee_invalid_op",
2517 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2518 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2519 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2520 PRINT_DIRECTIVE(
2521 ".amdhsa_exception_fp_ieee_div_zero",
2522 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2523 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2524 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2525 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2526 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2527 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2528 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2529 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2530 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2531
2532 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2533
2534 return true;
2535}
2536
2537// NOLINTNEXTLINE(readability-identifier-naming)
2538Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2539 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2540 using namespace amdhsa;
2541 StringRef Indent = "\t";
2542 if (isGFX90A()) {
2543 KdStream << Indent << ".amdhsa_accum_offset "
2544 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2545 << '\n';
2546
2547 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2548
2549 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2550 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2551 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2552 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2553 } else if (isGFX10Plus()) {
2554 // Bits [0-3].
2555 if (!isGFX12Plus()) {
2556 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2557 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2558 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2559 } else {
2560 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2561 "SHARED_VGPR_COUNT",
2562 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2563 }
2564 } else {
2565 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2566 "COMPUTE_PGM_RSRC3",
2567 "must be zero on gfx12+");
2568 }
2569
2570 // Bits [4-11].
2571 if (isGFX11()) {
2572 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2573 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2574 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2575 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2576 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2577 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2578 } else if (isGFX12Plus()) {
2579 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2580 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2581 } else {
2582 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2583 "COMPUTE_PGM_RSRC3",
2584 "must be zero on gfx10");
2585 }
2586
2587 // Bits [12].
2588 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2589 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2590
2591 // Bits [13].
2592 if (isGFX12Plus()) {
2593 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2594 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2595 } else {
2596 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2597 "COMPUTE_PGM_RSRC3",
2598 "must be zero on gfx10 or gfx11");
2599 }
2600
2601 // Bits [14-21].
2602 if (isGFX1250Plus()) {
2603 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2604 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2605 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2606 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2607 PRINT_PSEUDO_DIRECTIVE_COMMENT("TCP_SPLIT",
2608 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2609 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2610 "ENABLE_DIDT_THROTTLE",
2611 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2612 } else {
2613 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2614 "COMPUTE_PGM_RSRC3",
2615 "must be zero on gfx10+");
2616 }
2617
2618 // Bits [22-30].
2619 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2620 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2621
2622 // Bits [31].
2623 if (isGFX11Plus()) {
2624 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2625 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2626 } else {
2627 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2628 "COMPUTE_PGM_RSRC3",
2629 "must be zero on gfx10");
2630 }
2631 } else if (FourByteBuffer) {
2632 return createStringError(
2633 EC: std::errc::invalid_argument,
2634 Fmt: "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2635 }
2636 return true;
2637}
2638#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2639#undef PRINT_DIRECTIVE
2640#undef GET_FIELD
2641#undef CHECK_RESERVED_BITS_IMPL
2642#undef CHECK_RESERVED_BITS
2643#undef CHECK_RESERVED_BITS_MSG
2644#undef CHECK_RESERVED_BITS_DESC
2645#undef CHECK_RESERVED_BITS_DESC_MSG
2646
2647/// Create an error object to return from onSymbolStart for reserved kernel
2648/// descriptor bits being set.
2649static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2650 const char *Msg = "") {
2651 return createStringError(
2652 EC: std::errc::invalid_argument, Fmt: "kernel descriptor reserved %s set%s%s",
2653 Vals: getBitRangeFromMask(Mask, BaseBytes).c_str(), Vals: *Msg ? ", " : "", Vals: Msg);
2654}
2655
2656/// Create an error object to return from onSymbolStart for reserved kernel
2657/// descriptor bytes being set.
2658static Error createReservedKDBytesError(unsigned BaseInBytes,
2659 unsigned WidthInBytes) {
2660 // Create an error comment in the same format as the "Kernel Descriptor"
2661 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2662 return createStringError(
2663 EC: std::errc::invalid_argument,
2664 Fmt: "kernel descriptor reserved bits in range (%u:%u) set",
2665 Vals: (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, Vals: BaseInBytes * CHAR_BIT);
2666}
2667
2668Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2669 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2670 raw_string_ostream &KdStream) const {
2671#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2672 do { \
2673 KdStream << Indent << DIRECTIVE " " \
2674 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2675 } while (0)
2676
2677 uint16_t TwoByteBuffer = 0;
2678 uint32_t FourByteBuffer = 0;
2679
2680 StringRef ReservedBytes;
2681 StringRef Indent = "\t";
2682
2683 assert(Bytes.size() == 64);
2684 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2685
2686 switch (Cursor.tell()) {
2687 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2688 FourByteBuffer = DE.getU32(C&: Cursor);
2689 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2690 << '\n';
2691 return true;
2692
2693 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2694 FourByteBuffer = DE.getU32(C&: Cursor);
2695 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2696 << FourByteBuffer << '\n';
2697 return true;
2698
2699 case amdhsa::KERNARG_SIZE_OFFSET:
2700 FourByteBuffer = DE.getU32(C&: Cursor);
2701 KdStream << Indent << ".amdhsa_kernarg_size "
2702 << FourByteBuffer << '\n';
2703 return true;
2704
2705 case amdhsa::RESERVED0_OFFSET:
2706 // 4 reserved bytes, must be 0.
2707 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2708 for (char B : ReservedBytes) {
2709 if (B != 0)
2710 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED0_OFFSET, WidthInBytes: 4);
2711 }
2712 return true;
2713
2714 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2715 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2716 // So far no directive controls this for Code Object V3, so simply skip for
2717 // disassembly.
2718 DE.skip(C&: Cursor, Length: 8);
2719 return true;
2720
2721 case amdhsa::RESERVED1_OFFSET:
2722 // 20 reserved bytes, must be 0.
2723 ReservedBytes = DE.getBytes(C&: Cursor, Length: 20);
2724 for (char B : ReservedBytes) {
2725 if (B != 0)
2726 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED1_OFFSET, WidthInBytes: 20);
2727 }
2728 return true;
2729
2730 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2731 FourByteBuffer = DE.getU32(C&: Cursor);
2732 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2733
2734 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2735 FourByteBuffer = DE.getU32(C&: Cursor);
2736 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2737
2738 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2739 FourByteBuffer = DE.getU32(C&: Cursor);
2740 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2741
2742 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2743 using namespace amdhsa;
2744 TwoByteBuffer = DE.getU16(C&: Cursor);
2745
2746 if (!hasArchitectedFlatScratch())
2747 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2748 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2749 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2750 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2751 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2753 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2754 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2755 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2756 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2757 if (!hasArchitectedFlatScratch())
2758 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2760 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2761 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2762
2763 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2764 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED0,
2765 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2766
2767 // Reserved for GFX9
2768 if (isGFX9() &&
2769 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2770 return createReservedKDBitsError(
2771 Mask: KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2772 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, Msg: "must be zero on gfx9");
2773 }
2774 if (isGFX10Plus()) {
2775 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2776 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2777 }
2778
2779 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2780 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2781 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2782
2783 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2784 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED1,
2785 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2786 }
2787
2788 return true;
2789
2790 case amdhsa::KERNARG_PRELOAD_OFFSET:
2791 using namespace amdhsa;
2792 TwoByteBuffer = DE.getU16(C&: Cursor);
2793 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2794 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2795 KERNARG_PRELOAD_SPEC_LENGTH);
2796 }
2797
2798 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2799 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2800 KERNARG_PRELOAD_SPEC_OFFSET);
2801 }
2802 return true;
2803
2804 case amdhsa::RESERVED3_OFFSET:
2805 // 4 bytes from here are reserved, must be 0.
2806 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2807 for (char B : ReservedBytes) {
2808 if (B != 0)
2809 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED3_OFFSET, WidthInBytes: 4);
2810 }
2811 return true;
2812
2813 default:
2814 llvm_unreachable("Unhandled index. Case statements cover everything.");
2815 return true;
2816 }
2817#undef PRINT_DIRECTIVE
2818}
2819
2820Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2821 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2822
2823 // CP microcode requires the kernel descriptor to be 64 aligned.
2824 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2825 return createStringError(EC: std::errc::invalid_argument,
2826 Fmt: "kernel descriptor must be 64-byte aligned");
2827
2828 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2829 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2830 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2831 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2832 // when required.
2833 if (isGFX10Plus()) {
2834 uint16_t KernelCodeProperties =
2835 support::endian::read16(P: &Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2836 E: llvm::endianness::little);
2837 EnableWavefrontSize32 =
2838 AMDHSA_BITS_GET(KernelCodeProperties,
2839 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2840 }
2841
2842 std::string Kd;
2843 raw_string_ostream KdStream(Kd);
2844 KdStream << ".amdhsa_kernel " << KdName << '\n';
2845
2846 DataExtractor::Cursor C(0);
2847 while (C && C.tell() < Bytes.size()) {
2848 Expected<bool> Res = decodeKernelDescriptorDirective(Cursor&: C, Bytes, KdStream);
2849
2850 cantFail(Err: C.takeError());
2851
2852 if (!Res)
2853 return Res;
2854 }
2855 KdStream << ".end_amdhsa_kernel\n";
2856 outs() << KdStream.str();
2857 return true;
2858}
2859
2860Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2861 uint64_t &Size,
2862 ArrayRef<uint8_t> Bytes,
2863 uint64_t Address) const {
2864 // Right now only kernel descriptor needs to be handled.
2865 // We ignore all other symbols for target specific handling.
2866 // TODO:
2867 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2868 // Object V2 and V3 when symbols are marked protected.
2869
2870 // amd_kernel_code_t for Code Object V2.
2871 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2872 Size = 256;
2873 return createStringError(EC: std::errc::invalid_argument,
2874 Fmt: "code object v2 is not supported");
2875 }
2876
2877 // Code Object V3 kernel descriptors.
2878 StringRef Name = Symbol.Name;
2879 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(Suffix: StringRef(".kd"))) {
2880 Size = 64; // Size = 64 regardless of success or failure.
2881 return decodeKernelDescriptor(KdName: Name.drop_back(N: 3), Bytes, KdAddress: Address);
2882 }
2883
2884 return false;
2885}
2886
2887const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2888 int64_t Val) {
2889 MCContext &Ctx = getContext();
2890 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2891 // Note: only set value to Val on a new symbol in case an dissassembler
2892 // has already been initialized in this context.
2893 if (!Sym->isVariable()) {
2894 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2895 } else {
2896 int64_t Res = ~Val;
2897 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2898 if (!Valid || Res != Val)
2899 Ctx.reportWarning(L: SMLoc(), Msg: "unsupported redefinition of " + Id);
2900 }
2901 return MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2902}
2903
2904bool AMDGPUDisassembler::isBufferInstruction(const MCInst &MI) const {
2905 const uint64_t TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
2906
2907 // Check for MUBUF and MTBUF instructions
2908 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2909 return true;
2910
2911 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2912 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(Opc: MI.getOpcode()))
2913 return true;
2914
2915 return false;
2916}
2917
2918//===----------------------------------------------------------------------===//
2919// AMDGPUSymbolizer
2920//===----------------------------------------------------------------------===//
2921
2922// Try to find symbol name for specified label
2923bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2924 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2925 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2926 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2927
2928 if (!IsBranch) {
2929 return false;
2930 }
2931
2932 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2933 if (!Symbols)
2934 return false;
2935
2936 auto Result = llvm::find_if(Range&: *Symbols, P: [Value](const SymbolInfoTy &Val) {
2937 return Val.Addr == static_cast<uint64_t>(Value) &&
2938 Val.Type == ELF::STT_NOTYPE;
2939 });
2940 if (Result != Symbols->end()) {
2941 auto *Sym = Ctx.getOrCreateSymbol(Name: Result->Name);
2942 const auto *Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2943 Inst.addOperand(Op: MCOperand::createExpr(Val: Add));
2944 return true;
2945 }
2946 // Add to list of referenced addresses, so caller can synthesize a label.
2947 ReferencedAddresses.push_back(x: static_cast<uint64_t>(Value));
2948 return false;
2949}
2950
2951void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2952 int64_t Value,
2953 uint64_t Address) {
2954 llvm_unreachable("unimplemented");
2955}
2956
2957//===----------------------------------------------------------------------===//
2958// Initialization
2959//===----------------------------------------------------------------------===//
2960
2961static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2962 LLVMOpInfoCallback /*GetOpInfo*/,
2963 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2964 void *DisInfo,
2965 MCContext *Ctx,
2966 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2967 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2968}
2969
2970static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2971 const MCSubtargetInfo &STI,
2972 MCContext &Ctx) {
2973 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2974}
2975
2976extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
2977LLVMInitializeAMDGPUDisassembler() {
2978 TargetRegistry::RegisterMCDisassembler(T&: getTheGCNTarget(),
2979 Fn: createAMDGPUDisassembler);
2980 TargetRegistry::RegisterMCSymbolizer(T&: getTheGCNTarget(),
2981 Fn: createAMDGPUSymbolizer);
2982}
2983