1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCExpr.h"
21#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
24#include "TargetInfo/AMDGPUTargetInfo.h"
25#include "Utils/AMDGPUAsmUtils.h"
26#include "Utils/AMDGPUBaseInfo.h"
27#include "llvm-c/DisassemblerTypes.h"
28#include "llvm/BinaryFormat/ELF.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
32#include "llvm/MC/MCDecoderOps.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
35#include "llvm/MC/MCRegisterInfo.h"
36#include "llvm/MC/MCSubtargetInfo.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/AMDHSAKernelDescriptor.h"
39#include "llvm/Support/Compiler.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
50using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
57AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(*Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(STI: &STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError(reason: "disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Id: Symbol, Val: Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
74}
75
76void AMDGPUDisassembler::setABIVersion(unsigned Version) {
77 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(ABIVersion: Version);
78}
79
80inline static MCDisassembler::DecodeStatus
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Op: Opnd);
83 return Opnd.isValid() ?
84 MCDisassembler::Success :
85 MCDisassembler::Fail;
86}
87
88static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(i&: I, n: OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(x: Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Value: Offset, Address: Addr, IsBranch: true, Offset: 2, OpSize: 2, InstSize: 0))
108 return MCDisassembler::Success;
109 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
117 Offset = SignExtend64<24>(x: Imm);
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
121 Offset = SignExtend64<21>(x: Imm);
122 }
123 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Offset));
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, Opnd: DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, Opnd: DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, Opnd: DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: OpWidth, Val: EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
251DECODE_OPERAND_REG_8(VGPR_32)
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
253DECODE_OPERAND_REG_8(VReg_64)
254DECODE_OPERAND_REG_8(VReg_96)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
281DECODE_OPERAND_REG_8(AGPR_32)
282DECODE_OPERAND_REG_8(AReg_64)
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
288static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
301DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
312static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Inst, Width: OpWidth, Val: Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
361static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm,
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, Opnd: DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: Opw, Val: Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Inst, Width: 64, Val: Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, Opnd: DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
424DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI,
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
445 return MCDisassembler::Success;
446 }
447 Bytes = SavedBytes;
448 return MCDisassembler::Fail;
449}
450
451template <typename InsnType>
452DecodeStatus
453AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
460 return MCDisassembler::Fail;
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
466 support::endian::read<T, llvm::endianness::little>(Bytes.data());
467 Bytes = Bytes.slice(N: sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
475 Bytes = Bytes.slice(N: 8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(P: Bytes.data()));
477 Bytes = Bytes.slice(N: 4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(P: Bytes.data()));
485 Bytes = Bytes.slice(N: 8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(P: Bytes.data()));
487 Bytes = Bytes.slice(N: 8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(Opcode: MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(First: Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(i: OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
509 if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm &&
510 Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) {
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
515 if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
520 if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm &&
521 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) {
522 switch (OpDesc.OperandType) {
523 case AMDGPU::OPERAND_REG_IMM_BF16:
524 case AMDGPU::OPERAND_REG_IMM_V2BF16:
525 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
526 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
527 Imm = getInlineImmValBF16(Imm);
528 break;
529 case AMDGPU::OPERAND_REG_IMM_FP16:
530 case AMDGPU::OPERAND_REG_IMM_INT16:
531 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
532 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
533 Imm = getInlineImmValF16(Imm);
534 break;
535 case AMDGPU::OPERAND_REG_IMM_V2FP16:
536 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
537 Imm = getInlineImmValF16(Imm);
538 break;
539 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: {
540 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
541 // halves, so we need to produce the duplicated value for correct
542 // round-trip.
543 if (isGFX11Plus()) {
544 int64_t F16Val = getInlineImmValF16(Imm);
545 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
546 } else {
547 Imm = getInlineImmValF16(Imm);
548 }
549 break;
550 }
551 case AMDGPU::OPERAND_REG_IMM_FP64:
552 case AMDGPU::OPERAND_REG_IMM_INT64:
553 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
554 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
555 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
556 Imm = getInlineImmVal64(Imm);
557 break;
558 default:
559 Imm = getInlineImmVal32(Imm);
560 }
561 Op.setImm(Imm);
562 }
563 }
564}
565
566DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
567 ArrayRef<uint8_t> Bytes_,
568 uint64_t Address,
569 raw_ostream &CS) const {
570 unsigned MaxInstBytesNum = std::min(a: (size_t)TargetMaxInstBytes, b: Bytes_.size());
571 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
572
573 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
574 // there are fewer bytes left). This will be overridden on success.
575 Size = std::min(a: (size_t)4, b: Bytes_.size());
576
577 do {
578 // ToDo: better to switch encoding length using some bit predicate
579 // but it is unknown yet, so try all we can
580
581 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
582 // encodings
583 if (isGFX1250Plus() && Bytes.size() >= 16) {
584 std::bitset<128> DecW = eat16Bytes(Bytes);
585 if (tryDecodeInst(Table: DecoderTableGFX1250128, MI, Inst: DecW, Address, Comments&: CS))
586 break;
587 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
588 }
589
590 if (isGFX11Plus() && Bytes.size() >= 12) {
591 std::bitset<96> DecW = eat12Bytes(Bytes);
592
593 if (isGFX11() &&
594 tryDecodeInst(Table1: DecoderTableGFX1196, Table2: DecoderTableGFX11_FAKE1696, MI,
595 Inst: DecW, Address, Comments&: CS))
596 break;
597
598 if (isGFX1250() &&
599 tryDecodeInst(Table1: DecoderTableGFX125096, Table2: DecoderTableGFX1250_FAKE1696, MI,
600 Inst: DecW, Address, Comments&: CS))
601 break;
602
603 if (isGFX12() &&
604 tryDecodeInst(Table1: DecoderTableGFX1296, Table2: DecoderTableGFX12_FAKE1696, MI,
605 Inst: DecW, Address, Comments&: CS))
606 break;
607
608 if (isGFX12() &&
609 tryDecodeInst(Table: DecoderTableGFX12W6496, MI, Inst: DecW, Address, Comments&: CS))
610 break;
611
612 if (isGFX13() &&
613 tryDecodeInst(Table1: DecoderTableGFX1396, Table2: DecoderTableGFX13_FAKE1696, MI,
614 Inst: DecW, Address, Comments&: CS))
615 break;
616
617 if (STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
618 // Return 8 bytes for a potential literal.
619 Bytes = Bytes_.slice(N: 4, M: MaxInstBytesNum - 4);
620
621 if (isGFX1250() &&
622 tryDecodeInst(Table: DecoderTableGFX125096, MI, Inst: DecW, Address, Comments&: CS))
623 break;
624 }
625
626 // Reinitialize Bytes
627 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
628
629 } else if (Bytes.size() >= 16 &&
630 STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts)) {
631 std::bitset<128> DecW = eat16Bytes(Bytes);
632 if (tryDecodeInst(Table: DecoderTableGFX940128, MI, Inst: DecW, Address, Comments&: CS))
633 break;
634
635 // Reinitialize Bytes
636 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
637 }
638
639 if (Bytes.size() >= 8) {
640 const uint64_t QW = eatBytes<uint64_t>(Bytes);
641
642 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
643 tryDecodeInst(Table: DecoderTableGFX10_B64, MI, Inst: QW, Address, Comments&: CS))
644 break;
645
646 if (STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) &&
647 tryDecodeInst(Table: DecoderTableGFX80_UNPACKED64, MI, Inst: QW, Address, Comments&: CS))
648 break;
649
650 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
651 tryDecodeInst(Table: DecoderTableGFX95064, MI, Inst: QW, Address, Comments&: CS))
652 break;
653
654 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
655 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
656 // table first so we print the correct name.
657 if (STI.hasFeature(Feature: AMDGPU::FeatureFmaMixInsts) &&
658 tryDecodeInst(Table: DecoderTableGFX9_DL64, MI, Inst: QW, Address, Comments&: CS))
659 break;
660
661 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts) &&
662 tryDecodeInst(Table: DecoderTableGFX94064, MI, Inst: QW, Address, Comments&: CS))
663 break;
664
665 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
666 tryDecodeInst(Table: DecoderTableGFX90A64, MI, Inst: QW, Address, Comments&: CS))
667 break;
668
669 if ((isVI() || isGFX9()) &&
670 tryDecodeInst(Table: DecoderTableGFX864, MI, Inst: QW, Address, Comments&: CS))
671 break;
672
673 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX964, MI, Inst: QW, Address, Comments&: CS))
674 break;
675
676 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1064, MI, Inst: QW, Address, Comments&: CS))
677 break;
678
679 if (isGFX1250() &&
680 tryDecodeInst(Table1: DecoderTableGFX125064, Table2: DecoderTableGFX1250_FAKE1664, MI,
681 Inst: QW, Address, Comments&: CS))
682 break;
683
684 if (isGFX12() &&
685 tryDecodeInst(Table1: DecoderTableGFX1264, Table2: DecoderTableGFX12_FAKE1664, MI, Inst: QW,
686 Address, Comments&: CS))
687 break;
688
689 if (isGFX1170() &&
690 tryDecodeInst(Table: DecoderTableGFX117064, MI, Inst: QW, Address, Comments&: CS))
691 break;
692
693 if (isGFX11() &&
694 tryDecodeInst(Table1: DecoderTableGFX1164, Table2: DecoderTableGFX11_FAKE1664, MI, Inst: QW,
695 Address, Comments&: CS))
696 break;
697
698 if (isGFX1170() &&
699 tryDecodeInst(Table: DecoderTableGFX1170W6464, MI, Inst: QW, Address, Comments&: CS))
700 break;
701
702 if (isGFX11() &&
703 tryDecodeInst(Table: DecoderTableGFX11W6464, MI, Inst: QW, Address, Comments&: CS))
704 break;
705
706 if (isGFX12() &&
707 tryDecodeInst(Table: DecoderTableGFX12W6464, MI, Inst: QW, Address, Comments&: CS))
708 break;
709
710 if (isGFX13() &&
711 tryDecodeInst(Table1: DecoderTableGFX1364, Table2: DecoderTableGFX13_FAKE1664, MI, Inst: QW,
712 Address, Comments&: CS))
713 break;
714
715 // Reinitialize Bytes
716 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
717 }
718
719 // Try decode 32-bit instruction
720 if (Bytes.size() >= 4) {
721 const uint32_t DW = eatBytes<uint32_t>(Bytes);
722
723 if ((isVI() || isGFX9()) &&
724 tryDecodeInst(Table: DecoderTableGFX832, MI, Inst: DW, Address, Comments&: CS))
725 break;
726
727 if (tryDecodeInst(Table: DecoderTableAMDGPU32, MI, Inst: DW, Address, Comments&: CS))
728 break;
729
730 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX932, MI, Inst: DW, Address, Comments&: CS))
731 break;
732
733 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
734 tryDecodeInst(Table: DecoderTableGFX95032, MI, Inst: DW, Address, Comments&: CS))
735 break;
736
737 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
738 tryDecodeInst(Table: DecoderTableGFX90A32, MI, Inst: DW, Address, Comments&: CS))
739 break;
740
741 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
742 tryDecodeInst(Table: DecoderTableGFX10_B32, MI, Inst: DW, Address, Comments&: CS))
743 break;
744
745 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1032, MI, Inst: DW, Address, Comments&: CS))
746 break;
747
748 if (isGFX11() &&
749 tryDecodeInst(Table1: DecoderTableGFX1132, Table2: DecoderTableGFX11_FAKE1632, MI, Inst: DW,
750 Address, Comments&: CS))
751 break;
752
753 if (isGFX1250() &&
754 tryDecodeInst(Table1: DecoderTableGFX125032, Table2: DecoderTableGFX1250_FAKE1632, MI,
755 Inst: DW, Address, Comments&: CS))
756 break;
757
758 if (isGFX12() &&
759 tryDecodeInst(Table1: DecoderTableGFX1232, Table2: DecoderTableGFX12_FAKE1632, MI, Inst: DW,
760 Address, Comments&: CS))
761 break;
762
763 if (isGFX13() &&
764 tryDecodeInst(Table1: DecoderTableGFX1332, Table2: DecoderTableGFX13_FAKE1632, MI, Inst: DW,
765 Address, Comments&: CS))
766 break;
767 }
768
769 return MCDisassembler::Fail;
770 } while (false);
771
772 DecodeStatus Status = MCDisassembler::Success;
773
774 decodeImmOperands(MI, MCII: *MCII);
775
776 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
777 if (isMacDPP(MI))
778 convertMacDPPInst(MI);
779
780 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
781 convertVOP3PDPPInst(MI);
782 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
783 convertVOPCDPPInst(MI); // Special VOP3 case
784 else if (AMDGPU::isVOPC64DPP(Opc: MI.getOpcode()))
785 convertVOPC64DPPInst(MI); // Special VOP3 case
786 else if (AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dpp8) !=
787 -1)
788 convertDPP8Inst(MI);
789 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
790 convertVOP3DPPInst(MI); // Regular VOP3 case
791 }
792
793 convertTrue16OpSel(MI);
794
795 if (AMDGPU::isMAC(Opc: MI.getOpcode())) {
796 // Insert dummy unused src2_modifiers.
797 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
798 Name: AMDGPU::OpName::src2_modifiers);
799 }
800
801 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
802 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
803 // Insert dummy unused src2_modifiers.
804 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
805 Name: AMDGPU::OpName::src2_modifiers);
806 }
807
808 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
809 !AMDGPU::hasGDS(STI)) {
810 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::gds);
811 }
812
813 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
814 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
815 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
816 Name: AMDGPU::OpName::cpol);
817 if (CPolPos != -1) {
818 unsigned CPol =
819 (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
820 AMDGPU::CPol::GLC : 0;
821 if (MI.getNumOperands() <= (unsigned)CPolPos) {
822 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: CPol),
823 Name: AMDGPU::OpName::cpol);
824 } else if (CPol) {
825 MI.getOperand(i: CPolPos).setImm(MI.getOperand(i: CPolPos).getImm() | CPol);
826 }
827 }
828 }
829
830 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags &
831 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
832 (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts))) {
833 // GFX90A lost TFE, its place is occupied by ACC.
834 int TFEOpIdx =
835 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::tfe);
836 if (TFEOpIdx != -1) {
837 auto *TFEIter = MI.begin();
838 std::advance(i&: TFEIter, n: TFEOpIdx);
839 MI.insert(I: TFEIter, Op: MCOperand::createImm(Val: 0));
840 }
841 }
842
843 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
844 if (isGFX12Plus() && isBufferInstruction(MI)) {
845 int OffsetIdx =
846 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::offset);
847 if (OffsetIdx != -1) {
848 uint32_t Imm = MI.getOperand(i: OffsetIdx).getImm();
849 int64_t SignedOffset = SignExtend64<24>(x: Imm);
850 if (SignedOffset < 0)
851 return MCDisassembler::Fail;
852 }
853 }
854
855 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
856 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
857 int SWZOpIdx =
858 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::swz);
859 if (SWZOpIdx != -1) {
860 auto *SWZIter = MI.begin();
861 std::advance(i&: SWZIter, n: SWZOpIdx);
862 MI.insert(I: SWZIter, Op: MCOperand::createImm(Val: 0));
863 }
864 }
865
866 const MCInstrDesc &Desc = MCII->get(Opcode: MI.getOpcode());
867 if (Desc.TSFlags & SIInstrFlags::MIMG) {
868 int VAddr0Idx =
869 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
870 int RsrcIdx =
871 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::srsrc);
872 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
873 if (VAddr0Idx >= 0 && NSAArgs > 0) {
874 unsigned NSAWords = (NSAArgs + 3) / 4;
875 if (Bytes.size() < 4 * NSAWords)
876 return MCDisassembler::Fail;
877 for (unsigned i = 0; i < NSAArgs; ++i) {
878 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
879 auto VAddrRCID =
880 MCII->getOpRegClassID(OpInfo: Desc.operands()[VAddrIdx], HwModeId: HwModeRegClass);
881 MI.insert(I: MI.begin() + VAddrIdx, Op: createRegOperand(RegClassID: VAddrRCID, Val: Bytes[i]));
882 }
883 Bytes = Bytes.slice(N: 4 * NSAWords);
884 }
885
886 convertMIMGInst(MI);
887 }
888
889 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
890 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
891 convertMIMGInst(MI);
892
893 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
894 convertEXPInst(MI);
895
896 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
897 convertVINTERPInst(MI);
898
899 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
900 convertSDWAInst(MI);
901
902 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
903 convertMAIInst(MI);
904
905 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
906 convertWMMAInst(MI);
907
908 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
909 Name: AMDGPU::OpName::vdst_in);
910 if (VDstIn_Idx != -1) {
911 int Tied = MCII->get(Opcode: MI.getOpcode()).getOperandConstraint(OpNum: VDstIn_Idx,
912 Constraint: MCOI::OperandConstraint::TIED_TO);
913 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
914 !MI.getOperand(i: VDstIn_Idx).isReg() ||
915 MI.getOperand(i: VDstIn_Idx).getReg() != MI.getOperand(i: Tied).getReg())) {
916 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
917 MI.erase(I: &MI.getOperand(i: VDstIn_Idx));
918 insertNamedMCOperand(MI,
919 Op: MCOperand::createReg(Reg: MI.getOperand(i: Tied).getReg()),
920 Name: AMDGPU::OpName::vdst_in);
921 }
922 }
923
924 bool IsSOPK = MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
925 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::imm) && !IsSOPK)
926 convertFMAanyK(MI);
927
928 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
929 // have EXEC as implicit destination. Issue a warning if encoding for
930 // vdst is not EXEC.
931 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
932 MCII->get(Opcode: MI.getOpcode()).getNumDefs() == 0 &&
933 MCII->get(Opcode: MI.getOpcode()).hasImplicitDefOfPhysReg(Reg: AMDGPU::EXEC)) {
934 auto ExecEncoding = MRI.getEncodingValue(Reg: AMDGPU::EXEC_LO);
935 if (Bytes_[0] != ExecEncoding)
936 Status = MCDisassembler::SoftFail;
937 }
938
939 Size = MaxInstBytesNum - Bytes.size();
940 return Status;
941}
942
943void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
944 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX11Insts)) {
945 // The MCInst still has these fields even though they are no longer encoded
946 // in the GFX11 instruction.
947 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vm);
948 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::compr);
949 }
950}
951
952void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
953 convertTrue16OpSel(MI);
954 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
955 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
956 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
957 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
958 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
959 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
960 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
961 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
962 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
963 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
964 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
965 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
970 // The MCInst has this field that is not directly encoded in the
971 // instruction.
972 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::op_sel);
973 }
974}
975
976void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
977 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
978 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
979 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::sdst))
980 // VOPC - insert clamp
981 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::clamp);
982 } else if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands)) {
983 int SDst = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::sdst);
984 if (SDst != -1) {
985 // VOPC - insert VCC register as sdst
986 insertNamedMCOperand(MI, Op: createRegOperand(Reg: AMDGPU::VCC),
987 Name: AMDGPU::OpName::sdst);
988 } else {
989 // VOP1/2 - insert omod if present in instruction
990 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::omod);
991 }
992 }
993}
994
995/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
996/// appropriate subregister for the used format width.
997static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI,
998 MCOperand &MO, uint8_t NumRegs) {
999 switch (NumRegs) {
1000 case 4:
1001 return MO.setReg(MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3));
1002 case 6:
1003 return MO.setReg(
1004 MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1005 case 8:
1006 if (MCRegister NewReg = MRI.getSubReg(
1007 Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1008 MO.setReg(NewReg);
1009 }
1010 return;
1011 case 12: {
1012 // There is no 384-bit subreg index defined.
1013 MCRegister BaseReg = MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0);
1014 MCRegister NewReg = MRI.getMatchingSuperReg(
1015 Reg: BaseReg, SubIdx: AMDGPU::sub0, RC: &MRI.getRegClass(i: AMDGPU::VReg_384RegClassID));
1016 return MO.setReg(NewReg);
1017 }
1018 case 16:
1019 // No-op in cases where one operand is still f8/bf8.
1020 return;
1021 default:
1022 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1023 }
1024}
1025
1026/// f8f6f4 instructions have different pseudos depending on the used formats. In
1027/// the disassembler table, we only have the variants with the largest register
1028/// classes which assume using an fp8/bf8 format for both operands. The actual
1029/// register class depends on the format in blgp and cbsz operands. Adjust the
1030/// register classes depending on the used format.
1031void AMDGPUDisassembler::convertMAIInst(MCInst &MI) const {
1032 int BlgpIdx =
1033 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::blgp);
1034 if (BlgpIdx == -1)
1035 return;
1036
1037 int CbszIdx =
1038 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::cbsz);
1039
1040 unsigned CBSZ = MI.getOperand(i: CbszIdx).getImm();
1041 unsigned BLGP = MI.getOperand(i: BlgpIdx).getImm();
1042
1043 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1044 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, F8F8Opcode: MI.getOpcode());
1045 if (!AdjustedRegClassOpcode ||
1046 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1047 return;
1048
1049 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1050 int Src0Idx =
1051 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1052 int Src1Idx =
1053 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1054 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1055 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1056 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1057 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1058}
1059
1060void AMDGPUDisassembler::convertWMMAInst(MCInst &MI) const {
1061 int FmtAIdx =
1062 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_a_fmt);
1063 if (FmtAIdx == -1)
1064 return;
1065
1066 int FmtBIdx =
1067 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::matrix_b_fmt);
1068
1069 unsigned FmtA = MI.getOperand(i: FmtAIdx).getImm();
1070 unsigned FmtB = MI.getOperand(i: FmtBIdx).getImm();
1071
1072 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1073 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, F8F8Opcode: MI.getOpcode());
1074 if (!AdjustedRegClassOpcode ||
1075 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1076 return;
1077
1078 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1079 int Src0Idx =
1080 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
1081 int Src1Idx =
1082 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
1083 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
1084 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
1085 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
1086 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1087}
1088
1089struct VOPModifiers {
1090 unsigned OpSel = 0;
1091 unsigned OpSelHi = 0;
1092 unsigned NegLo = 0;
1093 unsigned NegHi = 0;
1094};
1095
1096// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1097// Note that these values do not affect disassembler output,
1098// so this is only necessary for consistency with src_modifiers.
1099static VOPModifiers collectVOPModifiers(const MCInst &MI,
1100 bool IsVOP3P = false) {
1101 VOPModifiers Modifiers;
1102 unsigned Opc = MI.getOpcode();
1103 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1104 AMDGPU::OpName::src1_modifiers,
1105 AMDGPU::OpName::src2_modifiers};
1106 for (int J = 0; J < 3; ++J) {
1107 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
1108 if (OpIdx == -1)
1109 continue;
1110
1111 unsigned Val = MI.getOperand(i: OpIdx).getImm();
1112
1113 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1114 if (IsVOP3P) {
1115 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1116 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1117 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1118 } else if (J == 0) {
1119 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1120 }
1121 }
1122
1123 return Modifiers;
1124}
1125
1126// Instructions decode the op_sel/suffix bits into the src_modifier
1127// operands. Copy those bits into the src operands for true16 VGPRs.
1128void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
1129 const unsigned Opc = MI.getOpcode();
1130 const MCRegisterClass &ConversionRC =
1131 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID);
1132 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1133 OpAndOpMods = {._M_elems: {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1134 SISrcMods::OP_SEL_0},
1135 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1136 SISrcMods::OP_SEL_0},
1137 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1138 SISrcMods::OP_SEL_0},
1139 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1140 SISrcMods::DST_OP_SEL}}};
1141 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1142 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
1143 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpModsName);
1144 if (OpIdx == -1 || OpModsIdx == -1)
1145 continue;
1146 MCOperand &Op = MI.getOperand(i: OpIdx);
1147 if (!Op.isReg())
1148 continue;
1149 if (!ConversionRC.contains(Reg: Op.getReg()))
1150 continue;
1151 unsigned OpEnc = MRI.getEncodingValue(Reg: Op.getReg());
1152 const MCOperand &OpMods = MI.getOperand(i: OpModsIdx);
1153 unsigned ModVal = OpMods.getImm();
1154 if (ModVal & OpSelMask) { // isHi
1155 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1156 Op.setReg(ConversionRC.getRegister(i: RegIdx * 2 + 1));
1157 }
1158 }
1159}
1160
1161// MAC opcodes have special old and src2 operands.
1162// src2 is tied to dst, while old is not tied (but assumed to be).
1163bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
1164 constexpr int DST_IDX = 0;
1165 auto Opcode = MI.getOpcode();
1166 const auto &Desc = MCII->get(Opcode);
1167 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::old);
1168
1169 if (OldIdx != -1 && Desc.getOperandConstraint(
1170 OpNum: OldIdx, Constraint: MCOI::OperandConstraint::TIED_TO) == -1) {
1171 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1172 assert(Desc.getOperandConstraint(
1173 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1174 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
1175 (void)DST_IDX;
1176 return true;
1177 }
1178
1179 return false;
1180}
1181
1182// Create dummy old operand and insert dummy unused src2_modifiers
1183void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
1184 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1185 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1186 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1187 Name: AMDGPU::OpName::src2_modifiers);
1188}
1189
1190void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
1191 unsigned Opc = MI.getOpcode();
1192
1193 int VDstInIdx =
1194 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1195 if (VDstInIdx != -1)
1196 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1197
1198 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1199 if (MI.getNumOperands() < DescNumOps &&
1200 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1201 convertTrue16OpSel(MI);
1202 auto Mods = collectVOPModifiers(MI);
1203 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1204 Name: AMDGPU::OpName::op_sel);
1205 } else {
1206 // Insert dummy unused src modifiers.
1207 if (MI.getNumOperands() < DescNumOps &&
1208 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1209 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1210 Name: AMDGPU::OpName::src0_modifiers);
1211
1212 if (MI.getNumOperands() < DescNumOps &&
1213 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1214 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1215 Name: AMDGPU::OpName::src1_modifiers);
1216 }
1217}
1218
1219void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
1220 convertTrue16OpSel(MI);
1221
1222 int VDstInIdx =
1223 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1224 if (VDstInIdx != -1)
1225 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1226
1227 unsigned Opc = MI.getOpcode();
1228 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1229 if (MI.getNumOperands() < DescNumOps &&
1230 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1231 auto Mods = collectVOPModifiers(MI);
1232 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1233 Name: AMDGPU::OpName::op_sel);
1234 }
1235}
1236
1237// Given a wide tuple \p Reg check if it will overflow 256 registers.
1238// \returns \p Reg on success or NoRegister otherwise.
1239static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC,
1240 const MCRegisterInfo &MRI) {
1241 unsigned NumRegs = RC.getSizeInBits() / 32;
1242 MCRegister Sub0 = MRI.getSubReg(Reg, Idx: AMDGPU::sub0);
1243 if (!Sub0)
1244 return Reg;
1245
1246 MCRegister BaseReg;
1247 if (MRI.getRegClass(i: AMDGPU::VGPR_32RegClassID).contains(Reg: Sub0))
1248 BaseReg = AMDGPU::VGPR0;
1249 else if (MRI.getRegClass(i: AMDGPU::AGPR_32RegClassID).contains(Reg: Sub0))
1250 BaseReg = AMDGPU::AGPR0;
1251
1252 assert(BaseReg && "Only vector registers expected");
1253
1254 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1255}
1256
1257// Note that before gfx10, the MIMG encoding provided no information about
1258// VADDR size. Consequently, decoded instructions always show address as if it
1259// has 1 dword, which could be not really so.
1260void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1261 auto TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
1262
1263 int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1264 Name: AMDGPU::OpName::vdst);
1265
1266 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1267 Name: AMDGPU::OpName::vdata);
1268 int VAddr0Idx =
1269 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
1270 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1271 ? AMDGPU::OpName::srsrc
1272 : AMDGPU::OpName::rsrc;
1273 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: RsrcOpName);
1274 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1275 Name: AMDGPU::OpName::dmask);
1276
1277 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1278 Name: AMDGPU::OpName::tfe);
1279 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1280 Name: AMDGPU::OpName::d16);
1281
1282 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
1283 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1284 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
1285
1286 assert(VDataIdx != -1);
1287 if (BaseOpcode->BVH) {
1288 // Add A16 operand for intersect_ray instructions
1289 addOperand(Inst&: MI, Opnd: MCOperand::createImm(Val: BaseOpcode->A16));
1290 return;
1291 }
1292
1293 bool IsAtomic = (VDstIdx != -1);
1294 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1295 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1296 bool IsNSA = false;
1297 bool IsPartialNSA = false;
1298 unsigned AddrSize = Info->VAddrDwords;
1299
1300 if (isGFX10Plus()) {
1301 unsigned DimIdx =
1302 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dim);
1303 int A16Idx =
1304 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::a16);
1305 const AMDGPU::MIMGDimInfo *Dim =
1306 AMDGPU::getMIMGDimInfoByEncoding(DimEnc: MI.getOperand(i: DimIdx).getImm());
1307 const bool IsA16 = (A16Idx != -1 && MI.getOperand(i: A16Idx).getImm());
1308
1309 AddrSize =
1310 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, IsG16Supported: AMDGPU::hasG16(STI));
1311
1312 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1313 // VIMAGE insts other than BVH never use vaddr4.
1314 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1315 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1316 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1317 if (!IsNSA) {
1318 if (!IsVSample && AddrSize > 12)
1319 AddrSize = 16;
1320 } else {
1321 if (AddrSize > Info->VAddrDwords) {
1322 if (!STI.hasFeature(Feature: AMDGPU::FeaturePartialNSAEncoding)) {
1323 // The NSA encoding does not contain enough operands for the
1324 // combination of base opcode / dimension. Should this be an error?
1325 return;
1326 }
1327 IsPartialNSA = true;
1328 }
1329 }
1330 }
1331
1332 unsigned DMask = MI.getOperand(i: DMaskIdx).getImm() & 0xf;
1333 unsigned DstSize = IsGather4 ? 4 : std::max(a: llvm::popcount(Value: DMask), b: 1);
1334
1335 bool D16 = D16Idx >= 0 && MI.getOperand(i: D16Idx).getImm();
1336 if (D16 && AMDGPU::hasPackedD16(STI)) {
1337 DstSize = (DstSize + 1) / 2;
1338 }
1339
1340 if (TFEIdx != -1 && MI.getOperand(i: TFEIdx).getImm())
1341 DstSize += 1;
1342
1343 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1344 return;
1345
1346 int NewOpcode =
1347 AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: Info->MIMGEncoding, VDataDwords: DstSize, VAddrDwords: AddrSize);
1348 if (NewOpcode == -1)
1349 return;
1350
1351 // Widen the register to the correct number of enabled channels.
1352 MCRegister NewVdata;
1353 if (DstSize != Info->VDataDwords) {
1354 auto DataRCID = MCII->getOpRegClassID(
1355 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VDataIdx], HwModeId: HwModeRegClass);
1356
1357 // Get first subregister of VData
1358 MCRegister Vdata0 = MI.getOperand(i: VDataIdx).getReg();
1359 MCRegister VdataSub0 = MRI.getSubReg(Reg: Vdata0, Idx: AMDGPU::sub0);
1360 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1361
1362 const MCRegisterClass &NewRC = MRI.getRegClass(i: DataRCID);
1363 NewVdata = MRI.getMatchingSuperReg(Reg: Vdata0, SubIdx: AMDGPU::sub0, RC: &NewRC);
1364 NewVdata = CheckVGPROverflow(Reg: NewVdata, RC: NewRC, MRI);
1365 if (!NewVdata) {
1366 // It's possible to encode this such that the low register + enabled
1367 // components exceeds the register count.
1368 return;
1369 }
1370 }
1371
1372 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1373 // If using partial NSA on GFX11+ widen last address register.
1374 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1375 MCRegister NewVAddrSA;
1376 if (STI.hasFeature(Feature: AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1377 AddrSize != Info->VAddrDwords) {
1378 MCRegister VAddrSA = MI.getOperand(i: VAddrSAIdx).getReg();
1379 MCRegister VAddrSubSA = MRI.getSubReg(Reg: VAddrSA, Idx: AMDGPU::sub0);
1380 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1381
1382 auto AddrRCID = MCII->getOpRegClassID(
1383 OpInfo: MCII->get(Opcode: NewOpcode).operands()[VAddrSAIdx], HwModeId: HwModeRegClass);
1384
1385 const MCRegisterClass &NewRC = MRI.getRegClass(i: AddrRCID);
1386 NewVAddrSA = MRI.getMatchingSuperReg(Reg: VAddrSA, SubIdx: AMDGPU::sub0, RC: &NewRC);
1387 NewVAddrSA = CheckVGPROverflow(Reg: NewVAddrSA, RC: NewRC, MRI);
1388 if (!NewVAddrSA)
1389 return;
1390 }
1391
1392 MI.setOpcode(NewOpcode);
1393
1394 if (NewVdata != AMDGPU::NoRegister) {
1395 MI.getOperand(i: VDataIdx) = MCOperand::createReg(Reg: NewVdata);
1396
1397 if (IsAtomic) {
1398 // Atomic operations have an additional operand (a copy of data)
1399 MI.getOperand(i: VDstIdx) = MCOperand::createReg(Reg: NewVdata);
1400 }
1401 }
1402
1403 if (NewVAddrSA) {
1404 MI.getOperand(i: VAddrSAIdx) = MCOperand::createReg(Reg: NewVAddrSA);
1405 } else if (IsNSA) {
1406 assert(AddrSize <= Info->VAddrDwords);
1407 MI.erase(First: MI.begin() + VAddr0Idx + AddrSize,
1408 Last: MI.begin() + VAddr0Idx + Info->VAddrDwords);
1409 }
1410}
1411
1412// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1413// decoder only adds to src_modifiers, so manually add the bits to the other
1414// operands.
1415void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1416 unsigned Opc = MI.getOpcode();
1417 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1418 auto Mods = collectVOPModifiers(MI, IsVOP3P: true);
1419
1420 if (MI.getNumOperands() < DescNumOps &&
1421 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
1422 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vdst_in);
1423
1424 if (MI.getNumOperands() < DescNumOps &&
1425 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel))
1426 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1427 Name: AMDGPU::OpName::op_sel);
1428 if (MI.getNumOperands() < DescNumOps &&
1429 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi))
1430 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSelHi),
1431 Name: AMDGPU::OpName::op_sel_hi);
1432 if (MI.getNumOperands() < DescNumOps &&
1433 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_lo))
1434 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegLo),
1435 Name: AMDGPU::OpName::neg_lo);
1436 if (MI.getNumOperands() < DescNumOps &&
1437 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_hi))
1438 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegHi),
1439 Name: AMDGPU::OpName::neg_hi);
1440}
1441
1442// Create dummy old operand and insert optional operands
1443void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1444 unsigned Opc = MI.getOpcode();
1445 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1446
1447 if (MI.getNumOperands() < DescNumOps &&
1448 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::old))
1449 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1450
1451 if (MI.getNumOperands() < DescNumOps &&
1452 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1453 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1454 Name: AMDGPU::OpName::src0_modifiers);
1455
1456 if (MI.getNumOperands() < DescNumOps &&
1457 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1458 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1459 Name: AMDGPU::OpName::src1_modifiers);
1460}
1461
1462void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
1463 unsigned Opc = MI.getOpcode();
1464 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1465
1466 convertTrue16OpSel(MI);
1467
1468 if (MI.getNumOperands() < DescNumOps &&
1469 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1470 VOPModifiers Mods = collectVOPModifiers(MI);
1471 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1472 Name: AMDGPU::OpName::op_sel);
1473 }
1474}
1475
1476void AMDGPUDisassembler::convertFMAanyK(MCInst &MI) const {
1477 assert(HasLiteral && "Should have decoded a literal");
1478 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Literal), Name: AMDGPU::OpName::immX);
1479}
1480
1481const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1482 return getContext().getRegisterInfo()->
1483 getRegClassName(Class: &AMDGPUMCRegisterClasses[RegClassID]);
1484}
1485
1486inline
1487MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1488 const Twine& ErrMsg) const {
1489 *CommentStream << "Error: " + ErrMsg;
1490
1491 // ToDo: add support for error operands to MCInst.h
1492 // return MCOperand::createError(V);
1493 return MCOperand();
1494}
1495
1496inline MCOperand AMDGPUDisassembler::createRegOperand(MCRegister Reg) const {
1497 return MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg, STI));
1498}
1499
1500inline
1501MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1502 unsigned Val) const {
1503 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1504 if (Val >= RegCl.getNumRegs())
1505 return errOperand(V: Val, ErrMsg: Twine(getRegClassName(RegClassID)) +
1506 ": unknown register " + Twine(Val));
1507 return createRegOperand(Reg: RegCl.getRegister(i: Val));
1508}
1509
1510inline
1511MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1512 unsigned Val) const {
1513 // ToDo: SI/CI have 104 SGPRs, VI - 102
1514 // Valery: here we accepting as much as we can, let assembler sort it out
1515 int shift = 0;
1516 switch (SRegClassID) {
1517 case AMDGPU::SGPR_32RegClassID:
1518 case AMDGPU::TTMP_32RegClassID:
1519 break;
1520 case AMDGPU::SGPR_64RegClassID:
1521 case AMDGPU::TTMP_64RegClassID:
1522 shift = 1;
1523 break;
1524 case AMDGPU::SGPR_96RegClassID:
1525 case AMDGPU::TTMP_96RegClassID:
1526 case AMDGPU::SGPR_128RegClassID:
1527 case AMDGPU::TTMP_128RegClassID:
1528 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1529 // this bundle?
1530 case AMDGPU::SGPR_256RegClassID:
1531 case AMDGPU::TTMP_256RegClassID:
1532 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1533 // this bundle?
1534 case AMDGPU::SGPR_288RegClassID:
1535 case AMDGPU::TTMP_288RegClassID:
1536 case AMDGPU::SGPR_320RegClassID:
1537 case AMDGPU::TTMP_320RegClassID:
1538 case AMDGPU::SGPR_352RegClassID:
1539 case AMDGPU::TTMP_352RegClassID:
1540 case AMDGPU::SGPR_384RegClassID:
1541 case AMDGPU::TTMP_384RegClassID:
1542 case AMDGPU::SGPR_512RegClassID:
1543 case AMDGPU::TTMP_512RegClassID:
1544 shift = 2;
1545 break;
1546 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1547 // this bundle?
1548 default:
1549 llvm_unreachable("unhandled register class");
1550 }
1551
1552 if (Val % (1 << shift)) {
1553 *CommentStream << "Warning: " << getRegClassName(RegClassID: SRegClassID)
1554 << ": scalar reg isn't aligned " << Val;
1555 }
1556
1557 return createRegOperand(RegClassID: SRegClassID, Val: Val >> shift);
1558}
1559
1560MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1561 bool IsHi) const {
1562 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1563 return createRegOperand(RegClassID: AMDGPU::VGPR_16RegClassID, Val: RegIdxInVGPR16);
1564}
1565
1566// Decode Literals for insts which always have a literal in the encoding
1567MCOperand
1568AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1569 if (HasLiteral) {
1570 assert(
1571 AMDGPU::hasVOPD(STI) &&
1572 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1573 if (Literal != Val)
1574 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1575 }
1576 HasLiteral = true;
1577 Literal = Val;
1578 return MCOperand::createImm(Val: Literal);
1579}
1580
1581MCOperand
1582AMDGPUDisassembler::decodeMandatoryLiteral64Constant(uint64_t Val) const {
1583 if (HasLiteral) {
1584 if (Literal != Val)
1585 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1586 }
1587 HasLiteral = true;
1588 Literal = Val;
1589
1590 bool UseLit64 = Hi_32(Value: Literal) == 0;
1591 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1592 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1593 : MCOperand::createImm(Val: Literal);
1594}
1595
1596MCOperand
1597AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc,
1598 const MCOperandInfo &OpDesc) const {
1599 // For now all literal constants are supposed to be unsigned integer
1600 // ToDo: deal with signed/unsigned 64-bit integer constants
1601 // ToDo: deal with float/double constants
1602 if (!HasLiteral) {
1603 if (Bytes.size() < 4) {
1604 return errOperand(V: 0, ErrMsg: "cannot read literal, inst bytes left " +
1605 Twine(Bytes.size()));
1606 }
1607 HasLiteral = true;
1608 Literal = eatBytes<uint32_t>(Bytes);
1609 }
1610
1611 // For disassembling always assume all inline constants are available.
1612 bool HasInv2Pi = true;
1613
1614 // Invalid instruction codes may contain literals for inline-only
1615 // operands, so we support them here as well.
1616 int64_t Val = Literal;
1617 bool UseLit = false;
1618 switch (OpDesc.OperandType) {
1619 default:
1620 llvm_unreachable("Unexpected operand type!");
1621 case AMDGPU::OPERAND_REG_IMM_BF16:
1622 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1623 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1624 UseLit = AMDGPU::isInlinableLiteralBF16(Literal: Val, HasInv2Pi);
1625 break;
1626 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1627 UseLit = AMDGPU::isInlinableLiteralV2BF16(Literal: Val);
1628 break;
1629 case AMDGPU::OPERAND_REG_IMM_FP16:
1630 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1631 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1632 UseLit = AMDGPU::isInlinableLiteralFP16(Literal: Val, HasInv2Pi);
1633 break;
1634 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1635 UseLit = AMDGPU::isInlinableLiteralV2F16(Literal: Val);
1636 break;
1637 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1638 UseLit = AMDGPU::isPKFMACF16InlineConstant(Literal: Val, IsGFX11Plus: isGFX11Plus());
1639 break;
1640 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1641 break;
1642 case AMDGPU::OPERAND_REG_IMM_INT16:
1643 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1644 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1645 UseLit = AMDGPU::isInlinableLiteralI16(Literal: Val, HasInv2Pi);
1646 break;
1647 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1648 UseLit = AMDGPU::isInlinableLiteralV2I16(Literal: Val);
1649 break;
1650 case AMDGPU::OPERAND_REG_IMM_FP32:
1651 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1652 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1653 case AMDGPU::OPERAND_REG_IMM_INT32:
1654 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1655 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1656 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1657 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1658 case AMDGPU::OPERAND_KIMM32:
1659 UseLit = AMDGPU::isInlinableLiteral32(Literal: Val, HasInv2Pi);
1660 break;
1661 case AMDGPU::OPERAND_REG_IMM_FP64:
1662 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1663 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1664 Val <<= 32;
1665 break;
1666 case AMDGPU::OPERAND_REG_IMM_INT64:
1667 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1668 UseLit = AMDGPU::isInlinableLiteral64(Literal: Val, HasInv2Pi);
1669 break;
1670 case MCOI::OPERAND_REGISTER:
1671 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1672 // decoding a literal in a position of a register operand. Give
1673 // it special handling in the caller, decodeImmOperands(), instead
1674 // of quietly allowing it here.
1675 break;
1676 }
1677
1678 return UseLit ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1679 Lit: LitModifier::Lit, Value: Val, Ctx&: getContext()))
1680 : MCOperand::createImm(Val);
1681}
1682
1683MCOperand AMDGPUDisassembler::decodeLiteral64Constant() const {
1684 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1685
1686 if (!HasLiteral) {
1687 if (Bytes.size() < 8) {
1688 return errOperand(V: 0, ErrMsg: "cannot read literal64, inst bytes left " +
1689 Twine(Bytes.size()));
1690 }
1691 HasLiteral = true;
1692 Literal = eatBytes<uint64_t>(Bytes);
1693 }
1694
1695 bool UseLit64 = Hi_32(Value: Literal) == 0;
1696 return UseLit64 ? MCOperand::createExpr(Val: AMDGPUMCExpr::createLit(
1697 Lit: LitModifier::Lit64, Value: Literal, Ctx&: getContext()))
1698 : MCOperand::createImm(Val: Literal);
1699}
1700
1701MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1702 using namespace AMDGPU::EncValues;
1703
1704 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1705 return MCOperand::createImm(Val: (Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1706 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1707 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1708 // Cast prevents negative overflow.
1709}
1710
1711static int64_t getInlineImmVal32(unsigned Imm) {
1712 switch (Imm) {
1713 case 240:
1714 return llvm::bit_cast<uint32_t>(from: 0.5f);
1715 case 241:
1716 return llvm::bit_cast<uint32_t>(from: -0.5f);
1717 case 242:
1718 return llvm::bit_cast<uint32_t>(from: 1.0f);
1719 case 243:
1720 return llvm::bit_cast<uint32_t>(from: -1.0f);
1721 case 244:
1722 return llvm::bit_cast<uint32_t>(from: 2.0f);
1723 case 245:
1724 return llvm::bit_cast<uint32_t>(from: -2.0f);
1725 case 246:
1726 return llvm::bit_cast<uint32_t>(from: 4.0f);
1727 case 247:
1728 return llvm::bit_cast<uint32_t>(from: -4.0f);
1729 case 248: // 1 / (2 * PI)
1730 return 0x3e22f983;
1731 default:
1732 llvm_unreachable("invalid fp inline imm");
1733 }
1734}
1735
1736static int64_t getInlineImmVal64(unsigned Imm) {
1737 switch (Imm) {
1738 case 240:
1739 return llvm::bit_cast<uint64_t>(from: 0.5);
1740 case 241:
1741 return llvm::bit_cast<uint64_t>(from: -0.5);
1742 case 242:
1743 return llvm::bit_cast<uint64_t>(from: 1.0);
1744 case 243:
1745 return llvm::bit_cast<uint64_t>(from: -1.0);
1746 case 244:
1747 return llvm::bit_cast<uint64_t>(from: 2.0);
1748 case 245:
1749 return llvm::bit_cast<uint64_t>(from: -2.0);
1750 case 246:
1751 return llvm::bit_cast<uint64_t>(from: 4.0);
1752 case 247:
1753 return llvm::bit_cast<uint64_t>(from: -4.0);
1754 case 248: // 1 / (2 * PI)
1755 return 0x3fc45f306dc9c882;
1756 default:
1757 llvm_unreachable("invalid fp inline imm");
1758 }
1759}
1760
1761static int64_t getInlineImmValF16(unsigned Imm) {
1762 switch (Imm) {
1763 case 240:
1764 return 0x3800;
1765 case 241:
1766 return 0xB800;
1767 case 242:
1768 return 0x3C00;
1769 case 243:
1770 return 0xBC00;
1771 case 244:
1772 return 0x4000;
1773 case 245:
1774 return 0xC000;
1775 case 246:
1776 return 0x4400;
1777 case 247:
1778 return 0xC400;
1779 case 248: // 1 / (2 * PI)
1780 return 0x3118;
1781 default:
1782 llvm_unreachable("invalid fp inline imm");
1783 }
1784}
1785
1786static int64_t getInlineImmValBF16(unsigned Imm) {
1787 switch (Imm) {
1788 case 240:
1789 return 0x3F00;
1790 case 241:
1791 return 0xBF00;
1792 case 242:
1793 return 0x3F80;
1794 case 243:
1795 return 0xBF80;
1796 case 244:
1797 return 0x4000;
1798 case 245:
1799 return 0xC000;
1800 case 246:
1801 return 0x4080;
1802 case 247:
1803 return 0xC080;
1804 case 248: // 1 / (2 * PI)
1805 return 0x3E22;
1806 default:
1807 llvm_unreachable("invalid fp inline imm");
1808 }
1809}
1810
1811unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1812 using namespace AMDGPU;
1813
1814 switch (Width) {
1815 case 16:
1816 case 32:
1817 return VGPR_32RegClassID;
1818 case 64:
1819 return VReg_64RegClassID;
1820 case 96:
1821 return VReg_96RegClassID;
1822 case 128:
1823 return VReg_128RegClassID;
1824 case 160:
1825 return VReg_160RegClassID;
1826 case 192:
1827 return VReg_192RegClassID;
1828 case 256:
1829 return VReg_256RegClassID;
1830 case 288:
1831 return VReg_288RegClassID;
1832 case 320:
1833 return VReg_320RegClassID;
1834 case 352:
1835 return VReg_352RegClassID;
1836 case 384:
1837 return VReg_384RegClassID;
1838 case 512:
1839 return VReg_512RegClassID;
1840 case 1024:
1841 return VReg_1024RegClassID;
1842 }
1843 llvm_unreachable("Invalid register width!");
1844}
1845
1846unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1847 using namespace AMDGPU;
1848
1849 switch (Width) {
1850 case 16:
1851 case 32:
1852 return AGPR_32RegClassID;
1853 case 64:
1854 return AReg_64RegClassID;
1855 case 96:
1856 return AReg_96RegClassID;
1857 case 128:
1858 return AReg_128RegClassID;
1859 case 160:
1860 return AReg_160RegClassID;
1861 case 256:
1862 return AReg_256RegClassID;
1863 case 288:
1864 return AReg_288RegClassID;
1865 case 320:
1866 return AReg_320RegClassID;
1867 case 352:
1868 return AReg_352RegClassID;
1869 case 384:
1870 return AReg_384RegClassID;
1871 case 512:
1872 return AReg_512RegClassID;
1873 case 1024:
1874 return AReg_1024RegClassID;
1875 }
1876 llvm_unreachable("Invalid register width!");
1877}
1878
1879unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1880 using namespace AMDGPU;
1881
1882 switch (Width) {
1883 case 16:
1884 case 32:
1885 return SGPR_32RegClassID;
1886 case 64:
1887 return SGPR_64RegClassID;
1888 case 96:
1889 return SGPR_96RegClassID;
1890 case 128:
1891 return SGPR_128RegClassID;
1892 case 160:
1893 return SGPR_160RegClassID;
1894 case 256:
1895 return SGPR_256RegClassID;
1896 case 288:
1897 return SGPR_288RegClassID;
1898 case 320:
1899 return SGPR_320RegClassID;
1900 case 352:
1901 return SGPR_352RegClassID;
1902 case 384:
1903 return SGPR_384RegClassID;
1904 case 512:
1905 return SGPR_512RegClassID;
1906 }
1907 llvm_unreachable("Invalid register width!");
1908}
1909
1910unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1911 using namespace AMDGPU;
1912
1913 switch (Width) {
1914 case 16:
1915 case 32:
1916 return TTMP_32RegClassID;
1917 case 64:
1918 return TTMP_64RegClassID;
1919 case 128:
1920 return TTMP_128RegClassID;
1921 case 256:
1922 return TTMP_256RegClassID;
1923 case 288:
1924 return TTMP_288RegClassID;
1925 case 320:
1926 return TTMP_320RegClassID;
1927 case 352:
1928 return TTMP_352RegClassID;
1929 case 384:
1930 return TTMP_384RegClassID;
1931 case 512:
1932 return TTMP_512RegClassID;
1933 }
1934 llvm_unreachable("Invalid register width!");
1935}
1936
1937int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1938 using namespace AMDGPU::EncValues;
1939
1940 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1941 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1942
1943 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1944}
1945
1946MCOperand AMDGPUDisassembler::decodeSrcOp(const MCInst &Inst, unsigned Width,
1947 unsigned Val) const {
1948 using namespace AMDGPU::EncValues;
1949
1950 assert(Val < 1024); // enum10
1951
1952 bool IsAGPR = Val & 512;
1953 Val &= 511;
1954
1955 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1956 return createRegOperand(RegClassID: IsAGPR ? getAgprClassId(Width)
1957 : getVgprClassId(Width), Val: Val - VGPR_MIN);
1958 }
1959 return decodeNonVGPRSrcOp(Inst, Width, Val: Val & 0xFF);
1960}
1961
1962MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const MCInst &Inst,
1963 unsigned Width,
1964 unsigned Val) const {
1965 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1966 // decoded earlier.
1967 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1968 using namespace AMDGPU::EncValues;
1969
1970 if (Val <= SGPR_MAX) {
1971 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1972 static_assert(SGPR_MIN == 0);
1973 return createSRegOperand(SRegClassID: getSgprClassId(Width), Val: Val - SGPR_MIN);
1974 }
1975
1976 int TTmpIdx = getTTmpIdx(Val);
1977 if (TTmpIdx >= 0) {
1978 return createSRegOperand(SRegClassID: getTtmpClassId(Width), Val: TTmpIdx);
1979 }
1980
1981 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1982 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1983 Val == LITERAL_CONST)
1984 return MCOperand::createImm(Val);
1985
1986 if (Val == LITERAL64_CONST && STI.hasFeature(Feature: AMDGPU::Feature64BitLiterals)) {
1987 return decodeLiteral64Constant();
1988 }
1989
1990 switch (Width) {
1991 case 32:
1992 case 16:
1993 return decodeSpecialReg32(Val);
1994 case 64:
1995 return decodeSpecialReg64(Val);
1996 case 96:
1997 case 128:
1998 case 256:
1999 case 512:
2000 return decodeSpecialReg96Plus(Val);
2001 default:
2002 llvm_unreachable("unexpected immediate type");
2003 }
2004}
2005
2006// Bit 0 of DstY isn't stored in the instruction, because it's always the
2007// opposite of bit 0 of DstX.
2008MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
2009 unsigned Val) const {
2010 int VDstXInd =
2011 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::vdstX);
2012 assert(VDstXInd != -1);
2013 assert(Inst.getOperand(VDstXInd).isReg());
2014 unsigned XDstReg = MRI.getEncodingValue(Reg: Inst.getOperand(i: VDstXInd).getReg());
2015 Val |= ~XDstReg & 1;
2016 return createRegOperand(RegClassID: getVgprClassId(Width: 32), Val);
2017}
2018
2019MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
2020 using namespace AMDGPU;
2021
2022 switch (Val) {
2023 // clang-format off
2024 case 102: return createRegOperand(Reg: FLAT_SCR_LO);
2025 case 103: return createRegOperand(Reg: FLAT_SCR_HI);
2026 case 104: return createRegOperand(Reg: XNACK_MASK_LO);
2027 case 105: return createRegOperand(Reg: XNACK_MASK_HI);
2028 case 106: return createRegOperand(Reg: VCC_LO);
2029 case 107: return createRegOperand(Reg: VCC_HI);
2030 case 108: return createRegOperand(Reg: TBA_LO);
2031 case 109: return createRegOperand(Reg: TBA_HI);
2032 case 110: return createRegOperand(Reg: TMA_LO);
2033 case 111: return createRegOperand(Reg: TMA_HI);
2034 case 124:
2035 return isGFX11Plus() ? createRegOperand(Reg: SGPR_NULL) : createRegOperand(Reg: M0);
2036 case 125:
2037 return isGFX11Plus() ? createRegOperand(Reg: M0) : createRegOperand(Reg: SGPR_NULL);
2038 case 126: return createRegOperand(Reg: EXEC_LO);
2039 case 127: return createRegOperand(Reg: EXEC_HI);
2040 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2041 case 231: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_HI);
2042 case 235: return createRegOperand(Reg: SRC_SHARED_BASE_LO);
2043 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT_LO);
2044 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE_LO);
2045 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT_LO);
2046 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2047 case 251: return createRegOperand(Reg: SRC_VCCZ);
2048 case 252: return createRegOperand(Reg: SRC_EXECZ);
2049 case 253: return createRegOperand(Reg: SRC_SCC);
2050 case 254: return createRegOperand(Reg: LDS_DIRECT);
2051 default: break;
2052 // clang-format on
2053 }
2054 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2055}
2056
2057MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
2058 using namespace AMDGPU;
2059
2060 switch (Val) {
2061 case 102: return createRegOperand(Reg: FLAT_SCR);
2062 case 104: return createRegOperand(Reg: XNACK_MASK);
2063 case 106: return createRegOperand(Reg: VCC);
2064 case 108: return createRegOperand(Reg: TBA);
2065 case 110: return createRegOperand(Reg: TMA);
2066 case 124:
2067 if (isGFX11Plus())
2068 return createRegOperand(Reg: SGPR_NULL);
2069 break;
2070 case 125:
2071 if (!isGFX11Plus())
2072 return createRegOperand(Reg: SGPR_NULL);
2073 break;
2074 case 126: return createRegOperand(Reg: EXEC);
2075 case 230: return createRegOperand(Reg: SRC_FLAT_SCRATCH_BASE_LO);
2076 case 235: return createRegOperand(Reg: SRC_SHARED_BASE);
2077 case 236: return createRegOperand(Reg: SRC_SHARED_LIMIT);
2078 case 237: return createRegOperand(Reg: SRC_PRIVATE_BASE);
2079 case 238: return createRegOperand(Reg: SRC_PRIVATE_LIMIT);
2080 case 239: return createRegOperand(Reg: SRC_POPS_EXITING_WAVE_ID);
2081 case 251: return createRegOperand(Reg: SRC_VCCZ);
2082 case 252: return createRegOperand(Reg: SRC_EXECZ);
2083 case 253: return createRegOperand(Reg: SRC_SCC);
2084 default: break;
2085 }
2086 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2087}
2088
2089MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
2090 using namespace AMDGPU;
2091
2092 switch (Val) {
2093 case 124:
2094 if (isGFX11Plus())
2095 return createRegOperand(Reg: SGPR_NULL);
2096 break;
2097 case 125:
2098 if (!isGFX11Plus())
2099 return createRegOperand(Reg: SGPR_NULL);
2100 break;
2101 default:
2102 break;
2103 }
2104 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
2105}
2106
2107MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width,
2108 const unsigned Val) const {
2109 using namespace AMDGPU::SDWA;
2110 using namespace AMDGPU::EncValues;
2111
2112 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
2113 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
2114 // XXX: cast to int is needed to avoid stupid warning:
2115 // compare with unsigned is always true
2116 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2117 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2118 return createRegOperand(RegClassID: getVgprClassId(Width),
2119 Val: Val - SDWA9EncValues::SRC_VGPR_MIN);
2120 }
2121 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2122 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2123 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2124 return createSRegOperand(SRegClassID: getSgprClassId(Width),
2125 Val: Val - SDWA9EncValues::SRC_SGPR_MIN);
2126 }
2127 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2128 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2129 return createSRegOperand(SRegClassID: getTtmpClassId(Width),
2130 Val: Val - SDWA9EncValues::SRC_TTMP_MIN);
2131 }
2132
2133 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2134
2135 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2136 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2137 return MCOperand::createImm(Val: SVal);
2138
2139 return decodeSpecialReg32(Val: SVal);
2140 }
2141 if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands))
2142 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
2143 llvm_unreachable("unsupported target");
2144}
2145
2146MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
2147 return decodeSDWASrc(Width: 16, Val);
2148}
2149
2150MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
2151 return decodeSDWASrc(Width: 32, Val);
2152}
2153
2154MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
2155 using namespace AMDGPU::SDWA;
2156
2157 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2158 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2159 "SDWAVopcDst should be present only on GFX9+");
2160
2161 bool IsWave32 = STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32);
2162
2163 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2164 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2165
2166 int TTmpIdx = getTTmpIdx(Val);
2167 if (TTmpIdx >= 0) {
2168 auto TTmpClsId = getTtmpClassId(Width: IsWave32 ? 32 : 64);
2169 return createSRegOperand(SRegClassID: TTmpClsId, Val: TTmpIdx);
2170 }
2171 if (Val > SGPR_MAX) {
2172 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2173 }
2174 return createSRegOperand(SRegClassID: getSgprClassId(Width: IsWave32 ? 32 : 64), Val);
2175 }
2176 return createRegOperand(Reg: IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2177}
2178
2179MCOperand AMDGPUDisassembler::decodeBoolReg(const MCInst &Inst,
2180 unsigned Val) const {
2181 return STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32)
2182 ? decodeSrcOp(Inst, Width: 32, Val)
2183 : decodeSrcOp(Inst, Width: 64, Val);
2184}
2185
2186MCOperand AMDGPUDisassembler::decodeSplitBarrier(const MCInst &Inst,
2187 unsigned Val) const {
2188 return decodeSrcOp(Inst, Width: 32, Val);
2189}
2190
2191MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
2192 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
2193 return MCOperand();
2194 return MCOperand::createImm(Val);
2195}
2196
2197MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
2198 using VersionField = AMDGPU::EncodingField<7, 0>;
2199 using W64Bit = AMDGPU::EncodingBit<13>;
2200 using W32Bit = AMDGPU::EncodingBit<14>;
2201 using MDPBit = AMDGPU::EncodingBit<15>;
2202 using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
2203
2204 auto [Version, W64, W32, MDP] = Encoding::decode(Encoded: Imm);
2205
2206 // Decode into a plain immediate if any unused bits are raised.
2207 if (Encoding::encode(Values: Version, Values: W64, Values: W32, Values: MDP) != Imm)
2208 return MCOperand::createImm(Val: Imm);
2209
2210 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2211 const auto *I = find_if(
2212 Range: Versions, P: [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2213 return V.Code == Version;
2214 });
2215 MCContext &Ctx = getContext();
2216 const MCExpr *E;
2217 if (I == Versions.end())
2218 E = MCConstantExpr::create(Value: Version, Ctx);
2219 else
2220 E = MCSymbolRefExpr::create(Symbol: Ctx.getOrCreateSymbol(Name: I->Symbol), Ctx);
2221
2222 if (W64)
2223 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW64Expr, Ctx);
2224 if (W32)
2225 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW32Expr, Ctx);
2226 if (MDP)
2227 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionMDPExpr, Ctx);
2228
2229 return MCOperand::createExpr(Val: E);
2230}
2231
2232bool AMDGPUDisassembler::isVI() const {
2233 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2234}
2235
2236bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
2237
2238bool AMDGPUDisassembler::isGFX90A() const {
2239 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2240}
2241
2242bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
2243
2244bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
2245
2246bool AMDGPUDisassembler::isGFX10Plus() const {
2247 return AMDGPU::isGFX10Plus(STI);
2248}
2249
2250bool AMDGPUDisassembler::isGFX11() const {
2251 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2252}
2253
2254bool AMDGPUDisassembler::isGFX11Plus() const {
2255 return AMDGPU::isGFX11Plus(STI);
2256}
2257
2258bool AMDGPUDisassembler::isGFX1170() const { return AMDGPU::isGFX1170(STI); }
2259
2260bool AMDGPUDisassembler::isGFX12() const {
2261 return STI.hasFeature(Feature: AMDGPU::FeatureGFX12);
2262}
2263
2264bool AMDGPUDisassembler::isGFX12Plus() const {
2265 return AMDGPU::isGFX12Plus(STI);
2266}
2267
2268bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); }
2269
2270bool AMDGPUDisassembler::isGFX1250Plus() const {
2271 return AMDGPU::isGFX1250Plus(STI);
2272}
2273
2274bool AMDGPUDisassembler::isGFX13() const { return AMDGPU::isGFX13(STI); }
2275
2276bool AMDGPUDisassembler::isGFX13Plus() const {
2277 return AMDGPU::isGFX13Plus(STI);
2278}
2279
2280bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
2281 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2282}
2283
2284bool AMDGPUDisassembler::hasKernargPreload() const {
2285 return AMDGPU::hasKernargPreload(STI);
2286}
2287
2288//===----------------------------------------------------------------------===//
2289// AMDGPU specific symbol handling
2290//===----------------------------------------------------------------------===//
2291
2292/// Print a string describing the reserved bit range specified by Mask with
2293/// offset BaseBytes for use in error comments. Mask is a single continuous
2294/// range of 1s surrounded by zeros. The format here is meant to align with the
2295/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2296static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2297 SmallString<32> Result;
2298 raw_svector_ostream S(Result);
2299
2300 int TrailingZeros = llvm::countr_zero(Val: Mask);
2301 int PopCount = llvm::popcount(Value: Mask);
2302
2303 if (PopCount == 1) {
2304 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2305 } else {
2306 S << "bits in range ("
2307 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2308 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2309 }
2310
2311 return Result;
2312}
2313
2314#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2315#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2316 do { \
2317 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2318 } while (0)
2319#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2320 do { \
2321 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2322 << GET_FIELD(MASK) << '\n'; \
2323 } while (0)
2324
2325#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2326 do { \
2327 if (FourByteBuffer & (MASK)) { \
2328 return createStringError(std::errc::invalid_argument, \
2329 "kernel descriptor " DESC \
2330 " reserved %s set" MSG, \
2331 getBitRangeFromMask((MASK), 0).c_str()); \
2332 } \
2333 } while (0)
2334
2335#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2336#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2337 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2338#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2339 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2340#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2341 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2342
2343// NOLINTNEXTLINE(readability-identifier-naming)
2344Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
2345 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2346 using namespace amdhsa;
2347 StringRef Indent = "\t";
2348
2349 // We cannot accurately backward compute #VGPRs used from
2350 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2351 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2352 // simply calculate the inverse of what the assembler does.
2353
2354 uint32_t GranulatedWorkitemVGPRCount =
2355 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2356
2357 uint32_t NextFreeVGPR =
2358 (GranulatedWorkitemVGPRCount + 1) *
2359 AMDGPU::IsaInfo::getVGPREncodingGranule(STI: &STI, EnableWavefrontSize32);
2360
2361 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2362
2363 // We cannot backward compute values used to calculate
2364 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2365 // directives can't be computed:
2366 // .amdhsa_reserve_vcc
2367 // .amdhsa_reserve_flat_scratch
2368 // .amdhsa_reserve_xnack_mask
2369 // They take their respective default values if not specified in the assembly.
2370 //
2371 // GRANULATED_WAVEFRONT_SGPR_COUNT
2372 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2373 //
2374 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2375 // are set to 0. So while disassembling we consider that:
2376 //
2377 // GRANULATED_WAVEFRONT_SGPR_COUNT
2378 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2379 //
2380 // The disassembler cannot recover the original values of those 3 directives.
2381
2382 uint32_t GranulatedWavefrontSGPRCount =
2383 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2384
2385 if (isGFX10Plus())
2386 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2387 "must be zero on gfx10+");
2388
2389 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2390 AMDGPU::IsaInfo::getSGPREncodingGranule(STI: &STI);
2391
2392 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2393 if (!hasArchitectedFlatScratch())
2394 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2395 bool ReservedXnackMask = STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2396 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2397 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2398 << '\n';
2399 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2400
2401 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2402
2403 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2404 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2405 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2406 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2407 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2408 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2409 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2410 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2411
2412 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2413
2414 if (!isGFX12Plus())
2415 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2416 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2417
2418 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2419
2420 if (!isGFX12Plus())
2421 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2422 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2423
2424 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2425 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2426
2427 // Bits [26].
2428 if (isGFX9Plus()) {
2429 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2430 } else {
2431 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2432 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2433 }
2434
2435 // Bits [27].
2436 if (isGFX1250Plus()) {
2437 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2438 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2439 } else {
2440 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2441 "COMPUTE_PGM_RSRC1");
2442 }
2443
2444 // Bits [28].
2445 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2446
2447 // Bits [29-31].
2448 if (isGFX10Plus()) {
2449 // WGP_MODE is not available on GFX1250.
2450 if (!isGFX1250Plus()) {
2451 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2452 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2453 }
2454 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2455 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2456 } else {
2457 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2458 "COMPUTE_PGM_RSRC1");
2459 }
2460
2461 if (isGFX12Plus())
2462 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2463 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2464
2465 return true;
2466}
2467
2468// NOLINTNEXTLINE(readability-identifier-naming)
2469Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2470 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2471 using namespace amdhsa;
2472 StringRef Indent = "\t";
2473 if (hasArchitectedFlatScratch())
2474 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2475 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2476 else
2477 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2478 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2479 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2480 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2481 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2482 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2483 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2484 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2485 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2486 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2487 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2488 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2489
2490 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2491 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2492 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2493
2494 PRINT_DIRECTIVE(
2495 ".amdhsa_exception_fp_ieee_invalid_op",
2496 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2497 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2498 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2499 PRINT_DIRECTIVE(
2500 ".amdhsa_exception_fp_ieee_div_zero",
2501 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2502 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2503 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2504 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2505 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2506 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2507 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2508 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2509 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2510
2511 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2512
2513 return true;
2514}
2515
2516// NOLINTNEXTLINE(readability-identifier-naming)
2517Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2518 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2519 using namespace amdhsa;
2520 StringRef Indent = "\t";
2521 if (isGFX90A()) {
2522 KdStream << Indent << ".amdhsa_accum_offset "
2523 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2524 << '\n';
2525
2526 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2527
2528 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2529 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2530 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2531 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2532 } else if (isGFX10Plus()) {
2533 // Bits [0-3].
2534 if (!isGFX12Plus()) {
2535 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2536 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2537 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2538 } else {
2539 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2540 "SHARED_VGPR_COUNT",
2541 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2542 }
2543 } else {
2544 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2545 "COMPUTE_PGM_RSRC3",
2546 "must be zero on gfx12+");
2547 }
2548
2549 // Bits [4-11].
2550 if (isGFX11()) {
2551 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2552 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2553 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2554 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2555 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2556 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2557 } else if (isGFX12Plus()) {
2558 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2559 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2560 } else {
2561 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2562 "COMPUTE_PGM_RSRC3",
2563 "must be zero on gfx10");
2564 }
2565
2566 // Bits [12].
2567 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2568 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2569
2570 // Bits [13].
2571 if (isGFX12Plus()) {
2572 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2573 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2574 } else {
2575 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2576 "COMPUTE_PGM_RSRC3",
2577 "must be zero on gfx10 or gfx11");
2578 }
2579
2580 // Bits [14-21].
2581 if (isGFX1250Plus()) {
2582 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2583 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2584 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2585 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2586 PRINT_PSEUDO_DIRECTIVE_COMMENT("TCP_SPLIT",
2587 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2588 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2589 "ENABLE_DIDT_THROTTLE",
2590 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2591 } else {
2592 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2593 "COMPUTE_PGM_RSRC3",
2594 "must be zero on gfx10+");
2595 }
2596
2597 // Bits [22-30].
2598 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2599 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2600
2601 // Bits [31].
2602 if (isGFX11Plus()) {
2603 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2604 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2605 } else {
2606 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2607 "COMPUTE_PGM_RSRC3",
2608 "must be zero on gfx10");
2609 }
2610 } else if (FourByteBuffer) {
2611 return createStringError(
2612 EC: std::errc::invalid_argument,
2613 Fmt: "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2614 }
2615 return true;
2616}
2617#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2618#undef PRINT_DIRECTIVE
2619#undef GET_FIELD
2620#undef CHECK_RESERVED_BITS_IMPL
2621#undef CHECK_RESERVED_BITS
2622#undef CHECK_RESERVED_BITS_MSG
2623#undef CHECK_RESERVED_BITS_DESC
2624#undef CHECK_RESERVED_BITS_DESC_MSG
2625
2626/// Create an error object to return from onSymbolStart for reserved kernel
2627/// descriptor bits being set.
2628static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2629 const char *Msg = "") {
2630 return createStringError(
2631 EC: std::errc::invalid_argument, Fmt: "kernel descriptor reserved %s set%s%s",
2632 Vals: getBitRangeFromMask(Mask, BaseBytes).c_str(), Vals: *Msg ? ", " : "", Vals: Msg);
2633}
2634
2635/// Create an error object to return from onSymbolStart for reserved kernel
2636/// descriptor bytes being set.
2637static Error createReservedKDBytesError(unsigned BaseInBytes,
2638 unsigned WidthInBytes) {
2639 // Create an error comment in the same format as the "Kernel Descriptor"
2640 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2641 return createStringError(
2642 EC: std::errc::invalid_argument,
2643 Fmt: "kernel descriptor reserved bits in range (%u:%u) set",
2644 Vals: (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, Vals: BaseInBytes * CHAR_BIT);
2645}
2646
2647Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2648 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2649 raw_string_ostream &KdStream) const {
2650#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2651 do { \
2652 KdStream << Indent << DIRECTIVE " " \
2653 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2654 } while (0)
2655
2656 uint16_t TwoByteBuffer = 0;
2657 uint32_t FourByteBuffer = 0;
2658
2659 StringRef ReservedBytes;
2660 StringRef Indent = "\t";
2661
2662 assert(Bytes.size() == 64);
2663 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2664
2665 switch (Cursor.tell()) {
2666 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2667 FourByteBuffer = DE.getU32(C&: Cursor);
2668 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2669 << '\n';
2670 return true;
2671
2672 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2673 FourByteBuffer = DE.getU32(C&: Cursor);
2674 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2675 << FourByteBuffer << '\n';
2676 return true;
2677
2678 case amdhsa::KERNARG_SIZE_OFFSET:
2679 FourByteBuffer = DE.getU32(C&: Cursor);
2680 KdStream << Indent << ".amdhsa_kernarg_size "
2681 << FourByteBuffer << '\n';
2682 return true;
2683
2684 case amdhsa::RESERVED0_OFFSET:
2685 // 4 reserved bytes, must be 0.
2686 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2687 for (char B : ReservedBytes) {
2688 if (B != 0)
2689 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED0_OFFSET, WidthInBytes: 4);
2690 }
2691 return true;
2692
2693 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2694 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2695 // So far no directive controls this for Code Object V3, so simply skip for
2696 // disassembly.
2697 DE.skip(C&: Cursor, Length: 8);
2698 return true;
2699
2700 case amdhsa::RESERVED1_OFFSET:
2701 // 20 reserved bytes, must be 0.
2702 ReservedBytes = DE.getBytes(C&: Cursor, Length: 20);
2703 for (char B : ReservedBytes) {
2704 if (B != 0)
2705 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED1_OFFSET, WidthInBytes: 20);
2706 }
2707 return true;
2708
2709 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2710 FourByteBuffer = DE.getU32(C&: Cursor);
2711 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2712
2713 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2714 FourByteBuffer = DE.getU32(C&: Cursor);
2715 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2716
2717 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2718 FourByteBuffer = DE.getU32(C&: Cursor);
2719 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2720
2721 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2722 using namespace amdhsa;
2723 TwoByteBuffer = DE.getU16(C&: Cursor);
2724
2725 if (!hasArchitectedFlatScratch())
2726 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2727 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2728 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2729 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2730 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2731 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2732 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2733 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2734 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2735 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2736 if (!hasArchitectedFlatScratch())
2737 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2738 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2739 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2740 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2741
2742 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2743 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED0,
2744 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2745
2746 // Reserved for GFX9
2747 if (isGFX9() &&
2748 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2749 return createReservedKDBitsError(
2750 Mask: KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2751 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, Msg: "must be zero on gfx9");
2752 }
2753 if (isGFX10Plus()) {
2754 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2755 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2756 }
2757
2758 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2759 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2760 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2761
2762 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2763 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED1,
2764 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2765 }
2766
2767 return true;
2768
2769 case amdhsa::KERNARG_PRELOAD_OFFSET:
2770 using namespace amdhsa;
2771 TwoByteBuffer = DE.getU16(C&: Cursor);
2772 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2773 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2774 KERNARG_PRELOAD_SPEC_LENGTH);
2775 }
2776
2777 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2778 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2779 KERNARG_PRELOAD_SPEC_OFFSET);
2780 }
2781 return true;
2782
2783 case amdhsa::RESERVED3_OFFSET:
2784 // 4 bytes from here are reserved, must be 0.
2785 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2786 for (char B : ReservedBytes) {
2787 if (B != 0)
2788 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED3_OFFSET, WidthInBytes: 4);
2789 }
2790 return true;
2791
2792 default:
2793 llvm_unreachable("Unhandled index. Case statements cover everything.");
2794 return true;
2795 }
2796#undef PRINT_DIRECTIVE
2797}
2798
2799Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2800 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2801
2802 // CP microcode requires the kernel descriptor to be 64 aligned.
2803 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2804 return createStringError(EC: std::errc::invalid_argument,
2805 Fmt: "kernel descriptor must be 64-byte aligned");
2806
2807 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2808 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2809 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2810 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2811 // when required.
2812 if (isGFX10Plus()) {
2813 uint16_t KernelCodeProperties =
2814 support::endian::read16(P: &Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2815 E: llvm::endianness::little);
2816 EnableWavefrontSize32 =
2817 AMDHSA_BITS_GET(KernelCodeProperties,
2818 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2819 }
2820
2821 std::string Kd;
2822 raw_string_ostream KdStream(Kd);
2823 KdStream << ".amdhsa_kernel " << KdName << '\n';
2824
2825 DataExtractor::Cursor C(0);
2826 while (C && C.tell() < Bytes.size()) {
2827 Expected<bool> Res = decodeKernelDescriptorDirective(Cursor&: C, Bytes, KdStream);
2828
2829 cantFail(Err: C.takeError());
2830
2831 if (!Res)
2832 return Res;
2833 }
2834 KdStream << ".end_amdhsa_kernel\n";
2835 outs() << KdStream.str();
2836 return true;
2837}
2838
2839Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2840 uint64_t &Size,
2841 ArrayRef<uint8_t> Bytes,
2842 uint64_t Address) const {
2843 // Right now only kernel descriptor needs to be handled.
2844 // We ignore all other symbols for target specific handling.
2845 // TODO:
2846 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2847 // Object V2 and V3 when symbols are marked protected.
2848
2849 // amd_kernel_code_t for Code Object V2.
2850 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2851 Size = 256;
2852 return createStringError(EC: std::errc::invalid_argument,
2853 Fmt: "code object v2 is not supported");
2854 }
2855
2856 // Code Object V3 kernel descriptors.
2857 StringRef Name = Symbol.Name;
2858 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(Suffix: StringRef(".kd"))) {
2859 Size = 64; // Size = 64 regardless of success or failure.
2860 return decodeKernelDescriptor(KdName: Name.drop_back(N: 3), Bytes, KdAddress: Address);
2861 }
2862
2863 return false;
2864}
2865
2866const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2867 int64_t Val) {
2868 MCContext &Ctx = getContext();
2869 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2870 // Note: only set value to Val on a new symbol in case an dissassembler
2871 // has already been initialized in this context.
2872 if (!Sym->isVariable()) {
2873 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2874 } else {
2875 int64_t Res = ~Val;
2876 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2877 if (!Valid || Res != Val)
2878 Ctx.reportWarning(L: SMLoc(), Msg: "unsupported redefinition of " + Id);
2879 }
2880 return MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2881}
2882
2883bool AMDGPUDisassembler::isBufferInstruction(const MCInst &MI) const {
2884 const uint64_t TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
2885
2886 // Check for MUBUF and MTBUF instructions
2887 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2888 return true;
2889
2890 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2891 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(Opc: MI.getOpcode()))
2892 return true;
2893
2894 return false;
2895}
2896
2897//===----------------------------------------------------------------------===//
2898// AMDGPUSymbolizer
2899//===----------------------------------------------------------------------===//
2900
2901// Try to find symbol name for specified label
2902bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2903 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2904 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2905 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2906
2907 if (!IsBranch) {
2908 return false;
2909 }
2910
2911 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2912 if (!Symbols)
2913 return false;
2914
2915 auto Result = llvm::find_if(Range&: *Symbols, P: [Value](const SymbolInfoTy &Val) {
2916 return Val.Addr == static_cast<uint64_t>(Value) &&
2917 Val.Type == ELF::STT_NOTYPE;
2918 });
2919 if (Result != Symbols->end()) {
2920 auto *Sym = Ctx.getOrCreateSymbol(Name: Result->Name);
2921 const auto *Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2922 Inst.addOperand(Op: MCOperand::createExpr(Val: Add));
2923 return true;
2924 }
2925 // Add to list of referenced addresses, so caller can synthesize a label.
2926 ReferencedAddresses.push_back(x: static_cast<uint64_t>(Value));
2927 return false;
2928}
2929
2930void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2931 int64_t Value,
2932 uint64_t Address) {
2933 llvm_unreachable("unimplemented");
2934}
2935
2936//===----------------------------------------------------------------------===//
2937// Initialization
2938//===----------------------------------------------------------------------===//
2939
2940static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2941 LLVMOpInfoCallback /*GetOpInfo*/,
2942 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2943 void *DisInfo,
2944 MCContext *Ctx,
2945 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2946 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2947}
2948
2949static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2950 const MCSubtargetInfo &STI,
2951 MCContext &Ctx) {
2952 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2953}
2954
2955extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
2956LLVMInitializeAMDGPUDisassembler() {
2957 TargetRegistry::RegisterMCDisassembler(T&: getTheGCNTarget(),
2958 Fn: createAMDGPUDisassembler);
2959 TargetRegistry::RegisterMCSymbolizer(T&: getTheGCNTarget(),
2960 Fn: createAMDGPUSymbolizer);
2961}
2962