1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
23#include "TargetInfo/AMDGPUTargetInfo.h"
24#include "Utils/AMDGPUAsmUtils.h"
25#include "Utils/AMDGPUBaseInfo.h"
26#include "llvm-c/DisassemblerTypes.h"
27#include "llvm/BinaryFormat/ELF.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCDecoderOps.h"
31#include "llvm/MC/MCExpr.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/MC/MCRegisterInfo.h"
34#include "llvm/MC/MCSubtargetInfo.h"
35#include "llvm/MC/TargetRegistry.h"
36#include "llvm/Support/AMDHSAKernelDescriptor.h"
37#include "llvm/Support/Compiler.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "amdgpu-disassembler"
42
43#define SGPR_MAX \
44 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
45 : AMDGPU::EncValues::SGPR_MAX_SI)
46
47using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
48
49static int64_t getInlineImmValF16(unsigned Imm);
50static int64_t getInlineImmValBF16(unsigned Imm);
51static int64_t getInlineImmVal32(unsigned Imm);
52static int64_t getInlineImmVal64(unsigned Imm);
53
54AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
55 MCContext &Ctx, MCInstrInfo const *MCII)
56 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
57 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(STI: &STI)),
58 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
59 // ToDo: AMDGPUDisassembler supports only VI ISA.
60 if (!STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
61 reportFatalUsageError(reason: "disassembly not yet supported for subtarget");
62
63 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
64 createConstantSymbolExpr(Id: Symbol, Val: Code);
65
66 UCVersionW64Expr = createConstantSymbolExpr(Id: "UC_VERSION_W64_BIT", Val: 0x2000);
67 UCVersionW32Expr = createConstantSymbolExpr(Id: "UC_VERSION_W32_BIT", Val: 0x4000);
68 UCVersionMDPExpr = createConstantSymbolExpr(Id: "UC_VERSION_MDP_BIT", Val: 0x8000);
69}
70
71void AMDGPUDisassembler::setABIVersion(unsigned Version) {
72 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(ABIVersion: Version);
73}
74
75inline static MCDisassembler::DecodeStatus
76addOperand(MCInst &Inst, const MCOperand& Opnd) {
77 Inst.addOperand(Op: Opnd);
78 return Opnd.isValid() ?
79 MCDisassembler::Success :
80 MCDisassembler::Fail;
81}
82
83static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
84 AMDGPU::OpName Name) {
85 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name);
86 if (OpIdx != -1) {
87 auto *I = MI.begin();
88 std::advance(i&: I, n: OpIdx);
89 MI.insert(I, Op);
90 }
91 return OpIdx;
92}
93
94static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
95 uint64_t Addr,
96 const MCDisassembler *Decoder) {
97 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
98
99 // Our branches take a simm16.
100 int64_t Offset = SignExtend64<16>(x: Imm) * 4 + 4 + Addr;
101
102 if (DAsm->tryAddingSymbolicOperand(Inst, Value: Offset, Address: Addr, IsBranch: true, Offset: 2, OpSize: 2, InstSize: 0))
103 return MCDisassembler::Success;
104 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Imm));
105}
106
107static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
108 const MCDisassembler *Decoder) {
109 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
110 int64_t Offset;
111 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
112 Offset = SignExtend64<24>(x: Imm);
113 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
114 Offset = Imm & 0xFFFFF;
115 } else { // GFX9+ supports 21-bit signed offsets.
116 Offset = SignExtend64<21>(x: Imm);
117 }
118 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Offset));
119}
120
121static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
122 const MCDisassembler *Decoder) {
123 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
124 return addOperand(Inst, Opnd: DAsm->decodeBoolReg(Val));
125}
126
127static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
128 uint64_t Addr,
129 const MCDisassembler *Decoder) {
130 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
131 return addOperand(Inst, Opnd: DAsm->decodeSplitBarrier(Val));
132}
133
134static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
135 const MCDisassembler *Decoder) {
136 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
137 return addOperand(Inst, Opnd: DAsm->decodeDpp8FI(Val));
138}
139
140#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
141 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
142 uint64_t /*Addr*/, \
143 const MCDisassembler *Decoder) { \
144 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
145 return addOperand(Inst, DAsm->DecoderName(Imm)); \
146 }
147
148// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
149// number of register. Used by VGPR only and AGPR only operands.
150#define DECODE_OPERAND_REG_8(RegClass) \
151 static DecodeStatus Decode##RegClass##RegisterClass( \
152 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
153 const MCDisassembler *Decoder) { \
154 assert(Imm < (1 << 8) && "8-bit encoding"); \
155 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
156 return addOperand( \
157 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
158 }
159
160#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
161 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
162 const MCDisassembler *Decoder) { \
163 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
164 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
165 return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm)); \
166 }
167
168static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
169 unsigned OpWidth, unsigned Imm, unsigned EncImm,
170 const MCDisassembler *Decoder) {
171 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
172 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
173 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Width: OpWidth, Val: EncImm));
174}
175
176// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
177// get register class. Used by SGPR only operands.
178#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
179 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
180
181#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
182 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
183
184// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
185// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
186// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
187// Used by AV_ register classes (AGPR or VGPR only register operands).
188template <unsigned OpWidth>
189static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
190 const MCDisassembler *Decoder) {
191 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm | AMDGPU::EncValues::IS_VGPR,
192 Decoder);
193}
194
195// Decoder for Src(9-bit encoding) registers only.
196template <unsigned OpWidth>
197static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
198 uint64_t /* Addr */,
199 const MCDisassembler *Decoder) {
200 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
201}
202
203// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
204// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
205// only.
206template <unsigned OpWidth>
207static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
208 const MCDisassembler *Decoder) {
209 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
210}
211
212// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
213// Imm{9} is acc, registers only.
214template <unsigned OpWidth>
215static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
216 uint64_t /* Addr */,
217 const MCDisassembler *Decoder) {
218 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm, Decoder);
219}
220
221// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
222// register from RegClass or immediate. Registers that don't belong to RegClass
223// will be decoded and InstPrinter will report warning. Immediate will be
224// decoded into constant matching the OperandType (important for floating point
225// types).
226template <unsigned OpWidth>
227static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
228 uint64_t /* Addr */,
229 const MCDisassembler *Decoder) {
230 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, Decoder);
231}
232
233// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
234// and decode using 'enum10' from decodeSrcOp.
235template <unsigned OpWidth>
236static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
237 uint64_t /* Addr */,
238 const MCDisassembler *Decoder) {
239 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, Decoder);
240}
241
242// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
243// when RegisterClass is used as an operand. Most often used for destination
244// operands.
245
246DECODE_OPERAND_REG_8(VGPR_32)
247DECODE_OPERAND_REG_8(VGPR_32_Lo128)
248DECODE_OPERAND_REG_8(VReg_64)
249DECODE_OPERAND_REG_8(VReg_96)
250DECODE_OPERAND_REG_8(VReg_128)
251DECODE_OPERAND_REG_8(VReg_192)
252DECODE_OPERAND_REG_8(VReg_256)
253DECODE_OPERAND_REG_8(VReg_288)
254DECODE_OPERAND_REG_8(VReg_320)
255DECODE_OPERAND_REG_8(VReg_352)
256DECODE_OPERAND_REG_8(VReg_384)
257DECODE_OPERAND_REG_8(VReg_512)
258DECODE_OPERAND_REG_8(VReg_1024)
259
260DECODE_OPERAND_SREG_7(SReg_32, 32)
261DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
262DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
263DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
264DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
265DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
266DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
267DECODE_OPERAND_SREG_7(SReg_96, 96)
268DECODE_OPERAND_SREG_7(SReg_128, 128)
269DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
270DECODE_OPERAND_SREG_7(SReg_256, 256)
271DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
272DECODE_OPERAND_SREG_7(SReg_512, 512)
273
274DECODE_OPERAND_SREG_8(SReg_64, 64)
275
276DECODE_OPERAND_REG_8(AGPR_32)
277DECODE_OPERAND_REG_8(AReg_64)
278DECODE_OPERAND_REG_8(AReg_128)
279DECODE_OPERAND_REG_8(AReg_256)
280DECODE_OPERAND_REG_8(AReg_512)
281DECODE_OPERAND_REG_8(AReg_1024)
282
283static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
284 uint64_t /*Addr*/,
285 const MCDisassembler *Decoder) {
286 assert(isUInt<10>(Imm) && "10-bit encoding expected");
287 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
288
289 bool IsHi = Imm & (1 << 9);
290 unsigned RegIdx = Imm & 0xff;
291 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
292 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
293}
294
295static DecodeStatus
296DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
297 const MCDisassembler *Decoder) {
298 assert(isUInt<8>(Imm) && "8-bit encoding expected");
299
300 bool IsHi = Imm & (1 << 7);
301 unsigned RegIdx = Imm & 0x7f;
302 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
303 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
304}
305
306template <unsigned OpWidth>
307static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
308 uint64_t /*Addr*/,
309 const MCDisassembler *Decoder) {
310 assert(isUInt<9>(Imm) && "9-bit encoding expected");
311
312 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
313 if (Imm & AMDGPU::EncValues::IS_VGPR) {
314 bool IsHi = Imm & (1 << 7);
315 unsigned RegIdx = Imm & 0x7f;
316 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
317 }
318 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Width: OpWidth, Val: Imm & 0xFF));
319}
320
321template <unsigned OpWidth>
322static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
323 uint64_t /*Addr*/,
324 const MCDisassembler *Decoder) {
325 assert(isUInt<10>(Imm) && "10-bit encoding expected");
326
327 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328 if (Imm & AMDGPU::EncValues::IS_VGPR) {
329 bool IsHi = Imm & (1 << 9);
330 unsigned RegIdx = Imm & 0xff;
331 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
332 }
333 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Width: OpWidth, Val: Imm & 0xFF));
334}
335
336static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
337 uint64_t /*Addr*/,
338 const MCDisassembler *Decoder) {
339 assert(isUInt<10>(Imm) && "10-bit encoding expected");
340 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
341
342 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
343
344 bool IsHi = Imm & (1 << 9);
345 unsigned RegIdx = Imm & 0xff;
346 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
347}
348
349static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
350 uint64_t Addr,
351 const MCDisassembler *Decoder) {
352 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteralConstant(Imm));
354}
355
356static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
357 uint64_t Addr, const void *Decoder) {
358 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
359 return addOperand(Inst, Opnd: DAsm->decodeVOPDDstYOp(Inst, Val));
360}
361
362static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
363 const MCRegisterInfo *MRI) {
364 if (OpIdx < 0)
365 return false;
366
367 const MCOperand &Op = Inst.getOperand(i: OpIdx);
368 if (!Op.isReg())
369 return false;
370
371 MCRegister Sub = MRI->getSubReg(Reg: Op.getReg(), Idx: AMDGPU::sub0);
372 auto Reg = Sub ? Sub : Op.getReg();
373 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
374}
375
376static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
377 const MCDisassembler *Decoder) {
378 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
379 if (!DAsm->isGFX90A()) {
380 Imm &= 511;
381 } else {
382 // If atomic has both vdata and vdst their register classes are tied.
383 // The bit is decoded along with the vdst, first operand. We need to
384 // change register class to AGPR if vdst was AGPR.
385 // If a DS instruction has both data0 and data1 their register classes
386 // are also tied.
387 unsigned Opc = Inst.getOpcode();
388 uint64_t TSFlags = DAsm->getMCII()->get(Opcode: Opc).TSFlags;
389 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
390 ? AMDGPU::OpName::data0
391 : AMDGPU::OpName::vdata;
392 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
393 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: DataName);
394 if ((int)Inst.getNumOperands() == DataIdx) {
395 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::vdst);
396 if (IsAGPROperand(Inst, OpIdx: DstIdx, MRI))
397 Imm |= 512;
398 }
399
400 if (TSFlags & SIInstrFlags::DS) {
401 int Data2Idx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: AMDGPU::OpName::data1);
402 if ((int)Inst.getNumOperands() == Data2Idx &&
403 IsAGPROperand(Inst, OpIdx: DataIdx, MRI))
404 Imm |= 512;
405 }
406 }
407 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Width: Opw, Val: Imm | 256));
408}
409
410template <unsigned Opw>
411static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
412 uint64_t /* Addr */,
413 const MCDisassembler *Decoder) {
414 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
415}
416
417static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
418 uint64_t Addr,
419 const MCDisassembler *Decoder) {
420 assert(Imm < (1 << 9) && "9-bit encoding");
421 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
422 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Width: 64, Val: Imm));
423}
424
425#define DECODE_SDWA(DecName) \
426DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
427
428DECODE_SDWA(Src32)
429DECODE_SDWA(Src16)
430DECODE_SDWA(VopcDst)
431
432static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
433 uint64_t /* Addr */,
434 const MCDisassembler *Decoder) {
435 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
436 return addOperand(Inst, Opnd: DAsm->decodeVersionImm(Imm));
437}
438
439#include "AMDGPUGenDisassemblerTables.inc"
440
441//===----------------------------------------------------------------------===//
442//
443//===----------------------------------------------------------------------===//
444
445template <typename InsnType>
446DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table, MCInst &MI,
447 InsnType Inst, uint64_t Address,
448 raw_ostream &Comments) const {
449 assert(MI.getOpcode() == 0);
450 assert(MI.getNumOperands() == 0);
451 MCInst TmpInst;
452 HasLiteral = false;
453 const auto SavedBytes = Bytes;
454
455 SmallString<64> LocalComments;
456 raw_svector_ostream LocalCommentStream(LocalComments);
457 CommentStream = &LocalCommentStream;
458
459 DecodeStatus Res =
460 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
461
462 CommentStream = nullptr;
463
464 if (Res != MCDisassembler::Fail) {
465 MI = TmpInst;
466 Comments << LocalComments;
467 return MCDisassembler::Success;
468 }
469 Bytes = SavedBytes;
470 return MCDisassembler::Fail;
471}
472
473template <typename InsnType>
474DecodeStatus
475AMDGPUDisassembler::tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
476 MCInst &MI, InsnType Inst, uint64_t Address,
477 raw_ostream &Comments) const {
478 for (const uint8_t *T : {Table1, Table2}) {
479 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
480 return Res;
481 }
482 return MCDisassembler::Fail;
483}
484
485template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
486 assert(Bytes.size() >= sizeof(T));
487 const auto Res =
488 support::endian::read<T, llvm::endianness::little>(Bytes.data());
489 Bytes = Bytes.slice(N: sizeof(T));
490 return Res;
491}
492
493static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
494 assert(Bytes.size() >= 12);
495 uint64_t Lo =
496 support::endian::read<uint64_t, llvm::endianness::little>(P: Bytes.data());
497 Bytes = Bytes.slice(N: 8);
498 uint64_t Hi =
499 support::endian::read<uint32_t, llvm::endianness::little>(P: Bytes.data());
500 Bytes = Bytes.slice(N: 4);
501 return DecoderUInt128(Lo, Hi);
502}
503
504static inline DecoderUInt128 eat16Bytes(ArrayRef<uint8_t> &Bytes) {
505 assert(Bytes.size() >= 16);
506 uint64_t Lo =
507 support::endian::read<uint64_t, llvm::endianness::little>(P: Bytes.data());
508 Bytes = Bytes.slice(N: 8);
509 uint64_t Hi =
510 support::endian::read<uint64_t, llvm::endianness::little>(P: Bytes.data());
511 Bytes = Bytes.slice(N: 8);
512 return DecoderUInt128(Lo, Hi);
513}
514
515void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
516 const MCInstrInfo &MCII) const {
517 const MCInstrDesc &Desc = MCII.get(Opcode: MI.getOpcode());
518 for (auto [OpNo, OpDesc] : enumerate(First: Desc.operands())) {
519 if (OpNo >= MI.getNumOperands())
520 continue;
521
522 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
523 // defined to take VGPR_32, but in reality allowing inline constants.
524 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
525 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
526 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
527 continue;
528
529 MCOperand &Op = MI.getOperand(i: OpNo);
530 if (!Op.isImm())
531 continue;
532 int64_t Imm = Op.getImm();
533 if (AMDGPU::EncValues::INLINE_INTEGER_C_MIN <= Imm &&
534 Imm <= AMDGPU::EncValues::INLINE_INTEGER_C_MAX) {
535 Op = decodeIntImmed(Imm);
536 continue;
537 }
538
539 if (Imm == AMDGPU::EncValues::LITERAL_CONST) {
540 Op = decodeLiteralConstant(ExtendFP64: OpDesc.OperandType ==
541 AMDGPU::OPERAND_REG_IMM_FP64);
542 continue;
543 }
544
545 if (AMDGPU::EncValues::INLINE_FLOATING_C_MIN <= Imm &&
546 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX) {
547 switch (OpDesc.OperandType) {
548 case AMDGPU::OPERAND_REG_IMM_BF16:
549 case AMDGPU::OPERAND_REG_IMM_V2BF16:
550 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
551 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
552 Imm = getInlineImmValBF16(Imm);
553 break;
554 case AMDGPU::OPERAND_REG_IMM_FP16:
555 case AMDGPU::OPERAND_REG_IMM_INT16:
556 case AMDGPU::OPERAND_REG_IMM_V2FP16:
557 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
558 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
559 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
560 Imm = getInlineImmValF16(Imm);
561 break;
562 case AMDGPU::OPERAND_REG_IMM_FP64:
563 case AMDGPU::OPERAND_REG_IMM_INT64:
564 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
565 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
566 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
567 Imm = getInlineImmVal64(Imm);
568 break;
569 default:
570 Imm = getInlineImmVal32(Imm);
571 }
572 Op.setImm(Imm);
573 }
574 }
575}
576
577DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
578 ArrayRef<uint8_t> Bytes_,
579 uint64_t Address,
580 raw_ostream &CS) const {
581 unsigned MaxInstBytesNum = std::min(a: (size_t)TargetMaxInstBytes, b: Bytes_.size());
582 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
583
584 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
585 // there are fewer bytes left). This will be overridden on success.
586 Size = std::min(a: (size_t)4, b: Bytes_.size());
587
588 do {
589 // ToDo: better to switch encoding length using some bit predicate
590 // but it is unknown yet, so try all we can
591
592 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
593 // encodings
594 if (isGFX11Plus() && Bytes.size() >= 12 ) {
595 DecoderUInt128 DecW = eat12Bytes(Bytes);
596
597 if (isGFX11() &&
598 tryDecodeInst(Table1: DecoderTableGFX1196, Table2: DecoderTableGFX11_FAKE1696, MI,
599 Inst: DecW, Address, Comments&: CS))
600 break;
601
602 if (isGFX1250() &&
603 tryDecodeInst(Table1: DecoderTableGFX125096, Table2: DecoderTableGFX1250_FAKE1696, MI,
604 Inst: DecW, Address, Comments&: CS))
605 break;
606
607 if (isGFX12() &&
608 tryDecodeInst(Table1: DecoderTableGFX1296, Table2: DecoderTableGFX12_FAKE1696, MI,
609 Inst: DecW, Address, Comments&: CS))
610 break;
611
612 if (isGFX12() &&
613 tryDecodeInst(Table: DecoderTableGFX12W6496, MI, Inst: DecW, Address, Comments&: CS))
614 break;
615
616 // Reinitialize Bytes
617 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
618
619 } else if (Bytes.size() >= 16 &&
620 STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts)) {
621 DecoderUInt128 DecW = eat16Bytes(Bytes);
622 if (tryDecodeInst(Table: DecoderTableGFX940128, MI, Inst: DecW, Address, Comments&: CS))
623 break;
624
625 // Reinitialize Bytes
626 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
627 }
628
629 if (Bytes.size() >= 8) {
630 const uint64_t QW = eatBytes<uint64_t>(Bytes);
631
632 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
633 tryDecodeInst(Table: DecoderTableGFX10_B64, MI, Inst: QW, Address, Comments&: CS))
634 break;
635
636 if (STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) &&
637 tryDecodeInst(Table: DecoderTableGFX80_UNPACKED64, MI, Inst: QW, Address, Comments&: CS))
638 break;
639
640 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
641 tryDecodeInst(Table: DecoderTableGFX95064, MI, Inst: QW, Address, Comments&: CS))
642 break;
643
644 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
645 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
646 // table first so we print the correct name.
647 if (STI.hasFeature(Feature: AMDGPU::FeatureFmaMixInsts) &&
648 tryDecodeInst(Table: DecoderTableGFX9_DL64, MI, Inst: QW, Address, Comments&: CS))
649 break;
650
651 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts) &&
652 tryDecodeInst(Table: DecoderTableGFX94064, MI, Inst: QW, Address, Comments&: CS))
653 break;
654
655 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
656 tryDecodeInst(Table: DecoderTableGFX90A64, MI, Inst: QW, Address, Comments&: CS))
657 break;
658
659 if ((isVI() || isGFX9()) &&
660 tryDecodeInst(Table: DecoderTableGFX864, MI, Inst: QW, Address, Comments&: CS))
661 break;
662
663 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX964, MI, Inst: QW, Address, Comments&: CS))
664 break;
665
666 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1064, MI, Inst: QW, Address, Comments&: CS))
667 break;
668
669 if (isGFX1250() &&
670 tryDecodeInst(Table1: DecoderTableGFX125064, Table2: DecoderTableGFX1250_FAKE1664, MI,
671 Inst: QW, Address, Comments&: CS))
672 break;
673
674 if (isGFX12() &&
675 tryDecodeInst(Table1: DecoderTableGFX1264, Table2: DecoderTableGFX12_FAKE1664, MI, Inst: QW,
676 Address, Comments&: CS))
677 break;
678
679 if (isGFX11() &&
680 tryDecodeInst(Table1: DecoderTableGFX1164, Table2: DecoderTableGFX11_FAKE1664, MI, Inst: QW,
681 Address, Comments&: CS))
682 break;
683
684 if (isGFX11() &&
685 tryDecodeInst(Table: DecoderTableGFX11W6464, MI, Inst: QW, Address, Comments&: CS))
686 break;
687
688 if (isGFX12() &&
689 tryDecodeInst(Table: DecoderTableGFX12W6464, MI, Inst: QW, Address, Comments&: CS))
690 break;
691
692 // Reinitialize Bytes
693 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
694 }
695
696 // Try decode 32-bit instruction
697 if (Bytes.size() >= 4) {
698 const uint32_t DW = eatBytes<uint32_t>(Bytes);
699
700 if ((isVI() || isGFX9()) &&
701 tryDecodeInst(Table: DecoderTableGFX832, MI, Inst: DW, Address, Comments&: CS))
702 break;
703
704 if (tryDecodeInst(Table: DecoderTableAMDGPU32, MI, Inst: DW, Address, Comments&: CS))
705 break;
706
707 if (isGFX9() && tryDecodeInst(Table: DecoderTableGFX932, MI, Inst: DW, Address, Comments&: CS))
708 break;
709
710 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX950Insts) &&
711 tryDecodeInst(Table: DecoderTableGFX95032, MI, Inst: DW, Address, Comments&: CS))
712 break;
713
714 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts) &&
715 tryDecodeInst(Table: DecoderTableGFX90A32, MI, Inst: DW, Address, Comments&: CS))
716 break;
717
718 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding) &&
719 tryDecodeInst(Table: DecoderTableGFX10_B32, MI, Inst: DW, Address, Comments&: CS))
720 break;
721
722 if (isGFX10() && tryDecodeInst(Table: DecoderTableGFX1032, MI, Inst: DW, Address, Comments&: CS))
723 break;
724
725 if (isGFX11() &&
726 tryDecodeInst(Table1: DecoderTableGFX1132, Table2: DecoderTableGFX11_FAKE1632, MI, Inst: DW,
727 Address, Comments&: CS))
728 break;
729
730 if (isGFX1250() &&
731 tryDecodeInst(Table1: DecoderTableGFX125032, Table2: DecoderTableGFX1250_FAKE1632, MI,
732 Inst: DW, Address, Comments&: CS))
733 break;
734
735 if (isGFX12() &&
736 tryDecodeInst(Table1: DecoderTableGFX1232, Table2: DecoderTableGFX12_FAKE1632, MI, Inst: DW,
737 Address, Comments&: CS))
738 break;
739 }
740
741 return MCDisassembler::Fail;
742 } while (false);
743
744 DecodeStatus Status = MCDisassembler::Success;
745
746 decodeImmOperands(MI, MCII: *MCII);
747
748 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
749 if (isMacDPP(MI))
750 convertMacDPPInst(MI);
751
752 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
753 convertVOP3PDPPInst(MI);
754 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
755 convertVOPCDPPInst(MI); // Special VOP3 case
756 else if (AMDGPU::isVOPC64DPP(Opc: MI.getOpcode()))
757 convertVOPC64DPPInst(MI); // Special VOP3 case
758 else if (AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dpp8) !=
759 -1)
760 convertDPP8Inst(MI);
761 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
762 convertVOP3DPPInst(MI); // Regular VOP3 case
763 }
764
765 convertTrue16OpSel(MI);
766
767 if (AMDGPU::isMAC(Opc: MI.getOpcode())) {
768 // Insert dummy unused src2_modifiers.
769 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
770 Name: AMDGPU::OpName::src2_modifiers);
771 }
772
773 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
774 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
775 // Insert dummy unused src2_modifiers.
776 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
777 Name: AMDGPU::OpName::src2_modifiers);
778 }
779
780 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
781 !AMDGPU::hasGDS(STI)) {
782 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::gds);
783 }
784
785 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
786 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
787 int CPolPos = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
788 Name: AMDGPU::OpName::cpol);
789 if (CPolPos != -1) {
790 unsigned CPol =
791 (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
792 AMDGPU::CPol::GLC : 0;
793 if (MI.getNumOperands() <= (unsigned)CPolPos) {
794 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: CPol),
795 Name: AMDGPU::OpName::cpol);
796 } else if (CPol) {
797 MI.getOperand(i: CPolPos).setImm(MI.getOperand(i: CPolPos).getImm() | CPol);
798 }
799 }
800 }
801
802 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags &
803 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
804 (STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts))) {
805 // GFX90A lost TFE, its place is occupied by ACC.
806 int TFEOpIdx =
807 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::tfe);
808 if (TFEOpIdx != -1) {
809 auto *TFEIter = MI.begin();
810 std::advance(i&: TFEIter, n: TFEOpIdx);
811 MI.insert(I: TFEIter, Op: MCOperand::createImm(Val: 0));
812 }
813 }
814
815 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
816 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
817 int SWZOpIdx =
818 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::swz);
819 if (SWZOpIdx != -1) {
820 auto *SWZIter = MI.begin();
821 std::advance(i&: SWZIter, n: SWZOpIdx);
822 MI.insert(I: SWZIter, Op: MCOperand::createImm(Val: 0));
823 }
824 }
825
826 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
827 int VAddr0Idx =
828 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
829 int RsrcIdx =
830 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::srsrc);
831 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
832 if (VAddr0Idx >= 0 && NSAArgs > 0) {
833 unsigned NSAWords = (NSAArgs + 3) / 4;
834 if (Bytes.size() < 4 * NSAWords)
835 return MCDisassembler::Fail;
836 for (unsigned i = 0; i < NSAArgs; ++i) {
837 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
838 auto VAddrRCID =
839 MCII->get(Opcode: MI.getOpcode()).operands()[VAddrIdx].RegClass;
840 MI.insert(I: MI.begin() + VAddrIdx, Op: createRegOperand(RegClassID: VAddrRCID, Val: Bytes[i]));
841 }
842 Bytes = Bytes.slice(N: 4 * NSAWords);
843 }
844
845 convertMIMGInst(MI);
846 }
847
848 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
849 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
850 convertMIMGInst(MI);
851
852 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
853 convertEXPInst(MI);
854
855 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
856 convertVINTERPInst(MI);
857
858 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
859 convertSDWAInst(MI);
860
861 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
862 convertMAIInst(MI);
863
864 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
865 Name: AMDGPU::OpName::vdst_in);
866 if (VDstIn_Idx != -1) {
867 int Tied = MCII->get(Opcode: MI.getOpcode()).getOperandConstraint(OpNum: VDstIn_Idx,
868 Constraint: MCOI::OperandConstraint::TIED_TO);
869 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
870 !MI.getOperand(i: VDstIn_Idx).isReg() ||
871 MI.getOperand(i: VDstIn_Idx).getReg() != MI.getOperand(i: Tied).getReg())) {
872 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
873 MI.erase(I: &MI.getOperand(i: VDstIn_Idx));
874 insertNamedMCOperand(MI,
875 Op: MCOperand::createReg(Reg: MI.getOperand(i: Tied).getReg()),
876 Name: AMDGPU::OpName::vdst_in);
877 }
878 }
879
880 bool IsSOPK = MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
881 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::imm) && !IsSOPK)
882 convertFMAanyK(MI);
883
884 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
885 // have EXEC as implicit destination. Issue a warning if encoding for
886 // vdst is not EXEC.
887 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
888 MCII->get(Opcode: MI.getOpcode()).hasImplicitDefOfPhysReg(Reg: AMDGPU::EXEC)) {
889 auto ExecEncoding = MRI.getEncodingValue(Reg: AMDGPU::EXEC_LO);
890 if (Bytes_[0] != ExecEncoding)
891 Status = MCDisassembler::SoftFail;
892 }
893
894 Size = MaxInstBytesNum - Bytes.size();
895 return Status;
896}
897
898void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
899 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX11Insts)) {
900 // The MCInst still has these fields even though they are no longer encoded
901 // in the GFX11 instruction.
902 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vm);
903 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::compr);
904 }
905}
906
907void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
908 convertTrue16OpSel(MI);
909 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
910 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
911 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
912 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
913 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
914 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
915 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
916 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
917 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
918 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
919 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
920 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
921 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
922 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
923 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
924 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
925 // The MCInst has this field that is not directly encoded in the
926 // instruction.
927 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::op_sel);
928 }
929}
930
931void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
932 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
933 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
934 if (AMDGPU::hasNamedOperand(Opcode: MI.getOpcode(), NamedIdx: AMDGPU::OpName::sdst))
935 // VOPC - insert clamp
936 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::clamp);
937 } else if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands)) {
938 int SDst = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::sdst);
939 if (SDst != -1) {
940 // VOPC - insert VCC register as sdst
941 insertNamedMCOperand(MI, Op: createRegOperand(RegId: AMDGPU::VCC),
942 Name: AMDGPU::OpName::sdst);
943 } else {
944 // VOP1/2 - insert omod if present in instruction
945 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::omod);
946 }
947 }
948}
949
950/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
951/// appropriate subregister for the used format width.
952static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI,
953 MCOperand &MO, uint8_t NumRegs) {
954 switch (NumRegs) {
955 case 4:
956 return MO.setReg(MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3));
957 case 6:
958 return MO.setReg(
959 MRI.getSubReg(Reg: MO.getReg(), Idx: AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
960 case 8:
961 // No-op in cases where one operand is still f8/bf8.
962 return;
963 default:
964 llvm_unreachable("Unexpected size for mfma f8f6f4 operand");
965 }
966}
967
968/// f8f6f4 instructions have different pseudos depending on the used formats. In
969/// the disassembler table, we only have the variants with the largest register
970/// classes which assume using an fp8/bf8 format for both operands. The actual
971/// register class depends on the format in blgp and cbsz operands. Adjust the
972/// register classes depending on the used format.
973void AMDGPUDisassembler::convertMAIInst(MCInst &MI) const {
974 int BlgpIdx =
975 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::blgp);
976 if (BlgpIdx == -1)
977 return;
978
979 int CbszIdx =
980 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::cbsz);
981
982 unsigned CBSZ = MI.getOperand(i: CbszIdx).getImm();
983 unsigned BLGP = MI.getOperand(i: BlgpIdx).getImm();
984
985 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
986 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, F8F8Opcode: MI.getOpcode());
987 if (!AdjustedRegClassOpcode ||
988 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
989 return;
990
991 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
992 int Src0Idx =
993 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src0);
994 int Src1Idx =
995 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::src1);
996 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src0Idx),
997 NumRegs: AdjustedRegClassOpcode->NumRegsSrcA);
998 adjustMFMA_F8F6F4OpRegClass(MRI, MO&: MI.getOperand(i: Src1Idx),
999 NumRegs: AdjustedRegClassOpcode->NumRegsSrcB);
1000}
1001
1002struct VOPModifiers {
1003 unsigned OpSel = 0;
1004 unsigned OpSelHi = 0;
1005 unsigned NegLo = 0;
1006 unsigned NegHi = 0;
1007};
1008
1009// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1010// Note that these values do not affect disassembler output,
1011// so this is only necessary for consistency with src_modifiers.
1012static VOPModifiers collectVOPModifiers(const MCInst &MI,
1013 bool IsVOP3P = false) {
1014 VOPModifiers Modifiers;
1015 unsigned Opc = MI.getOpcode();
1016 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1017 AMDGPU::OpName::src1_modifiers,
1018 AMDGPU::OpName::src2_modifiers};
1019 for (int J = 0; J < 3; ++J) {
1020 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: ModOps[J]);
1021 if (OpIdx == -1)
1022 continue;
1023
1024 unsigned Val = MI.getOperand(i: OpIdx).getImm();
1025
1026 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1027 if (IsVOP3P) {
1028 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1029 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1030 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1031 } else if (J == 0) {
1032 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1033 }
1034 }
1035
1036 return Modifiers;
1037}
1038
1039// Instructions decode the op_sel/suffix bits into the src_modifier
1040// operands. Copy those bits into the src operands for true16 VGPRs.
1041void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
1042 const unsigned Opc = MI.getOpcode();
1043 const MCRegisterClass &ConversionRC =
1044 MRI.getRegClass(i: AMDGPU::VGPR_16RegClassID);
1045 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1046 OpAndOpMods = {._M_elems: {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1047 SISrcMods::OP_SEL_0},
1048 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1049 SISrcMods::OP_SEL_0},
1050 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1051 SISrcMods::OP_SEL_0},
1052 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1053 SISrcMods::DST_OP_SEL}}};
1054 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1055 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpName);
1056 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, Name: OpModsName);
1057 if (OpIdx == -1 || OpModsIdx == -1)
1058 continue;
1059 MCOperand &Op = MI.getOperand(i: OpIdx);
1060 if (!Op.isReg())
1061 continue;
1062 if (!ConversionRC.contains(Reg: Op.getReg()))
1063 continue;
1064 unsigned OpEnc = MRI.getEncodingValue(Reg: Op.getReg());
1065 const MCOperand &OpMods = MI.getOperand(i: OpModsIdx);
1066 unsigned ModVal = OpMods.getImm();
1067 if (ModVal & OpSelMask) { // isHi
1068 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1069 Op.setReg(ConversionRC.getRegister(i: RegIdx * 2 + 1));
1070 }
1071 }
1072}
1073
1074// MAC opcodes have special old and src2 operands.
1075// src2 is tied to dst, while old is not tied (but assumed to be).
1076bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
1077 constexpr int DST_IDX = 0;
1078 auto Opcode = MI.getOpcode();
1079 const auto &Desc = MCII->get(Opcode);
1080 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, Name: AMDGPU::OpName::old);
1081
1082 if (OldIdx != -1 && Desc.getOperandConstraint(
1083 OpNum: OldIdx, Constraint: MCOI::OperandConstraint::TIED_TO) == -1) {
1084 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1085 assert(Desc.getOperandConstraint(
1086 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1087 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
1088 (void)DST_IDX;
1089 return true;
1090 }
1091
1092 return false;
1093}
1094
1095// Create dummy old operand and insert dummy unused src2_modifiers
1096void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
1097 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1098 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1099 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1100 Name: AMDGPU::OpName::src2_modifiers);
1101}
1102
1103void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
1104 unsigned Opc = MI.getOpcode();
1105
1106 int VDstInIdx =
1107 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1108 if (VDstInIdx != -1)
1109 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1110
1111 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1112 if (MI.getNumOperands() < DescNumOps &&
1113 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1114 convertTrue16OpSel(MI);
1115 auto Mods = collectVOPModifiers(MI);
1116 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1117 Name: AMDGPU::OpName::op_sel);
1118 } else {
1119 // Insert dummy unused src modifiers.
1120 if (MI.getNumOperands() < DescNumOps &&
1121 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1122 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1123 Name: AMDGPU::OpName::src0_modifiers);
1124
1125 if (MI.getNumOperands() < DescNumOps &&
1126 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1127 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1128 Name: AMDGPU::OpName::src1_modifiers);
1129 }
1130}
1131
1132void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
1133 convertTrue16OpSel(MI);
1134
1135 int VDstInIdx =
1136 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vdst_in);
1137 if (VDstInIdx != -1)
1138 insertNamedMCOperand(MI, Op: MI.getOperand(i: 0), Name: AMDGPU::OpName::vdst_in);
1139
1140 unsigned Opc = MI.getOpcode();
1141 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1142 if (MI.getNumOperands() < DescNumOps &&
1143 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1144 auto Mods = collectVOPModifiers(MI);
1145 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1146 Name: AMDGPU::OpName::op_sel);
1147 }
1148}
1149
1150// Note that before gfx10, the MIMG encoding provided no information about
1151// VADDR size. Consequently, decoded instructions always show address as if it
1152// has 1 dword, which could be not really so.
1153void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
1154 auto TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
1155
1156 int VDstIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1157 Name: AMDGPU::OpName::vdst);
1158
1159 int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1160 Name: AMDGPU::OpName::vdata);
1161 int VAddr0Idx =
1162 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::vaddr0);
1163 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1164 ? AMDGPU::OpName::srsrc
1165 : AMDGPU::OpName::rsrc;
1166 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: RsrcOpName);
1167 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1168 Name: AMDGPU::OpName::dmask);
1169
1170 int TFEIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1171 Name: AMDGPU::OpName::tfe);
1172 int D16Idx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(),
1173 Name: AMDGPU::OpName::d16);
1174
1175 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
1176 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1177 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
1178
1179 assert(VDataIdx != -1);
1180 if (BaseOpcode->BVH) {
1181 // Add A16 operand for intersect_ray instructions
1182 addOperand(Inst&: MI, Opnd: MCOperand::createImm(Val: BaseOpcode->A16));
1183 return;
1184 }
1185
1186 bool IsAtomic = (VDstIdx != -1);
1187 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1188 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1189 bool IsNSA = false;
1190 bool IsPartialNSA = false;
1191 unsigned AddrSize = Info->VAddrDwords;
1192
1193 if (isGFX10Plus()) {
1194 unsigned DimIdx =
1195 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::dim);
1196 int A16Idx =
1197 AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), Name: AMDGPU::OpName::a16);
1198 const AMDGPU::MIMGDimInfo *Dim =
1199 AMDGPU::getMIMGDimInfoByEncoding(DimEnc: MI.getOperand(i: DimIdx).getImm());
1200 const bool IsA16 = (A16Idx != -1 && MI.getOperand(i: A16Idx).getImm());
1201
1202 AddrSize =
1203 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, IsG16Supported: AMDGPU::hasG16(STI));
1204
1205 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1206 // VIMAGE insts other than BVH never use vaddr4.
1207 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1208 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1209 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1210 if (!IsNSA) {
1211 if (!IsVSample && AddrSize > 12)
1212 AddrSize = 16;
1213 } else {
1214 if (AddrSize > Info->VAddrDwords) {
1215 if (!STI.hasFeature(Feature: AMDGPU::FeaturePartialNSAEncoding)) {
1216 // The NSA encoding does not contain enough operands for the
1217 // combination of base opcode / dimension. Should this be an error?
1218 return;
1219 }
1220 IsPartialNSA = true;
1221 }
1222 }
1223 }
1224
1225 unsigned DMask = MI.getOperand(i: DMaskIdx).getImm() & 0xf;
1226 unsigned DstSize = IsGather4 ? 4 : std::max(a: llvm::popcount(Value: DMask), b: 1);
1227
1228 bool D16 = D16Idx >= 0 && MI.getOperand(i: D16Idx).getImm();
1229 if (D16 && AMDGPU::hasPackedD16(STI)) {
1230 DstSize = (DstSize + 1) / 2;
1231 }
1232
1233 if (TFEIdx != -1 && MI.getOperand(i: TFEIdx).getImm())
1234 DstSize += 1;
1235
1236 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1237 return;
1238
1239 int NewOpcode =
1240 AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: Info->MIMGEncoding, VDataDwords: DstSize, VAddrDwords: AddrSize);
1241 if (NewOpcode == -1)
1242 return;
1243
1244 // Widen the register to the correct number of enabled channels.
1245 MCRegister NewVdata;
1246 if (DstSize != Info->VDataDwords) {
1247 auto DataRCID = MCII->get(Opcode: NewOpcode).operands()[VDataIdx].RegClass;
1248
1249 // Get first subregister of VData
1250 MCRegister Vdata0 = MI.getOperand(i: VDataIdx).getReg();
1251 MCRegister VdataSub0 = MRI.getSubReg(Reg: Vdata0, Idx: AMDGPU::sub0);
1252 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1253
1254 NewVdata = MRI.getMatchingSuperReg(Reg: Vdata0, SubIdx: AMDGPU::sub0,
1255 RC: &MRI.getRegClass(i: DataRCID));
1256 if (!NewVdata) {
1257 // It's possible to encode this such that the low register + enabled
1258 // components exceeds the register count.
1259 return;
1260 }
1261 }
1262
1263 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1264 // If using partial NSA on GFX11+ widen last address register.
1265 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1266 MCRegister NewVAddrSA;
1267 if (STI.hasFeature(Feature: AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1268 AddrSize != Info->VAddrDwords) {
1269 MCRegister VAddrSA = MI.getOperand(i: VAddrSAIdx).getReg();
1270 MCRegister VAddrSubSA = MRI.getSubReg(Reg: VAddrSA, Idx: AMDGPU::sub0);
1271 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1272
1273 auto AddrRCID = MCII->get(Opcode: NewOpcode).operands()[VAddrSAIdx].RegClass;
1274 NewVAddrSA = MRI.getMatchingSuperReg(Reg: VAddrSA, SubIdx: AMDGPU::sub0,
1275 RC: &MRI.getRegClass(i: AddrRCID));
1276 if (!NewVAddrSA)
1277 return;
1278 }
1279
1280 MI.setOpcode(NewOpcode);
1281
1282 if (NewVdata != AMDGPU::NoRegister) {
1283 MI.getOperand(i: VDataIdx) = MCOperand::createReg(Reg: NewVdata);
1284
1285 if (IsAtomic) {
1286 // Atomic operations have an additional operand (a copy of data)
1287 MI.getOperand(i: VDstIdx) = MCOperand::createReg(Reg: NewVdata);
1288 }
1289 }
1290
1291 if (NewVAddrSA) {
1292 MI.getOperand(i: VAddrSAIdx) = MCOperand::createReg(Reg: NewVAddrSA);
1293 } else if (IsNSA) {
1294 assert(AddrSize <= Info->VAddrDwords);
1295 MI.erase(First: MI.begin() + VAddr0Idx + AddrSize,
1296 Last: MI.begin() + VAddr0Idx + Info->VAddrDwords);
1297 }
1298}
1299
1300// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1301// decoder only adds to src_modifiers, so manually add the bits to the other
1302// operands.
1303void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1304 unsigned Opc = MI.getOpcode();
1305 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1306 auto Mods = collectVOPModifiers(MI, IsVOP3P: true);
1307
1308 if (MI.getNumOperands() < DescNumOps &&
1309 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::vdst_in))
1310 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0), Name: AMDGPU::OpName::vdst_in);
1311
1312 if (MI.getNumOperands() < DescNumOps &&
1313 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel))
1314 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1315 Name: AMDGPU::OpName::op_sel);
1316 if (MI.getNumOperands() < DescNumOps &&
1317 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel_hi))
1318 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSelHi),
1319 Name: AMDGPU::OpName::op_sel_hi);
1320 if (MI.getNumOperands() < DescNumOps &&
1321 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_lo))
1322 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegLo),
1323 Name: AMDGPU::OpName::neg_lo);
1324 if (MI.getNumOperands() < DescNumOps &&
1325 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::neg_hi))
1326 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.NegHi),
1327 Name: AMDGPU::OpName::neg_hi);
1328}
1329
1330// Create dummy old operand and insert optional operands
1331void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1332 unsigned Opc = MI.getOpcode();
1333 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1334
1335 if (MI.getNumOperands() < DescNumOps &&
1336 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::old))
1337 insertNamedMCOperand(MI, Op: MCOperand::createReg(Reg: 0), Name: AMDGPU::OpName::old);
1338
1339 if (MI.getNumOperands() < DescNumOps &&
1340 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0_modifiers))
1341 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1342 Name: AMDGPU::OpName::src0_modifiers);
1343
1344 if (MI.getNumOperands() < DescNumOps &&
1345 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src1_modifiers))
1346 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: 0),
1347 Name: AMDGPU::OpName::src1_modifiers);
1348}
1349
1350void AMDGPUDisassembler::convertVOPC64DPPInst(MCInst &MI) const {
1351 unsigned Opc = MI.getOpcode();
1352 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1353
1354 convertTrue16OpSel(MI);
1355
1356 if (MI.getNumOperands() < DescNumOps &&
1357 AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::op_sel)) {
1358 VOPModifiers Mods = collectVOPModifiers(MI);
1359 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Mods.OpSel),
1360 Name: AMDGPU::OpName::op_sel);
1361 }
1362}
1363
1364void AMDGPUDisassembler::convertFMAanyK(MCInst &MI) const {
1365 assert(HasLiteral && "Should have decoded a literal");
1366 insertNamedMCOperand(MI, Op: MCOperand::createImm(Val: Literal), Name: AMDGPU::OpName::immX);
1367}
1368
1369const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1370 return getContext().getRegisterInfo()->
1371 getRegClassName(Class: &AMDGPUMCRegisterClasses[RegClassID]);
1372}
1373
1374inline
1375MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1376 const Twine& ErrMsg) const {
1377 *CommentStream << "Error: " + ErrMsg;
1378
1379 // ToDo: add support for error operands to MCInst.h
1380 // return MCOperand::createError(V);
1381 return MCOperand();
1382}
1383
1384inline
1385MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1386 return MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: RegId, STI));
1387}
1388
1389inline
1390MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1391 unsigned Val) const {
1392 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1393 if (Val >= RegCl.getNumRegs())
1394 return errOperand(V: Val, ErrMsg: Twine(getRegClassName(RegClassID)) +
1395 ": unknown register " + Twine(Val));
1396 return createRegOperand(RegId: RegCl.getRegister(i: Val));
1397}
1398
1399inline
1400MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1401 unsigned Val) const {
1402 // ToDo: SI/CI have 104 SGPRs, VI - 102
1403 // Valery: here we accepting as much as we can, let assembler sort it out
1404 int shift = 0;
1405 switch (SRegClassID) {
1406 case AMDGPU::SGPR_32RegClassID:
1407 case AMDGPU::TTMP_32RegClassID:
1408 break;
1409 case AMDGPU::SGPR_64RegClassID:
1410 case AMDGPU::TTMP_64RegClassID:
1411 shift = 1;
1412 break;
1413 case AMDGPU::SGPR_96RegClassID:
1414 case AMDGPU::TTMP_96RegClassID:
1415 case AMDGPU::SGPR_128RegClassID:
1416 case AMDGPU::TTMP_128RegClassID:
1417 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1418 // this bundle?
1419 case AMDGPU::SGPR_256RegClassID:
1420 case AMDGPU::TTMP_256RegClassID:
1421 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1422 // this bundle?
1423 case AMDGPU::SGPR_288RegClassID:
1424 case AMDGPU::TTMP_288RegClassID:
1425 case AMDGPU::SGPR_320RegClassID:
1426 case AMDGPU::TTMP_320RegClassID:
1427 case AMDGPU::SGPR_352RegClassID:
1428 case AMDGPU::TTMP_352RegClassID:
1429 case AMDGPU::SGPR_384RegClassID:
1430 case AMDGPU::TTMP_384RegClassID:
1431 case AMDGPU::SGPR_512RegClassID:
1432 case AMDGPU::TTMP_512RegClassID:
1433 shift = 2;
1434 break;
1435 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1436 // this bundle?
1437 default:
1438 llvm_unreachable("unhandled register class");
1439 }
1440
1441 if (Val % (1 << shift)) {
1442 *CommentStream << "Warning: " << getRegClassName(RegClassID: SRegClassID)
1443 << ": scalar reg isn't aligned " << Val;
1444 }
1445
1446 return createRegOperand(RegClassID: SRegClassID, Val: Val >> shift);
1447}
1448
1449MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1450 bool IsHi) const {
1451 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1452 return createRegOperand(RegClassID: AMDGPU::VGPR_16RegClassID, Val: RegIdxInVGPR16);
1453}
1454
1455// Decode Literals for insts which always have a literal in the encoding
1456MCOperand
1457AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1458 if (HasLiteral) {
1459 assert(
1460 AMDGPU::hasVOPD(STI) &&
1461 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1462 if (Literal != Val)
1463 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1464 }
1465 HasLiteral = true;
1466 Literal = Val;
1467 return MCOperand::createImm(Val: Literal);
1468}
1469
1470MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
1471 // For now all literal constants are supposed to be unsigned integer
1472 // ToDo: deal with signed/unsigned 64-bit integer constants
1473 // ToDo: deal with float/double constants
1474 if (!HasLiteral) {
1475 if (Bytes.size() < 4) {
1476 return errOperand(V: 0, ErrMsg: "cannot read literal, inst bytes left " +
1477 Twine(Bytes.size()));
1478 }
1479 HasLiteral = true;
1480 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1481 if (ExtendFP64)
1482 Literal64 <<= 32;
1483 }
1484 return MCOperand::createImm(Val: ExtendFP64 ? Literal64 : Literal);
1485}
1486
1487MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1488 using namespace AMDGPU::EncValues;
1489
1490 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1491 return MCOperand::createImm(Val: (Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1492 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1493 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1494 // Cast prevents negative overflow.
1495}
1496
1497static int64_t getInlineImmVal32(unsigned Imm) {
1498 switch (Imm) {
1499 case 240:
1500 return llvm::bit_cast<uint32_t>(from: 0.5f);
1501 case 241:
1502 return llvm::bit_cast<uint32_t>(from: -0.5f);
1503 case 242:
1504 return llvm::bit_cast<uint32_t>(from: 1.0f);
1505 case 243:
1506 return llvm::bit_cast<uint32_t>(from: -1.0f);
1507 case 244:
1508 return llvm::bit_cast<uint32_t>(from: 2.0f);
1509 case 245:
1510 return llvm::bit_cast<uint32_t>(from: -2.0f);
1511 case 246:
1512 return llvm::bit_cast<uint32_t>(from: 4.0f);
1513 case 247:
1514 return llvm::bit_cast<uint32_t>(from: -4.0f);
1515 case 248: // 1 / (2 * PI)
1516 return 0x3e22f983;
1517 default:
1518 llvm_unreachable("invalid fp inline imm");
1519 }
1520}
1521
1522static int64_t getInlineImmVal64(unsigned Imm) {
1523 switch (Imm) {
1524 case 240:
1525 return llvm::bit_cast<uint64_t>(from: 0.5);
1526 case 241:
1527 return llvm::bit_cast<uint64_t>(from: -0.5);
1528 case 242:
1529 return llvm::bit_cast<uint64_t>(from: 1.0);
1530 case 243:
1531 return llvm::bit_cast<uint64_t>(from: -1.0);
1532 case 244:
1533 return llvm::bit_cast<uint64_t>(from: 2.0);
1534 case 245:
1535 return llvm::bit_cast<uint64_t>(from: -2.0);
1536 case 246:
1537 return llvm::bit_cast<uint64_t>(from: 4.0);
1538 case 247:
1539 return llvm::bit_cast<uint64_t>(from: -4.0);
1540 case 248: // 1 / (2 * PI)
1541 return 0x3fc45f306dc9c882;
1542 default:
1543 llvm_unreachable("invalid fp inline imm");
1544 }
1545}
1546
1547static int64_t getInlineImmValF16(unsigned Imm) {
1548 switch (Imm) {
1549 case 240:
1550 return 0x3800;
1551 case 241:
1552 return 0xB800;
1553 case 242:
1554 return 0x3C00;
1555 case 243:
1556 return 0xBC00;
1557 case 244:
1558 return 0x4000;
1559 case 245:
1560 return 0xC000;
1561 case 246:
1562 return 0x4400;
1563 case 247:
1564 return 0xC400;
1565 case 248: // 1 / (2 * PI)
1566 return 0x3118;
1567 default:
1568 llvm_unreachable("invalid fp inline imm");
1569 }
1570}
1571
1572static int64_t getInlineImmValBF16(unsigned Imm) {
1573 switch (Imm) {
1574 case 240:
1575 return 0x3F00;
1576 case 241:
1577 return 0xBF00;
1578 case 242:
1579 return 0x3F80;
1580 case 243:
1581 return 0xBF80;
1582 case 244:
1583 return 0x4000;
1584 case 245:
1585 return 0xC000;
1586 case 246:
1587 return 0x4080;
1588 case 247:
1589 return 0xC080;
1590 case 248: // 1 / (2 * PI)
1591 return 0x3E22;
1592 default:
1593 llvm_unreachable("invalid fp inline imm");
1594 }
1595}
1596
1597unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1598 using namespace AMDGPU;
1599
1600 switch (Width) {
1601 case 16:
1602 case 32:
1603 return VGPR_32RegClassID;
1604 case 64:
1605 return VReg_64RegClassID;
1606 case 96:
1607 return VReg_96RegClassID;
1608 case 128:
1609 return VReg_128RegClassID;
1610 case 160:
1611 return VReg_160RegClassID;
1612 case 192:
1613 return VReg_192RegClassID;
1614 case 256:
1615 return VReg_256RegClassID;
1616 case 288:
1617 return VReg_288RegClassID;
1618 case 320:
1619 return VReg_320RegClassID;
1620 case 352:
1621 return VReg_352RegClassID;
1622 case 384:
1623 return VReg_384RegClassID;
1624 case 512:
1625 return VReg_512RegClassID;
1626 case 1024:
1627 return VReg_1024RegClassID;
1628 }
1629 llvm_unreachable("Invalid register width!");
1630}
1631
1632unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1633 using namespace AMDGPU;
1634
1635 switch (Width) {
1636 case 16:
1637 case 32:
1638 return AGPR_32RegClassID;
1639 case 64:
1640 return AReg_64RegClassID;
1641 case 96:
1642 return AReg_96RegClassID;
1643 case 128:
1644 return AReg_128RegClassID;
1645 case 160:
1646 return AReg_160RegClassID;
1647 case 256:
1648 return AReg_256RegClassID;
1649 case 288:
1650 return AReg_288RegClassID;
1651 case 320:
1652 return AReg_320RegClassID;
1653 case 352:
1654 return AReg_352RegClassID;
1655 case 384:
1656 return AReg_384RegClassID;
1657 case 512:
1658 return AReg_512RegClassID;
1659 case 1024:
1660 return AReg_1024RegClassID;
1661 }
1662 llvm_unreachable("Invalid register width!");
1663}
1664
1665unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1666 using namespace AMDGPU;
1667
1668 switch (Width) {
1669 case 16:
1670 case 32:
1671 return SGPR_32RegClassID;
1672 case 64:
1673 return SGPR_64RegClassID;
1674 case 96:
1675 return SGPR_96RegClassID;
1676 case 128:
1677 return SGPR_128RegClassID;
1678 case 160:
1679 return SGPR_160RegClassID;
1680 case 256:
1681 return SGPR_256RegClassID;
1682 case 288:
1683 return SGPR_288RegClassID;
1684 case 320:
1685 return SGPR_320RegClassID;
1686 case 352:
1687 return SGPR_352RegClassID;
1688 case 384:
1689 return SGPR_384RegClassID;
1690 case 512:
1691 return SGPR_512RegClassID;
1692 }
1693 llvm_unreachable("Invalid register width!");
1694}
1695
1696unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1697 using namespace AMDGPU;
1698
1699 switch (Width) {
1700 case 16:
1701 case 32:
1702 return TTMP_32RegClassID;
1703 case 64:
1704 return TTMP_64RegClassID;
1705 case 128:
1706 return TTMP_128RegClassID;
1707 case 256:
1708 return TTMP_256RegClassID;
1709 case 288:
1710 return TTMP_288RegClassID;
1711 case 320:
1712 return TTMP_320RegClassID;
1713 case 352:
1714 return TTMP_352RegClassID;
1715 case 384:
1716 return TTMP_384RegClassID;
1717 case 512:
1718 return TTMP_512RegClassID;
1719 }
1720 llvm_unreachable("Invalid register width!");
1721}
1722
1723int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1724 using namespace AMDGPU::EncValues;
1725
1726 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1727 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1728
1729 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1730}
1731
1732MCOperand AMDGPUDisassembler::decodeSrcOp(unsigned Width, unsigned Val) const {
1733 using namespace AMDGPU::EncValues;
1734
1735 assert(Val < 1024); // enum10
1736
1737 bool IsAGPR = Val & 512;
1738 Val &= 511;
1739
1740 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1741 return createRegOperand(RegClassID: IsAGPR ? getAgprClassId(Width)
1742 : getVgprClassId(Width), Val: Val - VGPR_MIN);
1743 }
1744 return decodeNonVGPRSrcOp(Width, Val: Val & 0xFF);
1745}
1746
1747MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(unsigned Width,
1748 unsigned Val) const {
1749 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1750 // decoded earlier.
1751 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1752 using namespace AMDGPU::EncValues;
1753
1754 if (Val <= SGPR_MAX) {
1755 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1756 static_assert(SGPR_MIN == 0);
1757 return createSRegOperand(SRegClassID: getSgprClassId(Width), Val: Val - SGPR_MIN);
1758 }
1759
1760 int TTmpIdx = getTTmpIdx(Val);
1761 if (TTmpIdx >= 0) {
1762 return createSRegOperand(SRegClassID: getTtmpClassId(Width), Val: TTmpIdx);
1763 }
1764
1765 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
1766 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
1767 Val == LITERAL_CONST)
1768 return MCOperand::createImm(Val);
1769
1770 switch (Width) {
1771 case 32:
1772 case 16:
1773 return decodeSpecialReg32(Val);
1774 case 64:
1775 return decodeSpecialReg64(Val);
1776 case 96:
1777 case 128:
1778 case 256:
1779 case 512:
1780 return decodeSpecialReg96Plus(Val);
1781 default:
1782 llvm_unreachable("unexpected immediate type");
1783 }
1784}
1785
1786// Bit 0 of DstY isn't stored in the instruction, because it's always the
1787// opposite of bit 0 of DstX.
1788MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1789 unsigned Val) const {
1790 int VDstXInd =
1791 AMDGPU::getNamedOperandIdx(Opcode: Inst.getOpcode(), Name: AMDGPU::OpName::vdstX);
1792 assert(VDstXInd != -1);
1793 assert(Inst.getOperand(VDstXInd).isReg());
1794 unsigned XDstReg = MRI.getEncodingValue(Reg: Inst.getOperand(i: VDstXInd).getReg());
1795 Val |= ~XDstReg & 1;
1796 return createRegOperand(RegClassID: getVgprClassId(Width: 32), Val);
1797}
1798
1799MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1800 using namespace AMDGPU;
1801
1802 switch (Val) {
1803 // clang-format off
1804 case 102: return createRegOperand(RegId: FLAT_SCR_LO);
1805 case 103: return createRegOperand(RegId: FLAT_SCR_HI);
1806 case 104: return createRegOperand(RegId: XNACK_MASK_LO);
1807 case 105: return createRegOperand(RegId: XNACK_MASK_HI);
1808 case 106: return createRegOperand(RegId: VCC_LO);
1809 case 107: return createRegOperand(RegId: VCC_HI);
1810 case 108: return createRegOperand(RegId: TBA_LO);
1811 case 109: return createRegOperand(RegId: TBA_HI);
1812 case 110: return createRegOperand(RegId: TMA_LO);
1813 case 111: return createRegOperand(RegId: TMA_HI);
1814 case 124:
1815 return isGFX11Plus() ? createRegOperand(RegId: SGPR_NULL) : createRegOperand(RegId: M0);
1816 case 125:
1817 return isGFX11Plus() ? createRegOperand(RegId: M0) : createRegOperand(RegId: SGPR_NULL);
1818 case 126: return createRegOperand(RegId: EXEC_LO);
1819 case 127: return createRegOperand(RegId: EXEC_HI);
1820 case 235: return createRegOperand(RegId: SRC_SHARED_BASE_LO);
1821 case 236: return createRegOperand(RegId: SRC_SHARED_LIMIT_LO);
1822 case 237: return createRegOperand(RegId: SRC_PRIVATE_BASE_LO);
1823 case 238: return createRegOperand(RegId: SRC_PRIVATE_LIMIT_LO);
1824 case 239: return createRegOperand(RegId: SRC_POPS_EXITING_WAVE_ID);
1825 case 251: return createRegOperand(RegId: SRC_VCCZ);
1826 case 252: return createRegOperand(RegId: SRC_EXECZ);
1827 case 253: return createRegOperand(RegId: SRC_SCC);
1828 case 254: return createRegOperand(RegId: LDS_DIRECT);
1829 default: break;
1830 // clang-format on
1831 }
1832 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
1833}
1834
1835MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1836 using namespace AMDGPU;
1837
1838 switch (Val) {
1839 case 102: return createRegOperand(RegId: FLAT_SCR);
1840 case 104: return createRegOperand(RegId: XNACK_MASK);
1841 case 106: return createRegOperand(RegId: VCC);
1842 case 108: return createRegOperand(RegId: TBA);
1843 case 110: return createRegOperand(RegId: TMA);
1844 case 124:
1845 if (isGFX11Plus())
1846 return createRegOperand(RegId: SGPR_NULL);
1847 break;
1848 case 125:
1849 if (!isGFX11Plus())
1850 return createRegOperand(RegId: SGPR_NULL);
1851 break;
1852 case 126: return createRegOperand(RegId: EXEC);
1853 case 235: return createRegOperand(RegId: SRC_SHARED_BASE);
1854 case 236: return createRegOperand(RegId: SRC_SHARED_LIMIT);
1855 case 237: return createRegOperand(RegId: SRC_PRIVATE_BASE);
1856 case 238: return createRegOperand(RegId: SRC_PRIVATE_LIMIT);
1857 case 239: return createRegOperand(RegId: SRC_POPS_EXITING_WAVE_ID);
1858 case 251: return createRegOperand(RegId: SRC_VCCZ);
1859 case 252: return createRegOperand(RegId: SRC_EXECZ);
1860 case 253: return createRegOperand(RegId: SRC_SCC);
1861 default: break;
1862 }
1863 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
1864}
1865
1866MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
1867 using namespace AMDGPU;
1868
1869 switch (Val) {
1870 case 124:
1871 if (isGFX11Plus())
1872 return createRegOperand(RegId: SGPR_NULL);
1873 break;
1874 case 125:
1875 if (!isGFX11Plus())
1876 return createRegOperand(RegId: SGPR_NULL);
1877 break;
1878 default:
1879 break;
1880 }
1881 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
1882}
1883
1884MCOperand AMDGPUDisassembler::decodeSDWASrc(unsigned Width,
1885 const unsigned Val) const {
1886 using namespace AMDGPU::SDWA;
1887 using namespace AMDGPU::EncValues;
1888
1889 if (STI.hasFeature(Feature: AMDGPU::FeatureGFX9) ||
1890 STI.hasFeature(Feature: AMDGPU::FeatureGFX10)) {
1891 // XXX: cast to int is needed to avoid stupid warning:
1892 // compare with unsigned is always true
1893 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1894 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1895 return createRegOperand(RegClassID: getVgprClassId(Width),
1896 Val: Val - SDWA9EncValues::SRC_VGPR_MIN);
1897 }
1898 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1899 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1900 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1901 return createSRegOperand(SRegClassID: getSgprClassId(Width),
1902 Val: Val - SDWA9EncValues::SRC_SGPR_MIN);
1903 }
1904 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1905 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1906 return createSRegOperand(SRegClassID: getTtmpClassId(Width),
1907 Val: Val - SDWA9EncValues::SRC_TTMP_MIN);
1908 }
1909
1910 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1911
1912 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
1913 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
1914 return MCOperand::createImm(Val: SVal);
1915
1916 return decodeSpecialReg32(Val: SVal);
1917 }
1918 if (STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands))
1919 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
1920 llvm_unreachable("unsupported target");
1921}
1922
1923MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1924 return decodeSDWASrc(Width: 16, Val);
1925}
1926
1927MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1928 return decodeSDWASrc(Width: 32, Val);
1929}
1930
1931MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1932 using namespace AMDGPU::SDWA;
1933
1934 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1935 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1936 "SDWAVopcDst should be present only on GFX9+");
1937
1938 bool IsWave32 = STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32);
1939
1940 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1941 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1942
1943 int TTmpIdx = getTTmpIdx(Val);
1944 if (TTmpIdx >= 0) {
1945 auto TTmpClsId = getTtmpClassId(Width: IsWave32 ? 32 : 64);
1946 return createSRegOperand(SRegClassID: TTmpClsId, Val: TTmpIdx);
1947 }
1948 if (Val > SGPR_MAX) {
1949 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
1950 }
1951 return createSRegOperand(SRegClassID: getSgprClassId(Width: IsWave32 ? 32 : 64), Val);
1952 }
1953 return createRegOperand(RegId: IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
1954}
1955
1956MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1957 return STI.hasFeature(Feature: AMDGPU::FeatureWavefrontSize32) ? decodeSrcOp(Width: 32, Val)
1958 : decodeSrcOp(Width: 64, Val);
1959}
1960
1961MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1962 return decodeSrcOp(Width: 32, Val);
1963}
1964
1965MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
1966 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
1967 return MCOperand();
1968 return MCOperand::createImm(Val);
1969}
1970
1971MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
1972 using VersionField = AMDGPU::EncodingField<7, 0>;
1973 using W64Bit = AMDGPU::EncodingBit<13>;
1974 using W32Bit = AMDGPU::EncodingBit<14>;
1975 using MDPBit = AMDGPU::EncodingBit<15>;
1976 using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
1977
1978 auto [Version, W64, W32, MDP] = Encoding::decode(Encoded: Imm);
1979
1980 // Decode into a plain immediate if any unused bits are raised.
1981 if (Encoding::encode(Values: Version, Values: W64, Values: W32, Values: MDP) != Imm)
1982 return MCOperand::createImm(Val: Imm);
1983
1984 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1985 const auto *I = find_if(
1986 Range: Versions, P: [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
1987 return V.Code == Version;
1988 });
1989 MCContext &Ctx = getContext();
1990 const MCExpr *E;
1991 if (I == Versions.end())
1992 E = MCConstantExpr::create(Value: Version, Ctx);
1993 else
1994 E = MCSymbolRefExpr::create(Symbol: Ctx.getOrCreateSymbol(Name: I->Symbol), Ctx);
1995
1996 if (W64)
1997 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW64Expr, Ctx);
1998 if (W32)
1999 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionW32Expr, Ctx);
2000 if (MDP)
2001 E = MCBinaryExpr::createOr(LHS: E, RHS: UCVersionMDPExpr, Ctx);
2002
2003 return MCOperand::createExpr(Val: E);
2004}
2005
2006bool AMDGPUDisassembler::isVI() const {
2007 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2008}
2009
2010bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
2011
2012bool AMDGPUDisassembler::isGFX90A() const {
2013 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2014}
2015
2016bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
2017
2018bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
2019
2020bool AMDGPUDisassembler::isGFX10Plus() const {
2021 return AMDGPU::isGFX10Plus(STI);
2022}
2023
2024bool AMDGPUDisassembler::isGFX11() const {
2025 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2026}
2027
2028bool AMDGPUDisassembler::isGFX11Plus() const {
2029 return AMDGPU::isGFX11Plus(STI);
2030}
2031
2032bool AMDGPUDisassembler::isGFX12() const {
2033 return STI.hasFeature(Feature: AMDGPU::FeatureGFX12);
2034}
2035
2036bool AMDGPUDisassembler::isGFX12Plus() const {
2037 return AMDGPU::isGFX12Plus(STI);
2038}
2039
2040bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); }
2041
2042bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
2043 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2044}
2045
2046bool AMDGPUDisassembler::hasKernargPreload() const {
2047 return AMDGPU::hasKernargPreload(STI);
2048}
2049
2050//===----------------------------------------------------------------------===//
2051// AMDGPU specific symbol handling
2052//===----------------------------------------------------------------------===//
2053
2054/// Print a string describing the reserved bit range specified by Mask with
2055/// offset BaseBytes for use in error comments. Mask is a single continuous
2056/// range of 1s surrounded by zeros. The format here is meant to align with the
2057/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2058static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2059 SmallString<32> Result;
2060 raw_svector_ostream S(Result);
2061
2062 int TrailingZeros = llvm::countr_zero(Val: Mask);
2063 int PopCount = llvm::popcount(Value: Mask);
2064
2065 if (PopCount == 1) {
2066 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2067 } else {
2068 S << "bits in range ("
2069 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2070 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2071 }
2072
2073 return Result;
2074}
2075
2076#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2077#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2078 do { \
2079 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2080 } while (0)
2081#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2082 do { \
2083 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2084 << GET_FIELD(MASK) << '\n'; \
2085 } while (0)
2086
2087#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2088 do { \
2089 if (FourByteBuffer & (MASK)) { \
2090 return createStringError(std::errc::invalid_argument, \
2091 "kernel descriptor " DESC \
2092 " reserved %s set" MSG, \
2093 getBitRangeFromMask((MASK), 0).c_str()); \
2094 } \
2095 } while (0)
2096
2097#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2098#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2099 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2100#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2101 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2102#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2103 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2104
2105// NOLINTNEXTLINE(readability-identifier-naming)
2106Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
2107 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2108 using namespace amdhsa;
2109 StringRef Indent = "\t";
2110
2111 // We cannot accurately backward compute #VGPRs used from
2112 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2113 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2114 // simply calculate the inverse of what the assembler does.
2115
2116 uint32_t GranulatedWorkitemVGPRCount =
2117 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2118
2119 uint32_t NextFreeVGPR =
2120 (GranulatedWorkitemVGPRCount + 1) *
2121 AMDGPU::IsaInfo::getVGPREncodingGranule(STI: &STI, EnableWavefrontSize32);
2122
2123 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2124
2125 // We cannot backward compute values used to calculate
2126 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2127 // directives can't be computed:
2128 // .amdhsa_reserve_vcc
2129 // .amdhsa_reserve_flat_scratch
2130 // .amdhsa_reserve_xnack_mask
2131 // They take their respective default values if not specified in the assembly.
2132 //
2133 // GRANULATED_WAVEFRONT_SGPR_COUNT
2134 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2135 //
2136 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2137 // are set to 0. So while disassembling we consider that:
2138 //
2139 // GRANULATED_WAVEFRONT_SGPR_COUNT
2140 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2141 //
2142 // The disassembler cannot recover the original values of those 3 directives.
2143
2144 uint32_t GranulatedWavefrontSGPRCount =
2145 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2146
2147 if (isGFX10Plus())
2148 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2149 "must be zero on gfx10+");
2150
2151 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2152 AMDGPU::IsaInfo::getSGPREncodingGranule(STI: &STI);
2153
2154 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2155 if (!hasArchitectedFlatScratch())
2156 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2157 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
2158 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2159
2160 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2161
2162 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2163 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2164 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2165 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2166 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2167 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2168 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2169 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2170
2171 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2172
2173 if (!isGFX12Plus())
2174 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2175 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2176
2177 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2178
2179 if (!isGFX12Plus())
2180 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2181 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2182
2183 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2184 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2185
2186 if (isGFX9Plus())
2187 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2188
2189 if (!isGFX9Plus())
2190 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2191 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2192
2193 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
2194
2195 if (!isGFX10Plus())
2196 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
2197 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
2198
2199 if (isGFX10Plus()) {
2200 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2201 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2202 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2203 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2204 }
2205
2206 if (isGFX12Plus())
2207 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2208 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2209
2210 return true;
2211}
2212
2213// NOLINTNEXTLINE(readability-identifier-naming)
2214Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2215 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2216 using namespace amdhsa;
2217 StringRef Indent = "\t";
2218 if (hasArchitectedFlatScratch())
2219 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2220 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2221 else
2222 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2223 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2224 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2225 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2226 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2227 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2228 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2229 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2230 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2231 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2232 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2233 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2234
2235 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2236 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2237 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2238
2239 PRINT_DIRECTIVE(
2240 ".amdhsa_exception_fp_ieee_invalid_op",
2241 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2242 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2243 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2244 PRINT_DIRECTIVE(
2245 ".amdhsa_exception_fp_ieee_div_zero",
2246 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2247 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2248 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2249 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2250 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2251 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2252 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2253 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2254 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2255
2256 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2257
2258 return true;
2259}
2260
2261// NOLINTNEXTLINE(readability-identifier-naming)
2262Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
2263 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2264 using namespace amdhsa;
2265 StringRef Indent = "\t";
2266 if (isGFX90A()) {
2267 KdStream << Indent << ".amdhsa_accum_offset "
2268 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2269 << '\n';
2270
2271 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2272
2273 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2274 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2275 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2276 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2277 } else if (isGFX10Plus()) {
2278 // Bits [0-3].
2279 if (!isGFX12Plus()) {
2280 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2281 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2282 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2283 } else {
2284 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2285 "SHARED_VGPR_COUNT",
2286 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2287 }
2288 } else {
2289 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2290 "COMPUTE_PGM_RSRC3",
2291 "must be zero on gfx12+");
2292 }
2293
2294 // Bits [4-11].
2295 if (isGFX11()) {
2296 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2297 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2298 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2299 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2300 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2301 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2302 } else if (isGFX12Plus()) {
2303 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2304 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2305 } else {
2306 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2307 "COMPUTE_PGM_RSRC3",
2308 "must be zero on gfx10");
2309 }
2310
2311 // Bits [12].
2312 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2313 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2314
2315 // Bits [13].
2316 if (isGFX12Plus()) {
2317 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2318 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2319 } else {
2320 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2321 "COMPUTE_PGM_RSRC3",
2322 "must be zero on gfx10 or gfx11");
2323 }
2324
2325 // Bits [14-30].
2326 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2327 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2328
2329 // Bits [31].
2330 if (isGFX11Plus()) {
2331 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2332 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2333 } else {
2334 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2335 "COMPUTE_PGM_RSRC3",
2336 "must be zero on gfx10");
2337 }
2338 } else if (FourByteBuffer) {
2339 return createStringError(
2340 EC: std::errc::invalid_argument,
2341 Fmt: "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2342 }
2343 return true;
2344}
2345#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2346#undef PRINT_DIRECTIVE
2347#undef GET_FIELD
2348#undef CHECK_RESERVED_BITS_IMPL
2349#undef CHECK_RESERVED_BITS
2350#undef CHECK_RESERVED_BITS_MSG
2351#undef CHECK_RESERVED_BITS_DESC
2352#undef CHECK_RESERVED_BITS_DESC_MSG
2353
2354/// Create an error object to return from onSymbolStart for reserved kernel
2355/// descriptor bits being set.
2356static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2357 const char *Msg = "") {
2358 return createStringError(
2359 EC: std::errc::invalid_argument, Fmt: "kernel descriptor reserved %s set%s%s",
2360 Vals: getBitRangeFromMask(Mask, BaseBytes).c_str(), Vals: *Msg ? ", " : "", Vals: Msg);
2361}
2362
2363/// Create an error object to return from onSymbolStart for reserved kernel
2364/// descriptor bytes being set.
2365static Error createReservedKDBytesError(unsigned BaseInBytes,
2366 unsigned WidthInBytes) {
2367 // Create an error comment in the same format as the "Kernel Descriptor"
2368 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2369 return createStringError(
2370 EC: std::errc::invalid_argument,
2371 Fmt: "kernel descriptor reserved bits in range (%u:%u) set",
2372 Vals: (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, Vals: BaseInBytes * CHAR_BIT);
2373}
2374
2375Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2376 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2377 raw_string_ostream &KdStream) const {
2378#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2379 do { \
2380 KdStream << Indent << DIRECTIVE " " \
2381 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2382 } while (0)
2383
2384 uint16_t TwoByteBuffer = 0;
2385 uint32_t FourByteBuffer = 0;
2386
2387 StringRef ReservedBytes;
2388 StringRef Indent = "\t";
2389
2390 assert(Bytes.size() == 64);
2391 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2392
2393 switch (Cursor.tell()) {
2394 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2395 FourByteBuffer = DE.getU32(C&: Cursor);
2396 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2397 << '\n';
2398 return true;
2399
2400 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2401 FourByteBuffer = DE.getU32(C&: Cursor);
2402 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2403 << FourByteBuffer << '\n';
2404 return true;
2405
2406 case amdhsa::KERNARG_SIZE_OFFSET:
2407 FourByteBuffer = DE.getU32(C&: Cursor);
2408 KdStream << Indent << ".amdhsa_kernarg_size "
2409 << FourByteBuffer << '\n';
2410 return true;
2411
2412 case amdhsa::RESERVED0_OFFSET:
2413 // 4 reserved bytes, must be 0.
2414 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2415 for (int I = 0; I < 4; ++I) {
2416 if (ReservedBytes[I] != 0)
2417 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED0_OFFSET, WidthInBytes: 4);
2418 }
2419 return true;
2420
2421 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2422 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2423 // So far no directive controls this for Code Object V3, so simply skip for
2424 // disassembly.
2425 DE.skip(C&: Cursor, Length: 8);
2426 return true;
2427
2428 case amdhsa::RESERVED1_OFFSET:
2429 // 20 reserved bytes, must be 0.
2430 ReservedBytes = DE.getBytes(C&: Cursor, Length: 20);
2431 for (int I = 0; I < 20; ++I) {
2432 if (ReservedBytes[I] != 0)
2433 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED1_OFFSET, WidthInBytes: 20);
2434 }
2435 return true;
2436
2437 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2438 FourByteBuffer = DE.getU32(C&: Cursor);
2439 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2440
2441 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2442 FourByteBuffer = DE.getU32(C&: Cursor);
2443 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2444
2445 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2446 FourByteBuffer = DE.getU32(C&: Cursor);
2447 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2448
2449 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2450 using namespace amdhsa;
2451 TwoByteBuffer = DE.getU16(C&: Cursor);
2452
2453 if (!hasArchitectedFlatScratch())
2454 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2455 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2456 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2457 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2458 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2459 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2460 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2461 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2462 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2463 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2464 if (!hasArchitectedFlatScratch())
2465 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2466 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2467 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2468 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2469
2470 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2471 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED0,
2472 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2473
2474 // Reserved for GFX9
2475 if (isGFX9() &&
2476 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2477 return createReservedKDBitsError(
2478 Mask: KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2479 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, Msg: "must be zero on gfx9");
2480 }
2481 if (isGFX10Plus()) {
2482 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2483 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2484 }
2485
2486 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2487 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2488 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2489
2490 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2491 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED1,
2492 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2493 }
2494
2495 return true;
2496
2497 case amdhsa::KERNARG_PRELOAD_OFFSET:
2498 using namespace amdhsa;
2499 TwoByteBuffer = DE.getU16(C&: Cursor);
2500 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2501 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2502 KERNARG_PRELOAD_SPEC_LENGTH);
2503 }
2504
2505 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2506 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2507 KERNARG_PRELOAD_SPEC_OFFSET);
2508 }
2509 return true;
2510
2511 case amdhsa::RESERVED3_OFFSET:
2512 // 4 bytes from here are reserved, must be 0.
2513 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2514 for (int I = 0; I < 4; ++I) {
2515 if (ReservedBytes[I] != 0)
2516 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED3_OFFSET, WidthInBytes: 4);
2517 }
2518 return true;
2519
2520 default:
2521 llvm_unreachable("Unhandled index. Case statements cover everything.");
2522 return true;
2523 }
2524#undef PRINT_DIRECTIVE
2525}
2526
2527Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2528 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2529
2530 // CP microcode requires the kernel descriptor to be 64 aligned.
2531 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2532 return createStringError(EC: std::errc::invalid_argument,
2533 Fmt: "kernel descriptor must be 64-byte aligned");
2534
2535 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2536 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2537 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2538 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2539 // when required.
2540 if (isGFX10Plus()) {
2541 uint16_t KernelCodeProperties =
2542 support::endian::read16(P: &Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2543 E: llvm::endianness::little);
2544 EnableWavefrontSize32 =
2545 AMDHSA_BITS_GET(KernelCodeProperties,
2546 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2547 }
2548
2549 std::string Kd;
2550 raw_string_ostream KdStream(Kd);
2551 KdStream << ".amdhsa_kernel " << KdName << '\n';
2552
2553 DataExtractor::Cursor C(0);
2554 while (C && C.tell() < Bytes.size()) {
2555 Expected<bool> Res = decodeKernelDescriptorDirective(Cursor&: C, Bytes, KdStream);
2556
2557 cantFail(Err: C.takeError());
2558
2559 if (!Res)
2560 return Res;
2561 }
2562 KdStream << ".end_amdhsa_kernel\n";
2563 outs() << KdStream.str();
2564 return true;
2565}
2566
2567Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2568 uint64_t &Size,
2569 ArrayRef<uint8_t> Bytes,
2570 uint64_t Address) const {
2571 // Right now only kernel descriptor needs to be handled.
2572 // We ignore all other symbols for target specific handling.
2573 // TODO:
2574 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2575 // Object V2 and V3 when symbols are marked protected.
2576
2577 // amd_kernel_code_t for Code Object V2.
2578 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2579 Size = 256;
2580 return createStringError(EC: std::errc::invalid_argument,
2581 Fmt: "code object v2 is not supported");
2582 }
2583
2584 // Code Object V3 kernel descriptors.
2585 StringRef Name = Symbol.Name;
2586 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(Suffix: StringRef(".kd"))) {
2587 Size = 64; // Size = 64 regardless of success or failure.
2588 return decodeKernelDescriptor(KdName: Name.drop_back(N: 3), Bytes, KdAddress: Address);
2589 }
2590
2591 return false;
2592}
2593
2594const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2595 int64_t Val) {
2596 MCContext &Ctx = getContext();
2597 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: Id);
2598 // Note: only set value to Val on a new symbol in case an dissassembler
2599 // has already been initialized in this context.
2600 if (!Sym->isVariable()) {
2601 Sym->setVariableValue(MCConstantExpr::create(Value: Val, Ctx));
2602 } else {
2603 int64_t Res = ~Val;
2604 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2605 if (!Valid || Res != Val)
2606 Ctx.reportWarning(L: SMLoc(), Msg: "unsupported redefinition of " + Id);
2607 }
2608 return MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2609}
2610
2611//===----------------------------------------------------------------------===//
2612// AMDGPUSymbolizer
2613//===----------------------------------------------------------------------===//
2614
2615// Try to find symbol name for specified label
2616bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2617 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2618 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2619 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2620
2621 if (!IsBranch) {
2622 return false;
2623 }
2624
2625 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2626 if (!Symbols)
2627 return false;
2628
2629 auto Result = llvm::find_if(Range&: *Symbols, P: [Value](const SymbolInfoTy &Val) {
2630 return Val.Addr == static_cast<uint64_t>(Value) &&
2631 Val.Type == ELF::STT_NOTYPE;
2632 });
2633 if (Result != Symbols->end()) {
2634 auto *Sym = Ctx.getOrCreateSymbol(Name: Result->Name);
2635 const auto *Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2636 Inst.addOperand(Op: MCOperand::createExpr(Val: Add));
2637 return true;
2638 }
2639 // Add to list of referenced addresses, so caller can synthesize a label.
2640 ReferencedAddresses.push_back(x: static_cast<uint64_t>(Value));
2641 return false;
2642}
2643
2644void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2645 int64_t Value,
2646 uint64_t Address) {
2647 llvm_unreachable("unimplemented");
2648}
2649
2650//===----------------------------------------------------------------------===//
2651// Initialization
2652//===----------------------------------------------------------------------===//
2653
2654static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2655 LLVMOpInfoCallback /*GetOpInfo*/,
2656 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2657 void *DisInfo,
2658 MCContext *Ctx,
2659 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2660 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2661}
2662
2663static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2664 const MCSubtargetInfo &STI,
2665 MCContext &Ctx) {
2666 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2667}
2668
2669extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
2670LLVMInitializeAMDGPUDisassembler() {
2671 TargetRegistry::RegisterMCDisassembler(T&: getTheGCNTarget(),
2672 Fn: createAMDGPUDisassembler);
2673 TargetRegistry::RegisterMCSymbolizer(T&: getTheGCNTarget(),
2674 Fn: createAMDGPUSymbolizer);
2675}
2676