| 1 | //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | /// \file |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
| 11 | #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
| 12 | |
| 13 | #include "llvm/MC/MCInst.h" |
| 14 | #include "llvm/MC/MCInstrDesc.h" |
| 15 | #include "llvm/MC/MCInstrInfo.h" |
| 16 | #include "llvm/Support/AMDGPUAddrSpace.h" |
| 17 | |
| 18 | namespace llvm { |
| 19 | |
| 20 | // This needs to be kept in sync with the field bits in SIRegisterClass. |
| 21 | enum SIRCFlags : uint8_t { |
| 22 | RegTupleAlignUnitsWidth = 2, |
| 23 | HasVGPRBit = RegTupleAlignUnitsWidth, |
| 24 | HasAGPRBit, |
| 25 | HasSGPRbit, |
| 26 | |
| 27 | HasVGPR = 1 << HasVGPRBit, |
| 28 | HasAGPR = 1 << HasAGPRBit, |
| 29 | HasSGPR = 1 << HasSGPRbit, |
| 30 | |
| 31 | RegTupleAlignUnitsMask = (1 << RegTupleAlignUnitsWidth) - 1, |
| 32 | RegKindMask = (HasVGPR | HasAGPR | HasSGPR) |
| 33 | }; // enum SIRCFlagsr |
| 34 | |
| 35 | namespace SIEncodingFamily { |
| 36 | // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td |
| 37 | // and the columns of the getMCOpcodeGen table. |
| 38 | enum { |
| 39 | SI = 0, |
| 40 | VI = 1, |
| 41 | SDWA = 2, |
| 42 | SDWA9 = 3, |
| 43 | GFX80 = 4, |
| 44 | GFX9 = 5, |
| 45 | GFX10 = 6, |
| 46 | SDWA10 = 7, |
| 47 | GFX90A = 8, |
| 48 | GFX940 = 9, |
| 49 | GFX11 = 10, |
| 50 | GFX1170 = 11, |
| 51 | GFX12 = 12, |
| 52 | GFX1250 = 13, |
| 53 | GFX13 = 14, |
| 54 | }; |
| 55 | } |
| 56 | |
| 57 | namespace SIInstrFlags { |
| 58 | // This needs to be kept in sync with the field bits in InstSI. |
| 59 | enum : uint64_t { |
| 60 | // Low bits - basic encoding information. |
| 61 | SALU = 1 << 0, |
| 62 | VALU = 1 << 1, |
| 63 | |
| 64 | // SALU instruction formats. |
| 65 | SOP1 = 1 << 2, |
| 66 | SOP2 = 1 << 3, |
| 67 | SOPC = 1 << 4, |
| 68 | SOPK = 1 << 5, |
| 69 | SOPP = 1 << 6, |
| 70 | |
| 71 | // VALU instruction formats. |
| 72 | VOP1 = 1 << 7, |
| 73 | VOP2 = 1 << 8, |
| 74 | VOPC = 1 << 9, |
| 75 | |
| 76 | // TODO: Should this be spilt into VOP3 a and b? |
| 77 | VOP3 = 1 << 10, |
| 78 | VOP3P = 1 << 12, |
| 79 | |
| 80 | VINTRP = 1 << 13, |
| 81 | SDWA = 1 << 14, |
| 82 | DPP = 1 << 15, |
| 83 | TRANS = 1 << 16, |
| 84 | |
| 85 | // Memory instruction formats. |
| 86 | MUBUF = 1 << 17, |
| 87 | MTBUF = 1 << 18, |
| 88 | SMRD = 1 << 19, |
| 89 | MIMG = 1 << 20, |
| 90 | VIMAGE = 1 << 21, |
| 91 | VSAMPLE = 1 << 22, |
| 92 | EXP = 1 << 23, |
| 93 | FLAT = 1 << 24, |
| 94 | DS = 1 << 25, |
| 95 | |
| 96 | // Combined SGPR/VGPR Spill bit |
| 97 | // Logic to separate them out is done in isSGPRSpill and isVGPRSpill |
| 98 | Spill = 1 << 26, |
| 99 | |
| 100 | // LDSDIR instruction format. |
| 101 | LDSDIR = 1 << 28, |
| 102 | |
| 103 | // VINTERP instruction format. |
| 104 | VINTERP = 1 << 29, |
| 105 | |
| 106 | VOPD3 = 1 << 30, |
| 107 | |
| 108 | // High bits - other information. |
| 109 | VM_CNT = UINT64_C(1) << 32, |
| 110 | EXP_CNT = UINT64_C(1) << 33, |
| 111 | LGKM_CNT = UINT64_C(1) << 34, |
| 112 | |
| 113 | WQM = UINT64_C(1) << 35, |
| 114 | DisableWQM = UINT64_C(1) << 36, |
| 115 | Gather4 = UINT64_C(1) << 37, |
| 116 | |
| 117 | TENSOR_CNT = UINT64_C(1) << 38, |
| 118 | |
| 119 | SCALAR_STORE = UINT64_C(1) << 39, |
| 120 | FIXED_SIZE = UINT64_C(1) << 40, |
| 121 | |
| 122 | ASYNC_CNT = UINT64_C(1) << 41, |
| 123 | |
| 124 | VOP3_OPSEL = UINT64_C(1) << 42, |
| 125 | maybeAtomic = UINT64_C(1) << 43, |
| 126 | renamedInGFX9 = UINT64_C(1) << 44, |
| 127 | |
| 128 | // Is a clamp on FP type. |
| 129 | FPClamp = UINT64_C(1) << 45, |
| 130 | |
| 131 | // Is an integer clamp |
| 132 | IntClamp = UINT64_C(1) << 46, |
| 133 | |
| 134 | // Clamps lo component of register. |
| 135 | ClampLo = UINT64_C(1) << 47, |
| 136 | |
| 137 | // Clamps hi component of register. |
| 138 | // ClampLo and ClampHi set for packed clamp. |
| 139 | ClampHi = UINT64_C(1) << 48, |
| 140 | |
| 141 | // Is a packed VOP3P instruction. |
| 142 | IsPacked = UINT64_C(1) << 49, |
| 143 | |
| 144 | // Is a D16 buffer instruction. |
| 145 | D16Buf = UINT64_C(1) << 50, |
| 146 | |
| 147 | // FLAT instruction accesses FLAT_GLBL segment. |
| 148 | FlatGlobal = UINT64_C(1) << 51, |
| 149 | |
| 150 | // Uses floating point double precision rounding mode |
| 151 | FPDPRounding = UINT64_C(1) << 52, |
| 152 | |
| 153 | // Instruction is FP atomic. |
| 154 | FPAtomic = UINT64_C(1) << 53, |
| 155 | |
| 156 | // Is a MFMA instruction. |
| 157 | IsMAI = UINT64_C(1) << 54, |
| 158 | |
| 159 | // Is a DOT instruction. |
| 160 | IsDOT = UINT64_C(1) << 55, |
| 161 | |
| 162 | // FLAT instruction accesses FLAT_SCRATCH segment. |
| 163 | FlatScratch = UINT64_C(1) << 56, |
| 164 | |
| 165 | // Atomic without return. |
| 166 | IsAtomicNoRet = UINT64_C(1) << 57, |
| 167 | |
| 168 | // Atomic with return. |
| 169 | IsAtomicRet = UINT64_C(1) << 58, |
| 170 | |
| 171 | // Is a WMMA instruction. |
| 172 | IsWMMA = UINT64_C(1) << 59, |
| 173 | |
| 174 | // Whether tied sources will be read. |
| 175 | TiedSourceNotRead = UINT64_C(1) << 60, |
| 176 | |
| 177 | // Is never uniform. |
| 178 | IsNeverUniform = UINT64_C(1) << 61, |
| 179 | |
| 180 | // ds_gws_* instructions. |
| 181 | GWS = UINT64_C(1) << 62, |
| 182 | |
| 183 | // Is a SWMMAC instruction. |
| 184 | IsSWMMAC = UINT64_C(1) << 63, |
| 185 | }; |
| 186 | |
| 187 | // Predicate functions over TSFlags — the single place where raw TSFlags bit |
| 188 | // tests are written. All callers (SIInstrInfo methods, MC-layer code) go |
| 189 | // through these so that bit-layout changes require updating only this file. |
| 190 | // |
| 191 | // getTSFlags is overloaded for MCInstrDesc, (MCInstrInfo, Opcode), and |
| 192 | // (MCInstrInfo, MCInst) here; SIInstrInfo.h adds a MachineInstr overload in |
| 193 | // namespace llvm so ADL finds it when predicates are instantiated with |
| 194 | // MachineInstr. |
| 195 | |
| 196 | constexpr uint64_t getTSFlags(const MCInstrDesc &Desc) { return Desc.TSFlags; } |
| 197 | inline uint64_t getTSFlags(const MCInstrInfo &MII, unsigned Opcode) { |
| 198 | return MII.get(Opcode).TSFlags; |
| 199 | } |
| 200 | inline uint64_t getTSFlags(const MCInstrInfo &MII, const MCInst &Inst) { |
| 201 | return MII.get(Opcode: Inst.getOpcode()).TSFlags; |
| 202 | } |
| 203 | |
| 204 | template <typename... T> constexpr bool isSALU(const T &...O) { |
| 205 | return getTSFlags(O...) & SALU; |
| 206 | } |
| 207 | template <typename... T> constexpr bool isVALU(const T &...O) { |
| 208 | return getTSFlags(O...) & VALU; |
| 209 | } |
| 210 | template <typename... T> constexpr bool isSOP1(const T &...O) { |
| 211 | return getTSFlags(O...) & SOP1; |
| 212 | } |
| 213 | template <typename... T> constexpr bool isSOP2(const T &...O) { |
| 214 | return getTSFlags(O...) & SOP2; |
| 215 | } |
| 216 | template <typename... T> constexpr bool isSOPC(const T &...O) { |
| 217 | return getTSFlags(O...) & SOPC; |
| 218 | } |
| 219 | template <typename... T> constexpr bool isSOPK(const T &...O) { |
| 220 | return getTSFlags(O...) & SOPK; |
| 221 | } |
| 222 | template <typename... T> constexpr bool isSOPP(const T &...O) { |
| 223 | return getTSFlags(O...) & SOPP; |
| 224 | } |
| 225 | template <typename... T> constexpr bool isVOP1(const T &...O) { |
| 226 | return getTSFlags(O...) & VOP1; |
| 227 | } |
| 228 | template <typename... T> constexpr bool isVOP2(const T &...O) { |
| 229 | return getTSFlags(O...) & VOP2; |
| 230 | } |
| 231 | template <typename... T> constexpr bool isVOPC(const T &...O) { |
| 232 | return getTSFlags(O...) & VOPC; |
| 233 | } |
| 234 | template <typename... T> constexpr bool isVOP3(const T &...O) { |
| 235 | return getTSFlags(O...) & VOP3; |
| 236 | } |
| 237 | template <typename... T> constexpr bool isVOP3P(const T &...O) { |
| 238 | return getTSFlags(O...) & VOP3P; |
| 239 | } |
| 240 | template <typename... T> constexpr bool isVINTRP(const T &...O) { |
| 241 | return getTSFlags(O...) & VINTRP; |
| 242 | } |
| 243 | template <typename... T> constexpr bool isSDWA(const T &...O) { |
| 244 | return getTSFlags(O...) & SDWA; |
| 245 | } |
| 246 | template <typename... T> constexpr bool isDPP(const T &...O) { |
| 247 | return getTSFlags(O...) & DPP; |
| 248 | } |
| 249 | template <typename... T> constexpr bool isTRANS(const T &...O) { |
| 250 | return getTSFlags(O...) & TRANS; |
| 251 | } |
| 252 | template <typename... T> constexpr bool isMUBUF(const T &...O) { |
| 253 | return getTSFlags(O...) & MUBUF; |
| 254 | } |
| 255 | template <typename... T> constexpr bool isMTBUF(const T &...O) { |
| 256 | return getTSFlags(O...) & MTBUF; |
| 257 | } |
| 258 | template <typename... T> constexpr bool isSMRD(const T &...O) { |
| 259 | return getTSFlags(O...) & SMRD; |
| 260 | } |
| 261 | template <typename... T> constexpr bool isMIMG(const T &...O) { |
| 262 | return getTSFlags(O...) & MIMG; |
| 263 | } |
| 264 | template <typename... T> constexpr bool isVIMAGE(const T &...O) { |
| 265 | return getTSFlags(O...) & VIMAGE; |
| 266 | } |
| 267 | template <typename... T> constexpr bool isVSAMPLE(const T &...O) { |
| 268 | return getTSFlags(O...) & VSAMPLE; |
| 269 | } |
| 270 | template <typename... T> constexpr bool isEXP(const T &...O) { |
| 271 | return getTSFlags(O...) & EXP; |
| 272 | } |
| 273 | template <typename... T> constexpr bool isFLAT(const T &...O) { |
| 274 | return getTSFlags(O...) & FLAT; |
| 275 | } |
| 276 | template <typename... T> constexpr bool isDS(const T &...O) { |
| 277 | return getTSFlags(O...) & DS; |
| 278 | } |
| 279 | template <typename... T> constexpr bool isSpill(const T &...O) { |
| 280 | return getTSFlags(O...) & Spill; |
| 281 | } |
| 282 | template <typename... T> constexpr bool isLDSDIR(const T &...O) { |
| 283 | return getTSFlags(O...) & LDSDIR; |
| 284 | } |
| 285 | template <typename... T> constexpr bool isVINTERP(const T &...O) { |
| 286 | return getTSFlags(O...) & VINTERP; |
| 287 | } |
| 288 | template <typename... T> constexpr bool isWQM(const T &...O) { |
| 289 | return getTSFlags(O...) & WQM; |
| 290 | } |
| 291 | template <typename... T> constexpr bool isDisableWQM(const T &...O) { |
| 292 | return getTSFlags(O...) & DisableWQM; |
| 293 | } |
| 294 | template <typename... T> constexpr bool isGather4(const T &...O) { |
| 295 | return getTSFlags(O...) & Gather4; |
| 296 | } |
| 297 | template <typename... T> constexpr bool usesTENSOR_CNT(const T &...O) { |
| 298 | return getTSFlags(O...) & TENSOR_CNT; |
| 299 | } |
| 300 | template <typename... T> constexpr bool isScalarStore(const T &...O) { |
| 301 | return getTSFlags(O...) & SCALAR_STORE; |
| 302 | } |
| 303 | template <typename... T> constexpr bool isFixedSize(const T &...O) { |
| 304 | return getTSFlags(O...) & FIXED_SIZE; |
| 305 | } |
| 306 | template <typename... T> constexpr bool usesASYNC_CNT(const T &...O) { |
| 307 | return getTSFlags(O...) & ASYNC_CNT; |
| 308 | } |
| 309 | template <typename... T> constexpr bool hasVOP3OpSel(const T &...O) { |
| 310 | return getTSFlags(O...) & VOP3_OPSEL; |
| 311 | } |
| 312 | template <typename... T> constexpr bool isMaybeAtomic(const T &...O) { |
| 313 | return getTSFlags(O...) & maybeAtomic; |
| 314 | } |
| 315 | template <typename... T> constexpr bool hasFPClamp(const T &...O) { |
| 316 | return getTSFlags(O...) & FPClamp; |
| 317 | } |
| 318 | template <typename... T> constexpr bool hasIntClamp(const T &...O) { |
| 319 | return getTSFlags(O...) & IntClamp; |
| 320 | } |
| 321 | template <typename... T> constexpr bool hasClampLo(const T &...O) { |
| 322 | return getTSFlags(O...) & ClampLo; |
| 323 | } |
| 324 | template <typename... T> constexpr bool hasClampHi(const T &...O) { |
| 325 | return getTSFlags(O...) & ClampHi; |
| 326 | } |
| 327 | template <typename... T> constexpr bool isPacked(const T &...O) { |
| 328 | return getTSFlags(O...) & IsPacked; |
| 329 | } |
| 330 | template <typename... T> constexpr bool isD16Buf(const T &...O) { |
| 331 | return getTSFlags(O...) & D16Buf; |
| 332 | } |
| 333 | template <typename... T> constexpr bool isFlatGlobal(const T &...O) { |
| 334 | return getTSFlags(O...) & FlatGlobal; |
| 335 | } |
| 336 | template <typename... T> constexpr bool usesFPDPRounding(const T &...O) { |
| 337 | return getTSFlags(O...) & FPDPRounding; |
| 338 | } |
| 339 | template <typename... T> constexpr bool isFPAtomic(const T &...O) { |
| 340 | return getTSFlags(O...) & FPAtomic; |
| 341 | } |
| 342 | template <typename... T> constexpr bool isMAI(const T &...O) { |
| 343 | return getTSFlags(O...) & IsMAI; |
| 344 | } |
| 345 | template <typename... T> constexpr bool isDOT(const T &...O) { |
| 346 | return getTSFlags(O...) & IsDOT; |
| 347 | } |
| 348 | template <typename... T> constexpr bool isFlatScratch(const T &...O) { |
| 349 | return getTSFlags(O...) & FlatScratch; |
| 350 | } |
| 351 | template <typename... T> constexpr bool isAtomicNoRet(const T &...O) { |
| 352 | return getTSFlags(O...) & IsAtomicNoRet; |
| 353 | } |
| 354 | template <typename... T> constexpr bool isAtomicRet(const T &...O) { |
| 355 | return getTSFlags(O...) & IsAtomicRet; |
| 356 | } |
| 357 | template <typename... T> constexpr bool isWMMA(const T &...O) { |
| 358 | return getTSFlags(O...) & IsWMMA; |
| 359 | } |
| 360 | template <typename... T> constexpr bool isTiedSourceNotRead(const T &...O) { |
| 361 | return getTSFlags(O...) & TiedSourceNotRead; |
| 362 | } |
| 363 | template <typename... T> constexpr bool isNeverUniform(const T &...O) { |
| 364 | return getTSFlags(O...) & IsNeverUniform; |
| 365 | } |
| 366 | template <typename... T> constexpr bool isGWS(const T &...O) { |
| 367 | return getTSFlags(O...) & GWS; |
| 368 | } |
| 369 | template <typename... T> constexpr bool isSWMMAC(const T &...O) { |
| 370 | return getTSFlags(O...) & IsSWMMAC; |
| 371 | } |
| 372 | template <typename... T> constexpr bool usesVM_CNT(const T &...O) { |
| 373 | return getTSFlags(O...) & VM_CNT; |
| 374 | } |
| 375 | template <typename... T> constexpr bool usesLGKM_CNT(const T &...O) { |
| 376 | return getTSFlags(O...) & LGKM_CNT; |
| 377 | } |
| 378 | |
| 379 | // Compound predicates. |
| 380 | template <typename... T> constexpr bool isAtomic(const T &...O) { |
| 381 | return isAtomicNoRet(O...) || isAtomicRet(O...); |
| 382 | } |
| 383 | template <typename... T> constexpr bool isSegmentSpecificFLAT(const T &...O) { |
| 384 | return isFlatGlobal(O...) || isFlatScratch(O...); |
| 385 | } |
| 386 | // Any image-family instruction: pre-gfx11 MIMG, gfx11+ VIMAGE or VSAMPLE. |
| 387 | template <typename... T> constexpr bool isImage(const T &...O) { |
| 388 | return isMIMG(O...) || isVIMAGE(O...) || isVSAMPLE(O...); |
| 389 | } |
| 390 | // Vector memory: buffer + image + flat. |
| 391 | template <typename... T> constexpr bool isVMEM(const T &...O) { |
| 392 | return isMUBUF(O...) || isMTBUF(O...) || isImage(O...) || isFLAT(O...); |
| 393 | } |
| 394 | |
| 395 | // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. |
| 396 | // The result is true if any of these tests are true. |
| 397 | enum ClassFlags : unsigned { |
| 398 | S_NAN = 1 << 0, // Signaling NaN |
| 399 | Q_NAN = 1 << 1, // Quiet NaN |
| 400 | N_INFINITY = 1 << 2, // Negative infinity |
| 401 | N_NORMAL = 1 << 3, // Negative normal |
| 402 | N_SUBNORMAL = 1 << 4, // Negative subnormal |
| 403 | N_ZERO = 1 << 5, // Negative zero |
| 404 | P_ZERO = 1 << 6, // Positive zero |
| 405 | P_SUBNORMAL = 1 << 7, // Positive subnormal |
| 406 | P_NORMAL = 1 << 8, // Positive normal |
| 407 | P_INFINITY = 1 << 9 // Positive infinity |
| 408 | }; |
| 409 | } |
| 410 | |
| 411 | namespace AMDGPU { |
| 412 | |
| 413 | enum OperandType : unsigned { |
| 414 | /// Operands with register, 32-bit, or 64-bit immediate |
| 415 | OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, |
| 416 | OPERAND_REG_IMM_INT64, |
| 417 | OPERAND_REG_IMM_INT16, |
| 418 | OPERAND_REG_IMM_FP32, |
| 419 | OPERAND_REG_IMM_FP64, |
| 420 | OPERAND_REG_IMM_BF16, |
| 421 | OPERAND_REG_IMM_FP16, |
| 422 | OPERAND_REG_IMM_V2BF16, |
| 423 | OPERAND_REG_IMM_V2FP16, |
| 424 | OPERAND_REG_IMM_V2FP16_SPLAT, |
| 425 | OPERAND_REG_IMM_V2INT16, |
| 426 | OPERAND_REG_IMM_V2INT64, |
| 427 | OPERAND_REG_IMM_NOINLINE_V2FP16, |
| 428 | OPERAND_REG_IMM_V2INT32, |
| 429 | OPERAND_REG_IMM_V2FP32, |
| 430 | OPERAND_REG_IMM_V2FP64, |
| 431 | |
| 432 | /// Operands with register or inline constant |
| 433 | OPERAND_REG_INLINE_C_INT16, |
| 434 | OPERAND_REG_INLINE_C_INT32, |
| 435 | OPERAND_REG_INLINE_C_INT64, |
| 436 | OPERAND_REG_INLINE_C_BF16, |
| 437 | OPERAND_REG_INLINE_C_FP16, |
| 438 | OPERAND_REG_INLINE_C_FP32, |
| 439 | OPERAND_REG_INLINE_C_FP64, |
| 440 | OPERAND_REG_INLINE_C_V2INT16, |
| 441 | OPERAND_REG_INLINE_C_V2BF16, |
| 442 | OPERAND_REG_INLINE_C_V2FP16, |
| 443 | |
| 444 | // Operand for split barrier inline constant |
| 445 | OPERAND_INLINE_SPLIT_BARRIER_INT32, |
| 446 | |
| 447 | /// Operand with 32-bit immediate that uses the constant bus. |
| 448 | OPERAND_KIMM32, |
| 449 | OPERAND_KIMM16, |
| 450 | OPERAND_KIMM64, |
| 451 | |
| 452 | /// Operands with an AccVGPR register or inline constant |
| 453 | OPERAND_REG_INLINE_AC_INT32, |
| 454 | OPERAND_REG_INLINE_AC_FP32, |
| 455 | OPERAND_REG_INLINE_AC_FP64, |
| 456 | |
| 457 | // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline |
| 458 | // constants. Does not accept registers. |
| 459 | OPERAND_INLINE_C_AV64_PSEUDO, |
| 460 | |
| 461 | // Operand for source modifiers for VOP instructions |
| 462 | OPERAND_INPUT_MODS, |
| 463 | |
| 464 | // Operand for SDWA instructions |
| 465 | OPERAND_SDWA_VOPC_DST, |
| 466 | |
| 467 | OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, |
| 468 | OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP64, |
| 469 | |
| 470 | OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, |
| 471 | OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64, |
| 472 | |
| 473 | OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32, |
| 474 | OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO, |
| 475 | |
| 476 | OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, |
| 477 | OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, |
| 478 | |
| 479 | OPERAND_KIMM_FIRST = OPERAND_KIMM32, |
| 480 | OPERAND_KIMM_LAST = OPERAND_KIMM64 |
| 481 | |
| 482 | }; |
| 483 | } |
| 484 | |
| 485 | // Input operand modifiers bit-masks |
| 486 | // NEG and SEXT share same bit-mask because they can't be set simultaneously. |
| 487 | namespace SISrcMods { |
| 488 | enum : unsigned { |
| 489 | NONE = 0, |
| 490 | NEG = 1 << 0, // Floating-point negate modifier |
| 491 | ABS = 1 << 1, // Floating-point absolute modifier |
| 492 | SEXT = 1 << 4, // Integer sign-extend modifier |
| 493 | NEG_HI = ABS, // Floating-point negate high packed component modifier. |
| 494 | OP_SEL_0 = 1 << 2, |
| 495 | OP_SEL_1 = 1 << 3, |
| 496 | DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) |
| 497 | }; |
| 498 | } |
| 499 | |
| 500 | namespace SIOutMods { |
| 501 | enum : unsigned { |
| 502 | NONE = 0, |
| 503 | MUL2 = 1, |
| 504 | MUL4 = 2, |
| 505 | DIV2 = 3 |
| 506 | }; |
| 507 | } |
| 508 | |
| 509 | namespace AMDGPU { |
| 510 | namespace VGPRIndexMode { |
| 511 | |
| 512 | enum Id : unsigned { // id of symbolic names |
| 513 | ID_SRC0 = 0, |
| 514 | ID_SRC1, |
| 515 | ID_SRC2, |
| 516 | ID_DST, |
| 517 | |
| 518 | ID_MIN = ID_SRC0, |
| 519 | ID_MAX = ID_DST |
| 520 | }; |
| 521 | |
| 522 | enum EncBits : unsigned { |
| 523 | OFF = 0, |
| 524 | SRC0_ENABLE = 1 << ID_SRC0, |
| 525 | SRC1_ENABLE = 1 << ID_SRC1, |
| 526 | SRC2_ENABLE = 1 << ID_SRC2, |
| 527 | DST_ENABLE = 1 << ID_DST, |
| 528 | ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, |
| 529 | UNDEF = 0xFFFF |
| 530 | }; |
| 531 | |
| 532 | } // namespace VGPRIndexMode |
| 533 | } // namespace AMDGPU |
| 534 | |
| 535 | namespace AMDGPUAsmVariants { |
| 536 | enum : unsigned { |
| 537 | DEFAULT = 0, |
| 538 | VOP3 = 1, |
| 539 | SDWA = 2, |
| 540 | SDWA9 = 3, |
| 541 | DPP = 4, |
| 542 | VOP3_DPP = 5 |
| 543 | }; |
| 544 | } // namespace AMDGPUAsmVariants |
| 545 | |
| 546 | namespace AMDGPU { |
| 547 | namespace EncValues { // Encoding values of enum9/8/7 operands |
| 548 | |
| 549 | enum : unsigned { |
| 550 | SGPR_MIN = 0, |
| 551 | SGPR_MAX_SI = 101, |
| 552 | SGPR_MAX_GFX10 = 105, |
| 553 | TTMP_VI_MIN = 112, |
| 554 | TTMP_VI_MAX = 123, |
| 555 | TTMP_GFX9PLUS_MIN = 108, |
| 556 | TTMP_GFX9PLUS_MAX = 123, |
| 557 | INLINE_INTEGER_C_MIN = 128, |
| 558 | INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 |
| 559 | INLINE_INTEGER_C_MAX = 208, |
| 560 | INLINE_FLOATING_C_MIN = 240, |
| 561 | INLINE_FLOATING_C_MAX = 248, |
| 562 | LITERAL64_CONST = 254, |
| 563 | LITERAL_CONST = 255, |
| 564 | VGPR_MIN = 256, |
| 565 | VGPR_MAX = 511, |
| 566 | IS_VGPR = 256, // Indicates VGPR or AGPR |
| 567 | }; |
| 568 | |
| 569 | } // namespace EncValues |
| 570 | |
| 571 | // Register codes as defined in the TableGen's HWEncoding field. |
| 572 | namespace HWEncoding { |
| 573 | enum : unsigned { |
| 574 | REG_IDX_MASK = 0x3ff, |
| 575 | LO256_REG_IDX_MASK = 0xff, |
| 576 | IS_VGPR = 1 << 10, |
| 577 | IS_AGPR = 1 << 11, |
| 578 | IS_HI16 = 1 << 12, |
| 579 | }; |
| 580 | } // namespace HWEncoding |
| 581 | |
| 582 | namespace CPol { |
| 583 | |
| 584 | enum CPol { |
| 585 | GLC = 1, |
| 586 | SLC = 2, |
| 587 | DLC = 4, |
| 588 | SCC = 16, |
| 589 | SC0 = GLC, |
| 590 | SC1 = SCC, |
| 591 | NT = SLC, |
| 592 | ALL_pregfx12 = GLC | SLC | DLC | SCC, |
| 593 | SWZ_pregfx12 = 8, |
| 594 | |
| 595 | // Below are GFX12+ cache policy bits |
| 596 | |
| 597 | // Temporal hint |
| 598 | TH = 0x7, // All TH bits |
| 599 | TH_RT = 0, // regular |
| 600 | TH_NT = 1, // non-temporal |
| 601 | TH_HT = 2, // high-temporal |
| 602 | TH_LU = 3, // last use |
| 603 | TH_WB = 3, // regular (CU, SE), high-temporal with write-back (MALL) |
| 604 | TH_NT_RT = 4, // non-temporal (CU, SE), regular (MALL) |
| 605 | TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL) |
| 606 | TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL) |
| 607 | TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL) |
| 608 | TH_BYPASS = 3, // only to be used with scope = 3 |
| 609 | |
| 610 | TH_RESERVED = 7, // unused value for load insts |
| 611 | |
| 612 | // Bits of TH for atomics |
| 613 | TH_ATOMIC_RETURN = GLC, // Returning vs non-returning |
| 614 | TH_ATOMIC_NT = SLC, // Non-temporal vs regular |
| 615 | TH_ATOMIC_CASCADE = 4, // Cascading vs regular |
| 616 | |
| 617 | // Scope |
| 618 | SCOPE_SHIFT = 3, |
| 619 | SCOPE_MASK = 0x3, |
| 620 | SCOPE = SCOPE_MASK << SCOPE_SHIFT, // All Scope bits |
| 621 | SCOPE_CU = 0 << SCOPE_SHIFT, |
| 622 | SCOPE_SE = 1 << SCOPE_SHIFT, |
| 623 | SCOPE_DEV = 2 << SCOPE_SHIFT, |
| 624 | SCOPE_SYS = 3 << SCOPE_SHIFT, |
| 625 | |
| 626 | NV = 1 << 5, // Non-volatile bit |
| 627 | |
| 628 | SWZ = 1 << 6, // Swizzle bit |
| 629 | |
| 630 | SCAL = 1 << 11, // Scale offset bit |
| 631 | |
| 632 | ALL = TH | SCOPE | NV, |
| 633 | |
| 634 | // Helper bits |
| 635 | TH_TYPE_LOAD = 1 << 7, // TH_LOAD policy |
| 636 | TH_TYPE_STORE = 1 << 8, // TH_STORE policy |
| 637 | TH_TYPE_ATOMIC = 1 << 9, // TH_ATOMIC policy |
| 638 | TH_REAL_BYPASS = 1 << 10, // is TH=3 bypass policy or not |
| 639 | |
| 640 | // Volatile (used to preserve/signal operation volatility for buffer |
| 641 | // operations not a real instruction bit) |
| 642 | VOLATILE = 1 << 31, |
| 643 | // The set of "cache policy" bits used for compiler features that |
| 644 | // do not correspond to handware features. |
| 645 | VIRTUAL_BITS = VOLATILE, |
| 646 | }; |
| 647 | |
| 648 | } // namespace CPol |
| 649 | |
| 650 | namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. |
| 651 | |
| 652 | enum Id { // Message ID, width(4) [3:0]. |
| 653 | ID_INTERRUPT = 1, |
| 654 | |
| 655 | ID_GS_PreGFX11 = 2, // replaced in GFX11 |
| 656 | ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11 |
| 657 | |
| 658 | ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11 |
| 659 | ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11 |
| 660 | |
| 661 | ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11 |
| 662 | ID_STALL_WAVE_GEN = 5, // added in GFX9, removed in GFX12 |
| 663 | ID_HALT_WAVES = 6, // added in GFX9, removed in GFX12 |
| 664 | ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11 |
| 665 | ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10 |
| 666 | ID_GS_ALLOC_REQ = 9, // added in GFX9 |
| 667 | ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11 |
| 668 | ID_GET_DDID = 11, // added in GFX10, removed in GFX11 |
| 669 | ID_SYSMSG = 15, |
| 670 | |
| 671 | ID_RTN_GET_DOORBELL = 128, |
| 672 | ID_RTN_GET_DDID = 129, |
| 673 | ID_RTN_GET_TMA = 130, |
| 674 | ID_RTN_GET_REALTIME = 131, |
| 675 | ID_RTN_SAVE_WAVE = 132, |
| 676 | ID_RTN_GET_TBA = 133, |
| 677 | ID_RTN_GET_TBA_TO_PC = 134, |
| 678 | ID_RTN_GET_SE_AID_ID = 135, |
| 679 | |
| 680 | ID_RTN_GET_CLUSTER_BARRIER_STATE = 136, // added in GFX1250 |
| 681 | ID_RTN_SAVE_WAVE_HAS_TDM = 152, // added in GFX1250 |
| 682 | |
| 683 | ID_MASK_PreGFX11_ = 0xF, |
| 684 | ID_MASK_GFX11Plus_ = 0xFF |
| 685 | }; |
| 686 | |
| 687 | enum Op { // Both GS and SYS operation IDs. |
| 688 | OP_SHIFT_ = 4, |
| 689 | OP_NONE_ = 0, |
| 690 | // Bits used for operation encoding |
| 691 | OP_WIDTH_ = 3, |
| 692 | OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_), |
| 693 | // GS operations are encoded in bits 5:4 |
| 694 | OP_GS_NOP = 0, |
| 695 | OP_GS_CUT = 1, |
| 696 | OP_GS_EMIT = 2, |
| 697 | OP_GS_EMIT_CUT = 3, |
| 698 | OP_GS_FIRST_ = OP_GS_NOP, |
| 699 | // SYS operations are encoded in bits 6:4 |
| 700 | OP_SYS_ECC_ERR_INTERRUPT = 1, |
| 701 | OP_SYS_REG_RD = 2, |
| 702 | OP_SYS_HOST_TRAP_ACK = 3, |
| 703 | OP_SYS_TTRACE_PC = 4, |
| 704 | OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT, |
| 705 | }; |
| 706 | |
| 707 | enum StreamId : unsigned { // Stream ID, (2) [9:8]. |
| 708 | STREAM_ID_NONE_ = 0, |
| 709 | STREAM_ID_DEFAULT_ = 0, |
| 710 | STREAM_ID_LAST_ = 4, |
| 711 | STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_, |
| 712 | STREAM_ID_SHIFT_ = 8, |
| 713 | STREAM_ID_WIDTH_= 2, |
| 714 | STREAM_ID_MASK_ = (((1 << STREAM_ID_WIDTH_) - 1) << STREAM_ID_SHIFT_) |
| 715 | }; |
| 716 | |
| 717 | } // namespace SendMsg |
| 718 | |
| 719 | namespace WaitEvent { // Encoding of SIMM16 used in s_wait_event |
| 720 | enum Id { |
| 721 | DONT_WAIT_EXPORT_READY = 1 << 0, // Only used in gfx11 |
| 722 | EXPORT_READY = 1 << 1, // gfx12+ |
| 723 | }; |
| 724 | |
| 725 | } // namespace WaitEvent |
| 726 | |
| 727 | namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns. |
| 728 | |
| 729 | enum Id { // HwRegCode, (6) [5:0] |
| 730 | ID_MODE = 1, |
| 731 | ID_STATUS = 2, |
| 732 | ID_TRAPSTS = 3, |
| 733 | ID_HW_ID = 4, |
| 734 | ID_GPR_ALLOC = 5, |
| 735 | ID_LDS_ALLOC = 6, |
| 736 | ID_IB_STS = 7, |
| 737 | ID_PERF_SNAPSHOT_DATA_gfx12 = 10, |
| 738 | ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11, |
| 739 | ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12, |
| 740 | ID_MEM_BASES = 15, |
| 741 | ID_TBA_LO = 16, |
| 742 | ID_TBA_HI = 17, |
| 743 | ID_TMA_LO = 18, |
| 744 | ID_TMA_HI = 19, |
| 745 | ID_FLAT_SCR_LO = 20, |
| 746 | ID_FLAT_SCR_HI = 21, |
| 747 | ID_XNACK_MASK = 22, |
| 748 | ID_HW_ID1 = 23, |
| 749 | ID_HW_ID2 = 24, |
| 750 | ID_POPS_PACKER = 25, |
| 751 | ID_SCHED_MODE = 26, |
| 752 | ID_PERF_SNAPSHOT_DATA_gfx11 = 27, |
| 753 | ID_IB_STS2 = 28, |
| 754 | ID_SHADER_CYCLES = 29, |
| 755 | ID_SHADER_CYCLES_HI = 30, |
| 756 | ID_DVGPR_ALLOC_LO = 31, |
| 757 | ID_DVGPR_ALLOC_HI = 32, |
| 758 | |
| 759 | // Register numbers reused in GFX11 |
| 760 | ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18, |
| 761 | ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19, |
| 762 | |
| 763 | // Register numbers reused in GFX12+ |
| 764 | ID_STATE_PRIV = 4, |
| 765 | ID_PERF_SNAPSHOT_DATA1 = 15, |
| 766 | ID_PERF_SNAPSHOT_DATA2 = 16, |
| 767 | ID_EXCP_FLAG_PRIV = 17, |
| 768 | ID_EXCP_FLAG_USER = 18, |
| 769 | ID_TRAP_CTRL = 19, |
| 770 | |
| 771 | // GFX94* specific registers |
| 772 | ID_XCC_ID = 20, |
| 773 | ID_SQ_PERF_SNAPSHOT_DATA = 21, |
| 774 | ID_SQ_PERF_SNAPSHOT_DATA1 = 22, |
| 775 | ID_SQ_PERF_SNAPSHOT_PC_LO = 23, |
| 776 | ID_SQ_PERF_SNAPSHOT_PC_HI = 24, |
| 777 | |
| 778 | // GFX1250 |
| 779 | ID_XNACK_STATE_PRIV = 33, |
| 780 | ID_XNACK_MASK_gfx1250 = 34, |
| 781 | }; |
| 782 | |
| 783 | enum Offset : unsigned { // Offset, (5) [10:6] |
| 784 | OFFSET_MEM_VIOL = 8, |
| 785 | OFFSET_ME_ID = 8, // in HW_ID2 |
| 786 | }; |
| 787 | |
| 788 | enum ModeRegisterMasks : uint32_t { |
| 789 | FP_ROUND_MASK = 0xf << 0, // Bits 0..3 |
| 790 | FP_DENORM_MASK = 0xf << 4, // Bits 4..7 |
| 791 | DX10_CLAMP_MASK = 1 << 8, |
| 792 | IEEE_MODE_MASK = 1 << 9, |
| 793 | LOD_CLAMP_MASK = 1 << 10, |
| 794 | DEBUG_MASK = 1 << 11, |
| 795 | |
| 796 | // EXCP_EN fields. |
| 797 | EXCP_EN_INVALID_MASK = 1 << 12, |
| 798 | EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13, |
| 799 | EXCP_EN_FLOAT_DIV0_MASK = 1 << 14, |
| 800 | EXCP_EN_OVERFLOW_MASK = 1 << 15, |
| 801 | EXCP_EN_UNDERFLOW_MASK = 1 << 16, |
| 802 | EXCP_EN_INEXACT_MASK = 1 << 17, |
| 803 | EXCP_EN_INT_DIV0_MASK = 1 << 18, |
| 804 | |
| 805 | GPR_IDX_EN_MASK = 1 << 27, |
| 806 | VSKIP_MASK = 1 << 28, |
| 807 | CSP_MASK = 0x7u << 29, // Bits 29..31 |
| 808 | |
| 809 | // GFX1250 |
| 810 | DST_VGPR_MSB = 0x3 << 12, |
| 811 | SRC0_VGPR_MSB = 0x3 << 14, |
| 812 | SRC1_VGPR_MSB = 0x3 << 16, |
| 813 | SRC2_VGPR_MSB = 0x3 << 18, |
| 814 | VGPR_MSB_MASK = 0xff << 12, // Bits 12..19 |
| 815 | |
| 816 | REPLAY_MODE = 1 << 25, |
| 817 | FLAT_SCRATCH_IS_NV = 1 << 26, |
| 818 | }; |
| 819 | |
| 820 | } // namespace Hwreg |
| 821 | |
| 822 | namespace MTBUFFormat { |
| 823 | |
| 824 | enum DataFormat : int64_t { |
| 825 | DFMT_INVALID = 0, |
| 826 | DFMT_8, |
| 827 | DFMT_16, |
| 828 | DFMT_8_8, |
| 829 | DFMT_32, |
| 830 | DFMT_16_16, |
| 831 | DFMT_10_11_11, |
| 832 | DFMT_11_11_10, |
| 833 | DFMT_10_10_10_2, |
| 834 | DFMT_2_10_10_10, |
| 835 | DFMT_8_8_8_8, |
| 836 | DFMT_32_32, |
| 837 | DFMT_16_16_16_16, |
| 838 | DFMT_32_32_32, |
| 839 | DFMT_32_32_32_32, |
| 840 | DFMT_RESERVED_15, |
| 841 | |
| 842 | DFMT_MIN = DFMT_INVALID, |
| 843 | DFMT_MAX = DFMT_RESERVED_15, |
| 844 | |
| 845 | DFMT_UNDEF = -1, |
| 846 | DFMT_DEFAULT = DFMT_8, |
| 847 | |
| 848 | DFMT_SHIFT = 0, |
| 849 | DFMT_MASK = 0xF |
| 850 | }; |
| 851 | |
| 852 | enum NumFormat : int64_t { |
| 853 | NFMT_UNORM = 0, |
| 854 | NFMT_SNORM, |
| 855 | NFMT_USCALED, |
| 856 | NFMT_SSCALED, |
| 857 | NFMT_UINT, |
| 858 | NFMT_SINT, |
| 859 | NFMT_RESERVED_6, // VI and GFX9 |
| 860 | NFMT_SNORM_OGL = NFMT_RESERVED_6, // SI and CI only |
| 861 | NFMT_FLOAT, |
| 862 | |
| 863 | NFMT_MIN = NFMT_UNORM, |
| 864 | NFMT_MAX = NFMT_FLOAT, |
| 865 | |
| 866 | NFMT_UNDEF = -1, |
| 867 | NFMT_DEFAULT = NFMT_UNORM, |
| 868 | |
| 869 | NFMT_SHIFT = 4, |
| 870 | NFMT_MASK = 7 |
| 871 | }; |
| 872 | |
| 873 | enum MergedFormat : int64_t { |
| 874 | DFMT_NFMT_UNDEF = -1, |
| 875 | DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) | |
| 876 | ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT), |
| 877 | |
| 878 | |
| 879 | DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT), |
| 880 | |
| 881 | DFMT_NFMT_MAX = DFMT_NFMT_MASK |
| 882 | }; |
| 883 | |
| 884 | enum UnifiedFormatCommon : int64_t { |
| 885 | UFMT_MAX = 127, |
| 886 | UFMT_UNDEF = -1, |
| 887 | UFMT_DEFAULT = 1 |
| 888 | }; |
| 889 | |
| 890 | } // namespace MTBUFFormat |
| 891 | |
| 892 | namespace UfmtGFX10 { |
| 893 | enum UnifiedFormat : int64_t { |
| 894 | UFMT_INVALID = 0, |
| 895 | |
| 896 | UFMT_8_UNORM, |
| 897 | UFMT_8_SNORM, |
| 898 | UFMT_8_USCALED, |
| 899 | UFMT_8_SSCALED, |
| 900 | UFMT_8_UINT, |
| 901 | UFMT_8_SINT, |
| 902 | |
| 903 | UFMT_16_UNORM, |
| 904 | UFMT_16_SNORM, |
| 905 | UFMT_16_USCALED, |
| 906 | UFMT_16_SSCALED, |
| 907 | UFMT_16_UINT, |
| 908 | UFMT_16_SINT, |
| 909 | UFMT_16_FLOAT, |
| 910 | |
| 911 | UFMT_8_8_UNORM, |
| 912 | UFMT_8_8_SNORM, |
| 913 | UFMT_8_8_USCALED, |
| 914 | UFMT_8_8_SSCALED, |
| 915 | UFMT_8_8_UINT, |
| 916 | UFMT_8_8_SINT, |
| 917 | |
| 918 | UFMT_32_UINT, |
| 919 | UFMT_32_SINT, |
| 920 | UFMT_32_FLOAT, |
| 921 | |
| 922 | UFMT_16_16_UNORM, |
| 923 | UFMT_16_16_SNORM, |
| 924 | UFMT_16_16_USCALED, |
| 925 | UFMT_16_16_SSCALED, |
| 926 | UFMT_16_16_UINT, |
| 927 | UFMT_16_16_SINT, |
| 928 | UFMT_16_16_FLOAT, |
| 929 | |
| 930 | UFMT_10_11_11_UNORM, |
| 931 | UFMT_10_11_11_SNORM, |
| 932 | UFMT_10_11_11_USCALED, |
| 933 | UFMT_10_11_11_SSCALED, |
| 934 | UFMT_10_11_11_UINT, |
| 935 | UFMT_10_11_11_SINT, |
| 936 | UFMT_10_11_11_FLOAT, |
| 937 | |
| 938 | UFMT_11_11_10_UNORM, |
| 939 | UFMT_11_11_10_SNORM, |
| 940 | UFMT_11_11_10_USCALED, |
| 941 | UFMT_11_11_10_SSCALED, |
| 942 | UFMT_11_11_10_UINT, |
| 943 | UFMT_11_11_10_SINT, |
| 944 | UFMT_11_11_10_FLOAT, |
| 945 | |
| 946 | UFMT_10_10_10_2_UNORM, |
| 947 | UFMT_10_10_10_2_SNORM, |
| 948 | UFMT_10_10_10_2_USCALED, |
| 949 | UFMT_10_10_10_2_SSCALED, |
| 950 | UFMT_10_10_10_2_UINT, |
| 951 | UFMT_10_10_10_2_SINT, |
| 952 | |
| 953 | UFMT_2_10_10_10_UNORM, |
| 954 | UFMT_2_10_10_10_SNORM, |
| 955 | UFMT_2_10_10_10_USCALED, |
| 956 | UFMT_2_10_10_10_SSCALED, |
| 957 | UFMT_2_10_10_10_UINT, |
| 958 | UFMT_2_10_10_10_SINT, |
| 959 | |
| 960 | UFMT_8_8_8_8_UNORM, |
| 961 | UFMT_8_8_8_8_SNORM, |
| 962 | UFMT_8_8_8_8_USCALED, |
| 963 | UFMT_8_8_8_8_SSCALED, |
| 964 | UFMT_8_8_8_8_UINT, |
| 965 | UFMT_8_8_8_8_SINT, |
| 966 | |
| 967 | UFMT_32_32_UINT, |
| 968 | UFMT_32_32_SINT, |
| 969 | UFMT_32_32_FLOAT, |
| 970 | |
| 971 | UFMT_16_16_16_16_UNORM, |
| 972 | UFMT_16_16_16_16_SNORM, |
| 973 | UFMT_16_16_16_16_USCALED, |
| 974 | UFMT_16_16_16_16_SSCALED, |
| 975 | UFMT_16_16_16_16_UINT, |
| 976 | UFMT_16_16_16_16_SINT, |
| 977 | UFMT_16_16_16_16_FLOAT, |
| 978 | |
| 979 | UFMT_32_32_32_UINT, |
| 980 | UFMT_32_32_32_SINT, |
| 981 | UFMT_32_32_32_FLOAT, |
| 982 | UFMT_32_32_32_32_UINT, |
| 983 | UFMT_32_32_32_32_SINT, |
| 984 | UFMT_32_32_32_32_FLOAT, |
| 985 | |
| 986 | UFMT_FIRST = UFMT_INVALID, |
| 987 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
| 988 | }; |
| 989 | |
| 990 | } // namespace UfmtGFX10 |
| 991 | |
| 992 | namespace UfmtGFX11 { |
| 993 | enum UnifiedFormat : int64_t { |
| 994 | UFMT_INVALID = 0, |
| 995 | |
| 996 | UFMT_8_UNORM, |
| 997 | UFMT_8_SNORM, |
| 998 | UFMT_8_USCALED, |
| 999 | UFMT_8_SSCALED, |
| 1000 | UFMT_8_UINT, |
| 1001 | UFMT_8_SINT, |
| 1002 | |
| 1003 | UFMT_16_UNORM, |
| 1004 | UFMT_16_SNORM, |
| 1005 | UFMT_16_USCALED, |
| 1006 | UFMT_16_SSCALED, |
| 1007 | UFMT_16_UINT, |
| 1008 | UFMT_16_SINT, |
| 1009 | UFMT_16_FLOAT, |
| 1010 | |
| 1011 | UFMT_8_8_UNORM, |
| 1012 | UFMT_8_8_SNORM, |
| 1013 | UFMT_8_8_USCALED, |
| 1014 | UFMT_8_8_SSCALED, |
| 1015 | UFMT_8_8_UINT, |
| 1016 | UFMT_8_8_SINT, |
| 1017 | |
| 1018 | UFMT_32_UINT, |
| 1019 | UFMT_32_SINT, |
| 1020 | UFMT_32_FLOAT, |
| 1021 | |
| 1022 | UFMT_16_16_UNORM, |
| 1023 | UFMT_16_16_SNORM, |
| 1024 | UFMT_16_16_USCALED, |
| 1025 | UFMT_16_16_SSCALED, |
| 1026 | UFMT_16_16_UINT, |
| 1027 | UFMT_16_16_SINT, |
| 1028 | UFMT_16_16_FLOAT, |
| 1029 | |
| 1030 | UFMT_10_11_11_FLOAT, |
| 1031 | |
| 1032 | UFMT_11_11_10_FLOAT, |
| 1033 | |
| 1034 | UFMT_10_10_10_2_UNORM, |
| 1035 | UFMT_10_10_10_2_SNORM, |
| 1036 | UFMT_10_10_10_2_UINT, |
| 1037 | UFMT_10_10_10_2_SINT, |
| 1038 | |
| 1039 | UFMT_2_10_10_10_UNORM, |
| 1040 | UFMT_2_10_10_10_SNORM, |
| 1041 | UFMT_2_10_10_10_USCALED, |
| 1042 | UFMT_2_10_10_10_SSCALED, |
| 1043 | UFMT_2_10_10_10_UINT, |
| 1044 | UFMT_2_10_10_10_SINT, |
| 1045 | |
| 1046 | UFMT_8_8_8_8_UNORM, |
| 1047 | UFMT_8_8_8_8_SNORM, |
| 1048 | UFMT_8_8_8_8_USCALED, |
| 1049 | UFMT_8_8_8_8_SSCALED, |
| 1050 | UFMT_8_8_8_8_UINT, |
| 1051 | UFMT_8_8_8_8_SINT, |
| 1052 | |
| 1053 | UFMT_32_32_UINT, |
| 1054 | UFMT_32_32_SINT, |
| 1055 | UFMT_32_32_FLOAT, |
| 1056 | |
| 1057 | UFMT_16_16_16_16_UNORM, |
| 1058 | UFMT_16_16_16_16_SNORM, |
| 1059 | UFMT_16_16_16_16_USCALED, |
| 1060 | UFMT_16_16_16_16_SSCALED, |
| 1061 | UFMT_16_16_16_16_UINT, |
| 1062 | UFMT_16_16_16_16_SINT, |
| 1063 | UFMT_16_16_16_16_FLOAT, |
| 1064 | |
| 1065 | UFMT_32_32_32_UINT, |
| 1066 | UFMT_32_32_32_SINT, |
| 1067 | UFMT_32_32_32_FLOAT, |
| 1068 | UFMT_32_32_32_32_UINT, |
| 1069 | UFMT_32_32_32_32_SINT, |
| 1070 | UFMT_32_32_32_32_FLOAT, |
| 1071 | |
| 1072 | UFMT_FIRST = UFMT_INVALID, |
| 1073 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
| 1074 | }; |
| 1075 | |
| 1076 | } // namespace UfmtGFX11 |
| 1077 | |
| 1078 | namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. |
| 1079 | |
| 1080 | enum Id : unsigned { // id of symbolic names |
| 1081 | ID_QUAD_PERM = 0, |
| 1082 | ID_BITMASK_PERM, |
| 1083 | ID_SWAP, |
| 1084 | ID_REVERSE, |
| 1085 | ID_BROADCAST, |
| 1086 | ID_FFT, |
| 1087 | ID_ROTATE |
| 1088 | }; |
| 1089 | |
| 1090 | // clang-format off |
| 1091 | enum EncBits : unsigned { |
| 1092 | |
| 1093 | // swizzle mode encodings |
| 1094 | |
| 1095 | QUAD_PERM_ENC = 0x8000, |
| 1096 | QUAD_PERM_ENC_MASK = 0xFF00, |
| 1097 | |
| 1098 | BITMASK_PERM_ENC = 0x0000, |
| 1099 | BITMASK_PERM_ENC_MASK = 0x8000, |
| 1100 | |
| 1101 | FFT_MODE_ENC = 0xE000, |
| 1102 | |
| 1103 | ROTATE_MODE_ENC = 0xC000, |
| 1104 | FFT_ROTATE_MODE_MASK = 0xF000, |
| 1105 | |
| 1106 | ROTATE_MODE_LO = 0xC000, |
| 1107 | FFT_MODE_LO = 0xE000, |
| 1108 | |
| 1109 | // QUAD_PERM encodings |
| 1110 | |
| 1111 | LANE_MASK = 0x3, |
| 1112 | LANE_MAX = LANE_MASK, |
| 1113 | LANE_SHIFT = 2, |
| 1114 | LANE_NUM = 4, |
| 1115 | |
| 1116 | // BITMASK_PERM encodings |
| 1117 | |
| 1118 | BITMASK_MASK = 0x1F, |
| 1119 | BITMASK_MAX = BITMASK_MASK, |
| 1120 | BITMASK_WIDTH = 5, |
| 1121 | |
| 1122 | BITMASK_AND_SHIFT = 0, |
| 1123 | BITMASK_OR_SHIFT = 5, |
| 1124 | BITMASK_XOR_SHIFT = 10, |
| 1125 | |
| 1126 | // FFT encodings |
| 1127 | |
| 1128 | FFT_SWIZZLE_MASK = 0x1F, |
| 1129 | FFT_SWIZZLE_MAX = 0x1F, |
| 1130 | |
| 1131 | // ROTATE encodings |
| 1132 | ROTATE_MAX_SIZE = 0x1F, |
| 1133 | ROTATE_DIR_SHIFT = 10, // bit position of rotate direction |
| 1134 | ROTATE_DIR_MASK = 0x1, |
| 1135 | ROTATE_SIZE_SHIFT = 5, // bit position of rotate size |
| 1136 | ROTATE_SIZE_MASK = ROTATE_MAX_SIZE, |
| 1137 | }; |
| 1138 | // clang-format on |
| 1139 | |
| 1140 | } // namespace Swizzle |
| 1141 | |
| 1142 | namespace SDWA { |
| 1143 | |
| 1144 | enum SdwaSel : unsigned { |
| 1145 | BYTE_0 = 0, |
| 1146 | BYTE_1 = 1, |
| 1147 | BYTE_2 = 2, |
| 1148 | BYTE_3 = 3, |
| 1149 | WORD_0 = 4, |
| 1150 | WORD_1 = 5, |
| 1151 | DWORD = 6, |
| 1152 | }; |
| 1153 | |
| 1154 | enum DstUnused : unsigned { |
| 1155 | UNUSED_PAD = 0, |
| 1156 | UNUSED_SEXT = 1, |
| 1157 | UNUSED_PRESERVE = 2, |
| 1158 | }; |
| 1159 | |
| 1160 | enum SDWA9EncValues : unsigned { |
| 1161 | SRC_SGPR_MASK = 0x100, |
| 1162 | SRC_VGPR_MASK = 0xFF, |
| 1163 | VOPC_DST_VCC_MASK = 0x80, |
| 1164 | VOPC_DST_SGPR_MASK = 0x7F, |
| 1165 | |
| 1166 | SRC_VGPR_MIN = 0, |
| 1167 | SRC_VGPR_MAX = 255, |
| 1168 | SRC_SGPR_MIN = 256, |
| 1169 | SRC_SGPR_MAX_SI = 357, |
| 1170 | SRC_SGPR_MAX_GFX10 = 361, |
| 1171 | SRC_TTMP_MIN = 364, |
| 1172 | SRC_TTMP_MAX = 379, |
| 1173 | }; |
| 1174 | |
| 1175 | } // namespace SDWA |
| 1176 | |
| 1177 | namespace DPP { |
| 1178 | |
| 1179 | // clang-format off |
| 1180 | enum DppCtrl : unsigned { |
| 1181 | QUAD_PERM_FIRST = 0, |
| 1182 | QUAD_PERM_ID = 0xE4, // identity permutation |
| 1183 | QUAD_PERM_LAST = 0xFF, |
| 1184 | DPP_UNUSED1 = 0x100, |
| 1185 | ROW_SHL0 = 0x100, |
| 1186 | ROW_SHL_FIRST = 0x101, |
| 1187 | ROW_SHL_LAST = 0x10F, |
| 1188 | DPP_UNUSED2 = 0x110, |
| 1189 | ROW_SHR0 = 0x110, |
| 1190 | ROW_SHR_FIRST = 0x111, |
| 1191 | ROW_SHR_LAST = 0x11F, |
| 1192 | DPP_UNUSED3 = 0x120, |
| 1193 | ROW_ROR0 = 0x120, |
| 1194 | ROW_ROR_FIRST = 0x121, |
| 1195 | ROW_ROR_LAST = 0x12F, |
| 1196 | WAVE_SHL1 = 0x130, |
| 1197 | DPP_UNUSED4_FIRST = 0x131, |
| 1198 | DPP_UNUSED4_LAST = 0x133, |
| 1199 | WAVE_ROL1 = 0x134, |
| 1200 | DPP_UNUSED5_FIRST = 0x135, |
| 1201 | DPP_UNUSED5_LAST = 0x137, |
| 1202 | WAVE_SHR1 = 0x138, |
| 1203 | DPP_UNUSED6_FIRST = 0x139, |
| 1204 | DPP_UNUSED6_LAST = 0x13B, |
| 1205 | WAVE_ROR1 = 0x13C, |
| 1206 | DPP_UNUSED7_FIRST = 0x13D, |
| 1207 | DPP_UNUSED7_LAST = 0x13F, |
| 1208 | ROW_MIRROR = 0x140, |
| 1209 | ROW_HALF_MIRROR = 0x141, |
| 1210 | BCAST15 = 0x142, |
| 1211 | BCAST31 = 0x143, |
| 1212 | DPP_UNUSED8_FIRST = 0x144, |
| 1213 | DPP_UNUSED8_LAST = 0x14F, |
| 1214 | ROW_NEWBCAST_FIRST= 0x150, |
| 1215 | ROW_NEWBCAST_LAST = 0x15F, |
| 1216 | ROW_SHARE0 = 0x150, |
| 1217 | ROW_SHARE_FIRST = 0x150, |
| 1218 | ROW_SHARE_LAST = 0x15F, |
| 1219 | ROW_XMASK0 = 0x160, |
| 1220 | ROW_XMASK_FIRST = 0x160, |
| 1221 | ROW_XMASK_LAST = 0x16F, |
| 1222 | DPP_LAST = ROW_XMASK_LAST |
| 1223 | }; |
| 1224 | // clang-format on |
| 1225 | |
| 1226 | enum DppFiMode { |
| 1227 | DPP_FI_0 = 0, |
| 1228 | DPP_FI_1 = 1, |
| 1229 | DPP8_FI_0 = 0xE9, |
| 1230 | DPP8_FI_1 = 0xEA, |
| 1231 | }; |
| 1232 | |
| 1233 | } // namespace DPP |
| 1234 | |
| 1235 | namespace Exp { |
| 1236 | |
| 1237 | enum Target : unsigned { |
| 1238 | ET_MRT0 = 0, |
| 1239 | ET_MRT7 = 7, |
| 1240 | ET_MRTZ = 8, |
| 1241 | ET_NULL = 9, // Pre-GFX11 |
| 1242 | ET_POS0 = 12, |
| 1243 | ET_POS3 = 15, |
| 1244 | ET_POS4 = 16, // GFX10+ |
| 1245 | ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget |
| 1246 | ET_PRIM = 20, // GFX10+ |
| 1247 | ET_DUAL_SRC_BLEND0 = 21, // GFX11+ |
| 1248 | ET_DUAL_SRC_BLEND1 = 22, // GFX11+ |
| 1249 | ET_PARAM0 = 32, // Pre-GFX11 |
| 1250 | ET_PARAM31 = 63, // Pre-GFX11 |
| 1251 | |
| 1252 | ET_NULL_MAX_IDX = 0, |
| 1253 | ET_MRTZ_MAX_IDX = 0, |
| 1254 | ET_PRIM_MAX_IDX = 0, |
| 1255 | ET_MRT_MAX_IDX = 7, |
| 1256 | ET_POS_MAX_IDX = 4, |
| 1257 | ET_DUAL_SRC_BLEND_MAX_IDX = 1, |
| 1258 | ET_PARAM_MAX_IDX = 31, |
| 1259 | |
| 1260 | ET_INVALID = 255, |
| 1261 | }; |
| 1262 | |
| 1263 | } // namespace Exp |
| 1264 | |
| 1265 | namespace WMMA { |
| 1266 | enum MatrixFMT : unsigned { |
| 1267 | MATRIX_FMT_FP8 = 0, |
| 1268 | MATRIX_FMT_BF8 = 1, |
| 1269 | MATRIX_FMT_FP6 = 2, |
| 1270 | MATRIX_FMT_BF6 = 3, |
| 1271 | MATRIX_FMT_FP4 = 4 |
| 1272 | }; |
| 1273 | |
| 1274 | enum MatrixScale : unsigned { |
| 1275 | MATRIX_SCALE_ROW0 = 0, |
| 1276 | MATRIX_SCALE_ROW1 = 1, |
| 1277 | }; |
| 1278 | |
| 1279 | enum MatrixScaleFmt : unsigned { |
| 1280 | MATRIX_SCALE_FMT_E8 = 0, |
| 1281 | MATRIX_SCALE_FMT_E5M3 = 1, |
| 1282 | MATRIX_SCALE_FMT_E4M3 = 2 |
| 1283 | }; |
| 1284 | } // namespace WMMA |
| 1285 | |
| 1286 | namespace VOP3PEncoding { |
| 1287 | |
| 1288 | enum OpSel : uint64_t { |
| 1289 | OP_SEL_HI_0 = UINT64_C(1) << 59, |
| 1290 | OP_SEL_HI_1 = UINT64_C(1) << 60, |
| 1291 | OP_SEL_HI_2 = UINT64_C(1) << 14, |
| 1292 | }; |
| 1293 | |
| 1294 | } // namespace VOP3PEncoding |
| 1295 | |
| 1296 | namespace ImplicitArg { |
| 1297 | // Implicit kernel argument offset for code object version 5. |
| 1298 | enum Offset_COV5 : unsigned { |
| 1299 | HOSTCALL_PTR_OFFSET = 80, |
| 1300 | MULTIGRID_SYNC_ARG_OFFSET = 88, |
| 1301 | HEAP_PTR_OFFSET = 96, |
| 1302 | |
| 1303 | DEFAULT_QUEUE_OFFSET = 104, |
| 1304 | COMPLETION_ACTION_OFFSET = 112, |
| 1305 | |
| 1306 | PRIVATE_BASE_OFFSET = 192, |
| 1307 | SHARED_BASE_OFFSET = 196, |
| 1308 | QUEUE_PTR_OFFSET = 200, |
| 1309 | }; |
| 1310 | |
| 1311 | } // namespace ImplicitArg |
| 1312 | |
| 1313 | namespace MFMAScaleFormats { |
| 1314 | // Enum value used in cbsz/blgp for F8F6F4 MFMA operations to select the matrix |
| 1315 | // format. |
| 1316 | enum MFMAScaleFormats { |
| 1317 | FP8_E4M3 = 0, |
| 1318 | FP8_E5M2 = 1, |
| 1319 | FP6_E2M3 = 2, |
| 1320 | FP6_E3M2 = 3, |
| 1321 | FP4_E2M1 = 4 |
| 1322 | }; |
| 1323 | } // namespace MFMAScaleFormats |
| 1324 | |
| 1325 | namespace VirtRegFlag { |
| 1326 | // Virtual register flags used for various target specific handlings during |
| 1327 | // codegen. |
| 1328 | enum Register_Flag : uint8_t { |
| 1329 | // Register operand in a whole-wave mode operation. |
| 1330 | WWM_REG = 1 << 0, |
| 1331 | }; |
| 1332 | |
| 1333 | } // namespace VirtRegFlag |
| 1334 | |
| 1335 | } // namespace AMDGPU |
| 1336 | |
| 1337 | namespace AMDGPU { |
| 1338 | namespace Barrier { |
| 1339 | |
| 1340 | enum Type { |
| 1341 | CLUSTER_TRAP = -4, |
| 1342 | CLUSTER = -3, |
| 1343 | TRAP = -2, |
| 1344 | WORKGROUP = -1, |
| 1345 | NAMED_BARRIER_FIRST = 1, |
| 1346 | NAMED_BARRIER_LAST = 16, |
| 1347 | }; |
| 1348 | |
| 1349 | enum { |
| 1350 | BARRIER_SCOPE_WORKGROUP = 0, |
| 1351 | }; |
| 1352 | |
| 1353 | } // namespace Barrier |
| 1354 | } // namespace AMDGPU |
| 1355 | |
| 1356 | // clang-format off |
| 1357 | |
| 1358 | #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 |
| 1359 | #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) |
| 1360 | #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) |
| 1361 | #define S_00B028_MEM_ORDERED(x) (((x) & 0x1) << 25) |
| 1362 | #define G_00B028_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
| 1363 | #define C_00B028_MEM_ORDERED 0xFDFFFFFF |
| 1364 | |
| 1365 | #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C |
| 1366 | #define (x) (((x) & 0xFF) << 8) |
| 1367 | #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 |
| 1368 | #define S_00B128_MEM_ORDERED(x) (((x) & 0x1) << 27) |
| 1369 | #define G_00B128_MEM_ORDERED(x) (((x) >> 27) & 0x1) |
| 1370 | #define C_00B128_MEM_ORDERED 0xF7FFFFFF |
| 1371 | |
| 1372 | #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 |
| 1373 | #define S_00B228_WGP_MODE(x) (((x) & 0x1) << 27) |
| 1374 | #define G_00B228_WGP_MODE(x) (((x) >> 27) & 0x1) |
| 1375 | #define C_00B228_WGP_MODE 0xF7FFFFFF |
| 1376 | #define S_00B228_MEM_ORDERED(x) (((x) & 0x1) << 25) |
| 1377 | #define G_00B228_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
| 1378 | #define C_00B228_MEM_ORDERED 0xFDFFFFFF |
| 1379 | |
| 1380 | #define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328 |
| 1381 | #define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 |
| 1382 | #define S_00B428_WGP_MODE(x) (((x) & 0x1) << 26) |
| 1383 | #define G_00B428_WGP_MODE(x) (((x) >> 26) & 0x1) |
| 1384 | #define C_00B428_WGP_MODE 0xFBFFFFFF |
| 1385 | #define S_00B428_MEM_ORDERED(x) (((x) & 0x1) << 24) |
| 1386 | #define G_00B428_MEM_ORDERED(x) (((x) >> 24) & 0x1) |
| 1387 | #define C_00B428_MEM_ORDERED 0xFEFFFFFF |
| 1388 | |
| 1389 | #define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528 |
| 1390 | |
| 1391 | #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C |
| 1392 | #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0) |
| 1393 | #define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1) |
| 1394 | #define C_00B84C_SCRATCH_EN 0xFFFFFFFE |
| 1395 | #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) |
| 1396 | #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) |
| 1397 | #define C_00B84C_USER_SGPR 0xFFFFFFC1 |
| 1398 | #define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) |
| 1399 | #define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) |
| 1400 | #define C_00B84C_TRAP_HANDLER 0xFFFFFFBF |
| 1401 | #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) |
| 1402 | #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) |
| 1403 | #define C_00B84C_TGID_X_EN 0xFFFFFF7F |
| 1404 | #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8) |
| 1405 | #define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1) |
| 1406 | #define C_00B84C_TGID_Y_EN 0xFFFFFEFF |
| 1407 | #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9) |
| 1408 | #define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1) |
| 1409 | #define C_00B84C_TGID_Z_EN 0xFFFFFDFF |
| 1410 | #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10) |
| 1411 | #define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1) |
| 1412 | #define C_00B84C_TG_SIZE_EN 0xFFFFFBFF |
| 1413 | #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11) |
| 1414 | #define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03) |
| 1415 | #define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF |
| 1416 | /* CIK */ |
| 1417 | #define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13) |
| 1418 | #define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03) |
| 1419 | #define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF |
| 1420 | /* */ |
| 1421 | #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) |
| 1422 | #define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF) |
| 1423 | #define C_00B84C_LDS_SIZE 0xFF007FFF |
| 1424 | #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) |
| 1425 | #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) |
| 1426 | #define C_00B84C_EXCP_EN 0x80FFFFFF |
| 1427 | |
| 1428 | #define S_00B84C_USER_SGPR_GFX1250(x) (((x) & 0x3F) << 1) |
| 1429 | #define G_00B84C_USER_SGPR_GFX1250(x) (((x) >> 1) & 0x3F) |
| 1430 | #define C_00B84C_USER_SGPR_GFX1250 0xFFFFFF81 |
| 1431 | |
| 1432 | #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC |
| 1433 | #define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 |
| 1434 | |
| 1435 | #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 |
| 1436 | #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) |
| 1437 | #define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F) |
| 1438 | #define C_00B848_VGPRS 0xFFFFFFC0 |
| 1439 | #define S_00B848_SGPRS(x) (((x) & 0x0F) << 6) |
| 1440 | #define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F) |
| 1441 | #define C_00B848_SGPRS 0xFFFFFC3F |
| 1442 | #define S_00B848_PRIORITY(x) (((x) & 0x03) << 10) |
| 1443 | #define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03) |
| 1444 | #define C_00B848_PRIORITY 0xFFFFF3FF |
| 1445 | #define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12) |
| 1446 | #define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF) |
| 1447 | #define C_00B848_FLOAT_MODE 0xFFF00FFF |
| 1448 | #define S_00B848_PRIV(x) (((x) & 0x1) << 20) |
| 1449 | #define G_00B848_PRIV(x) (((x) >> 20) & 0x1) |
| 1450 | #define C_00B848_PRIV 0xFFEFFFFF |
| 1451 | #define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21) |
| 1452 | #define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1) |
| 1453 | #define C_00B848_DX10_CLAMP 0xFFDFFFFF |
| 1454 | #define S_00B848_RR_WG_MODE(x) (((x) & 0x1) << 21) |
| 1455 | #define G_00B848_RR_WG_MODE(x) (((x) >> 21) & 0x1) |
| 1456 | #define C_00B848_RR_WG_MODE 0xFFDFFFFF |
| 1457 | #define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22) |
| 1458 | #define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1) |
| 1459 | #define C_00B848_DEBUG_MODE 0xFFBFFFFF |
| 1460 | #define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23) |
| 1461 | #define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1) |
| 1462 | #define C_00B848_IEEE_MODE 0xFF7FFFFF |
| 1463 | #define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29) |
| 1464 | #define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1) |
| 1465 | #define C_00B848_WGP_MODE 0xDFFFFFFF |
| 1466 | #define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30) |
| 1467 | #define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1) |
| 1468 | #define C_00B848_MEM_ORDERED 0xBFFFFFFF |
| 1469 | #define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31) |
| 1470 | #define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1) |
| 1471 | #define C_00B848_FWD_PROGRESS 0x7FFFFFFF |
| 1472 | |
| 1473 | // Helpers for setting FLOAT_MODE |
| 1474 | #define FP_ROUND_ROUND_TO_NEAREST 0 |
| 1475 | #define FP_ROUND_ROUND_TO_INF 1 |
| 1476 | #define FP_ROUND_ROUND_TO_NEGINF 2 |
| 1477 | #define FP_ROUND_ROUND_TO_ZERO 3 |
| 1478 | |
| 1479 | // Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double |
| 1480 | // precision. |
| 1481 | #define FP_ROUND_MODE_SP(x) ((x) & 0x3) |
| 1482 | #define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2) |
| 1483 | |
| 1484 | #define FP_DENORM_FLUSH_IN_FLUSH_OUT 0 |
| 1485 | #define FP_DENORM_FLUSH_OUT 1 |
| 1486 | #define FP_DENORM_FLUSH_IN 2 |
| 1487 | #define FP_DENORM_FLUSH_NONE 3 |
| 1488 | |
| 1489 | |
| 1490 | // Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double |
| 1491 | // precision. |
| 1492 | #define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4) |
| 1493 | #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) |
| 1494 | |
| 1495 | #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 |
| 1496 | #define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
| 1497 | #define S_00B860_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
| 1498 | #define S_00B860_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
| 1499 | |
| 1500 | #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 |
| 1501 | #define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
| 1502 | #define S_0286E8_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
| 1503 | #define S_0286E8_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
| 1504 | |
| 1505 | #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54 |
| 1506 | #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21) |
| 1507 | #define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22) |
| 1508 | #define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23) |
| 1509 | #define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8 |
| 1510 | #define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15) |
| 1511 | #define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800 |
| 1512 | #define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15) |
| 1513 | |
| 1514 | #define R_SPILLED_SGPRS 0x4 |
| 1515 | #define R_SPILLED_VGPRS 0x8 |
| 1516 | |
| 1517 | // clang-format on |
| 1518 | |
| 1519 | } // End namespace llvm |
| 1520 | |
| 1521 | #endif |
| 1522 | |