| 1 | //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | /// \file |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
| 11 | #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
| 12 | |
| 13 | #include "llvm/MC/MCInstrDesc.h" |
| 14 | |
| 15 | namespace llvm { |
| 16 | |
| 17 | // This needs to be kept in sync with the field bits in SIRegisterClass. |
| 18 | enum SIRCFlags : uint8_t { |
| 19 | RegTupleAlignUnitsWidth = 2, |
| 20 | HasVGPRBit = RegTupleAlignUnitsWidth, |
| 21 | HasAGPRBit, |
| 22 | HasSGPRbit, |
| 23 | |
| 24 | HasVGPR = 1 << HasVGPRBit, |
| 25 | HasAGPR = 1 << HasAGPRBit, |
| 26 | HasSGPR = 1 << HasSGPRbit, |
| 27 | |
| 28 | RegTupleAlignUnitsMask = (1 << RegTupleAlignUnitsWidth) - 1, |
| 29 | RegKindMask = (HasVGPR | HasAGPR | HasSGPR) |
| 30 | }; // enum SIRCFlagsr |
| 31 | |
| 32 | namespace SIEncodingFamily { |
| 33 | // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td |
| 34 | // and the columns of the getMCOpcodeGen table. |
| 35 | enum { |
| 36 | SI = 0, |
| 37 | VI = 1, |
| 38 | SDWA = 2, |
| 39 | SDWA9 = 3, |
| 40 | GFX80 = 4, |
| 41 | GFX9 = 5, |
| 42 | GFX10 = 6, |
| 43 | SDWA10 = 7, |
| 44 | GFX90A = 8, |
| 45 | GFX940 = 9, |
| 46 | GFX11 = 10, |
| 47 | GFX12 = 11, |
| 48 | GFX1250 = 12, |
| 49 | GFX13 = 13, |
| 50 | }; |
| 51 | } |
| 52 | |
| 53 | namespace SIInstrFlags { |
| 54 | // This needs to be kept in sync with the field bits in InstSI. |
| 55 | enum : uint64_t { |
| 56 | // Low bits - basic encoding information. |
| 57 | SALU = 1 << 0, |
| 58 | VALU = 1 << 1, |
| 59 | |
| 60 | // SALU instruction formats. |
| 61 | SOP1 = 1 << 2, |
| 62 | SOP2 = 1 << 3, |
| 63 | SOPC = 1 << 4, |
| 64 | SOPK = 1 << 5, |
| 65 | SOPP = 1 << 6, |
| 66 | |
| 67 | // VALU instruction formats. |
| 68 | VOP1 = 1 << 7, |
| 69 | VOP2 = 1 << 8, |
| 70 | VOPC = 1 << 9, |
| 71 | |
| 72 | // TODO: Should this be spilt into VOP3 a and b? |
| 73 | VOP3 = 1 << 10, |
| 74 | VOP3P = 1 << 12, |
| 75 | |
| 76 | VINTRP = 1 << 13, |
| 77 | SDWA = 1 << 14, |
| 78 | DPP = 1 << 15, |
| 79 | TRANS = 1 << 16, |
| 80 | |
| 81 | // Memory instruction formats. |
| 82 | MUBUF = 1 << 17, |
| 83 | MTBUF = 1 << 18, |
| 84 | SMRD = 1 << 19, |
| 85 | MIMG = 1 << 20, |
| 86 | VIMAGE = 1 << 21, |
| 87 | VSAMPLE = 1 << 22, |
| 88 | EXP = 1 << 23, |
| 89 | FLAT = 1 << 24, |
| 90 | DS = 1 << 25, |
| 91 | |
| 92 | // Combined SGPR/VGPR Spill bit |
| 93 | // Logic to separate them out is done in isSGPRSpill and isVGPRSpill |
| 94 | Spill = 1 << 26, |
| 95 | |
| 96 | // LDSDIR instruction format. |
| 97 | LDSDIR = 1 << 28, |
| 98 | |
| 99 | // VINTERP instruction format. |
| 100 | VINTERP = 1 << 29, |
| 101 | |
| 102 | VOPD3 = 1 << 30, |
| 103 | |
| 104 | // High bits - other information. |
| 105 | VM_CNT = UINT64_C(1) << 32, |
| 106 | EXP_CNT = UINT64_C(1) << 33, |
| 107 | LGKM_CNT = UINT64_C(1) << 34, |
| 108 | |
| 109 | WQM = UINT64_C(1) << 35, |
| 110 | DisableWQM = UINT64_C(1) << 36, |
| 111 | Gather4 = UINT64_C(1) << 37, |
| 112 | |
| 113 | TENSOR_CNT = UINT64_C(1) << 38, |
| 114 | |
| 115 | SCALAR_STORE = UINT64_C(1) << 39, |
| 116 | FIXED_SIZE = UINT64_C(1) << 40, |
| 117 | |
| 118 | ASYNC_CNT = UINT64_C(1) << 41, |
| 119 | |
| 120 | VOP3_OPSEL = UINT64_C(1) << 42, |
| 121 | maybeAtomic = UINT64_C(1) << 43, |
| 122 | renamedInGFX9 = UINT64_C(1) << 44, |
| 123 | |
| 124 | // Is a clamp on FP type. |
| 125 | FPClamp = UINT64_C(1) << 45, |
| 126 | |
| 127 | // Is an integer clamp |
| 128 | IntClamp = UINT64_C(1) << 46, |
| 129 | |
| 130 | // Clamps lo component of register. |
| 131 | ClampLo = UINT64_C(1) << 47, |
| 132 | |
| 133 | // Clamps hi component of register. |
| 134 | // ClampLo and ClampHi set for packed clamp. |
| 135 | ClampHi = UINT64_C(1) << 48, |
| 136 | |
| 137 | // Is a packed VOP3P instruction. |
| 138 | IsPacked = UINT64_C(1) << 49, |
| 139 | |
| 140 | // Is a D16 buffer instruction. |
| 141 | D16Buf = UINT64_C(1) << 50, |
| 142 | |
| 143 | // FLAT instruction accesses FLAT_GLBL segment. |
| 144 | FlatGlobal = UINT64_C(1) << 51, |
| 145 | |
| 146 | // Uses floating point double precision rounding mode |
| 147 | FPDPRounding = UINT64_C(1) << 52, |
| 148 | |
| 149 | // Instruction is FP atomic. |
| 150 | FPAtomic = UINT64_C(1) << 53, |
| 151 | |
| 152 | // Is a MFMA instruction. |
| 153 | IsMAI = UINT64_C(1) << 54, |
| 154 | |
| 155 | // Is a DOT instruction. |
| 156 | IsDOT = UINT64_C(1) << 55, |
| 157 | |
| 158 | // FLAT instruction accesses FLAT_SCRATCH segment. |
| 159 | FlatScratch = UINT64_C(1) << 56, |
| 160 | |
| 161 | // Atomic without return. |
| 162 | IsAtomicNoRet = UINT64_C(1) << 57, |
| 163 | |
| 164 | // Atomic with return. |
| 165 | IsAtomicRet = UINT64_C(1) << 58, |
| 166 | |
| 167 | // Is a WMMA instruction. |
| 168 | IsWMMA = UINT64_C(1) << 59, |
| 169 | |
| 170 | // Whether tied sources will be read. |
| 171 | TiedSourceNotRead = UINT64_C(1) << 60, |
| 172 | |
| 173 | // Is never uniform. |
| 174 | IsNeverUniform = UINT64_C(1) << 61, |
| 175 | |
| 176 | // ds_gws_* instructions. |
| 177 | GWS = UINT64_C(1) << 62, |
| 178 | |
| 179 | // Is a SWMMAC instruction. |
| 180 | IsSWMMAC = UINT64_C(1) << 63, |
| 181 | }; |
| 182 | |
| 183 | // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. |
| 184 | // The result is true if any of these tests are true. |
| 185 | enum ClassFlags : unsigned { |
| 186 | S_NAN = 1 << 0, // Signaling NaN |
| 187 | Q_NAN = 1 << 1, // Quiet NaN |
| 188 | N_INFINITY = 1 << 2, // Negative infinity |
| 189 | N_NORMAL = 1 << 3, // Negative normal |
| 190 | N_SUBNORMAL = 1 << 4, // Negative subnormal |
| 191 | N_ZERO = 1 << 5, // Negative zero |
| 192 | P_ZERO = 1 << 6, // Positive zero |
| 193 | P_SUBNORMAL = 1 << 7, // Positive subnormal |
| 194 | P_NORMAL = 1 << 8, // Positive normal |
| 195 | P_INFINITY = 1 << 9 // Positive infinity |
| 196 | }; |
| 197 | } |
| 198 | |
| 199 | namespace AMDGPU { |
| 200 | enum OperandType : unsigned { |
| 201 | /// Operands with register, 32-bit, or 64-bit immediate |
| 202 | OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, |
| 203 | OPERAND_REG_IMM_INT64, |
| 204 | OPERAND_REG_IMM_INT16, |
| 205 | OPERAND_REG_IMM_FP32, |
| 206 | OPERAND_REG_IMM_FP64, |
| 207 | OPERAND_REG_IMM_BF16, |
| 208 | OPERAND_REG_IMM_FP16, |
| 209 | OPERAND_REG_IMM_V2BF16, |
| 210 | OPERAND_REG_IMM_V2FP16, |
| 211 | OPERAND_REG_IMM_V2FP16_SPLAT, |
| 212 | OPERAND_REG_IMM_V2INT16, |
| 213 | OPERAND_REG_IMM_NOINLINE_V2FP16, |
| 214 | OPERAND_REG_IMM_V2INT32, |
| 215 | OPERAND_REG_IMM_V2FP32, |
| 216 | |
| 217 | /// Operands with register or inline constant |
| 218 | OPERAND_REG_INLINE_C_INT16, |
| 219 | OPERAND_REG_INLINE_C_INT32, |
| 220 | OPERAND_REG_INLINE_C_INT64, |
| 221 | OPERAND_REG_INLINE_C_BF16, |
| 222 | OPERAND_REG_INLINE_C_FP16, |
| 223 | OPERAND_REG_INLINE_C_FP32, |
| 224 | OPERAND_REG_INLINE_C_FP64, |
| 225 | OPERAND_REG_INLINE_C_V2INT16, |
| 226 | OPERAND_REG_INLINE_C_V2BF16, |
| 227 | OPERAND_REG_INLINE_C_V2FP16, |
| 228 | |
| 229 | // Operand for split barrier inline constant |
| 230 | OPERAND_INLINE_SPLIT_BARRIER_INT32, |
| 231 | |
| 232 | /// Operand with 32-bit immediate that uses the constant bus. |
| 233 | OPERAND_KIMM32, |
| 234 | OPERAND_KIMM16, |
| 235 | OPERAND_KIMM64, |
| 236 | |
| 237 | /// Operands with an AccVGPR register or inline constant |
| 238 | OPERAND_REG_INLINE_AC_INT32, |
| 239 | OPERAND_REG_INLINE_AC_FP32, |
| 240 | OPERAND_REG_INLINE_AC_FP64, |
| 241 | |
| 242 | // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline |
| 243 | // constants. Does not accept registers. |
| 244 | OPERAND_INLINE_C_AV64_PSEUDO, |
| 245 | |
| 246 | // Operand for source modifiers for VOP instructions |
| 247 | OPERAND_INPUT_MODS, |
| 248 | |
| 249 | // Operand for SDWA instructions |
| 250 | OPERAND_SDWA_VOPC_DST, |
| 251 | |
| 252 | OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, |
| 253 | OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, |
| 254 | |
| 255 | OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, |
| 256 | OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64, |
| 257 | |
| 258 | OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32, |
| 259 | OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO, |
| 260 | |
| 261 | OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, |
| 262 | OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, |
| 263 | |
| 264 | OPERAND_KIMM_FIRST = OPERAND_KIMM32, |
| 265 | OPERAND_KIMM_LAST = OPERAND_KIMM64 |
| 266 | |
| 267 | }; |
| 268 | } |
| 269 | |
| 270 | // Input operand modifiers bit-masks |
| 271 | // NEG and SEXT share same bit-mask because they can't be set simultaneously. |
| 272 | namespace SISrcMods { |
| 273 | enum : unsigned { |
| 274 | NONE = 0, |
| 275 | NEG = 1 << 0, // Floating-point negate modifier |
| 276 | ABS = 1 << 1, // Floating-point absolute modifier |
| 277 | SEXT = 1 << 4, // Integer sign-extend modifier |
| 278 | NEG_HI = ABS, // Floating-point negate high packed component modifier. |
| 279 | OP_SEL_0 = 1 << 2, |
| 280 | OP_SEL_1 = 1 << 3, |
| 281 | DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) |
| 282 | }; |
| 283 | } |
| 284 | |
| 285 | namespace SIOutMods { |
| 286 | enum : unsigned { |
| 287 | NONE = 0, |
| 288 | MUL2 = 1, |
| 289 | MUL4 = 2, |
| 290 | DIV2 = 3 |
| 291 | }; |
| 292 | } |
| 293 | |
| 294 | namespace AMDGPU { |
| 295 | namespace VGPRIndexMode { |
| 296 | |
| 297 | enum Id : unsigned { // id of symbolic names |
| 298 | ID_SRC0 = 0, |
| 299 | ID_SRC1, |
| 300 | ID_SRC2, |
| 301 | ID_DST, |
| 302 | |
| 303 | ID_MIN = ID_SRC0, |
| 304 | ID_MAX = ID_DST |
| 305 | }; |
| 306 | |
| 307 | enum EncBits : unsigned { |
| 308 | OFF = 0, |
| 309 | SRC0_ENABLE = 1 << ID_SRC0, |
| 310 | SRC1_ENABLE = 1 << ID_SRC1, |
| 311 | SRC2_ENABLE = 1 << ID_SRC2, |
| 312 | DST_ENABLE = 1 << ID_DST, |
| 313 | ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, |
| 314 | UNDEF = 0xFFFF |
| 315 | }; |
| 316 | |
| 317 | } // namespace VGPRIndexMode |
| 318 | } // namespace AMDGPU |
| 319 | |
| 320 | namespace AMDGPUAsmVariants { |
| 321 | enum : unsigned { |
| 322 | DEFAULT = 0, |
| 323 | VOP3 = 1, |
| 324 | SDWA = 2, |
| 325 | SDWA9 = 3, |
| 326 | DPP = 4, |
| 327 | VOP3_DPP = 5 |
| 328 | }; |
| 329 | } // namespace AMDGPUAsmVariants |
| 330 | |
| 331 | namespace AMDGPU { |
| 332 | namespace EncValues { // Encoding values of enum9/8/7 operands |
| 333 | |
| 334 | enum : unsigned { |
| 335 | SGPR_MIN = 0, |
| 336 | SGPR_MAX_SI = 101, |
| 337 | SGPR_MAX_GFX10 = 105, |
| 338 | TTMP_VI_MIN = 112, |
| 339 | TTMP_VI_MAX = 123, |
| 340 | TTMP_GFX9PLUS_MIN = 108, |
| 341 | TTMP_GFX9PLUS_MAX = 123, |
| 342 | INLINE_INTEGER_C_MIN = 128, |
| 343 | INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 |
| 344 | INLINE_INTEGER_C_MAX = 208, |
| 345 | INLINE_FLOATING_C_MIN = 240, |
| 346 | INLINE_FLOATING_C_MAX = 248, |
| 347 | LITERAL64_CONST = 254, |
| 348 | LITERAL_CONST = 255, |
| 349 | VGPR_MIN = 256, |
| 350 | VGPR_MAX = 511, |
| 351 | IS_VGPR = 256, // Indicates VGPR or AGPR |
| 352 | }; |
| 353 | |
| 354 | } // namespace EncValues |
| 355 | |
| 356 | // Register codes as defined in the TableGen's HWEncoding field. |
| 357 | namespace HWEncoding { |
| 358 | enum : unsigned { |
| 359 | REG_IDX_MASK = 0x3ff, |
| 360 | LO256_REG_IDX_MASK = 0xff, |
| 361 | IS_VGPR = 1 << 10, |
| 362 | IS_AGPR = 1 << 11, |
| 363 | IS_HI16 = 1 << 12, |
| 364 | }; |
| 365 | } // namespace HWEncoding |
| 366 | |
| 367 | namespace CPol { |
| 368 | |
| 369 | enum CPol { |
| 370 | GLC = 1, |
| 371 | SLC = 2, |
| 372 | DLC = 4, |
| 373 | SCC = 16, |
| 374 | SC0 = GLC, |
| 375 | SC1 = SCC, |
| 376 | NT = SLC, |
| 377 | ALL_pregfx12 = GLC | SLC | DLC | SCC, |
| 378 | SWZ_pregfx12 = 8, |
| 379 | |
| 380 | // Below are GFX12+ cache policy bits |
| 381 | |
| 382 | // Temporal hint |
| 383 | TH = 0x7, // All TH bits |
| 384 | TH_RT = 0, // regular |
| 385 | TH_NT = 1, // non-temporal |
| 386 | TH_HT = 2, // high-temporal |
| 387 | TH_LU = 3, // last use |
| 388 | TH_WB = 3, // regular (CU, SE), high-temporal with write-back (MALL) |
| 389 | TH_NT_RT = 4, // non-temporal (CU, SE), regular (MALL) |
| 390 | TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL) |
| 391 | TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL) |
| 392 | TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL) |
| 393 | TH_BYPASS = 3, // only to be used with scope = 3 |
| 394 | |
| 395 | TH_RESERVED = 7, // unused value for load insts |
| 396 | |
| 397 | // Bits of TH for atomics |
| 398 | TH_ATOMIC_RETURN = GLC, // Returning vs non-returning |
| 399 | TH_ATOMIC_NT = SLC, // Non-temporal vs regular |
| 400 | TH_ATOMIC_CASCADE = 4, // Cascading vs regular |
| 401 | |
| 402 | // Scope |
| 403 | SCOPE_SHIFT = 3, |
| 404 | SCOPE_MASK = 0x3, |
| 405 | SCOPE = SCOPE_MASK << SCOPE_SHIFT, // All Scope bits |
| 406 | SCOPE_CU = 0 << SCOPE_SHIFT, |
| 407 | SCOPE_SE = 1 << SCOPE_SHIFT, |
| 408 | SCOPE_DEV = 2 << SCOPE_SHIFT, |
| 409 | SCOPE_SYS = 3 << SCOPE_SHIFT, |
| 410 | |
| 411 | NV = 1 << 5, // Non-volatile bit |
| 412 | |
| 413 | SWZ = 1 << 6, // Swizzle bit |
| 414 | |
| 415 | SCAL = 1 << 11, // Scale offset bit |
| 416 | |
| 417 | ALL = TH | SCOPE | NV, |
| 418 | |
| 419 | // Helper bits |
| 420 | TH_TYPE_LOAD = 1 << 7, // TH_LOAD policy |
| 421 | TH_TYPE_STORE = 1 << 8, // TH_STORE policy |
| 422 | TH_TYPE_ATOMIC = 1 << 9, // TH_ATOMIC policy |
| 423 | TH_REAL_BYPASS = 1 << 10, // is TH=3 bypass policy or not |
| 424 | |
| 425 | // Volatile (used to preserve/signal operation volatility for buffer |
| 426 | // operations not a real instruction bit) |
| 427 | VOLATILE = 1 << 31, |
| 428 | // The set of "cache policy" bits used for compiler features that |
| 429 | // do not correspond to handware features. |
| 430 | VIRTUAL_BITS = VOLATILE, |
| 431 | }; |
| 432 | |
| 433 | } // namespace CPol |
| 434 | |
| 435 | namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. |
| 436 | |
| 437 | enum Id { // Message ID, width(4) [3:0]. |
| 438 | ID_INTERRUPT = 1, |
| 439 | |
| 440 | ID_GS_PreGFX11 = 2, // replaced in GFX11 |
| 441 | ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11 |
| 442 | |
| 443 | ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11 |
| 444 | ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11 |
| 445 | |
| 446 | ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11 |
| 447 | ID_STALL_WAVE_GEN = 5, // added in GFX9, removed in GFX12 |
| 448 | ID_HALT_WAVES = 6, // added in GFX9, removed in GFX12 |
| 449 | ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11 |
| 450 | ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10 |
| 451 | ID_GS_ALLOC_REQ = 9, // added in GFX9 |
| 452 | ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11 |
| 453 | ID_SAVEWAVE_HAS_TDM = 10, // added in GFX1250 |
| 454 | ID_GET_DDID = 11, // added in GFX10, removed in GFX11 |
| 455 | ID_SYSMSG = 15, |
| 456 | |
| 457 | ID_RTN_GET_DOORBELL = 128, |
| 458 | ID_RTN_GET_DDID = 129, |
| 459 | ID_RTN_GET_TMA = 130, |
| 460 | ID_RTN_GET_REALTIME = 131, |
| 461 | ID_RTN_SAVE_WAVE = 132, |
| 462 | ID_RTN_GET_TBA = 133, |
| 463 | ID_RTN_GET_TBA_TO_PC = 134, |
| 464 | ID_RTN_GET_SE_AID_ID = 135, |
| 465 | |
| 466 | ID_RTN_GET_CLUSTER_BARRIER_STATE = 136, // added in GFX1250 |
| 467 | |
| 468 | ID_MASK_PreGFX11_ = 0xF, |
| 469 | ID_MASK_GFX11Plus_ = 0xFF |
| 470 | }; |
| 471 | |
| 472 | enum Op { // Both GS and SYS operation IDs. |
| 473 | OP_SHIFT_ = 4, |
| 474 | OP_NONE_ = 0, |
| 475 | // Bits used for operation encoding |
| 476 | OP_WIDTH_ = 3, |
| 477 | OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_), |
| 478 | // GS operations are encoded in bits 5:4 |
| 479 | OP_GS_NOP = 0, |
| 480 | OP_GS_CUT = 1, |
| 481 | OP_GS_EMIT = 2, |
| 482 | OP_GS_EMIT_CUT = 3, |
| 483 | OP_GS_FIRST_ = OP_GS_NOP, |
| 484 | // SYS operations are encoded in bits 6:4 |
| 485 | OP_SYS_ECC_ERR_INTERRUPT = 1, |
| 486 | OP_SYS_REG_RD = 2, |
| 487 | OP_SYS_HOST_TRAP_ACK = 3, |
| 488 | OP_SYS_TTRACE_PC = 4, |
| 489 | OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT, |
| 490 | }; |
| 491 | |
| 492 | enum StreamId : unsigned { // Stream ID, (2) [9:8]. |
| 493 | STREAM_ID_NONE_ = 0, |
| 494 | STREAM_ID_DEFAULT_ = 0, |
| 495 | STREAM_ID_LAST_ = 4, |
| 496 | STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_, |
| 497 | STREAM_ID_SHIFT_ = 8, |
| 498 | STREAM_ID_WIDTH_= 2, |
| 499 | STREAM_ID_MASK_ = (((1 << STREAM_ID_WIDTH_) - 1) << STREAM_ID_SHIFT_) |
| 500 | }; |
| 501 | |
| 502 | } // namespace SendMsg |
| 503 | |
| 504 | namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns. |
| 505 | |
| 506 | enum Id { // HwRegCode, (6) [5:0] |
| 507 | ID_MODE = 1, |
| 508 | ID_STATUS = 2, |
| 509 | ID_TRAPSTS = 3, |
| 510 | ID_HW_ID = 4, |
| 511 | ID_GPR_ALLOC = 5, |
| 512 | ID_LDS_ALLOC = 6, |
| 513 | ID_IB_STS = 7, |
| 514 | ID_PERF_SNAPSHOT_DATA_gfx12 = 10, |
| 515 | ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11, |
| 516 | ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12, |
| 517 | ID_MEM_BASES = 15, |
| 518 | ID_TBA_LO = 16, |
| 519 | ID_TBA_HI = 17, |
| 520 | ID_TMA_LO = 18, |
| 521 | ID_TMA_HI = 19, |
| 522 | ID_FLAT_SCR_LO = 20, |
| 523 | ID_FLAT_SCR_HI = 21, |
| 524 | ID_XNACK_MASK = 22, |
| 525 | ID_HW_ID1 = 23, |
| 526 | ID_HW_ID2 = 24, |
| 527 | ID_POPS_PACKER = 25, |
| 528 | ID_SCHED_MODE = 26, |
| 529 | ID_PERF_SNAPSHOT_DATA_gfx11 = 27, |
| 530 | ID_IB_STS2 = 28, |
| 531 | ID_SHADER_CYCLES = 29, |
| 532 | ID_SHADER_CYCLES_HI = 30, |
| 533 | ID_DVGPR_ALLOC_LO = 31, |
| 534 | ID_DVGPR_ALLOC_HI = 32, |
| 535 | |
| 536 | // Register numbers reused in GFX11 |
| 537 | ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18, |
| 538 | ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19, |
| 539 | |
| 540 | // Register numbers reused in GFX12+ |
| 541 | ID_STATE_PRIV = 4, |
| 542 | ID_PERF_SNAPSHOT_DATA1 = 15, |
| 543 | ID_PERF_SNAPSHOT_DATA2 = 16, |
| 544 | ID_EXCP_FLAG_PRIV = 17, |
| 545 | ID_EXCP_FLAG_USER = 18, |
| 546 | ID_TRAP_CTRL = 19, |
| 547 | |
| 548 | // GFX94* specific registers |
| 549 | ID_XCC_ID = 20, |
| 550 | ID_SQ_PERF_SNAPSHOT_DATA = 21, |
| 551 | ID_SQ_PERF_SNAPSHOT_DATA1 = 22, |
| 552 | ID_SQ_PERF_SNAPSHOT_PC_LO = 23, |
| 553 | ID_SQ_PERF_SNAPSHOT_PC_HI = 24, |
| 554 | |
| 555 | // GFX1250 |
| 556 | ID_XNACK_STATE_PRIV = 33, |
| 557 | ID_XNACK_MASK_gfx1250 = 34, |
| 558 | }; |
| 559 | |
| 560 | enum Offset : unsigned { // Offset, (5) [10:6] |
| 561 | OFFSET_MEM_VIOL = 8, |
| 562 | OFFSET_ME_ID = 8, // in HW_ID2 |
| 563 | }; |
| 564 | |
| 565 | enum ModeRegisterMasks : uint32_t { |
| 566 | FP_ROUND_MASK = 0xf << 0, // Bits 0..3 |
| 567 | FP_DENORM_MASK = 0xf << 4, // Bits 4..7 |
| 568 | DX10_CLAMP_MASK = 1 << 8, |
| 569 | IEEE_MODE_MASK = 1 << 9, |
| 570 | LOD_CLAMP_MASK = 1 << 10, |
| 571 | DEBUG_MASK = 1 << 11, |
| 572 | |
| 573 | // EXCP_EN fields. |
| 574 | EXCP_EN_INVALID_MASK = 1 << 12, |
| 575 | EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13, |
| 576 | EXCP_EN_FLOAT_DIV0_MASK = 1 << 14, |
| 577 | EXCP_EN_OVERFLOW_MASK = 1 << 15, |
| 578 | EXCP_EN_UNDERFLOW_MASK = 1 << 16, |
| 579 | EXCP_EN_INEXACT_MASK = 1 << 17, |
| 580 | EXCP_EN_INT_DIV0_MASK = 1 << 18, |
| 581 | |
| 582 | GPR_IDX_EN_MASK = 1 << 27, |
| 583 | VSKIP_MASK = 1 << 28, |
| 584 | CSP_MASK = 0x7u << 29, // Bits 29..31 |
| 585 | |
| 586 | // GFX1250 |
| 587 | DST_VGPR_MSB = 0x3 << 12, |
| 588 | SRC0_VGPR_MSB = 0x3 << 14, |
| 589 | SRC1_VGPR_MSB = 0x3 << 16, |
| 590 | SRC2_VGPR_MSB = 0x3 << 18, |
| 591 | VGPR_MSB_MASK = 0xff << 12, // Bits 12..19 |
| 592 | |
| 593 | REPLAY_MODE = 1 << 25, |
| 594 | FLAT_SCRATCH_IS_NV = 1 << 26, |
| 595 | }; |
| 596 | |
| 597 | } // namespace Hwreg |
| 598 | |
| 599 | namespace MTBUFFormat { |
| 600 | |
| 601 | enum DataFormat : int64_t { |
| 602 | DFMT_INVALID = 0, |
| 603 | DFMT_8, |
| 604 | DFMT_16, |
| 605 | DFMT_8_8, |
| 606 | DFMT_32, |
| 607 | DFMT_16_16, |
| 608 | DFMT_10_11_11, |
| 609 | DFMT_11_11_10, |
| 610 | DFMT_10_10_10_2, |
| 611 | DFMT_2_10_10_10, |
| 612 | DFMT_8_8_8_8, |
| 613 | DFMT_32_32, |
| 614 | DFMT_16_16_16_16, |
| 615 | DFMT_32_32_32, |
| 616 | DFMT_32_32_32_32, |
| 617 | DFMT_RESERVED_15, |
| 618 | |
| 619 | DFMT_MIN = DFMT_INVALID, |
| 620 | DFMT_MAX = DFMT_RESERVED_15, |
| 621 | |
| 622 | DFMT_UNDEF = -1, |
| 623 | DFMT_DEFAULT = DFMT_8, |
| 624 | |
| 625 | DFMT_SHIFT = 0, |
| 626 | DFMT_MASK = 0xF |
| 627 | }; |
| 628 | |
| 629 | enum NumFormat : int64_t { |
| 630 | NFMT_UNORM = 0, |
| 631 | NFMT_SNORM, |
| 632 | NFMT_USCALED, |
| 633 | NFMT_SSCALED, |
| 634 | NFMT_UINT, |
| 635 | NFMT_SINT, |
| 636 | NFMT_RESERVED_6, // VI and GFX9 |
| 637 | NFMT_SNORM_OGL = NFMT_RESERVED_6, // SI and CI only |
| 638 | NFMT_FLOAT, |
| 639 | |
| 640 | NFMT_MIN = NFMT_UNORM, |
| 641 | NFMT_MAX = NFMT_FLOAT, |
| 642 | |
| 643 | NFMT_UNDEF = -1, |
| 644 | NFMT_DEFAULT = NFMT_UNORM, |
| 645 | |
| 646 | NFMT_SHIFT = 4, |
| 647 | NFMT_MASK = 7 |
| 648 | }; |
| 649 | |
| 650 | enum MergedFormat : int64_t { |
| 651 | DFMT_NFMT_UNDEF = -1, |
| 652 | DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) | |
| 653 | ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT), |
| 654 | |
| 655 | |
| 656 | DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT), |
| 657 | |
| 658 | DFMT_NFMT_MAX = DFMT_NFMT_MASK |
| 659 | }; |
| 660 | |
| 661 | enum UnifiedFormatCommon : int64_t { |
| 662 | UFMT_MAX = 127, |
| 663 | UFMT_UNDEF = -1, |
| 664 | UFMT_DEFAULT = 1 |
| 665 | }; |
| 666 | |
| 667 | } // namespace MTBUFFormat |
| 668 | |
| 669 | namespace UfmtGFX10 { |
| 670 | enum UnifiedFormat : int64_t { |
| 671 | UFMT_INVALID = 0, |
| 672 | |
| 673 | UFMT_8_UNORM, |
| 674 | UFMT_8_SNORM, |
| 675 | UFMT_8_USCALED, |
| 676 | UFMT_8_SSCALED, |
| 677 | UFMT_8_UINT, |
| 678 | UFMT_8_SINT, |
| 679 | |
| 680 | UFMT_16_UNORM, |
| 681 | UFMT_16_SNORM, |
| 682 | UFMT_16_USCALED, |
| 683 | UFMT_16_SSCALED, |
| 684 | UFMT_16_UINT, |
| 685 | UFMT_16_SINT, |
| 686 | UFMT_16_FLOAT, |
| 687 | |
| 688 | UFMT_8_8_UNORM, |
| 689 | UFMT_8_8_SNORM, |
| 690 | UFMT_8_8_USCALED, |
| 691 | UFMT_8_8_SSCALED, |
| 692 | UFMT_8_8_UINT, |
| 693 | UFMT_8_8_SINT, |
| 694 | |
| 695 | UFMT_32_UINT, |
| 696 | UFMT_32_SINT, |
| 697 | UFMT_32_FLOAT, |
| 698 | |
| 699 | UFMT_16_16_UNORM, |
| 700 | UFMT_16_16_SNORM, |
| 701 | UFMT_16_16_USCALED, |
| 702 | UFMT_16_16_SSCALED, |
| 703 | UFMT_16_16_UINT, |
| 704 | UFMT_16_16_SINT, |
| 705 | UFMT_16_16_FLOAT, |
| 706 | |
| 707 | UFMT_10_11_11_UNORM, |
| 708 | UFMT_10_11_11_SNORM, |
| 709 | UFMT_10_11_11_USCALED, |
| 710 | UFMT_10_11_11_SSCALED, |
| 711 | UFMT_10_11_11_UINT, |
| 712 | UFMT_10_11_11_SINT, |
| 713 | UFMT_10_11_11_FLOAT, |
| 714 | |
| 715 | UFMT_11_11_10_UNORM, |
| 716 | UFMT_11_11_10_SNORM, |
| 717 | UFMT_11_11_10_USCALED, |
| 718 | UFMT_11_11_10_SSCALED, |
| 719 | UFMT_11_11_10_UINT, |
| 720 | UFMT_11_11_10_SINT, |
| 721 | UFMT_11_11_10_FLOAT, |
| 722 | |
| 723 | UFMT_10_10_10_2_UNORM, |
| 724 | UFMT_10_10_10_2_SNORM, |
| 725 | UFMT_10_10_10_2_USCALED, |
| 726 | UFMT_10_10_10_2_SSCALED, |
| 727 | UFMT_10_10_10_2_UINT, |
| 728 | UFMT_10_10_10_2_SINT, |
| 729 | |
| 730 | UFMT_2_10_10_10_UNORM, |
| 731 | UFMT_2_10_10_10_SNORM, |
| 732 | UFMT_2_10_10_10_USCALED, |
| 733 | UFMT_2_10_10_10_SSCALED, |
| 734 | UFMT_2_10_10_10_UINT, |
| 735 | UFMT_2_10_10_10_SINT, |
| 736 | |
| 737 | UFMT_8_8_8_8_UNORM, |
| 738 | UFMT_8_8_8_8_SNORM, |
| 739 | UFMT_8_8_8_8_USCALED, |
| 740 | UFMT_8_8_8_8_SSCALED, |
| 741 | UFMT_8_8_8_8_UINT, |
| 742 | UFMT_8_8_8_8_SINT, |
| 743 | |
| 744 | UFMT_32_32_UINT, |
| 745 | UFMT_32_32_SINT, |
| 746 | UFMT_32_32_FLOAT, |
| 747 | |
| 748 | UFMT_16_16_16_16_UNORM, |
| 749 | UFMT_16_16_16_16_SNORM, |
| 750 | UFMT_16_16_16_16_USCALED, |
| 751 | UFMT_16_16_16_16_SSCALED, |
| 752 | UFMT_16_16_16_16_UINT, |
| 753 | UFMT_16_16_16_16_SINT, |
| 754 | UFMT_16_16_16_16_FLOAT, |
| 755 | |
| 756 | UFMT_32_32_32_UINT, |
| 757 | UFMT_32_32_32_SINT, |
| 758 | UFMT_32_32_32_FLOAT, |
| 759 | UFMT_32_32_32_32_UINT, |
| 760 | UFMT_32_32_32_32_SINT, |
| 761 | UFMT_32_32_32_32_FLOAT, |
| 762 | |
| 763 | UFMT_FIRST = UFMT_INVALID, |
| 764 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
| 765 | }; |
| 766 | |
| 767 | } // namespace UfmtGFX10 |
| 768 | |
| 769 | namespace UfmtGFX11 { |
| 770 | enum UnifiedFormat : int64_t { |
| 771 | UFMT_INVALID = 0, |
| 772 | |
| 773 | UFMT_8_UNORM, |
| 774 | UFMT_8_SNORM, |
| 775 | UFMT_8_USCALED, |
| 776 | UFMT_8_SSCALED, |
| 777 | UFMT_8_UINT, |
| 778 | UFMT_8_SINT, |
| 779 | |
| 780 | UFMT_16_UNORM, |
| 781 | UFMT_16_SNORM, |
| 782 | UFMT_16_USCALED, |
| 783 | UFMT_16_SSCALED, |
| 784 | UFMT_16_UINT, |
| 785 | UFMT_16_SINT, |
| 786 | UFMT_16_FLOAT, |
| 787 | |
| 788 | UFMT_8_8_UNORM, |
| 789 | UFMT_8_8_SNORM, |
| 790 | UFMT_8_8_USCALED, |
| 791 | UFMT_8_8_SSCALED, |
| 792 | UFMT_8_8_UINT, |
| 793 | UFMT_8_8_SINT, |
| 794 | |
| 795 | UFMT_32_UINT, |
| 796 | UFMT_32_SINT, |
| 797 | UFMT_32_FLOAT, |
| 798 | |
| 799 | UFMT_16_16_UNORM, |
| 800 | UFMT_16_16_SNORM, |
| 801 | UFMT_16_16_USCALED, |
| 802 | UFMT_16_16_SSCALED, |
| 803 | UFMT_16_16_UINT, |
| 804 | UFMT_16_16_SINT, |
| 805 | UFMT_16_16_FLOAT, |
| 806 | |
| 807 | UFMT_10_11_11_FLOAT, |
| 808 | |
| 809 | UFMT_11_11_10_FLOAT, |
| 810 | |
| 811 | UFMT_10_10_10_2_UNORM, |
| 812 | UFMT_10_10_10_2_SNORM, |
| 813 | UFMT_10_10_10_2_UINT, |
| 814 | UFMT_10_10_10_2_SINT, |
| 815 | |
| 816 | UFMT_2_10_10_10_UNORM, |
| 817 | UFMT_2_10_10_10_SNORM, |
| 818 | UFMT_2_10_10_10_USCALED, |
| 819 | UFMT_2_10_10_10_SSCALED, |
| 820 | UFMT_2_10_10_10_UINT, |
| 821 | UFMT_2_10_10_10_SINT, |
| 822 | |
| 823 | UFMT_8_8_8_8_UNORM, |
| 824 | UFMT_8_8_8_8_SNORM, |
| 825 | UFMT_8_8_8_8_USCALED, |
| 826 | UFMT_8_8_8_8_SSCALED, |
| 827 | UFMT_8_8_8_8_UINT, |
| 828 | UFMT_8_8_8_8_SINT, |
| 829 | |
| 830 | UFMT_32_32_UINT, |
| 831 | UFMT_32_32_SINT, |
| 832 | UFMT_32_32_FLOAT, |
| 833 | |
| 834 | UFMT_16_16_16_16_UNORM, |
| 835 | UFMT_16_16_16_16_SNORM, |
| 836 | UFMT_16_16_16_16_USCALED, |
| 837 | UFMT_16_16_16_16_SSCALED, |
| 838 | UFMT_16_16_16_16_UINT, |
| 839 | UFMT_16_16_16_16_SINT, |
| 840 | UFMT_16_16_16_16_FLOAT, |
| 841 | |
| 842 | UFMT_32_32_32_UINT, |
| 843 | UFMT_32_32_32_SINT, |
| 844 | UFMT_32_32_32_FLOAT, |
| 845 | UFMT_32_32_32_32_UINT, |
| 846 | UFMT_32_32_32_32_SINT, |
| 847 | UFMT_32_32_32_32_FLOAT, |
| 848 | |
| 849 | UFMT_FIRST = UFMT_INVALID, |
| 850 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
| 851 | }; |
| 852 | |
| 853 | } // namespace UfmtGFX11 |
| 854 | |
| 855 | namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. |
| 856 | |
| 857 | enum Id : unsigned { // id of symbolic names |
| 858 | ID_QUAD_PERM = 0, |
| 859 | ID_BITMASK_PERM, |
| 860 | ID_SWAP, |
| 861 | ID_REVERSE, |
| 862 | ID_BROADCAST, |
| 863 | ID_FFT, |
| 864 | ID_ROTATE |
| 865 | }; |
| 866 | |
| 867 | // clang-format off |
| 868 | enum EncBits : unsigned { |
| 869 | |
| 870 | // swizzle mode encodings |
| 871 | |
| 872 | QUAD_PERM_ENC = 0x8000, |
| 873 | QUAD_PERM_ENC_MASK = 0xFF00, |
| 874 | |
| 875 | BITMASK_PERM_ENC = 0x0000, |
| 876 | BITMASK_PERM_ENC_MASK = 0x8000, |
| 877 | |
| 878 | FFT_MODE_ENC = 0xE000, |
| 879 | |
| 880 | ROTATE_MODE_ENC = 0xC000, |
| 881 | FFT_ROTATE_MODE_MASK = 0xF000, |
| 882 | |
| 883 | ROTATE_MODE_LO = 0xC000, |
| 884 | FFT_MODE_LO = 0xE000, |
| 885 | |
| 886 | // QUAD_PERM encodings |
| 887 | |
| 888 | LANE_MASK = 0x3, |
| 889 | LANE_MAX = LANE_MASK, |
| 890 | LANE_SHIFT = 2, |
| 891 | LANE_NUM = 4, |
| 892 | |
| 893 | // BITMASK_PERM encodings |
| 894 | |
| 895 | BITMASK_MASK = 0x1F, |
| 896 | BITMASK_MAX = BITMASK_MASK, |
| 897 | BITMASK_WIDTH = 5, |
| 898 | |
| 899 | BITMASK_AND_SHIFT = 0, |
| 900 | BITMASK_OR_SHIFT = 5, |
| 901 | BITMASK_XOR_SHIFT = 10, |
| 902 | |
| 903 | // FFT encodings |
| 904 | |
| 905 | FFT_SWIZZLE_MASK = 0x1F, |
| 906 | FFT_SWIZZLE_MAX = 0x1F, |
| 907 | |
| 908 | // ROTATE encodings |
| 909 | ROTATE_MAX_SIZE = 0x1F, |
| 910 | ROTATE_DIR_SHIFT = 10, // bit position of rotate direction |
| 911 | ROTATE_DIR_MASK = 0x1, |
| 912 | ROTATE_SIZE_SHIFT = 5, // bit position of rotate size |
| 913 | ROTATE_SIZE_MASK = ROTATE_MAX_SIZE, |
| 914 | }; |
| 915 | // clang-format on |
| 916 | |
| 917 | } // namespace Swizzle |
| 918 | |
| 919 | namespace SDWA { |
| 920 | |
| 921 | enum SdwaSel : unsigned { |
| 922 | BYTE_0 = 0, |
| 923 | BYTE_1 = 1, |
| 924 | BYTE_2 = 2, |
| 925 | BYTE_3 = 3, |
| 926 | WORD_0 = 4, |
| 927 | WORD_1 = 5, |
| 928 | DWORD = 6, |
| 929 | }; |
| 930 | |
| 931 | enum DstUnused : unsigned { |
| 932 | UNUSED_PAD = 0, |
| 933 | UNUSED_SEXT = 1, |
| 934 | UNUSED_PRESERVE = 2, |
| 935 | }; |
| 936 | |
| 937 | enum SDWA9EncValues : unsigned { |
| 938 | SRC_SGPR_MASK = 0x100, |
| 939 | SRC_VGPR_MASK = 0xFF, |
| 940 | VOPC_DST_VCC_MASK = 0x80, |
| 941 | VOPC_DST_SGPR_MASK = 0x7F, |
| 942 | |
| 943 | SRC_VGPR_MIN = 0, |
| 944 | SRC_VGPR_MAX = 255, |
| 945 | SRC_SGPR_MIN = 256, |
| 946 | SRC_SGPR_MAX_SI = 357, |
| 947 | SRC_SGPR_MAX_GFX10 = 361, |
| 948 | SRC_TTMP_MIN = 364, |
| 949 | SRC_TTMP_MAX = 379, |
| 950 | }; |
| 951 | |
| 952 | } // namespace SDWA |
| 953 | |
| 954 | namespace DPP { |
| 955 | |
| 956 | // clang-format off |
| 957 | enum DppCtrl : unsigned { |
| 958 | QUAD_PERM_FIRST = 0, |
| 959 | QUAD_PERM_ID = 0xE4, // identity permutation |
| 960 | QUAD_PERM_LAST = 0xFF, |
| 961 | DPP_UNUSED1 = 0x100, |
| 962 | ROW_SHL0 = 0x100, |
| 963 | ROW_SHL_FIRST = 0x101, |
| 964 | ROW_SHL_LAST = 0x10F, |
| 965 | DPP_UNUSED2 = 0x110, |
| 966 | ROW_SHR0 = 0x110, |
| 967 | ROW_SHR_FIRST = 0x111, |
| 968 | ROW_SHR_LAST = 0x11F, |
| 969 | DPP_UNUSED3 = 0x120, |
| 970 | ROW_ROR0 = 0x120, |
| 971 | ROW_ROR_FIRST = 0x121, |
| 972 | ROW_ROR_LAST = 0x12F, |
| 973 | WAVE_SHL1 = 0x130, |
| 974 | DPP_UNUSED4_FIRST = 0x131, |
| 975 | DPP_UNUSED4_LAST = 0x133, |
| 976 | WAVE_ROL1 = 0x134, |
| 977 | DPP_UNUSED5_FIRST = 0x135, |
| 978 | DPP_UNUSED5_LAST = 0x137, |
| 979 | WAVE_SHR1 = 0x138, |
| 980 | DPP_UNUSED6_FIRST = 0x139, |
| 981 | DPP_UNUSED6_LAST = 0x13B, |
| 982 | WAVE_ROR1 = 0x13C, |
| 983 | DPP_UNUSED7_FIRST = 0x13D, |
| 984 | DPP_UNUSED7_LAST = 0x13F, |
| 985 | ROW_MIRROR = 0x140, |
| 986 | ROW_HALF_MIRROR = 0x141, |
| 987 | BCAST15 = 0x142, |
| 988 | BCAST31 = 0x143, |
| 989 | DPP_UNUSED8_FIRST = 0x144, |
| 990 | DPP_UNUSED8_LAST = 0x14F, |
| 991 | ROW_NEWBCAST_FIRST= 0x150, |
| 992 | ROW_NEWBCAST_LAST = 0x15F, |
| 993 | ROW_SHARE0 = 0x150, |
| 994 | ROW_SHARE_FIRST = 0x150, |
| 995 | ROW_SHARE_LAST = 0x15F, |
| 996 | ROW_XMASK0 = 0x160, |
| 997 | ROW_XMASK_FIRST = 0x160, |
| 998 | ROW_XMASK_LAST = 0x16F, |
| 999 | DPP_LAST = ROW_XMASK_LAST |
| 1000 | }; |
| 1001 | // clang-format on |
| 1002 | |
| 1003 | enum DppFiMode { |
| 1004 | DPP_FI_0 = 0, |
| 1005 | DPP_FI_1 = 1, |
| 1006 | DPP8_FI_0 = 0xE9, |
| 1007 | DPP8_FI_1 = 0xEA, |
| 1008 | }; |
| 1009 | |
| 1010 | } // namespace DPP |
| 1011 | |
| 1012 | namespace Exp { |
| 1013 | |
| 1014 | enum Target : unsigned { |
| 1015 | ET_MRT0 = 0, |
| 1016 | ET_MRT7 = 7, |
| 1017 | ET_MRTZ = 8, |
| 1018 | ET_NULL = 9, // Pre-GFX11 |
| 1019 | ET_POS0 = 12, |
| 1020 | ET_POS3 = 15, |
| 1021 | ET_POS4 = 16, // GFX10+ |
| 1022 | ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget |
| 1023 | ET_PRIM = 20, // GFX10+ |
| 1024 | ET_DUAL_SRC_BLEND0 = 21, // GFX11+ |
| 1025 | ET_DUAL_SRC_BLEND1 = 22, // GFX11+ |
| 1026 | ET_PARAM0 = 32, // Pre-GFX11 |
| 1027 | ET_PARAM31 = 63, // Pre-GFX11 |
| 1028 | |
| 1029 | ET_NULL_MAX_IDX = 0, |
| 1030 | ET_MRTZ_MAX_IDX = 0, |
| 1031 | ET_PRIM_MAX_IDX = 0, |
| 1032 | ET_MRT_MAX_IDX = 7, |
| 1033 | ET_POS_MAX_IDX = 4, |
| 1034 | ET_DUAL_SRC_BLEND_MAX_IDX = 1, |
| 1035 | ET_PARAM_MAX_IDX = 31, |
| 1036 | |
| 1037 | ET_INVALID = 255, |
| 1038 | }; |
| 1039 | |
| 1040 | } // namespace Exp |
| 1041 | |
| 1042 | namespace WMMA { |
| 1043 | enum MatrixFMT : unsigned { |
| 1044 | MATRIX_FMT_FP8 = 0, |
| 1045 | MATRIX_FMT_BF8 = 1, |
| 1046 | MATRIX_FMT_FP6 = 2, |
| 1047 | MATRIX_FMT_BF6 = 3, |
| 1048 | MATRIX_FMT_FP4 = 4 |
| 1049 | }; |
| 1050 | |
| 1051 | enum MatrixScale : unsigned { |
| 1052 | MATRIX_SCALE_ROW0 = 0, |
| 1053 | MATRIX_SCALE_ROW1 = 1, |
| 1054 | }; |
| 1055 | |
| 1056 | enum MatrixScaleFmt : unsigned { |
| 1057 | MATRIX_SCALE_FMT_E8 = 0, |
| 1058 | MATRIX_SCALE_FMT_E5M3 = 1, |
| 1059 | MATRIX_SCALE_FMT_E4M3 = 2 |
| 1060 | }; |
| 1061 | } // namespace WMMA |
| 1062 | |
| 1063 | namespace VOP3PEncoding { |
| 1064 | |
| 1065 | enum OpSel : uint64_t { |
| 1066 | OP_SEL_HI_0 = UINT64_C(1) << 59, |
| 1067 | OP_SEL_HI_1 = UINT64_C(1) << 60, |
| 1068 | OP_SEL_HI_2 = UINT64_C(1) << 14, |
| 1069 | }; |
| 1070 | |
| 1071 | } // namespace VOP3PEncoding |
| 1072 | |
| 1073 | namespace ImplicitArg { |
| 1074 | // Implicit kernel argument offset for code object version 5. |
| 1075 | enum Offset_COV5 : unsigned { |
| 1076 | HOSTCALL_PTR_OFFSET = 80, |
| 1077 | MULTIGRID_SYNC_ARG_OFFSET = 88, |
| 1078 | HEAP_PTR_OFFSET = 96, |
| 1079 | |
| 1080 | DEFAULT_QUEUE_OFFSET = 104, |
| 1081 | COMPLETION_ACTION_OFFSET = 112, |
| 1082 | |
| 1083 | PRIVATE_BASE_OFFSET = 192, |
| 1084 | SHARED_BASE_OFFSET = 196, |
| 1085 | QUEUE_PTR_OFFSET = 200, |
| 1086 | }; |
| 1087 | |
| 1088 | } // namespace ImplicitArg |
| 1089 | |
| 1090 | namespace MFMAScaleFormats { |
| 1091 | // Enum value used in cbsz/blgp for F8F6F4 MFMA operations to select the matrix |
| 1092 | // format. |
| 1093 | enum MFMAScaleFormats { |
| 1094 | FP8_E4M3 = 0, |
| 1095 | FP8_E5M2 = 1, |
| 1096 | FP6_E2M3 = 2, |
| 1097 | FP6_E3M2 = 3, |
| 1098 | FP4_E2M1 = 4 |
| 1099 | }; |
| 1100 | } // namespace MFMAScaleFormats |
| 1101 | |
| 1102 | namespace VirtRegFlag { |
| 1103 | // Virtual register flags used for various target specific handlings during |
| 1104 | // codegen. |
| 1105 | enum Register_Flag : uint8_t { |
| 1106 | // Register operand in a whole-wave mode operation. |
| 1107 | WWM_REG = 1 << 0, |
| 1108 | }; |
| 1109 | |
| 1110 | } // namespace VirtRegFlag |
| 1111 | |
| 1112 | } // namespace AMDGPU |
| 1113 | |
| 1114 | namespace AMDGPU { |
| 1115 | namespace Barrier { |
| 1116 | |
| 1117 | enum Type { |
| 1118 | CLUSTER_TRAP = -4, |
| 1119 | CLUSTER = -3, |
| 1120 | TRAP = -2, |
| 1121 | WORKGROUP = -1, |
| 1122 | NAMED_BARRIER_FIRST = 1, |
| 1123 | NAMED_BARRIER_LAST = 16, |
| 1124 | }; |
| 1125 | |
| 1126 | enum { |
| 1127 | BARRIER_SCOPE_WORKGROUP = 0, |
| 1128 | }; |
| 1129 | |
| 1130 | } // namespace Barrier |
| 1131 | } // namespace AMDGPU |
| 1132 | |
| 1133 | // clang-format off |
| 1134 | |
| 1135 | #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 |
| 1136 | #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) |
| 1137 | #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) |
| 1138 | #define S_00B028_MEM_ORDERED(x) (((x) & 0x1) << 25) |
| 1139 | #define G_00B028_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
| 1140 | #define C_00B028_MEM_ORDERED 0xFDFFFFFF |
| 1141 | |
| 1142 | #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C |
| 1143 | #define (x) (((x) & 0xFF) << 8) |
| 1144 | #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 |
| 1145 | #define S_00B128_MEM_ORDERED(x) (((x) & 0x1) << 27) |
| 1146 | #define G_00B128_MEM_ORDERED(x) (((x) >> 27) & 0x1) |
| 1147 | #define C_00B128_MEM_ORDERED 0xF7FFFFFF |
| 1148 | |
| 1149 | #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 |
| 1150 | #define S_00B228_WGP_MODE(x) (((x) & 0x1) << 27) |
| 1151 | #define G_00B228_WGP_MODE(x) (((x) >> 27) & 0x1) |
| 1152 | #define C_00B228_WGP_MODE 0xF7FFFFFF |
| 1153 | #define S_00B228_MEM_ORDERED(x) (((x) & 0x1) << 25) |
| 1154 | #define G_00B228_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
| 1155 | #define C_00B228_MEM_ORDERED 0xFDFFFFFF |
| 1156 | |
| 1157 | #define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328 |
| 1158 | #define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 |
| 1159 | #define S_00B428_WGP_MODE(x) (((x) & 0x1) << 26) |
| 1160 | #define G_00B428_WGP_MODE(x) (((x) >> 26) & 0x1) |
| 1161 | #define C_00B428_WGP_MODE 0xFBFFFFFF |
| 1162 | #define S_00B428_MEM_ORDERED(x) (((x) & 0x1) << 24) |
| 1163 | #define G_00B428_MEM_ORDERED(x) (((x) >> 24) & 0x1) |
| 1164 | #define C_00B428_MEM_ORDERED 0xFEFFFFFF |
| 1165 | |
| 1166 | #define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528 |
| 1167 | |
| 1168 | #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C |
| 1169 | #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0) |
| 1170 | #define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1) |
| 1171 | #define C_00B84C_SCRATCH_EN 0xFFFFFFFE |
| 1172 | #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) |
| 1173 | #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) |
| 1174 | #define C_00B84C_USER_SGPR 0xFFFFFFC1 |
| 1175 | #define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) |
| 1176 | #define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) |
| 1177 | #define C_00B84C_TRAP_HANDLER 0xFFFFFFBF |
| 1178 | #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) |
| 1179 | #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) |
| 1180 | #define C_00B84C_TGID_X_EN 0xFFFFFF7F |
| 1181 | #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8) |
| 1182 | #define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1) |
| 1183 | #define C_00B84C_TGID_Y_EN 0xFFFFFEFF |
| 1184 | #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9) |
| 1185 | #define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1) |
| 1186 | #define C_00B84C_TGID_Z_EN 0xFFFFFDFF |
| 1187 | #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10) |
| 1188 | #define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1) |
| 1189 | #define C_00B84C_TG_SIZE_EN 0xFFFFFBFF |
| 1190 | #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11) |
| 1191 | #define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03) |
| 1192 | #define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF |
| 1193 | /* CIK */ |
| 1194 | #define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13) |
| 1195 | #define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03) |
| 1196 | #define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF |
| 1197 | /* */ |
| 1198 | #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) |
| 1199 | #define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF) |
| 1200 | #define C_00B84C_LDS_SIZE 0xFF007FFF |
| 1201 | #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) |
| 1202 | #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) |
| 1203 | #define C_00B84C_EXCP_EN 0x80FFFFFF |
| 1204 | |
| 1205 | #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC |
| 1206 | #define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 |
| 1207 | |
| 1208 | #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 |
| 1209 | #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) |
| 1210 | #define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F) |
| 1211 | #define C_00B848_VGPRS 0xFFFFFFC0 |
| 1212 | #define S_00B848_SGPRS(x) (((x) & 0x0F) << 6) |
| 1213 | #define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F) |
| 1214 | #define C_00B848_SGPRS 0xFFFFFC3F |
| 1215 | #define S_00B848_PRIORITY(x) (((x) & 0x03) << 10) |
| 1216 | #define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03) |
| 1217 | #define C_00B848_PRIORITY 0xFFFFF3FF |
| 1218 | #define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12) |
| 1219 | #define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF) |
| 1220 | #define C_00B848_FLOAT_MODE 0xFFF00FFF |
| 1221 | #define S_00B848_PRIV(x) (((x) & 0x1) << 20) |
| 1222 | #define G_00B848_PRIV(x) (((x) >> 20) & 0x1) |
| 1223 | #define C_00B848_PRIV 0xFFEFFFFF |
| 1224 | #define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21) |
| 1225 | #define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1) |
| 1226 | #define C_00B848_DX10_CLAMP 0xFFDFFFFF |
| 1227 | #define S_00B848_RR_WG_MODE(x) (((x) & 0x1) << 21) |
| 1228 | #define G_00B848_RR_WG_MODE(x) (((x) >> 21) & 0x1) |
| 1229 | #define C_00B848_RR_WG_MODE 0xFFDFFFFF |
| 1230 | #define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22) |
| 1231 | #define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1) |
| 1232 | #define C_00B848_DEBUG_MODE 0xFFBFFFFF |
| 1233 | #define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23) |
| 1234 | #define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1) |
| 1235 | #define C_00B848_IEEE_MODE 0xFF7FFFFF |
| 1236 | #define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29) |
| 1237 | #define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1) |
| 1238 | #define C_00B848_WGP_MODE 0xDFFFFFFF |
| 1239 | #define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30) |
| 1240 | #define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1) |
| 1241 | #define C_00B848_MEM_ORDERED 0xBFFFFFFF |
| 1242 | #define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31) |
| 1243 | #define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1) |
| 1244 | #define C_00B848_FWD_PROGRESS 0x7FFFFFFF |
| 1245 | |
| 1246 | // Helpers for setting FLOAT_MODE |
| 1247 | #define FP_ROUND_ROUND_TO_NEAREST 0 |
| 1248 | #define FP_ROUND_ROUND_TO_INF 1 |
| 1249 | #define FP_ROUND_ROUND_TO_NEGINF 2 |
| 1250 | #define FP_ROUND_ROUND_TO_ZERO 3 |
| 1251 | |
| 1252 | // Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double |
| 1253 | // precision. |
| 1254 | #define FP_ROUND_MODE_SP(x) ((x) & 0x3) |
| 1255 | #define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2) |
| 1256 | |
| 1257 | #define FP_DENORM_FLUSH_IN_FLUSH_OUT 0 |
| 1258 | #define FP_DENORM_FLUSH_OUT 1 |
| 1259 | #define FP_DENORM_FLUSH_IN 2 |
| 1260 | #define FP_DENORM_FLUSH_NONE 3 |
| 1261 | |
| 1262 | |
| 1263 | // Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double |
| 1264 | // precision. |
| 1265 | #define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4) |
| 1266 | #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) |
| 1267 | |
| 1268 | #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 |
| 1269 | #define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
| 1270 | #define S_00B860_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
| 1271 | #define S_00B860_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
| 1272 | |
| 1273 | #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 |
| 1274 | #define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
| 1275 | #define S_0286E8_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
| 1276 | #define S_0286E8_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
| 1277 | |
| 1278 | #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54 |
| 1279 | #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21) |
| 1280 | #define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22) |
| 1281 | #define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23) |
| 1282 | #define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8 |
| 1283 | #define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15) |
| 1284 | #define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800 |
| 1285 | #define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15) |
| 1286 | |
| 1287 | #define R_SPILLED_SGPRS 0x4 |
| 1288 | #define R_SPILLED_VGPRS 0x8 |
| 1289 | |
| 1290 | // clang-format on |
| 1291 | |
| 1292 | } // End namespace llvm |
| 1293 | |
| 1294 | #endif |
| 1295 | |