| 1 | //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | /// \file |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
| 11 | #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
| 12 | |
| 13 | #include "llvm/MC/MCInstrDesc.h" |
| 14 | |
| 15 | namespace llvm { |
| 16 | |
| 17 | // This needs to be kept in sync with the field bits in SIRegisterClass. |
| 18 | enum SIRCFlags : uint8_t { |
| 19 | RegTupleAlignUnitsWidth = 2, |
| 20 | HasVGPRBit = RegTupleAlignUnitsWidth, |
| 21 | HasAGPRBit, |
| 22 | HasSGPRbit, |
| 23 | |
| 24 | HasVGPR = 1 << HasVGPRBit, |
| 25 | HasAGPR = 1 << HasAGPRBit, |
| 26 | HasSGPR = 1 << HasSGPRbit, |
| 27 | |
| 28 | RegTupleAlignUnitsMask = (1 << RegTupleAlignUnitsWidth) - 1, |
| 29 | RegKindMask = (HasVGPR | HasAGPR | HasSGPR) |
| 30 | }; // enum SIRCFlagsr |
| 31 | |
| 32 | namespace SIEncodingFamily { |
| 33 | // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td |
| 34 | // and the columns of the getMCOpcodeGen table. |
| 35 | enum { |
| 36 | SI = 0, |
| 37 | VI = 1, |
| 38 | SDWA = 2, |
| 39 | SDWA9 = 3, |
| 40 | GFX80 = 4, |
| 41 | GFX9 = 5, |
| 42 | GFX10 = 6, |
| 43 | SDWA10 = 7, |
| 44 | GFX90A = 8, |
| 45 | GFX940 = 9, |
| 46 | GFX11 = 10, |
| 47 | GFX12 = 11, |
| 48 | GFX1250 = 12, |
| 49 | }; |
| 50 | } |
| 51 | |
| 52 | namespace SIInstrFlags { |
| 53 | // This needs to be kept in sync with the field bits in InstSI. |
| 54 | enum : uint64_t { |
| 55 | // Low bits - basic encoding information. |
| 56 | SALU = 1 << 0, |
| 57 | VALU = 1 << 1, |
| 58 | |
| 59 | // SALU instruction formats. |
| 60 | SOP1 = 1 << 2, |
| 61 | SOP2 = 1 << 3, |
| 62 | SOPC = 1 << 4, |
| 63 | SOPK = 1 << 5, |
| 64 | SOPP = 1 << 6, |
| 65 | |
| 66 | // VALU instruction formats. |
| 67 | VOP1 = 1 << 7, |
| 68 | VOP2 = 1 << 8, |
| 69 | VOPC = 1 << 9, |
| 70 | |
| 71 | // TODO: Should this be spilt into VOP3 a and b? |
| 72 | VOP3 = 1 << 10, |
| 73 | VOP3P = 1 << 12, |
| 74 | |
| 75 | VINTRP = 1 << 13, |
| 76 | SDWA = 1 << 14, |
| 77 | DPP = 1 << 15, |
| 78 | TRANS = 1 << 16, |
| 79 | |
| 80 | // Memory instruction formats. |
| 81 | MUBUF = 1 << 17, |
| 82 | MTBUF = 1 << 18, |
| 83 | SMRD = 1 << 19, |
| 84 | MIMG = 1 << 20, |
| 85 | VIMAGE = 1 << 21, |
| 86 | VSAMPLE = 1 << 22, |
| 87 | EXP = 1 << 23, |
| 88 | FLAT = 1 << 24, |
| 89 | DS = 1 << 25, |
| 90 | |
| 91 | // Combined SGPR/VGPR Spill bit |
| 92 | // Logic to separate them out is done in isSGPRSpill and isVGPRSpill |
| 93 | Spill = 1 << 26, |
| 94 | |
| 95 | // LDSDIR instruction format. |
| 96 | LDSDIR = 1 << 28, |
| 97 | |
| 98 | // VINTERP instruction format. |
| 99 | VINTERP = 1 << 29, |
| 100 | |
| 101 | // High bits - other information. |
| 102 | VM_CNT = UINT64_C(1) << 32, |
| 103 | EXP_CNT = UINT64_C(1) << 33, |
| 104 | LGKM_CNT = UINT64_C(1) << 34, |
| 105 | |
| 106 | WQM = UINT64_C(1) << 35, |
| 107 | DisableWQM = UINT64_C(1) << 36, |
| 108 | Gather4 = UINT64_C(1) << 37, |
| 109 | |
| 110 | TENSOR_CNT = UINT64_C(1) << 38, |
| 111 | |
| 112 | SCALAR_STORE = UINT64_C(1) << 39, |
| 113 | FIXED_SIZE = UINT64_C(1) << 40, |
| 114 | |
| 115 | ASYNC_CNT = UINT64_C(1) << 41, |
| 116 | |
| 117 | VOP3_OPSEL = UINT64_C(1) << 42, |
| 118 | maybeAtomic = UINT64_C(1) << 43, |
| 119 | renamedInGFX9 = UINT64_C(1) << 44, |
| 120 | |
| 121 | // Is a clamp on FP type. |
| 122 | FPClamp = UINT64_C(1) << 45, |
| 123 | |
| 124 | // Is an integer clamp |
| 125 | IntClamp = UINT64_C(1) << 46, |
| 126 | |
| 127 | // Clamps lo component of register. |
| 128 | ClampLo = UINT64_C(1) << 47, |
| 129 | |
| 130 | // Clamps hi component of register. |
| 131 | // ClampLo and ClampHi set for packed clamp. |
| 132 | ClampHi = UINT64_C(1) << 48, |
| 133 | |
| 134 | // Is a packed VOP3P instruction. |
| 135 | IsPacked = UINT64_C(1) << 49, |
| 136 | |
| 137 | // Is a D16 buffer instruction. |
| 138 | D16Buf = UINT64_C(1) << 50, |
| 139 | |
| 140 | // FLAT instruction accesses FLAT_GLBL segment. |
| 141 | FlatGlobal = UINT64_C(1) << 51, |
| 142 | |
| 143 | // Uses floating point double precision rounding mode |
| 144 | FPDPRounding = UINT64_C(1) << 52, |
| 145 | |
| 146 | // Instruction is FP atomic. |
| 147 | FPAtomic = UINT64_C(1) << 53, |
| 148 | |
| 149 | // Is a MFMA instruction. |
| 150 | IsMAI = UINT64_C(1) << 54, |
| 151 | |
| 152 | // Is a DOT instruction. |
| 153 | IsDOT = UINT64_C(1) << 55, |
| 154 | |
| 155 | // FLAT instruction accesses FLAT_SCRATCH segment. |
| 156 | FlatScratch = UINT64_C(1) << 56, |
| 157 | |
| 158 | // Atomic without return. |
| 159 | IsAtomicNoRet = UINT64_C(1) << 57, |
| 160 | |
| 161 | // Atomic with return. |
| 162 | IsAtomicRet = UINT64_C(1) << 58, |
| 163 | |
| 164 | // Is a WMMA instruction. |
| 165 | IsWMMA = UINT64_C(1) << 59, |
| 166 | |
| 167 | // Whether tied sources will be read. |
| 168 | TiedSourceNotRead = UINT64_C(1) << 60, |
| 169 | |
| 170 | // Is never uniform. |
| 171 | IsNeverUniform = UINT64_C(1) << 61, |
| 172 | |
| 173 | // ds_gws_* instructions. |
| 174 | GWS = UINT64_C(1) << 62, |
| 175 | |
| 176 | // Is a SWMMAC instruction. |
| 177 | IsSWMMAC = UINT64_C(1) << 63, |
| 178 | }; |
| 179 | |
| 180 | // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. |
| 181 | // The result is true if any of these tests are true. |
| 182 | enum ClassFlags : unsigned { |
| 183 | S_NAN = 1 << 0, // Signaling NaN |
| 184 | Q_NAN = 1 << 1, // Quiet NaN |
| 185 | N_INFINITY = 1 << 2, // Negative infinity |
| 186 | N_NORMAL = 1 << 3, // Negative normal |
| 187 | N_SUBNORMAL = 1 << 4, // Negative subnormal |
| 188 | N_ZERO = 1 << 5, // Negative zero |
| 189 | P_ZERO = 1 << 6, // Positive zero |
| 190 | P_SUBNORMAL = 1 << 7, // Positive subnormal |
| 191 | P_NORMAL = 1 << 8, // Positive normal |
| 192 | P_INFINITY = 1 << 9 // Positive infinity |
| 193 | }; |
| 194 | } |
| 195 | |
| 196 | namespace AMDGPU { |
| 197 | enum OperandType : unsigned { |
| 198 | /// Operands with register or 32-bit immediate |
| 199 | OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, |
| 200 | OPERAND_REG_IMM_INT64, |
| 201 | OPERAND_REG_IMM_INT16, |
| 202 | OPERAND_REG_IMM_FP32, |
| 203 | OPERAND_REG_IMM_FP64, |
| 204 | OPERAND_REG_IMM_BF16, |
| 205 | OPERAND_REG_IMM_FP16, |
| 206 | OPERAND_REG_IMM_V2BF16, |
| 207 | OPERAND_REG_IMM_V2FP16, |
| 208 | OPERAND_REG_IMM_V2INT16, |
| 209 | OPERAND_REG_IMM_V2INT32, |
| 210 | OPERAND_REG_IMM_V2FP32, |
| 211 | |
| 212 | /// Operands with register or inline constant |
| 213 | OPERAND_REG_INLINE_C_INT16, |
| 214 | OPERAND_REG_INLINE_C_INT32, |
| 215 | OPERAND_REG_INLINE_C_INT64, |
| 216 | OPERAND_REG_INLINE_C_BF16, |
| 217 | OPERAND_REG_INLINE_C_FP16, |
| 218 | OPERAND_REG_INLINE_C_FP32, |
| 219 | OPERAND_REG_INLINE_C_FP64, |
| 220 | OPERAND_REG_INLINE_C_V2INT16, |
| 221 | OPERAND_REG_INLINE_C_V2BF16, |
| 222 | OPERAND_REG_INLINE_C_V2FP16, |
| 223 | |
| 224 | // Operand for split barrier inline constant |
| 225 | OPERAND_INLINE_SPLIT_BARRIER_INT32, |
| 226 | |
| 227 | /// Operand with 32-bit immediate that uses the constant bus. |
| 228 | OPERAND_KIMM32, |
| 229 | OPERAND_KIMM16, |
| 230 | |
| 231 | /// Operands with an AccVGPR register or inline constant |
| 232 | OPERAND_REG_INLINE_AC_INT32, |
| 233 | OPERAND_REG_INLINE_AC_FP32, |
| 234 | OPERAND_REG_INLINE_AC_FP64, |
| 235 | |
| 236 | // Operand for source modifiers for VOP instructions |
| 237 | OPERAND_INPUT_MODS, |
| 238 | |
| 239 | // Operand for SDWA instructions |
| 240 | OPERAND_SDWA_VOPC_DST, |
| 241 | |
| 242 | OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, |
| 243 | OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, |
| 244 | |
| 245 | OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, |
| 246 | OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64, |
| 247 | |
| 248 | OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32, |
| 249 | OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64, |
| 250 | |
| 251 | OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, |
| 252 | OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, |
| 253 | |
| 254 | OPERAND_KIMM_FIRST = OPERAND_KIMM32, |
| 255 | OPERAND_KIMM_LAST = OPERAND_KIMM16 |
| 256 | |
| 257 | }; |
| 258 | } |
| 259 | |
| 260 | // Input operand modifiers bit-masks |
| 261 | // NEG and SEXT share same bit-mask because they can't be set simultaneously. |
| 262 | namespace SISrcMods { |
| 263 | enum : unsigned { |
| 264 | NONE = 0, |
| 265 | NEG = 1 << 0, // Floating-point negate modifier |
| 266 | ABS = 1 << 1, // Floating-point absolute modifier |
| 267 | SEXT = 1 << 0, // Integer sign-extend modifier |
| 268 | NEG_HI = ABS, // Floating-point negate high packed component modifier. |
| 269 | OP_SEL_0 = 1 << 2, |
| 270 | OP_SEL_1 = 1 << 3, |
| 271 | DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) |
| 272 | }; |
| 273 | } |
| 274 | |
| 275 | namespace SIOutMods { |
| 276 | enum : unsigned { |
| 277 | NONE = 0, |
| 278 | MUL2 = 1, |
| 279 | MUL4 = 2, |
| 280 | DIV2 = 3 |
| 281 | }; |
| 282 | } |
| 283 | |
| 284 | namespace AMDGPU { |
| 285 | namespace VGPRIndexMode { |
| 286 | |
| 287 | enum Id : unsigned { // id of symbolic names |
| 288 | ID_SRC0 = 0, |
| 289 | ID_SRC1, |
| 290 | ID_SRC2, |
| 291 | ID_DST, |
| 292 | |
| 293 | ID_MIN = ID_SRC0, |
| 294 | ID_MAX = ID_DST |
| 295 | }; |
| 296 | |
| 297 | enum EncBits : unsigned { |
| 298 | OFF = 0, |
| 299 | SRC0_ENABLE = 1 << ID_SRC0, |
| 300 | SRC1_ENABLE = 1 << ID_SRC1, |
| 301 | SRC2_ENABLE = 1 << ID_SRC2, |
| 302 | DST_ENABLE = 1 << ID_DST, |
| 303 | ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, |
| 304 | UNDEF = 0xFFFF |
| 305 | }; |
| 306 | |
| 307 | } // namespace VGPRIndexMode |
| 308 | } // namespace AMDGPU |
| 309 | |
| 310 | namespace AMDGPUAsmVariants { |
| 311 | enum : unsigned { |
| 312 | DEFAULT = 0, |
| 313 | VOP3 = 1, |
| 314 | SDWA = 2, |
| 315 | SDWA9 = 3, |
| 316 | DPP = 4, |
| 317 | VOP3_DPP = 5 |
| 318 | }; |
| 319 | } // namespace AMDGPUAsmVariants |
| 320 | |
| 321 | namespace AMDGPU { |
| 322 | namespace EncValues { // Encoding values of enum9/8/7 operands |
| 323 | |
| 324 | enum : unsigned { |
| 325 | SGPR_MIN = 0, |
| 326 | SGPR_MAX_SI = 101, |
| 327 | SGPR_MAX_GFX10 = 105, |
| 328 | TTMP_VI_MIN = 112, |
| 329 | TTMP_VI_MAX = 123, |
| 330 | TTMP_GFX9PLUS_MIN = 108, |
| 331 | TTMP_GFX9PLUS_MAX = 123, |
| 332 | INLINE_INTEGER_C_MIN = 128, |
| 333 | INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 |
| 334 | INLINE_INTEGER_C_MAX = 208, |
| 335 | INLINE_FLOATING_C_MIN = 240, |
| 336 | INLINE_FLOATING_C_MAX = 248, |
| 337 | LITERAL_CONST = 255, |
| 338 | VGPR_MIN = 256, |
| 339 | VGPR_MAX = 511, |
| 340 | IS_VGPR = 256, // Indicates VGPR or AGPR |
| 341 | }; |
| 342 | |
| 343 | } // namespace EncValues |
| 344 | |
| 345 | // Register codes as defined in the TableGen's HWEncoding field. |
| 346 | namespace HWEncoding { |
| 347 | enum : unsigned { |
| 348 | REG_IDX_MASK = 0xff, |
| 349 | IS_VGPR = 1 << 8, |
| 350 | IS_AGPR = 1 << 9, |
| 351 | IS_HI16 = 1 << 10, |
| 352 | }; |
| 353 | } // namespace HWEncoding |
| 354 | |
| 355 | namespace CPol { |
| 356 | |
| 357 | enum CPol { |
| 358 | GLC = 1, |
| 359 | SLC = 2, |
| 360 | DLC = 4, |
| 361 | SCC = 16, |
| 362 | SC0 = GLC, |
| 363 | SC1 = SCC, |
| 364 | NT = SLC, |
| 365 | ALL_pregfx12 = GLC | SLC | DLC | SCC, |
| 366 | SWZ_pregfx12 = 8, |
| 367 | |
| 368 | // Below are GFX12+ cache policy bits |
| 369 | |
| 370 | // Temporal hint |
| 371 | TH = 0x7, // All TH bits |
| 372 | TH_RT = 0, // regular |
| 373 | TH_NT = 1, // non-temporal |
| 374 | TH_HT = 2, // high-temporal |
| 375 | TH_LU = 3, // last use |
| 376 | TH_WB = 3, // regular (CU, SE), high-temporal with write-back (MALL) |
| 377 | TH_NT_RT = 4, // non-temporal (CU, SE), regular (MALL) |
| 378 | TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL) |
| 379 | TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL) |
| 380 | TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL) |
| 381 | TH_BYPASS = 3, // only to be used with scope = 3 |
| 382 | |
| 383 | TH_RESERVED = 7, // unused value for load insts |
| 384 | |
| 385 | // Bits of TH for atomics |
| 386 | TH_ATOMIC_RETURN = GLC, // Returning vs non-returning |
| 387 | TH_ATOMIC_NT = SLC, // Non-temporal vs regular |
| 388 | TH_ATOMIC_CASCADE = 4, // Cascading vs regular |
| 389 | |
| 390 | // Scope |
| 391 | SCOPE = 0x3 << 3, // All Scope bits |
| 392 | SCOPE_CU = 0 << 3, |
| 393 | SCOPE_SE = 1 << 3, |
| 394 | SCOPE_DEV = 2 << 3, |
| 395 | SCOPE_SYS = 3 << 3, |
| 396 | |
| 397 | SWZ = 1 << 6, // Swizzle bit |
| 398 | |
| 399 | ALL = TH | SCOPE, |
| 400 | |
| 401 | // Helper bits |
| 402 | TH_TYPE_LOAD = 1 << 7, // TH_LOAD policy |
| 403 | TH_TYPE_STORE = 1 << 8, // TH_STORE policy |
| 404 | TH_TYPE_ATOMIC = 1 << 9, // TH_ATOMIC policy |
| 405 | TH_REAL_BYPASS = 1 << 10, // is TH=3 bypass policy or not |
| 406 | |
| 407 | // Volatile (used to preserve/signal operation volatility for buffer |
| 408 | // operations not a real instruction bit) |
| 409 | VOLATILE = 1 << 31, |
| 410 | }; |
| 411 | |
| 412 | } // namespace CPol |
| 413 | |
| 414 | namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. |
| 415 | |
| 416 | enum Id { // Message ID, width(4) [3:0]. |
| 417 | ID_INTERRUPT = 1, |
| 418 | |
| 419 | ID_GS_PreGFX11 = 2, // replaced in GFX11 |
| 420 | ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11 |
| 421 | |
| 422 | ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11 |
| 423 | ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11 |
| 424 | |
| 425 | ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11 |
| 426 | ID_STALL_WAVE_GEN = 5, // added in GFX9, removed in GFX12 |
| 427 | ID_HALT_WAVES = 6, // added in GFX9, removed in GFX12 |
| 428 | ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11 |
| 429 | ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10 |
| 430 | ID_GS_ALLOC_REQ = 9, // added in GFX9 |
| 431 | ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11 |
| 432 | ID_GET_DDID = 11, // added in GFX10, removed in GFX11 |
| 433 | ID_SYSMSG = 15, |
| 434 | |
| 435 | ID_RTN_GET_DOORBELL = 128, |
| 436 | ID_RTN_GET_DDID = 129, |
| 437 | ID_RTN_GET_TMA = 130, |
| 438 | ID_RTN_GET_REALTIME = 131, |
| 439 | ID_RTN_SAVE_WAVE = 132, |
| 440 | ID_RTN_GET_TBA = 133, |
| 441 | ID_RTN_GET_TBA_TO_PC = 134, |
| 442 | ID_RTN_GET_SE_AID_ID = 135, |
| 443 | |
| 444 | ID_MASK_PreGFX11_ = 0xF, |
| 445 | ID_MASK_GFX11Plus_ = 0xFF |
| 446 | }; |
| 447 | |
| 448 | enum Op { // Both GS and SYS operation IDs. |
| 449 | OP_SHIFT_ = 4, |
| 450 | OP_NONE_ = 0, |
| 451 | // Bits used for operation encoding |
| 452 | OP_WIDTH_ = 3, |
| 453 | OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_), |
| 454 | // GS operations are encoded in bits 5:4 |
| 455 | OP_GS_NOP = 0, |
| 456 | OP_GS_CUT = 1, |
| 457 | OP_GS_EMIT = 2, |
| 458 | OP_GS_EMIT_CUT = 3, |
| 459 | OP_GS_FIRST_ = OP_GS_NOP, |
| 460 | // SYS operations are encoded in bits 6:4 |
| 461 | OP_SYS_ECC_ERR_INTERRUPT = 1, |
| 462 | OP_SYS_REG_RD = 2, |
| 463 | OP_SYS_HOST_TRAP_ACK = 3, |
| 464 | OP_SYS_TTRACE_PC = 4, |
| 465 | OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT, |
| 466 | }; |
| 467 | |
| 468 | enum StreamId : unsigned { // Stream ID, (2) [9:8]. |
| 469 | STREAM_ID_NONE_ = 0, |
| 470 | STREAM_ID_DEFAULT_ = 0, |
| 471 | STREAM_ID_LAST_ = 4, |
| 472 | STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_, |
| 473 | STREAM_ID_SHIFT_ = 8, |
| 474 | STREAM_ID_WIDTH_= 2, |
| 475 | STREAM_ID_MASK_ = (((1 << STREAM_ID_WIDTH_) - 1) << STREAM_ID_SHIFT_) |
| 476 | }; |
| 477 | |
| 478 | } // namespace SendMsg |
| 479 | |
| 480 | namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns. |
| 481 | |
| 482 | enum Id { // HwRegCode, (6) [5:0] |
| 483 | ID_MODE = 1, |
| 484 | ID_STATUS = 2, |
| 485 | ID_TRAPSTS = 3, |
| 486 | ID_HW_ID = 4, |
| 487 | ID_GPR_ALLOC = 5, |
| 488 | ID_LDS_ALLOC = 6, |
| 489 | ID_IB_STS = 7, |
| 490 | ID_PERF_SNAPSHOT_DATA_gfx12 = 10, |
| 491 | ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11, |
| 492 | ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12, |
| 493 | ID_MEM_BASES = 15, |
| 494 | ID_TBA_LO = 16, |
| 495 | ID_TBA_HI = 17, |
| 496 | ID_TMA_LO = 18, |
| 497 | ID_TMA_HI = 19, |
| 498 | ID_FLAT_SCR_LO = 20, |
| 499 | ID_FLAT_SCR_HI = 21, |
| 500 | ID_XNACK_MASK = 22, |
| 501 | ID_HW_ID1 = 23, |
| 502 | ID_HW_ID2 = 24, |
| 503 | ID_POPS_PACKER = 25, |
| 504 | ID_PERF_SNAPSHOT_DATA_gfx11 = 27, |
| 505 | ID_SHADER_CYCLES = 29, |
| 506 | ID_SHADER_CYCLES_HI = 30, |
| 507 | ID_DVGPR_ALLOC_LO = 31, |
| 508 | ID_DVGPR_ALLOC_HI = 32, |
| 509 | |
| 510 | // Register numbers reused in GFX11 |
| 511 | ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18, |
| 512 | ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19, |
| 513 | |
| 514 | // Register numbers reused in GFX12+ |
| 515 | ID_STATE_PRIV = 4, |
| 516 | ID_PERF_SNAPSHOT_DATA1 = 15, |
| 517 | ID_PERF_SNAPSHOT_DATA2 = 16, |
| 518 | ID_EXCP_FLAG_PRIV = 17, |
| 519 | ID_EXCP_FLAG_USER = 18, |
| 520 | ID_TRAP_CTRL = 19, |
| 521 | |
| 522 | // GFX94* specific registers |
| 523 | ID_XCC_ID = 20, |
| 524 | ID_SQ_PERF_SNAPSHOT_DATA = 21, |
| 525 | ID_SQ_PERF_SNAPSHOT_DATA1 = 22, |
| 526 | ID_SQ_PERF_SNAPSHOT_PC_LO = 23, |
| 527 | ID_SQ_PERF_SNAPSHOT_PC_HI = 24, |
| 528 | }; |
| 529 | |
| 530 | enum Offset : unsigned { // Offset, (5) [10:6] |
| 531 | OFFSET_MEM_VIOL = 8, |
| 532 | OFFSET_ME_ID = 8, // in HW_ID2 |
| 533 | }; |
| 534 | |
| 535 | enum ModeRegisterMasks : uint32_t { |
| 536 | FP_ROUND_MASK = 0xf << 0, // Bits 0..3 |
| 537 | FP_DENORM_MASK = 0xf << 4, // Bits 4..7 |
| 538 | DX10_CLAMP_MASK = 1 << 8, |
| 539 | IEEE_MODE_MASK = 1 << 9, |
| 540 | LOD_CLAMP_MASK = 1 << 10, |
| 541 | DEBUG_MASK = 1 << 11, |
| 542 | |
| 543 | // EXCP_EN fields. |
| 544 | EXCP_EN_INVALID_MASK = 1 << 12, |
| 545 | EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13, |
| 546 | EXCP_EN_FLOAT_DIV0_MASK = 1 << 14, |
| 547 | EXCP_EN_OVERFLOW_MASK = 1 << 15, |
| 548 | EXCP_EN_UNDERFLOW_MASK = 1 << 16, |
| 549 | EXCP_EN_INEXACT_MASK = 1 << 17, |
| 550 | EXCP_EN_INT_DIV0_MASK = 1 << 18, |
| 551 | |
| 552 | GPR_IDX_EN_MASK = 1 << 27, |
| 553 | VSKIP_MASK = 1 << 28, |
| 554 | CSP_MASK = 0x7u << 29 // Bits 29..31 |
| 555 | }; |
| 556 | |
| 557 | } // namespace Hwreg |
| 558 | |
| 559 | namespace MTBUFFormat { |
| 560 | |
| 561 | enum DataFormat : int64_t { |
| 562 | DFMT_INVALID = 0, |
| 563 | DFMT_8, |
| 564 | DFMT_16, |
| 565 | DFMT_8_8, |
| 566 | DFMT_32, |
| 567 | DFMT_16_16, |
| 568 | DFMT_10_11_11, |
| 569 | DFMT_11_11_10, |
| 570 | DFMT_10_10_10_2, |
| 571 | DFMT_2_10_10_10, |
| 572 | DFMT_8_8_8_8, |
| 573 | DFMT_32_32, |
| 574 | DFMT_16_16_16_16, |
| 575 | DFMT_32_32_32, |
| 576 | DFMT_32_32_32_32, |
| 577 | DFMT_RESERVED_15, |
| 578 | |
| 579 | DFMT_MIN = DFMT_INVALID, |
| 580 | DFMT_MAX = DFMT_RESERVED_15, |
| 581 | |
| 582 | DFMT_UNDEF = -1, |
| 583 | DFMT_DEFAULT = DFMT_8, |
| 584 | |
| 585 | DFMT_SHIFT = 0, |
| 586 | DFMT_MASK = 0xF |
| 587 | }; |
| 588 | |
| 589 | enum NumFormat : int64_t { |
| 590 | NFMT_UNORM = 0, |
| 591 | NFMT_SNORM, |
| 592 | NFMT_USCALED, |
| 593 | NFMT_SSCALED, |
| 594 | NFMT_UINT, |
| 595 | NFMT_SINT, |
| 596 | NFMT_RESERVED_6, // VI and GFX9 |
| 597 | NFMT_SNORM_OGL = NFMT_RESERVED_6, // SI and CI only |
| 598 | NFMT_FLOAT, |
| 599 | |
| 600 | NFMT_MIN = NFMT_UNORM, |
| 601 | NFMT_MAX = NFMT_FLOAT, |
| 602 | |
| 603 | NFMT_UNDEF = -1, |
| 604 | NFMT_DEFAULT = NFMT_UNORM, |
| 605 | |
| 606 | NFMT_SHIFT = 4, |
| 607 | NFMT_MASK = 7 |
| 608 | }; |
| 609 | |
| 610 | enum MergedFormat : int64_t { |
| 611 | DFMT_NFMT_UNDEF = -1, |
| 612 | DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) | |
| 613 | ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT), |
| 614 | |
| 615 | |
| 616 | DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT), |
| 617 | |
| 618 | DFMT_NFMT_MAX = DFMT_NFMT_MASK |
| 619 | }; |
| 620 | |
| 621 | enum UnifiedFormatCommon : int64_t { |
| 622 | UFMT_MAX = 127, |
| 623 | UFMT_UNDEF = -1, |
| 624 | UFMT_DEFAULT = 1 |
| 625 | }; |
| 626 | |
| 627 | } // namespace MTBUFFormat |
| 628 | |
| 629 | namespace UfmtGFX10 { |
| 630 | enum UnifiedFormat : int64_t { |
| 631 | UFMT_INVALID = 0, |
| 632 | |
| 633 | UFMT_8_UNORM, |
| 634 | UFMT_8_SNORM, |
| 635 | UFMT_8_USCALED, |
| 636 | UFMT_8_SSCALED, |
| 637 | UFMT_8_UINT, |
| 638 | UFMT_8_SINT, |
| 639 | |
| 640 | UFMT_16_UNORM, |
| 641 | UFMT_16_SNORM, |
| 642 | UFMT_16_USCALED, |
| 643 | UFMT_16_SSCALED, |
| 644 | UFMT_16_UINT, |
| 645 | UFMT_16_SINT, |
| 646 | UFMT_16_FLOAT, |
| 647 | |
| 648 | UFMT_8_8_UNORM, |
| 649 | UFMT_8_8_SNORM, |
| 650 | UFMT_8_8_USCALED, |
| 651 | UFMT_8_8_SSCALED, |
| 652 | UFMT_8_8_UINT, |
| 653 | UFMT_8_8_SINT, |
| 654 | |
| 655 | UFMT_32_UINT, |
| 656 | UFMT_32_SINT, |
| 657 | UFMT_32_FLOAT, |
| 658 | |
| 659 | UFMT_16_16_UNORM, |
| 660 | UFMT_16_16_SNORM, |
| 661 | UFMT_16_16_USCALED, |
| 662 | UFMT_16_16_SSCALED, |
| 663 | UFMT_16_16_UINT, |
| 664 | UFMT_16_16_SINT, |
| 665 | UFMT_16_16_FLOAT, |
| 666 | |
| 667 | UFMT_10_11_11_UNORM, |
| 668 | UFMT_10_11_11_SNORM, |
| 669 | UFMT_10_11_11_USCALED, |
| 670 | UFMT_10_11_11_SSCALED, |
| 671 | UFMT_10_11_11_UINT, |
| 672 | UFMT_10_11_11_SINT, |
| 673 | UFMT_10_11_11_FLOAT, |
| 674 | |
| 675 | UFMT_11_11_10_UNORM, |
| 676 | UFMT_11_11_10_SNORM, |
| 677 | UFMT_11_11_10_USCALED, |
| 678 | UFMT_11_11_10_SSCALED, |
| 679 | UFMT_11_11_10_UINT, |
| 680 | UFMT_11_11_10_SINT, |
| 681 | UFMT_11_11_10_FLOAT, |
| 682 | |
| 683 | UFMT_10_10_10_2_UNORM, |
| 684 | UFMT_10_10_10_2_SNORM, |
| 685 | UFMT_10_10_10_2_USCALED, |
| 686 | UFMT_10_10_10_2_SSCALED, |
| 687 | UFMT_10_10_10_2_UINT, |
| 688 | UFMT_10_10_10_2_SINT, |
| 689 | |
| 690 | UFMT_2_10_10_10_UNORM, |
| 691 | UFMT_2_10_10_10_SNORM, |
| 692 | UFMT_2_10_10_10_USCALED, |
| 693 | UFMT_2_10_10_10_SSCALED, |
| 694 | UFMT_2_10_10_10_UINT, |
| 695 | UFMT_2_10_10_10_SINT, |
| 696 | |
| 697 | UFMT_8_8_8_8_UNORM, |
| 698 | UFMT_8_8_8_8_SNORM, |
| 699 | UFMT_8_8_8_8_USCALED, |
| 700 | UFMT_8_8_8_8_SSCALED, |
| 701 | UFMT_8_8_8_8_UINT, |
| 702 | UFMT_8_8_8_8_SINT, |
| 703 | |
| 704 | UFMT_32_32_UINT, |
| 705 | UFMT_32_32_SINT, |
| 706 | UFMT_32_32_FLOAT, |
| 707 | |
| 708 | UFMT_16_16_16_16_UNORM, |
| 709 | UFMT_16_16_16_16_SNORM, |
| 710 | UFMT_16_16_16_16_USCALED, |
| 711 | UFMT_16_16_16_16_SSCALED, |
| 712 | UFMT_16_16_16_16_UINT, |
| 713 | UFMT_16_16_16_16_SINT, |
| 714 | UFMT_16_16_16_16_FLOAT, |
| 715 | |
| 716 | UFMT_32_32_32_UINT, |
| 717 | UFMT_32_32_32_SINT, |
| 718 | UFMT_32_32_32_FLOAT, |
| 719 | UFMT_32_32_32_32_UINT, |
| 720 | UFMT_32_32_32_32_SINT, |
| 721 | UFMT_32_32_32_32_FLOAT, |
| 722 | |
| 723 | UFMT_FIRST = UFMT_INVALID, |
| 724 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
| 725 | }; |
| 726 | |
| 727 | } // namespace UfmtGFX10 |
| 728 | |
| 729 | namespace UfmtGFX11 { |
| 730 | enum UnifiedFormat : int64_t { |
| 731 | UFMT_INVALID = 0, |
| 732 | |
| 733 | UFMT_8_UNORM, |
| 734 | UFMT_8_SNORM, |
| 735 | UFMT_8_USCALED, |
| 736 | UFMT_8_SSCALED, |
| 737 | UFMT_8_UINT, |
| 738 | UFMT_8_SINT, |
| 739 | |
| 740 | UFMT_16_UNORM, |
| 741 | UFMT_16_SNORM, |
| 742 | UFMT_16_USCALED, |
| 743 | UFMT_16_SSCALED, |
| 744 | UFMT_16_UINT, |
| 745 | UFMT_16_SINT, |
| 746 | UFMT_16_FLOAT, |
| 747 | |
| 748 | UFMT_8_8_UNORM, |
| 749 | UFMT_8_8_SNORM, |
| 750 | UFMT_8_8_USCALED, |
| 751 | UFMT_8_8_SSCALED, |
| 752 | UFMT_8_8_UINT, |
| 753 | UFMT_8_8_SINT, |
| 754 | |
| 755 | UFMT_32_UINT, |
| 756 | UFMT_32_SINT, |
| 757 | UFMT_32_FLOAT, |
| 758 | |
| 759 | UFMT_16_16_UNORM, |
| 760 | UFMT_16_16_SNORM, |
| 761 | UFMT_16_16_USCALED, |
| 762 | UFMT_16_16_SSCALED, |
| 763 | UFMT_16_16_UINT, |
| 764 | UFMT_16_16_SINT, |
| 765 | UFMT_16_16_FLOAT, |
| 766 | |
| 767 | UFMT_10_11_11_FLOAT, |
| 768 | |
| 769 | UFMT_11_11_10_FLOAT, |
| 770 | |
| 771 | UFMT_10_10_10_2_UNORM, |
| 772 | UFMT_10_10_10_2_SNORM, |
| 773 | UFMT_10_10_10_2_UINT, |
| 774 | UFMT_10_10_10_2_SINT, |
| 775 | |
| 776 | UFMT_2_10_10_10_UNORM, |
| 777 | UFMT_2_10_10_10_SNORM, |
| 778 | UFMT_2_10_10_10_USCALED, |
| 779 | UFMT_2_10_10_10_SSCALED, |
| 780 | UFMT_2_10_10_10_UINT, |
| 781 | UFMT_2_10_10_10_SINT, |
| 782 | |
| 783 | UFMT_8_8_8_8_UNORM, |
| 784 | UFMT_8_8_8_8_SNORM, |
| 785 | UFMT_8_8_8_8_USCALED, |
| 786 | UFMT_8_8_8_8_SSCALED, |
| 787 | UFMT_8_8_8_8_UINT, |
| 788 | UFMT_8_8_8_8_SINT, |
| 789 | |
| 790 | UFMT_32_32_UINT, |
| 791 | UFMT_32_32_SINT, |
| 792 | UFMT_32_32_FLOAT, |
| 793 | |
| 794 | UFMT_16_16_16_16_UNORM, |
| 795 | UFMT_16_16_16_16_SNORM, |
| 796 | UFMT_16_16_16_16_USCALED, |
| 797 | UFMT_16_16_16_16_SSCALED, |
| 798 | UFMT_16_16_16_16_UINT, |
| 799 | UFMT_16_16_16_16_SINT, |
| 800 | UFMT_16_16_16_16_FLOAT, |
| 801 | |
| 802 | UFMT_32_32_32_UINT, |
| 803 | UFMT_32_32_32_SINT, |
| 804 | UFMT_32_32_32_FLOAT, |
| 805 | UFMT_32_32_32_32_UINT, |
| 806 | UFMT_32_32_32_32_SINT, |
| 807 | UFMT_32_32_32_32_FLOAT, |
| 808 | |
| 809 | UFMT_FIRST = UFMT_INVALID, |
| 810 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
| 811 | }; |
| 812 | |
| 813 | } // namespace UfmtGFX11 |
| 814 | |
| 815 | namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. |
| 816 | |
| 817 | enum Id : unsigned { // id of symbolic names |
| 818 | ID_QUAD_PERM = 0, |
| 819 | ID_BITMASK_PERM, |
| 820 | ID_SWAP, |
| 821 | ID_REVERSE, |
| 822 | ID_BROADCAST, |
| 823 | ID_FFT, |
| 824 | ID_ROTATE |
| 825 | }; |
| 826 | |
| 827 | // clang-format off |
| 828 | enum EncBits : unsigned { |
| 829 | |
| 830 | // swizzle mode encodings |
| 831 | |
| 832 | QUAD_PERM_ENC = 0x8000, |
| 833 | QUAD_PERM_ENC_MASK = 0xFF00, |
| 834 | |
| 835 | BITMASK_PERM_ENC = 0x0000, |
| 836 | BITMASK_PERM_ENC_MASK = 0x8000, |
| 837 | |
| 838 | FFT_MODE_ENC = 0xE000, |
| 839 | |
| 840 | ROTATE_MODE_ENC = 0xC000, |
| 841 | FFT_ROTATE_MODE_MASK = 0xF000, |
| 842 | |
| 843 | ROTATE_MODE_LO = 0xC000, |
| 844 | FFT_MODE_LO = 0xE000, |
| 845 | |
| 846 | // QUAD_PERM encodings |
| 847 | |
| 848 | LANE_MASK = 0x3, |
| 849 | LANE_MAX = LANE_MASK, |
| 850 | LANE_SHIFT = 2, |
| 851 | LANE_NUM = 4, |
| 852 | |
| 853 | // BITMASK_PERM encodings |
| 854 | |
| 855 | BITMASK_MASK = 0x1F, |
| 856 | BITMASK_MAX = BITMASK_MASK, |
| 857 | BITMASK_WIDTH = 5, |
| 858 | |
| 859 | BITMASK_AND_SHIFT = 0, |
| 860 | BITMASK_OR_SHIFT = 5, |
| 861 | BITMASK_XOR_SHIFT = 10, |
| 862 | |
| 863 | // FFT encodings |
| 864 | |
| 865 | FFT_SWIZZLE_MASK = 0x1F, |
| 866 | FFT_SWIZZLE_MAX = 0x1F, |
| 867 | |
| 868 | // ROTATE encodings |
| 869 | ROTATE_MAX_SIZE = 0x1F, |
| 870 | ROTATE_DIR_SHIFT = 10, // bit position of rotate direction |
| 871 | ROTATE_DIR_MASK = 0x1, |
| 872 | ROTATE_SIZE_SHIFT = 5, // bit position of rotate size |
| 873 | ROTATE_SIZE_MASK = ROTATE_MAX_SIZE, |
| 874 | }; |
| 875 | // clang-format on |
| 876 | |
| 877 | } // namespace Swizzle |
| 878 | |
| 879 | namespace SDWA { |
| 880 | |
| 881 | enum SdwaSel : unsigned { |
| 882 | BYTE_0 = 0, |
| 883 | BYTE_1 = 1, |
| 884 | BYTE_2 = 2, |
| 885 | BYTE_3 = 3, |
| 886 | WORD_0 = 4, |
| 887 | WORD_1 = 5, |
| 888 | DWORD = 6, |
| 889 | }; |
| 890 | |
| 891 | enum DstUnused : unsigned { |
| 892 | UNUSED_PAD = 0, |
| 893 | UNUSED_SEXT = 1, |
| 894 | UNUSED_PRESERVE = 2, |
| 895 | }; |
| 896 | |
| 897 | enum SDWA9EncValues : unsigned { |
| 898 | SRC_SGPR_MASK = 0x100, |
| 899 | SRC_VGPR_MASK = 0xFF, |
| 900 | VOPC_DST_VCC_MASK = 0x80, |
| 901 | VOPC_DST_SGPR_MASK = 0x7F, |
| 902 | |
| 903 | SRC_VGPR_MIN = 0, |
| 904 | SRC_VGPR_MAX = 255, |
| 905 | SRC_SGPR_MIN = 256, |
| 906 | SRC_SGPR_MAX_SI = 357, |
| 907 | SRC_SGPR_MAX_GFX10 = 361, |
| 908 | SRC_TTMP_MIN = 364, |
| 909 | SRC_TTMP_MAX = 379, |
| 910 | }; |
| 911 | |
| 912 | } // namespace SDWA |
| 913 | |
| 914 | namespace DPP { |
| 915 | |
| 916 | // clang-format off |
| 917 | enum DppCtrl : unsigned { |
| 918 | QUAD_PERM_FIRST = 0, |
| 919 | QUAD_PERM_ID = 0xE4, // identity permutation |
| 920 | QUAD_PERM_LAST = 0xFF, |
| 921 | DPP_UNUSED1 = 0x100, |
| 922 | ROW_SHL0 = 0x100, |
| 923 | ROW_SHL_FIRST = 0x101, |
| 924 | ROW_SHL_LAST = 0x10F, |
| 925 | DPP_UNUSED2 = 0x110, |
| 926 | ROW_SHR0 = 0x110, |
| 927 | ROW_SHR_FIRST = 0x111, |
| 928 | ROW_SHR_LAST = 0x11F, |
| 929 | DPP_UNUSED3 = 0x120, |
| 930 | ROW_ROR0 = 0x120, |
| 931 | ROW_ROR_FIRST = 0x121, |
| 932 | ROW_ROR_LAST = 0x12F, |
| 933 | WAVE_SHL1 = 0x130, |
| 934 | DPP_UNUSED4_FIRST = 0x131, |
| 935 | DPP_UNUSED4_LAST = 0x133, |
| 936 | WAVE_ROL1 = 0x134, |
| 937 | DPP_UNUSED5_FIRST = 0x135, |
| 938 | DPP_UNUSED5_LAST = 0x137, |
| 939 | WAVE_SHR1 = 0x138, |
| 940 | DPP_UNUSED6_FIRST = 0x139, |
| 941 | DPP_UNUSED6_LAST = 0x13B, |
| 942 | WAVE_ROR1 = 0x13C, |
| 943 | DPP_UNUSED7_FIRST = 0x13D, |
| 944 | DPP_UNUSED7_LAST = 0x13F, |
| 945 | ROW_MIRROR = 0x140, |
| 946 | ROW_HALF_MIRROR = 0x141, |
| 947 | BCAST15 = 0x142, |
| 948 | BCAST31 = 0x143, |
| 949 | DPP_UNUSED8_FIRST = 0x144, |
| 950 | DPP_UNUSED8_LAST = 0x14F, |
| 951 | ROW_NEWBCAST_FIRST= 0x150, |
| 952 | ROW_NEWBCAST_LAST = 0x15F, |
| 953 | ROW_SHARE0 = 0x150, |
| 954 | ROW_SHARE_FIRST = 0x150, |
| 955 | ROW_SHARE_LAST = 0x15F, |
| 956 | ROW_XMASK0 = 0x160, |
| 957 | ROW_XMASK_FIRST = 0x160, |
| 958 | ROW_XMASK_LAST = 0x16F, |
| 959 | DPP_LAST = ROW_XMASK_LAST |
| 960 | }; |
| 961 | // clang-format on |
| 962 | |
| 963 | enum DppFiMode { |
| 964 | DPP_FI_0 = 0, |
| 965 | DPP_FI_1 = 1, |
| 966 | DPP8_FI_0 = 0xE9, |
| 967 | DPP8_FI_1 = 0xEA, |
| 968 | }; |
| 969 | |
| 970 | } // namespace DPP |
| 971 | |
| 972 | namespace Exp { |
| 973 | |
| 974 | enum Target : unsigned { |
| 975 | ET_MRT0 = 0, |
| 976 | ET_MRT7 = 7, |
| 977 | ET_MRTZ = 8, |
| 978 | ET_NULL = 9, // Pre-GFX11 |
| 979 | ET_POS0 = 12, |
| 980 | ET_POS3 = 15, |
| 981 | ET_POS4 = 16, // GFX10+ |
| 982 | ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget |
| 983 | ET_PRIM = 20, // GFX10+ |
| 984 | ET_DUAL_SRC_BLEND0 = 21, // GFX11+ |
| 985 | ET_DUAL_SRC_BLEND1 = 22, // GFX11+ |
| 986 | ET_PARAM0 = 32, // Pre-GFX11 |
| 987 | ET_PARAM31 = 63, // Pre-GFX11 |
| 988 | |
| 989 | ET_NULL_MAX_IDX = 0, |
| 990 | ET_MRTZ_MAX_IDX = 0, |
| 991 | ET_PRIM_MAX_IDX = 0, |
| 992 | ET_MRT_MAX_IDX = 7, |
| 993 | ET_POS_MAX_IDX = 4, |
| 994 | ET_DUAL_SRC_BLEND_MAX_IDX = 1, |
| 995 | ET_PARAM_MAX_IDX = 31, |
| 996 | |
| 997 | ET_INVALID = 255, |
| 998 | }; |
| 999 | |
| 1000 | } // namespace Exp |
| 1001 | |
| 1002 | namespace VOP3PEncoding { |
| 1003 | |
| 1004 | enum OpSel : uint64_t { |
| 1005 | OP_SEL_HI_0 = UINT64_C(1) << 59, |
| 1006 | OP_SEL_HI_1 = UINT64_C(1) << 60, |
| 1007 | OP_SEL_HI_2 = UINT64_C(1) << 14, |
| 1008 | }; |
| 1009 | |
| 1010 | } // namespace VOP3PEncoding |
| 1011 | |
| 1012 | namespace ImplicitArg { |
| 1013 | // Implicit kernel argument offset for code object version 5. |
| 1014 | enum Offset_COV5 : unsigned { |
| 1015 | HOSTCALL_PTR_OFFSET = 80, |
| 1016 | MULTIGRID_SYNC_ARG_OFFSET = 88, |
| 1017 | HEAP_PTR_OFFSET = 96, |
| 1018 | |
| 1019 | DEFAULT_QUEUE_OFFSET = 104, |
| 1020 | COMPLETION_ACTION_OFFSET = 112, |
| 1021 | |
| 1022 | PRIVATE_BASE_OFFSET = 192, |
| 1023 | SHARED_BASE_OFFSET = 196, |
| 1024 | QUEUE_PTR_OFFSET = 200, |
| 1025 | }; |
| 1026 | |
| 1027 | } // namespace ImplicitArg |
| 1028 | |
| 1029 | namespace MFMAScaleFormats { |
| 1030 | // Enum value used in cbsz/blgp for F8F6F4 MFMA operations to select the matrix |
| 1031 | // format. |
| 1032 | enum MFMAScaleFormats { |
| 1033 | FP8_E4M3 = 0, |
| 1034 | FP8_E5M2 = 1, |
| 1035 | FP6_E2M3 = 2, |
| 1036 | FP6_E3M2 = 3, |
| 1037 | FP4_E2M1 = 4 |
| 1038 | }; |
| 1039 | } // namespace MFMAScaleFormats |
| 1040 | |
| 1041 | namespace VirtRegFlag { |
| 1042 | // Virtual register flags used for various target specific handlings during |
| 1043 | // codegen. |
| 1044 | enum Register_Flag : uint8_t { |
| 1045 | // Register operand in a whole-wave mode operation. |
| 1046 | WWM_REG = 1 << 0, |
| 1047 | }; |
| 1048 | |
| 1049 | } // namespace VirtRegFlag |
| 1050 | |
| 1051 | } // namespace AMDGPU |
| 1052 | |
| 1053 | namespace AMDGPU { |
| 1054 | namespace Barrier { |
| 1055 | |
| 1056 | enum Type { TRAP = -2, WORKGROUP = -1 }; |
| 1057 | |
| 1058 | enum { |
| 1059 | BARRIER_SCOPE_WORKGROUP = 0, |
| 1060 | }; |
| 1061 | |
| 1062 | } // namespace Barrier |
| 1063 | } // namespace AMDGPU |
| 1064 | |
| 1065 | // clang-format off |
| 1066 | |
| 1067 | #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 |
| 1068 | #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) |
| 1069 | #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) |
| 1070 | #define S_00B028_MEM_ORDERED(x) (((x) & 0x1) << 25) |
| 1071 | #define G_00B028_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
| 1072 | #define C_00B028_MEM_ORDERED 0xFDFFFFFF |
| 1073 | |
| 1074 | #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C |
| 1075 | #define (x) (((x) & 0xFF) << 8) |
| 1076 | #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 |
| 1077 | #define S_00B128_MEM_ORDERED(x) (((x) & 0x1) << 27) |
| 1078 | #define G_00B128_MEM_ORDERED(x) (((x) >> 27) & 0x1) |
| 1079 | #define C_00B128_MEM_ORDERED 0xF7FFFFFF |
| 1080 | |
| 1081 | #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 |
| 1082 | #define S_00B228_WGP_MODE(x) (((x) & 0x1) << 27) |
| 1083 | #define G_00B228_WGP_MODE(x) (((x) >> 27) & 0x1) |
| 1084 | #define C_00B228_WGP_MODE 0xF7FFFFFF |
| 1085 | #define S_00B228_MEM_ORDERED(x) (((x) & 0x1) << 25) |
| 1086 | #define G_00B228_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
| 1087 | #define C_00B228_MEM_ORDERED 0xFDFFFFFF |
| 1088 | |
| 1089 | #define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328 |
| 1090 | #define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 |
| 1091 | #define S_00B428_WGP_MODE(x) (((x) & 0x1) << 26) |
| 1092 | #define G_00B428_WGP_MODE(x) (((x) >> 26) & 0x1) |
| 1093 | #define C_00B428_WGP_MODE 0xFBFFFFFF |
| 1094 | #define S_00B428_MEM_ORDERED(x) (((x) & 0x1) << 24) |
| 1095 | #define G_00B428_MEM_ORDERED(x) (((x) >> 24) & 0x1) |
| 1096 | #define C_00B428_MEM_ORDERED 0xFEFFFFFF |
| 1097 | |
| 1098 | #define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528 |
| 1099 | |
| 1100 | #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C |
| 1101 | #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0) |
| 1102 | #define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1) |
| 1103 | #define C_00B84C_SCRATCH_EN 0xFFFFFFFE |
| 1104 | #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) |
| 1105 | #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) |
| 1106 | #define C_00B84C_USER_SGPR 0xFFFFFFC1 |
| 1107 | #define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) |
| 1108 | #define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) |
| 1109 | #define C_00B84C_TRAP_HANDLER 0xFFFFFFBF |
| 1110 | #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) |
| 1111 | #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) |
| 1112 | #define C_00B84C_TGID_X_EN 0xFFFFFF7F |
| 1113 | #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8) |
| 1114 | #define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1) |
| 1115 | #define C_00B84C_TGID_Y_EN 0xFFFFFEFF |
| 1116 | #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9) |
| 1117 | #define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1) |
| 1118 | #define C_00B84C_TGID_Z_EN 0xFFFFFDFF |
| 1119 | #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10) |
| 1120 | #define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1) |
| 1121 | #define C_00B84C_TG_SIZE_EN 0xFFFFFBFF |
| 1122 | #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11) |
| 1123 | #define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03) |
| 1124 | #define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF |
| 1125 | /* CIK */ |
| 1126 | #define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13) |
| 1127 | #define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03) |
| 1128 | #define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF |
| 1129 | /* */ |
| 1130 | #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) |
| 1131 | #define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF) |
| 1132 | #define C_00B84C_LDS_SIZE 0xFF007FFF |
| 1133 | #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) |
| 1134 | #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) |
| 1135 | #define C_00B84C_EXCP_EN 0x80FFFFFF |
| 1136 | |
| 1137 | #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC |
| 1138 | #define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 |
| 1139 | |
| 1140 | #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 |
| 1141 | #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) |
| 1142 | #define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F) |
| 1143 | #define C_00B848_VGPRS 0xFFFFFFC0 |
| 1144 | #define S_00B848_SGPRS(x) (((x) & 0x0F) << 6) |
| 1145 | #define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F) |
| 1146 | #define C_00B848_SGPRS 0xFFFFFC3F |
| 1147 | #define S_00B848_PRIORITY(x) (((x) & 0x03) << 10) |
| 1148 | #define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03) |
| 1149 | #define C_00B848_PRIORITY 0xFFFFF3FF |
| 1150 | #define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12) |
| 1151 | #define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF) |
| 1152 | #define C_00B848_FLOAT_MODE 0xFFF00FFF |
| 1153 | #define S_00B848_PRIV(x) (((x) & 0x1) << 20) |
| 1154 | #define G_00B848_PRIV(x) (((x) >> 20) & 0x1) |
| 1155 | #define C_00B848_PRIV 0xFFEFFFFF |
| 1156 | #define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21) |
| 1157 | #define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1) |
| 1158 | #define C_00B848_DX10_CLAMP 0xFFDFFFFF |
| 1159 | #define S_00B848_RR_WG_MODE(x) (((x) & 0x1) << 21) |
| 1160 | #define G_00B848_RR_WG_MODE(x) (((x) >> 21) & 0x1) |
| 1161 | #define C_00B848_RR_WG_MODE 0xFFDFFFFF |
| 1162 | #define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22) |
| 1163 | #define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1) |
| 1164 | #define C_00B848_DEBUG_MODE 0xFFBFFFFF |
| 1165 | #define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23) |
| 1166 | #define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1) |
| 1167 | #define C_00B848_IEEE_MODE 0xFF7FFFFF |
| 1168 | #define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29) |
| 1169 | #define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1) |
| 1170 | #define C_00B848_WGP_MODE 0xDFFFFFFF |
| 1171 | #define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30) |
| 1172 | #define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1) |
| 1173 | #define C_00B848_MEM_ORDERED 0xBFFFFFFF |
| 1174 | #define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31) |
| 1175 | #define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1) |
| 1176 | #define C_00B848_FWD_PROGRESS 0x7FFFFFFF |
| 1177 | |
| 1178 | // Helpers for setting FLOAT_MODE |
| 1179 | #define FP_ROUND_ROUND_TO_NEAREST 0 |
| 1180 | #define FP_ROUND_ROUND_TO_INF 1 |
| 1181 | #define FP_ROUND_ROUND_TO_NEGINF 2 |
| 1182 | #define FP_ROUND_ROUND_TO_ZERO 3 |
| 1183 | |
| 1184 | // Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double |
| 1185 | // precision. |
| 1186 | #define FP_ROUND_MODE_SP(x) ((x) & 0x3) |
| 1187 | #define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2) |
| 1188 | |
| 1189 | #define FP_DENORM_FLUSH_IN_FLUSH_OUT 0 |
| 1190 | #define FP_DENORM_FLUSH_OUT 1 |
| 1191 | #define FP_DENORM_FLUSH_IN 2 |
| 1192 | #define FP_DENORM_FLUSH_NONE 3 |
| 1193 | |
| 1194 | |
| 1195 | // Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double |
| 1196 | // precision. |
| 1197 | #define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4) |
| 1198 | #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) |
| 1199 | |
| 1200 | #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 |
| 1201 | #define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
| 1202 | #define S_00B860_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
| 1203 | #define S_00B860_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
| 1204 | |
| 1205 | #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 |
| 1206 | #define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
| 1207 | #define S_0286E8_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
| 1208 | #define S_0286E8_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
| 1209 | |
| 1210 | #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54 |
| 1211 | #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21) |
| 1212 | #define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22) |
| 1213 | #define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23) |
| 1214 | #define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8 |
| 1215 | #define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15) |
| 1216 | #define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800 |
| 1217 | #define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15) |
| 1218 | |
| 1219 | #define R_SPILLED_SGPRS 0x4 |
| 1220 | #define R_SPILLED_VGPRS 0x8 |
| 1221 | |
| 1222 | // clang-format on |
| 1223 | |
| 1224 | } // End namespace llvm |
| 1225 | |
| 1226 | #endif |
| 1227 | |