1 | //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | /// \file |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
11 | #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
12 | |
13 | #include "llvm/MC/MCInstrDesc.h" |
14 | |
15 | namespace llvm { |
16 | |
17 | // This needs to be kept in sync with the field bits in SIRegisterClass. |
18 | enum SIRCFlags : uint8_t { |
19 | RegTupleAlignUnitsWidth = 2, |
20 | HasVGPRBit = RegTupleAlignUnitsWidth, |
21 | HasAGPRBit, |
22 | HasSGPRbit, |
23 | |
24 | HasVGPR = 1 << HasVGPRBit, |
25 | HasAGPR = 1 << HasAGPRBit, |
26 | HasSGPR = 1 << HasSGPRbit, |
27 | |
28 | RegTupleAlignUnitsMask = (1 << RegTupleAlignUnitsWidth) - 1, |
29 | RegKindMask = (HasVGPR | HasAGPR | HasSGPR) |
30 | }; // enum SIRCFlagsr |
31 | |
32 | namespace SIEncodingFamily { |
33 | // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td |
34 | // and the columns of the getMCOpcodeGen table. |
35 | enum { |
36 | SI = 0, |
37 | VI = 1, |
38 | SDWA = 2, |
39 | SDWA9 = 3, |
40 | GFX80 = 4, |
41 | GFX9 = 5, |
42 | GFX10 = 6, |
43 | SDWA10 = 7, |
44 | GFX90A = 8, |
45 | GFX940 = 9, |
46 | GFX11 = 10, |
47 | GFX12 = 11, |
48 | GFX1250 = 12, |
49 | }; |
50 | } |
51 | |
52 | namespace SIInstrFlags { |
53 | // This needs to be kept in sync with the field bits in InstSI. |
54 | enum : uint64_t { |
55 | // Low bits - basic encoding information. |
56 | SALU = 1 << 0, |
57 | VALU = 1 << 1, |
58 | |
59 | // SALU instruction formats. |
60 | SOP1 = 1 << 2, |
61 | SOP2 = 1 << 3, |
62 | SOPC = 1 << 4, |
63 | SOPK = 1 << 5, |
64 | SOPP = 1 << 6, |
65 | |
66 | // VALU instruction formats. |
67 | VOP1 = 1 << 7, |
68 | VOP2 = 1 << 8, |
69 | VOPC = 1 << 9, |
70 | |
71 | // TODO: Should this be spilt into VOP3 a and b? |
72 | VOP3 = 1 << 10, |
73 | VOP3P = 1 << 12, |
74 | |
75 | VINTRP = 1 << 13, |
76 | SDWA = 1 << 14, |
77 | DPP = 1 << 15, |
78 | TRANS = 1 << 16, |
79 | |
80 | // Memory instruction formats. |
81 | MUBUF = 1 << 17, |
82 | MTBUF = 1 << 18, |
83 | SMRD = 1 << 19, |
84 | MIMG = 1 << 20, |
85 | VIMAGE = 1 << 21, |
86 | VSAMPLE = 1 << 22, |
87 | EXP = 1 << 23, |
88 | FLAT = 1 << 24, |
89 | DS = 1 << 25, |
90 | |
91 | // Combined SGPR/VGPR Spill bit |
92 | // Logic to separate them out is done in isSGPRSpill and isVGPRSpill |
93 | Spill = 1 << 26, |
94 | |
95 | // LDSDIR instruction format. |
96 | LDSDIR = 1 << 28, |
97 | |
98 | // VINTERP instruction format. |
99 | VINTERP = 1 << 29, |
100 | |
101 | // High bits - other information. |
102 | VM_CNT = UINT64_C(1) << 32, |
103 | EXP_CNT = UINT64_C(1) << 33, |
104 | LGKM_CNT = UINT64_C(1) << 34, |
105 | |
106 | WQM = UINT64_C(1) << 35, |
107 | DisableWQM = UINT64_C(1) << 36, |
108 | Gather4 = UINT64_C(1) << 37, |
109 | |
110 | TENSOR_CNT = UINT64_C(1) << 38, |
111 | |
112 | SCALAR_STORE = UINT64_C(1) << 39, |
113 | FIXED_SIZE = UINT64_C(1) << 40, |
114 | |
115 | ASYNC_CNT = UINT64_C(1) << 41, |
116 | |
117 | VOP3_OPSEL = UINT64_C(1) << 42, |
118 | maybeAtomic = UINT64_C(1) << 43, |
119 | renamedInGFX9 = UINT64_C(1) << 44, |
120 | |
121 | // Is a clamp on FP type. |
122 | FPClamp = UINT64_C(1) << 45, |
123 | |
124 | // Is an integer clamp |
125 | IntClamp = UINT64_C(1) << 46, |
126 | |
127 | // Clamps lo component of register. |
128 | ClampLo = UINT64_C(1) << 47, |
129 | |
130 | // Clamps hi component of register. |
131 | // ClampLo and ClampHi set for packed clamp. |
132 | ClampHi = UINT64_C(1) << 48, |
133 | |
134 | // Is a packed VOP3P instruction. |
135 | IsPacked = UINT64_C(1) << 49, |
136 | |
137 | // Is a D16 buffer instruction. |
138 | D16Buf = UINT64_C(1) << 50, |
139 | |
140 | // FLAT instruction accesses FLAT_GLBL segment. |
141 | FlatGlobal = UINT64_C(1) << 51, |
142 | |
143 | // Uses floating point double precision rounding mode |
144 | FPDPRounding = UINT64_C(1) << 52, |
145 | |
146 | // Instruction is FP atomic. |
147 | FPAtomic = UINT64_C(1) << 53, |
148 | |
149 | // Is a MFMA instruction. |
150 | IsMAI = UINT64_C(1) << 54, |
151 | |
152 | // Is a DOT instruction. |
153 | IsDOT = UINT64_C(1) << 55, |
154 | |
155 | // FLAT instruction accesses FLAT_SCRATCH segment. |
156 | FlatScratch = UINT64_C(1) << 56, |
157 | |
158 | // Atomic without return. |
159 | IsAtomicNoRet = UINT64_C(1) << 57, |
160 | |
161 | // Atomic with return. |
162 | IsAtomicRet = UINT64_C(1) << 58, |
163 | |
164 | // Is a WMMA instruction. |
165 | IsWMMA = UINT64_C(1) << 59, |
166 | |
167 | // Whether tied sources will be read. |
168 | TiedSourceNotRead = UINT64_C(1) << 60, |
169 | |
170 | // Is never uniform. |
171 | IsNeverUniform = UINT64_C(1) << 61, |
172 | |
173 | // ds_gws_* instructions. |
174 | GWS = UINT64_C(1) << 62, |
175 | |
176 | // Is a SWMMAC instruction. |
177 | IsSWMMAC = UINT64_C(1) << 63, |
178 | }; |
179 | |
180 | // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. |
181 | // The result is true if any of these tests are true. |
182 | enum ClassFlags : unsigned { |
183 | S_NAN = 1 << 0, // Signaling NaN |
184 | Q_NAN = 1 << 1, // Quiet NaN |
185 | N_INFINITY = 1 << 2, // Negative infinity |
186 | N_NORMAL = 1 << 3, // Negative normal |
187 | N_SUBNORMAL = 1 << 4, // Negative subnormal |
188 | N_ZERO = 1 << 5, // Negative zero |
189 | P_ZERO = 1 << 6, // Positive zero |
190 | P_SUBNORMAL = 1 << 7, // Positive subnormal |
191 | P_NORMAL = 1 << 8, // Positive normal |
192 | P_INFINITY = 1 << 9 // Positive infinity |
193 | }; |
194 | } |
195 | |
196 | namespace AMDGPU { |
197 | enum OperandType : unsigned { |
198 | /// Operands with register or 32-bit immediate |
199 | OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, |
200 | OPERAND_REG_IMM_INT64, |
201 | OPERAND_REG_IMM_INT16, |
202 | OPERAND_REG_IMM_FP32, |
203 | OPERAND_REG_IMM_FP64, |
204 | OPERAND_REG_IMM_BF16, |
205 | OPERAND_REG_IMM_FP16, |
206 | OPERAND_REG_IMM_V2BF16, |
207 | OPERAND_REG_IMM_V2FP16, |
208 | OPERAND_REG_IMM_V2INT16, |
209 | OPERAND_REG_IMM_V2INT32, |
210 | OPERAND_REG_IMM_V2FP32, |
211 | |
212 | /// Operands with register or inline constant |
213 | OPERAND_REG_INLINE_C_INT16, |
214 | OPERAND_REG_INLINE_C_INT32, |
215 | OPERAND_REG_INLINE_C_INT64, |
216 | OPERAND_REG_INLINE_C_BF16, |
217 | OPERAND_REG_INLINE_C_FP16, |
218 | OPERAND_REG_INLINE_C_FP32, |
219 | OPERAND_REG_INLINE_C_FP64, |
220 | OPERAND_REG_INLINE_C_V2INT16, |
221 | OPERAND_REG_INLINE_C_V2BF16, |
222 | OPERAND_REG_INLINE_C_V2FP16, |
223 | |
224 | // Operand for split barrier inline constant |
225 | OPERAND_INLINE_SPLIT_BARRIER_INT32, |
226 | |
227 | /// Operand with 32-bit immediate that uses the constant bus. |
228 | OPERAND_KIMM32, |
229 | OPERAND_KIMM16, |
230 | |
231 | /// Operands with an AccVGPR register or inline constant |
232 | OPERAND_REG_INLINE_AC_INT32, |
233 | OPERAND_REG_INLINE_AC_FP32, |
234 | OPERAND_REG_INLINE_AC_FP64, |
235 | |
236 | // Operand for source modifiers for VOP instructions |
237 | OPERAND_INPUT_MODS, |
238 | |
239 | // Operand for SDWA instructions |
240 | OPERAND_SDWA_VOPC_DST, |
241 | |
242 | OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, |
243 | OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, |
244 | |
245 | OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, |
246 | OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64, |
247 | |
248 | OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32, |
249 | OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64, |
250 | |
251 | OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, |
252 | OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, |
253 | |
254 | OPERAND_KIMM_FIRST = OPERAND_KIMM32, |
255 | OPERAND_KIMM_LAST = OPERAND_KIMM16 |
256 | |
257 | }; |
258 | } |
259 | |
260 | // Input operand modifiers bit-masks |
261 | // NEG and SEXT share same bit-mask because they can't be set simultaneously. |
262 | namespace SISrcMods { |
263 | enum : unsigned { |
264 | NONE = 0, |
265 | NEG = 1 << 0, // Floating-point negate modifier |
266 | ABS = 1 << 1, // Floating-point absolute modifier |
267 | SEXT = 1 << 0, // Integer sign-extend modifier |
268 | NEG_HI = ABS, // Floating-point negate high packed component modifier. |
269 | OP_SEL_0 = 1 << 2, |
270 | OP_SEL_1 = 1 << 3, |
271 | DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) |
272 | }; |
273 | } |
274 | |
275 | namespace SIOutMods { |
276 | enum : unsigned { |
277 | NONE = 0, |
278 | MUL2 = 1, |
279 | MUL4 = 2, |
280 | DIV2 = 3 |
281 | }; |
282 | } |
283 | |
284 | namespace AMDGPU { |
285 | namespace VGPRIndexMode { |
286 | |
287 | enum Id : unsigned { // id of symbolic names |
288 | ID_SRC0 = 0, |
289 | ID_SRC1, |
290 | ID_SRC2, |
291 | ID_DST, |
292 | |
293 | ID_MIN = ID_SRC0, |
294 | ID_MAX = ID_DST |
295 | }; |
296 | |
297 | enum EncBits : unsigned { |
298 | OFF = 0, |
299 | SRC0_ENABLE = 1 << ID_SRC0, |
300 | SRC1_ENABLE = 1 << ID_SRC1, |
301 | SRC2_ENABLE = 1 << ID_SRC2, |
302 | DST_ENABLE = 1 << ID_DST, |
303 | ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, |
304 | UNDEF = 0xFFFF |
305 | }; |
306 | |
307 | } // namespace VGPRIndexMode |
308 | } // namespace AMDGPU |
309 | |
310 | namespace AMDGPUAsmVariants { |
311 | enum : unsigned { |
312 | DEFAULT = 0, |
313 | VOP3 = 1, |
314 | SDWA = 2, |
315 | SDWA9 = 3, |
316 | DPP = 4, |
317 | VOP3_DPP = 5 |
318 | }; |
319 | } // namespace AMDGPUAsmVariants |
320 | |
321 | namespace AMDGPU { |
322 | namespace EncValues { // Encoding values of enum9/8/7 operands |
323 | |
324 | enum : unsigned { |
325 | SGPR_MIN = 0, |
326 | SGPR_MAX_SI = 101, |
327 | SGPR_MAX_GFX10 = 105, |
328 | TTMP_VI_MIN = 112, |
329 | TTMP_VI_MAX = 123, |
330 | TTMP_GFX9PLUS_MIN = 108, |
331 | TTMP_GFX9PLUS_MAX = 123, |
332 | INLINE_INTEGER_C_MIN = 128, |
333 | INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 |
334 | INLINE_INTEGER_C_MAX = 208, |
335 | INLINE_FLOATING_C_MIN = 240, |
336 | INLINE_FLOATING_C_MAX = 248, |
337 | LITERAL_CONST = 255, |
338 | VGPR_MIN = 256, |
339 | VGPR_MAX = 511, |
340 | IS_VGPR = 256, // Indicates VGPR or AGPR |
341 | }; |
342 | |
343 | } // namespace EncValues |
344 | |
345 | // Register codes as defined in the TableGen's HWEncoding field. |
346 | namespace HWEncoding { |
347 | enum : unsigned { |
348 | REG_IDX_MASK = 0xff, |
349 | IS_VGPR = 1 << 8, |
350 | IS_AGPR = 1 << 9, |
351 | IS_HI16 = 1 << 10, |
352 | }; |
353 | } // namespace HWEncoding |
354 | |
355 | namespace CPol { |
356 | |
357 | enum CPol { |
358 | GLC = 1, |
359 | SLC = 2, |
360 | DLC = 4, |
361 | SCC = 16, |
362 | SC0 = GLC, |
363 | SC1 = SCC, |
364 | NT = SLC, |
365 | ALL_pregfx12 = GLC | SLC | DLC | SCC, |
366 | SWZ_pregfx12 = 8, |
367 | |
368 | // Below are GFX12+ cache policy bits |
369 | |
370 | // Temporal hint |
371 | TH = 0x7, // All TH bits |
372 | TH_RT = 0, // regular |
373 | TH_NT = 1, // non-temporal |
374 | TH_HT = 2, // high-temporal |
375 | TH_LU = 3, // last use |
376 | TH_WB = 3, // regular (CU, SE), high-temporal with write-back (MALL) |
377 | TH_NT_RT = 4, // non-temporal (CU, SE), regular (MALL) |
378 | TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL) |
379 | TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL) |
380 | TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL) |
381 | TH_BYPASS = 3, // only to be used with scope = 3 |
382 | |
383 | TH_RESERVED = 7, // unused value for load insts |
384 | |
385 | // Bits of TH for atomics |
386 | TH_ATOMIC_RETURN = GLC, // Returning vs non-returning |
387 | TH_ATOMIC_NT = SLC, // Non-temporal vs regular |
388 | TH_ATOMIC_CASCADE = 4, // Cascading vs regular |
389 | |
390 | // Scope |
391 | SCOPE = 0x3 << 3, // All Scope bits |
392 | SCOPE_CU = 0 << 3, |
393 | SCOPE_SE = 1 << 3, |
394 | SCOPE_DEV = 2 << 3, |
395 | SCOPE_SYS = 3 << 3, |
396 | |
397 | SWZ = 1 << 6, // Swizzle bit |
398 | |
399 | ALL = TH | SCOPE, |
400 | |
401 | // Helper bits |
402 | TH_TYPE_LOAD = 1 << 7, // TH_LOAD policy |
403 | TH_TYPE_STORE = 1 << 8, // TH_STORE policy |
404 | TH_TYPE_ATOMIC = 1 << 9, // TH_ATOMIC policy |
405 | TH_REAL_BYPASS = 1 << 10, // is TH=3 bypass policy or not |
406 | |
407 | // Volatile (used to preserve/signal operation volatility for buffer |
408 | // operations not a real instruction bit) |
409 | VOLATILE = 1 << 31, |
410 | }; |
411 | |
412 | } // namespace CPol |
413 | |
414 | namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. |
415 | |
416 | enum Id { // Message ID, width(4) [3:0]. |
417 | ID_INTERRUPT = 1, |
418 | |
419 | ID_GS_PreGFX11 = 2, // replaced in GFX11 |
420 | ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11 |
421 | |
422 | ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11 |
423 | ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11 |
424 | |
425 | ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11 |
426 | ID_STALL_WAVE_GEN = 5, // added in GFX9, removed in GFX12 |
427 | ID_HALT_WAVES = 6, // added in GFX9, removed in GFX12 |
428 | ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11 |
429 | ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10 |
430 | ID_GS_ALLOC_REQ = 9, // added in GFX9 |
431 | ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11 |
432 | ID_GET_DDID = 11, // added in GFX10, removed in GFX11 |
433 | ID_SYSMSG = 15, |
434 | |
435 | ID_RTN_GET_DOORBELL = 128, |
436 | ID_RTN_GET_DDID = 129, |
437 | ID_RTN_GET_TMA = 130, |
438 | ID_RTN_GET_REALTIME = 131, |
439 | ID_RTN_SAVE_WAVE = 132, |
440 | ID_RTN_GET_TBA = 133, |
441 | ID_RTN_GET_TBA_TO_PC = 134, |
442 | ID_RTN_GET_SE_AID_ID = 135, |
443 | |
444 | ID_MASK_PreGFX11_ = 0xF, |
445 | ID_MASK_GFX11Plus_ = 0xFF |
446 | }; |
447 | |
448 | enum Op { // Both GS and SYS operation IDs. |
449 | OP_SHIFT_ = 4, |
450 | OP_NONE_ = 0, |
451 | // Bits used for operation encoding |
452 | OP_WIDTH_ = 3, |
453 | OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_), |
454 | // GS operations are encoded in bits 5:4 |
455 | OP_GS_NOP = 0, |
456 | OP_GS_CUT = 1, |
457 | OP_GS_EMIT = 2, |
458 | OP_GS_EMIT_CUT = 3, |
459 | OP_GS_FIRST_ = OP_GS_NOP, |
460 | // SYS operations are encoded in bits 6:4 |
461 | OP_SYS_ECC_ERR_INTERRUPT = 1, |
462 | OP_SYS_REG_RD = 2, |
463 | OP_SYS_HOST_TRAP_ACK = 3, |
464 | OP_SYS_TTRACE_PC = 4, |
465 | OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT, |
466 | }; |
467 | |
468 | enum StreamId : unsigned { // Stream ID, (2) [9:8]. |
469 | STREAM_ID_NONE_ = 0, |
470 | STREAM_ID_DEFAULT_ = 0, |
471 | STREAM_ID_LAST_ = 4, |
472 | STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_, |
473 | STREAM_ID_SHIFT_ = 8, |
474 | STREAM_ID_WIDTH_= 2, |
475 | STREAM_ID_MASK_ = (((1 << STREAM_ID_WIDTH_) - 1) << STREAM_ID_SHIFT_) |
476 | }; |
477 | |
478 | } // namespace SendMsg |
479 | |
480 | namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns. |
481 | |
482 | enum Id { // HwRegCode, (6) [5:0] |
483 | ID_MODE = 1, |
484 | ID_STATUS = 2, |
485 | ID_TRAPSTS = 3, |
486 | ID_HW_ID = 4, |
487 | ID_GPR_ALLOC = 5, |
488 | ID_LDS_ALLOC = 6, |
489 | ID_IB_STS = 7, |
490 | ID_PERF_SNAPSHOT_DATA_gfx12 = 10, |
491 | ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11, |
492 | ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12, |
493 | ID_MEM_BASES = 15, |
494 | ID_TBA_LO = 16, |
495 | ID_TBA_HI = 17, |
496 | ID_TMA_LO = 18, |
497 | ID_TMA_HI = 19, |
498 | ID_FLAT_SCR_LO = 20, |
499 | ID_FLAT_SCR_HI = 21, |
500 | ID_XNACK_MASK = 22, |
501 | ID_HW_ID1 = 23, |
502 | ID_HW_ID2 = 24, |
503 | ID_POPS_PACKER = 25, |
504 | ID_PERF_SNAPSHOT_DATA_gfx11 = 27, |
505 | ID_SHADER_CYCLES = 29, |
506 | ID_SHADER_CYCLES_HI = 30, |
507 | ID_DVGPR_ALLOC_LO = 31, |
508 | ID_DVGPR_ALLOC_HI = 32, |
509 | |
510 | // Register numbers reused in GFX11 |
511 | ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18, |
512 | ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19, |
513 | |
514 | // Register numbers reused in GFX12+ |
515 | ID_STATE_PRIV = 4, |
516 | ID_PERF_SNAPSHOT_DATA1 = 15, |
517 | ID_PERF_SNAPSHOT_DATA2 = 16, |
518 | ID_EXCP_FLAG_PRIV = 17, |
519 | ID_EXCP_FLAG_USER = 18, |
520 | ID_TRAP_CTRL = 19, |
521 | |
522 | // GFX94* specific registers |
523 | ID_XCC_ID = 20, |
524 | ID_SQ_PERF_SNAPSHOT_DATA = 21, |
525 | ID_SQ_PERF_SNAPSHOT_DATA1 = 22, |
526 | ID_SQ_PERF_SNAPSHOT_PC_LO = 23, |
527 | ID_SQ_PERF_SNAPSHOT_PC_HI = 24, |
528 | }; |
529 | |
530 | enum Offset : unsigned { // Offset, (5) [10:6] |
531 | OFFSET_MEM_VIOL = 8, |
532 | OFFSET_ME_ID = 8, // in HW_ID2 |
533 | }; |
534 | |
535 | enum ModeRegisterMasks : uint32_t { |
536 | FP_ROUND_MASK = 0xf << 0, // Bits 0..3 |
537 | FP_DENORM_MASK = 0xf << 4, // Bits 4..7 |
538 | DX10_CLAMP_MASK = 1 << 8, |
539 | IEEE_MODE_MASK = 1 << 9, |
540 | LOD_CLAMP_MASK = 1 << 10, |
541 | DEBUG_MASK = 1 << 11, |
542 | |
543 | // EXCP_EN fields. |
544 | EXCP_EN_INVALID_MASK = 1 << 12, |
545 | EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13, |
546 | EXCP_EN_FLOAT_DIV0_MASK = 1 << 14, |
547 | EXCP_EN_OVERFLOW_MASK = 1 << 15, |
548 | EXCP_EN_UNDERFLOW_MASK = 1 << 16, |
549 | EXCP_EN_INEXACT_MASK = 1 << 17, |
550 | EXCP_EN_INT_DIV0_MASK = 1 << 18, |
551 | |
552 | GPR_IDX_EN_MASK = 1 << 27, |
553 | VSKIP_MASK = 1 << 28, |
554 | CSP_MASK = 0x7u << 29 // Bits 29..31 |
555 | }; |
556 | |
557 | } // namespace Hwreg |
558 | |
559 | namespace MTBUFFormat { |
560 | |
561 | enum DataFormat : int64_t { |
562 | DFMT_INVALID = 0, |
563 | DFMT_8, |
564 | DFMT_16, |
565 | DFMT_8_8, |
566 | DFMT_32, |
567 | DFMT_16_16, |
568 | DFMT_10_11_11, |
569 | DFMT_11_11_10, |
570 | DFMT_10_10_10_2, |
571 | DFMT_2_10_10_10, |
572 | DFMT_8_8_8_8, |
573 | DFMT_32_32, |
574 | DFMT_16_16_16_16, |
575 | DFMT_32_32_32, |
576 | DFMT_32_32_32_32, |
577 | DFMT_RESERVED_15, |
578 | |
579 | DFMT_MIN = DFMT_INVALID, |
580 | DFMT_MAX = DFMT_RESERVED_15, |
581 | |
582 | DFMT_UNDEF = -1, |
583 | DFMT_DEFAULT = DFMT_8, |
584 | |
585 | DFMT_SHIFT = 0, |
586 | DFMT_MASK = 0xF |
587 | }; |
588 | |
589 | enum NumFormat : int64_t { |
590 | NFMT_UNORM = 0, |
591 | NFMT_SNORM, |
592 | NFMT_USCALED, |
593 | NFMT_SSCALED, |
594 | NFMT_UINT, |
595 | NFMT_SINT, |
596 | NFMT_RESERVED_6, // VI and GFX9 |
597 | NFMT_SNORM_OGL = NFMT_RESERVED_6, // SI and CI only |
598 | NFMT_FLOAT, |
599 | |
600 | NFMT_MIN = NFMT_UNORM, |
601 | NFMT_MAX = NFMT_FLOAT, |
602 | |
603 | NFMT_UNDEF = -1, |
604 | NFMT_DEFAULT = NFMT_UNORM, |
605 | |
606 | NFMT_SHIFT = 4, |
607 | NFMT_MASK = 7 |
608 | }; |
609 | |
610 | enum MergedFormat : int64_t { |
611 | DFMT_NFMT_UNDEF = -1, |
612 | DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) | |
613 | ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT), |
614 | |
615 | |
616 | DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT), |
617 | |
618 | DFMT_NFMT_MAX = DFMT_NFMT_MASK |
619 | }; |
620 | |
621 | enum UnifiedFormatCommon : int64_t { |
622 | UFMT_MAX = 127, |
623 | UFMT_UNDEF = -1, |
624 | UFMT_DEFAULT = 1 |
625 | }; |
626 | |
627 | } // namespace MTBUFFormat |
628 | |
629 | namespace UfmtGFX10 { |
630 | enum UnifiedFormat : int64_t { |
631 | UFMT_INVALID = 0, |
632 | |
633 | UFMT_8_UNORM, |
634 | UFMT_8_SNORM, |
635 | UFMT_8_USCALED, |
636 | UFMT_8_SSCALED, |
637 | UFMT_8_UINT, |
638 | UFMT_8_SINT, |
639 | |
640 | UFMT_16_UNORM, |
641 | UFMT_16_SNORM, |
642 | UFMT_16_USCALED, |
643 | UFMT_16_SSCALED, |
644 | UFMT_16_UINT, |
645 | UFMT_16_SINT, |
646 | UFMT_16_FLOAT, |
647 | |
648 | UFMT_8_8_UNORM, |
649 | UFMT_8_8_SNORM, |
650 | UFMT_8_8_USCALED, |
651 | UFMT_8_8_SSCALED, |
652 | UFMT_8_8_UINT, |
653 | UFMT_8_8_SINT, |
654 | |
655 | UFMT_32_UINT, |
656 | UFMT_32_SINT, |
657 | UFMT_32_FLOAT, |
658 | |
659 | UFMT_16_16_UNORM, |
660 | UFMT_16_16_SNORM, |
661 | UFMT_16_16_USCALED, |
662 | UFMT_16_16_SSCALED, |
663 | UFMT_16_16_UINT, |
664 | UFMT_16_16_SINT, |
665 | UFMT_16_16_FLOAT, |
666 | |
667 | UFMT_10_11_11_UNORM, |
668 | UFMT_10_11_11_SNORM, |
669 | UFMT_10_11_11_USCALED, |
670 | UFMT_10_11_11_SSCALED, |
671 | UFMT_10_11_11_UINT, |
672 | UFMT_10_11_11_SINT, |
673 | UFMT_10_11_11_FLOAT, |
674 | |
675 | UFMT_11_11_10_UNORM, |
676 | UFMT_11_11_10_SNORM, |
677 | UFMT_11_11_10_USCALED, |
678 | UFMT_11_11_10_SSCALED, |
679 | UFMT_11_11_10_UINT, |
680 | UFMT_11_11_10_SINT, |
681 | UFMT_11_11_10_FLOAT, |
682 | |
683 | UFMT_10_10_10_2_UNORM, |
684 | UFMT_10_10_10_2_SNORM, |
685 | UFMT_10_10_10_2_USCALED, |
686 | UFMT_10_10_10_2_SSCALED, |
687 | UFMT_10_10_10_2_UINT, |
688 | UFMT_10_10_10_2_SINT, |
689 | |
690 | UFMT_2_10_10_10_UNORM, |
691 | UFMT_2_10_10_10_SNORM, |
692 | UFMT_2_10_10_10_USCALED, |
693 | UFMT_2_10_10_10_SSCALED, |
694 | UFMT_2_10_10_10_UINT, |
695 | UFMT_2_10_10_10_SINT, |
696 | |
697 | UFMT_8_8_8_8_UNORM, |
698 | UFMT_8_8_8_8_SNORM, |
699 | UFMT_8_8_8_8_USCALED, |
700 | UFMT_8_8_8_8_SSCALED, |
701 | UFMT_8_8_8_8_UINT, |
702 | UFMT_8_8_8_8_SINT, |
703 | |
704 | UFMT_32_32_UINT, |
705 | UFMT_32_32_SINT, |
706 | UFMT_32_32_FLOAT, |
707 | |
708 | UFMT_16_16_16_16_UNORM, |
709 | UFMT_16_16_16_16_SNORM, |
710 | UFMT_16_16_16_16_USCALED, |
711 | UFMT_16_16_16_16_SSCALED, |
712 | UFMT_16_16_16_16_UINT, |
713 | UFMT_16_16_16_16_SINT, |
714 | UFMT_16_16_16_16_FLOAT, |
715 | |
716 | UFMT_32_32_32_UINT, |
717 | UFMT_32_32_32_SINT, |
718 | UFMT_32_32_32_FLOAT, |
719 | UFMT_32_32_32_32_UINT, |
720 | UFMT_32_32_32_32_SINT, |
721 | UFMT_32_32_32_32_FLOAT, |
722 | |
723 | UFMT_FIRST = UFMT_INVALID, |
724 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
725 | }; |
726 | |
727 | } // namespace UfmtGFX10 |
728 | |
729 | namespace UfmtGFX11 { |
730 | enum UnifiedFormat : int64_t { |
731 | UFMT_INVALID = 0, |
732 | |
733 | UFMT_8_UNORM, |
734 | UFMT_8_SNORM, |
735 | UFMT_8_USCALED, |
736 | UFMT_8_SSCALED, |
737 | UFMT_8_UINT, |
738 | UFMT_8_SINT, |
739 | |
740 | UFMT_16_UNORM, |
741 | UFMT_16_SNORM, |
742 | UFMT_16_USCALED, |
743 | UFMT_16_SSCALED, |
744 | UFMT_16_UINT, |
745 | UFMT_16_SINT, |
746 | UFMT_16_FLOAT, |
747 | |
748 | UFMT_8_8_UNORM, |
749 | UFMT_8_8_SNORM, |
750 | UFMT_8_8_USCALED, |
751 | UFMT_8_8_SSCALED, |
752 | UFMT_8_8_UINT, |
753 | UFMT_8_8_SINT, |
754 | |
755 | UFMT_32_UINT, |
756 | UFMT_32_SINT, |
757 | UFMT_32_FLOAT, |
758 | |
759 | UFMT_16_16_UNORM, |
760 | UFMT_16_16_SNORM, |
761 | UFMT_16_16_USCALED, |
762 | UFMT_16_16_SSCALED, |
763 | UFMT_16_16_UINT, |
764 | UFMT_16_16_SINT, |
765 | UFMT_16_16_FLOAT, |
766 | |
767 | UFMT_10_11_11_FLOAT, |
768 | |
769 | UFMT_11_11_10_FLOAT, |
770 | |
771 | UFMT_10_10_10_2_UNORM, |
772 | UFMT_10_10_10_2_SNORM, |
773 | UFMT_10_10_10_2_UINT, |
774 | UFMT_10_10_10_2_SINT, |
775 | |
776 | UFMT_2_10_10_10_UNORM, |
777 | UFMT_2_10_10_10_SNORM, |
778 | UFMT_2_10_10_10_USCALED, |
779 | UFMT_2_10_10_10_SSCALED, |
780 | UFMT_2_10_10_10_UINT, |
781 | UFMT_2_10_10_10_SINT, |
782 | |
783 | UFMT_8_8_8_8_UNORM, |
784 | UFMT_8_8_8_8_SNORM, |
785 | UFMT_8_8_8_8_USCALED, |
786 | UFMT_8_8_8_8_SSCALED, |
787 | UFMT_8_8_8_8_UINT, |
788 | UFMT_8_8_8_8_SINT, |
789 | |
790 | UFMT_32_32_UINT, |
791 | UFMT_32_32_SINT, |
792 | UFMT_32_32_FLOAT, |
793 | |
794 | UFMT_16_16_16_16_UNORM, |
795 | UFMT_16_16_16_16_SNORM, |
796 | UFMT_16_16_16_16_USCALED, |
797 | UFMT_16_16_16_16_SSCALED, |
798 | UFMT_16_16_16_16_UINT, |
799 | UFMT_16_16_16_16_SINT, |
800 | UFMT_16_16_16_16_FLOAT, |
801 | |
802 | UFMT_32_32_32_UINT, |
803 | UFMT_32_32_32_SINT, |
804 | UFMT_32_32_32_FLOAT, |
805 | UFMT_32_32_32_32_UINT, |
806 | UFMT_32_32_32_32_SINT, |
807 | UFMT_32_32_32_32_FLOAT, |
808 | |
809 | UFMT_FIRST = UFMT_INVALID, |
810 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
811 | }; |
812 | |
813 | } // namespace UfmtGFX11 |
814 | |
815 | namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. |
816 | |
817 | enum Id : unsigned { // id of symbolic names |
818 | ID_QUAD_PERM = 0, |
819 | ID_BITMASK_PERM, |
820 | ID_SWAP, |
821 | ID_REVERSE, |
822 | ID_BROADCAST, |
823 | ID_FFT, |
824 | ID_ROTATE |
825 | }; |
826 | |
827 | // clang-format off |
828 | enum EncBits : unsigned { |
829 | |
830 | // swizzle mode encodings |
831 | |
832 | QUAD_PERM_ENC = 0x8000, |
833 | QUAD_PERM_ENC_MASK = 0xFF00, |
834 | |
835 | BITMASK_PERM_ENC = 0x0000, |
836 | BITMASK_PERM_ENC_MASK = 0x8000, |
837 | |
838 | FFT_MODE_ENC = 0xE000, |
839 | |
840 | ROTATE_MODE_ENC = 0xC000, |
841 | FFT_ROTATE_MODE_MASK = 0xF000, |
842 | |
843 | ROTATE_MODE_LO = 0xC000, |
844 | FFT_MODE_LO = 0xE000, |
845 | |
846 | // QUAD_PERM encodings |
847 | |
848 | LANE_MASK = 0x3, |
849 | LANE_MAX = LANE_MASK, |
850 | LANE_SHIFT = 2, |
851 | LANE_NUM = 4, |
852 | |
853 | // BITMASK_PERM encodings |
854 | |
855 | BITMASK_MASK = 0x1F, |
856 | BITMASK_MAX = BITMASK_MASK, |
857 | BITMASK_WIDTH = 5, |
858 | |
859 | BITMASK_AND_SHIFT = 0, |
860 | BITMASK_OR_SHIFT = 5, |
861 | BITMASK_XOR_SHIFT = 10, |
862 | |
863 | // FFT encodings |
864 | |
865 | FFT_SWIZZLE_MASK = 0x1F, |
866 | FFT_SWIZZLE_MAX = 0x1F, |
867 | |
868 | // ROTATE encodings |
869 | ROTATE_MAX_SIZE = 0x1F, |
870 | ROTATE_DIR_SHIFT = 10, // bit position of rotate direction |
871 | ROTATE_DIR_MASK = 0x1, |
872 | ROTATE_SIZE_SHIFT = 5, // bit position of rotate size |
873 | ROTATE_SIZE_MASK = ROTATE_MAX_SIZE, |
874 | }; |
875 | // clang-format on |
876 | |
877 | } // namespace Swizzle |
878 | |
879 | namespace SDWA { |
880 | |
881 | enum SdwaSel : unsigned { |
882 | BYTE_0 = 0, |
883 | BYTE_1 = 1, |
884 | BYTE_2 = 2, |
885 | BYTE_3 = 3, |
886 | WORD_0 = 4, |
887 | WORD_1 = 5, |
888 | DWORD = 6, |
889 | }; |
890 | |
891 | enum DstUnused : unsigned { |
892 | UNUSED_PAD = 0, |
893 | UNUSED_SEXT = 1, |
894 | UNUSED_PRESERVE = 2, |
895 | }; |
896 | |
897 | enum SDWA9EncValues : unsigned { |
898 | SRC_SGPR_MASK = 0x100, |
899 | SRC_VGPR_MASK = 0xFF, |
900 | VOPC_DST_VCC_MASK = 0x80, |
901 | VOPC_DST_SGPR_MASK = 0x7F, |
902 | |
903 | SRC_VGPR_MIN = 0, |
904 | SRC_VGPR_MAX = 255, |
905 | SRC_SGPR_MIN = 256, |
906 | SRC_SGPR_MAX_SI = 357, |
907 | SRC_SGPR_MAX_GFX10 = 361, |
908 | SRC_TTMP_MIN = 364, |
909 | SRC_TTMP_MAX = 379, |
910 | }; |
911 | |
912 | } // namespace SDWA |
913 | |
914 | namespace DPP { |
915 | |
916 | // clang-format off |
917 | enum DppCtrl : unsigned { |
918 | QUAD_PERM_FIRST = 0, |
919 | QUAD_PERM_ID = 0xE4, // identity permutation |
920 | QUAD_PERM_LAST = 0xFF, |
921 | DPP_UNUSED1 = 0x100, |
922 | ROW_SHL0 = 0x100, |
923 | ROW_SHL_FIRST = 0x101, |
924 | ROW_SHL_LAST = 0x10F, |
925 | DPP_UNUSED2 = 0x110, |
926 | ROW_SHR0 = 0x110, |
927 | ROW_SHR_FIRST = 0x111, |
928 | ROW_SHR_LAST = 0x11F, |
929 | DPP_UNUSED3 = 0x120, |
930 | ROW_ROR0 = 0x120, |
931 | ROW_ROR_FIRST = 0x121, |
932 | ROW_ROR_LAST = 0x12F, |
933 | WAVE_SHL1 = 0x130, |
934 | DPP_UNUSED4_FIRST = 0x131, |
935 | DPP_UNUSED4_LAST = 0x133, |
936 | WAVE_ROL1 = 0x134, |
937 | DPP_UNUSED5_FIRST = 0x135, |
938 | DPP_UNUSED5_LAST = 0x137, |
939 | WAVE_SHR1 = 0x138, |
940 | DPP_UNUSED6_FIRST = 0x139, |
941 | DPP_UNUSED6_LAST = 0x13B, |
942 | WAVE_ROR1 = 0x13C, |
943 | DPP_UNUSED7_FIRST = 0x13D, |
944 | DPP_UNUSED7_LAST = 0x13F, |
945 | ROW_MIRROR = 0x140, |
946 | ROW_HALF_MIRROR = 0x141, |
947 | BCAST15 = 0x142, |
948 | BCAST31 = 0x143, |
949 | DPP_UNUSED8_FIRST = 0x144, |
950 | DPP_UNUSED8_LAST = 0x14F, |
951 | ROW_NEWBCAST_FIRST= 0x150, |
952 | ROW_NEWBCAST_LAST = 0x15F, |
953 | ROW_SHARE0 = 0x150, |
954 | ROW_SHARE_FIRST = 0x150, |
955 | ROW_SHARE_LAST = 0x15F, |
956 | ROW_XMASK0 = 0x160, |
957 | ROW_XMASK_FIRST = 0x160, |
958 | ROW_XMASK_LAST = 0x16F, |
959 | DPP_LAST = ROW_XMASK_LAST |
960 | }; |
961 | // clang-format on |
962 | |
963 | enum DppFiMode { |
964 | DPP_FI_0 = 0, |
965 | DPP_FI_1 = 1, |
966 | DPP8_FI_0 = 0xE9, |
967 | DPP8_FI_1 = 0xEA, |
968 | }; |
969 | |
970 | } // namespace DPP |
971 | |
972 | namespace Exp { |
973 | |
974 | enum Target : unsigned { |
975 | ET_MRT0 = 0, |
976 | ET_MRT7 = 7, |
977 | ET_MRTZ = 8, |
978 | ET_NULL = 9, // Pre-GFX11 |
979 | ET_POS0 = 12, |
980 | ET_POS3 = 15, |
981 | ET_POS4 = 16, // GFX10+ |
982 | ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget |
983 | ET_PRIM = 20, // GFX10+ |
984 | ET_DUAL_SRC_BLEND0 = 21, // GFX11+ |
985 | ET_DUAL_SRC_BLEND1 = 22, // GFX11+ |
986 | ET_PARAM0 = 32, // Pre-GFX11 |
987 | ET_PARAM31 = 63, // Pre-GFX11 |
988 | |
989 | ET_NULL_MAX_IDX = 0, |
990 | ET_MRTZ_MAX_IDX = 0, |
991 | ET_PRIM_MAX_IDX = 0, |
992 | ET_MRT_MAX_IDX = 7, |
993 | ET_POS_MAX_IDX = 4, |
994 | ET_DUAL_SRC_BLEND_MAX_IDX = 1, |
995 | ET_PARAM_MAX_IDX = 31, |
996 | |
997 | ET_INVALID = 255, |
998 | }; |
999 | |
1000 | } // namespace Exp |
1001 | |
1002 | namespace VOP3PEncoding { |
1003 | |
1004 | enum OpSel : uint64_t { |
1005 | OP_SEL_HI_0 = UINT64_C(1) << 59, |
1006 | OP_SEL_HI_1 = UINT64_C(1) << 60, |
1007 | OP_SEL_HI_2 = UINT64_C(1) << 14, |
1008 | }; |
1009 | |
1010 | } // namespace VOP3PEncoding |
1011 | |
1012 | namespace ImplicitArg { |
1013 | // Implicit kernel argument offset for code object version 5. |
1014 | enum Offset_COV5 : unsigned { |
1015 | HOSTCALL_PTR_OFFSET = 80, |
1016 | MULTIGRID_SYNC_ARG_OFFSET = 88, |
1017 | HEAP_PTR_OFFSET = 96, |
1018 | |
1019 | DEFAULT_QUEUE_OFFSET = 104, |
1020 | COMPLETION_ACTION_OFFSET = 112, |
1021 | |
1022 | PRIVATE_BASE_OFFSET = 192, |
1023 | SHARED_BASE_OFFSET = 196, |
1024 | QUEUE_PTR_OFFSET = 200, |
1025 | }; |
1026 | |
1027 | } // namespace ImplicitArg |
1028 | |
1029 | namespace MFMAScaleFormats { |
1030 | // Enum value used in cbsz/blgp for F8F6F4 MFMA operations to select the matrix |
1031 | // format. |
1032 | enum MFMAScaleFormats { |
1033 | FP8_E4M3 = 0, |
1034 | FP8_E5M2 = 1, |
1035 | FP6_E2M3 = 2, |
1036 | FP6_E3M2 = 3, |
1037 | FP4_E2M1 = 4 |
1038 | }; |
1039 | } // namespace MFMAScaleFormats |
1040 | |
1041 | namespace VirtRegFlag { |
1042 | // Virtual register flags used for various target specific handlings during |
1043 | // codegen. |
1044 | enum Register_Flag : uint8_t { |
1045 | // Register operand in a whole-wave mode operation. |
1046 | WWM_REG = 1 << 0, |
1047 | }; |
1048 | |
1049 | } // namespace VirtRegFlag |
1050 | |
1051 | } // namespace AMDGPU |
1052 | |
1053 | namespace AMDGPU { |
1054 | namespace Barrier { |
1055 | |
1056 | enum Type { TRAP = -2, WORKGROUP = -1 }; |
1057 | |
1058 | enum { |
1059 | BARRIER_SCOPE_WORKGROUP = 0, |
1060 | }; |
1061 | |
1062 | } // namespace Barrier |
1063 | } // namespace AMDGPU |
1064 | |
1065 | // clang-format off |
1066 | |
1067 | #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 |
1068 | #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) |
1069 | #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) |
1070 | #define S_00B028_MEM_ORDERED(x) (((x) & 0x1) << 25) |
1071 | #define G_00B028_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
1072 | #define C_00B028_MEM_ORDERED 0xFDFFFFFF |
1073 | |
1074 | #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C |
1075 | #define (x) (((x) & 0xFF) << 8) |
1076 | #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 |
1077 | #define S_00B128_MEM_ORDERED(x) (((x) & 0x1) << 27) |
1078 | #define G_00B128_MEM_ORDERED(x) (((x) >> 27) & 0x1) |
1079 | #define C_00B128_MEM_ORDERED 0xF7FFFFFF |
1080 | |
1081 | #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 |
1082 | #define S_00B228_WGP_MODE(x) (((x) & 0x1) << 27) |
1083 | #define G_00B228_WGP_MODE(x) (((x) >> 27) & 0x1) |
1084 | #define C_00B228_WGP_MODE 0xF7FFFFFF |
1085 | #define S_00B228_MEM_ORDERED(x) (((x) & 0x1) << 25) |
1086 | #define G_00B228_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
1087 | #define C_00B228_MEM_ORDERED 0xFDFFFFFF |
1088 | |
1089 | #define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328 |
1090 | #define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 |
1091 | #define S_00B428_WGP_MODE(x) (((x) & 0x1) << 26) |
1092 | #define G_00B428_WGP_MODE(x) (((x) >> 26) & 0x1) |
1093 | #define C_00B428_WGP_MODE 0xFBFFFFFF |
1094 | #define S_00B428_MEM_ORDERED(x) (((x) & 0x1) << 24) |
1095 | #define G_00B428_MEM_ORDERED(x) (((x) >> 24) & 0x1) |
1096 | #define C_00B428_MEM_ORDERED 0xFEFFFFFF |
1097 | |
1098 | #define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528 |
1099 | |
1100 | #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C |
1101 | #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0) |
1102 | #define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1) |
1103 | #define C_00B84C_SCRATCH_EN 0xFFFFFFFE |
1104 | #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) |
1105 | #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) |
1106 | #define C_00B84C_USER_SGPR 0xFFFFFFC1 |
1107 | #define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) |
1108 | #define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) |
1109 | #define C_00B84C_TRAP_HANDLER 0xFFFFFFBF |
1110 | #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) |
1111 | #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) |
1112 | #define C_00B84C_TGID_X_EN 0xFFFFFF7F |
1113 | #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8) |
1114 | #define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1) |
1115 | #define C_00B84C_TGID_Y_EN 0xFFFFFEFF |
1116 | #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9) |
1117 | #define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1) |
1118 | #define C_00B84C_TGID_Z_EN 0xFFFFFDFF |
1119 | #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10) |
1120 | #define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1) |
1121 | #define C_00B84C_TG_SIZE_EN 0xFFFFFBFF |
1122 | #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11) |
1123 | #define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03) |
1124 | #define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF |
1125 | /* CIK */ |
1126 | #define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13) |
1127 | #define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03) |
1128 | #define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF |
1129 | /* */ |
1130 | #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) |
1131 | #define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF) |
1132 | #define C_00B84C_LDS_SIZE 0xFF007FFF |
1133 | #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) |
1134 | #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) |
1135 | #define C_00B84C_EXCP_EN 0x80FFFFFF |
1136 | |
1137 | #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC |
1138 | #define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 |
1139 | |
1140 | #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 |
1141 | #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) |
1142 | #define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F) |
1143 | #define C_00B848_VGPRS 0xFFFFFFC0 |
1144 | #define S_00B848_SGPRS(x) (((x) & 0x0F) << 6) |
1145 | #define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F) |
1146 | #define C_00B848_SGPRS 0xFFFFFC3F |
1147 | #define S_00B848_PRIORITY(x) (((x) & 0x03) << 10) |
1148 | #define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03) |
1149 | #define C_00B848_PRIORITY 0xFFFFF3FF |
1150 | #define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12) |
1151 | #define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF) |
1152 | #define C_00B848_FLOAT_MODE 0xFFF00FFF |
1153 | #define S_00B848_PRIV(x) (((x) & 0x1) << 20) |
1154 | #define G_00B848_PRIV(x) (((x) >> 20) & 0x1) |
1155 | #define C_00B848_PRIV 0xFFEFFFFF |
1156 | #define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21) |
1157 | #define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1) |
1158 | #define C_00B848_DX10_CLAMP 0xFFDFFFFF |
1159 | #define S_00B848_RR_WG_MODE(x) (((x) & 0x1) << 21) |
1160 | #define G_00B848_RR_WG_MODE(x) (((x) >> 21) & 0x1) |
1161 | #define C_00B848_RR_WG_MODE 0xFFDFFFFF |
1162 | #define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22) |
1163 | #define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1) |
1164 | #define C_00B848_DEBUG_MODE 0xFFBFFFFF |
1165 | #define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23) |
1166 | #define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1) |
1167 | #define C_00B848_IEEE_MODE 0xFF7FFFFF |
1168 | #define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29) |
1169 | #define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1) |
1170 | #define C_00B848_WGP_MODE 0xDFFFFFFF |
1171 | #define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30) |
1172 | #define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1) |
1173 | #define C_00B848_MEM_ORDERED 0xBFFFFFFF |
1174 | #define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31) |
1175 | #define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1) |
1176 | #define C_00B848_FWD_PROGRESS 0x7FFFFFFF |
1177 | |
1178 | // Helpers for setting FLOAT_MODE |
1179 | #define FP_ROUND_ROUND_TO_NEAREST 0 |
1180 | #define FP_ROUND_ROUND_TO_INF 1 |
1181 | #define FP_ROUND_ROUND_TO_NEGINF 2 |
1182 | #define FP_ROUND_ROUND_TO_ZERO 3 |
1183 | |
1184 | // Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double |
1185 | // precision. |
1186 | #define FP_ROUND_MODE_SP(x) ((x) & 0x3) |
1187 | #define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2) |
1188 | |
1189 | #define FP_DENORM_FLUSH_IN_FLUSH_OUT 0 |
1190 | #define FP_DENORM_FLUSH_OUT 1 |
1191 | #define FP_DENORM_FLUSH_IN 2 |
1192 | #define FP_DENORM_FLUSH_NONE 3 |
1193 | |
1194 | |
1195 | // Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double |
1196 | // precision. |
1197 | #define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4) |
1198 | #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) |
1199 | |
1200 | #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 |
1201 | #define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
1202 | #define S_00B860_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
1203 | #define S_00B860_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
1204 | |
1205 | #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 |
1206 | #define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
1207 | #define S_0286E8_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
1208 | #define S_0286E8_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
1209 | |
1210 | #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54 |
1211 | #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21) |
1212 | #define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22) |
1213 | #define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23) |
1214 | #define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8 |
1215 | #define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15) |
1216 | #define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800 |
1217 | #define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15) |
1218 | |
1219 | #define R_SPILLED_SGPRS 0x4 |
1220 | #define R_SPILLED_VGPRS 0x8 |
1221 | |
1222 | // clang-format on |
1223 | |
1224 | } // End namespace llvm |
1225 | |
1226 | #endif |
1227 | |