1 | //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | /// \file |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
11 | #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H |
12 | |
13 | #include "llvm/MC/MCInstrDesc.h" |
14 | |
15 | namespace llvm { |
16 | |
17 | // This needs to be kept in sync with the field bits in SIRegisterClass. |
18 | enum SIRCFlags : uint8_t { |
19 | RegTupleAlignUnitsWidth = 2, |
20 | HasVGPRBit = RegTupleAlignUnitsWidth, |
21 | HasAGPRBit, |
22 | HasSGPRbit, |
23 | |
24 | HasVGPR = 1 << HasVGPRBit, |
25 | HasAGPR = 1 << HasAGPRBit, |
26 | HasSGPR = 1 << HasSGPRbit, |
27 | |
28 | RegTupleAlignUnitsMask = (1 << RegTupleAlignUnitsWidth) - 1, |
29 | RegKindMask = (HasVGPR | HasAGPR | HasSGPR) |
30 | }; // enum SIRCFlagsr |
31 | |
32 | namespace SIEncodingFamily { |
33 | // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td |
34 | // and the columns of the getMCOpcodeGen table. |
35 | enum { |
36 | SI = 0, |
37 | VI = 1, |
38 | SDWA = 2, |
39 | SDWA9 = 3, |
40 | GFX80 = 4, |
41 | GFX9 = 5, |
42 | GFX10 = 6, |
43 | SDWA10 = 7, |
44 | GFX90A = 8, |
45 | GFX940 = 9, |
46 | GFX11 = 10, |
47 | GFX12 = 11, |
48 | }; |
49 | } |
50 | |
51 | namespace SIInstrFlags { |
52 | // This needs to be kept in sync with the field bits in InstSI. |
53 | enum : uint64_t { |
54 | // Low bits - basic encoding information. |
55 | SALU = 1 << 0, |
56 | VALU = 1 << 1, |
57 | |
58 | // SALU instruction formats. |
59 | SOP1 = 1 << 2, |
60 | SOP2 = 1 << 3, |
61 | SOPC = 1 << 4, |
62 | SOPK = 1 << 5, |
63 | SOPP = 1 << 6, |
64 | |
65 | // VALU instruction formats. |
66 | VOP1 = 1 << 7, |
67 | VOP2 = 1 << 8, |
68 | VOPC = 1 << 9, |
69 | |
70 | // TODO: Should this be spilt into VOP3 a and b? |
71 | VOP3 = 1 << 10, |
72 | VOP3P = 1 << 12, |
73 | |
74 | VINTRP = 1 << 13, |
75 | SDWA = 1 << 14, |
76 | DPP = 1 << 15, |
77 | TRANS = 1 << 16, |
78 | |
79 | // Memory instruction formats. |
80 | MUBUF = 1 << 17, |
81 | MTBUF = 1 << 18, |
82 | SMRD = 1 << 19, |
83 | MIMG = 1 << 20, |
84 | VIMAGE = 1 << 21, |
85 | VSAMPLE = 1 << 22, |
86 | EXP = 1 << 23, |
87 | FLAT = 1 << 24, |
88 | DS = 1 << 25, |
89 | |
90 | // Combined SGPR/VGPR Spill bit |
91 | // Logic to separate them out is done in isSGPRSpill and isVGPRSpill |
92 | Spill = 1 << 26, |
93 | |
94 | // LDSDIR instruction format. |
95 | LDSDIR = 1 << 28, |
96 | |
97 | // VINTERP instruction format. |
98 | VINTERP = 1 << 29, |
99 | |
100 | // High bits - other information. |
101 | VM_CNT = UINT64_C(1) << 32, |
102 | EXP_CNT = UINT64_C(1) << 33, |
103 | LGKM_CNT = UINT64_C(1) << 34, |
104 | |
105 | WQM = UINT64_C(1) << 35, |
106 | DisableWQM = UINT64_C(1) << 36, |
107 | Gather4 = UINT64_C(1) << 37, |
108 | |
109 | // Reserved, must be 0. |
110 | Reserved0 = UINT64_C(1) << 38, |
111 | |
112 | SCALAR_STORE = UINT64_C(1) << 39, |
113 | FIXED_SIZE = UINT64_C(1) << 40, |
114 | |
115 | // Reserved, must be 0. |
116 | Reserved1 = UINT64_C(1) << 41, |
117 | |
118 | VOP3_OPSEL = UINT64_C(1) << 42, |
119 | maybeAtomic = UINT64_C(1) << 43, |
120 | renamedInGFX9 = UINT64_C(1) << 44, |
121 | |
122 | // Is a clamp on FP type. |
123 | FPClamp = UINT64_C(1) << 45, |
124 | |
125 | // Is an integer clamp |
126 | IntClamp = UINT64_C(1) << 46, |
127 | |
128 | // Clamps lo component of register. |
129 | ClampLo = UINT64_C(1) << 47, |
130 | |
131 | // Clamps hi component of register. |
132 | // ClampLo and ClampHi set for packed clamp. |
133 | ClampHi = UINT64_C(1) << 48, |
134 | |
135 | // Is a packed VOP3P instruction. |
136 | IsPacked = UINT64_C(1) << 49, |
137 | |
138 | // Is a D16 buffer instruction. |
139 | D16Buf = UINT64_C(1) << 50, |
140 | |
141 | // FLAT instruction accesses FLAT_GLBL segment. |
142 | FlatGlobal = UINT64_C(1) << 51, |
143 | |
144 | // Uses floating point double precision rounding mode |
145 | FPDPRounding = UINT64_C(1) << 52, |
146 | |
147 | // Instruction is FP atomic. |
148 | FPAtomic = UINT64_C(1) << 53, |
149 | |
150 | // Is a MFMA instruction. |
151 | IsMAI = UINT64_C(1) << 54, |
152 | |
153 | // Is a DOT instruction. |
154 | IsDOT = UINT64_C(1) << 55, |
155 | |
156 | // FLAT instruction accesses FLAT_SCRATCH segment. |
157 | FlatScratch = UINT64_C(1) << 56, |
158 | |
159 | // Atomic without return. |
160 | IsAtomicNoRet = UINT64_C(1) << 57, |
161 | |
162 | // Atomic with return. |
163 | IsAtomicRet = UINT64_C(1) << 58, |
164 | |
165 | // Is a WMMA instruction. |
166 | IsWMMA = UINT64_C(1) << 59, |
167 | |
168 | // Whether tied sources will be read. |
169 | TiedSourceNotRead = UINT64_C(1) << 60, |
170 | |
171 | // Is never uniform. |
172 | IsNeverUniform = UINT64_C(1) << 61, |
173 | |
174 | // ds_gws_* instructions. |
175 | GWS = UINT64_C(1) << 62, |
176 | |
177 | // Is a SWMMAC instruction. |
178 | IsSWMMAC = UINT64_C(1) << 63, |
179 | }; |
180 | |
181 | // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. |
182 | // The result is true if any of these tests are true. |
183 | enum ClassFlags : unsigned { |
184 | S_NAN = 1 << 0, // Signaling NaN |
185 | Q_NAN = 1 << 1, // Quiet NaN |
186 | N_INFINITY = 1 << 2, // Negative infinity |
187 | N_NORMAL = 1 << 3, // Negative normal |
188 | N_SUBNORMAL = 1 << 4, // Negative subnormal |
189 | N_ZERO = 1 << 5, // Negative zero |
190 | P_ZERO = 1 << 6, // Positive zero |
191 | P_SUBNORMAL = 1 << 7, // Positive subnormal |
192 | P_NORMAL = 1 << 8, // Positive normal |
193 | P_INFINITY = 1 << 9 // Positive infinity |
194 | }; |
195 | } |
196 | |
197 | namespace AMDGPU { |
198 | enum OperandType : unsigned { |
199 | /// Operands with register or 32-bit immediate |
200 | OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET, |
201 | OPERAND_REG_IMM_INT64, |
202 | OPERAND_REG_IMM_INT16, |
203 | OPERAND_REG_IMM_FP32, |
204 | OPERAND_REG_IMM_FP64, |
205 | OPERAND_REG_IMM_BF16, |
206 | OPERAND_REG_IMM_FP16, |
207 | OPERAND_REG_IMM_BF16_DEFERRED, |
208 | OPERAND_REG_IMM_FP16_DEFERRED, |
209 | OPERAND_REG_IMM_FP32_DEFERRED, |
210 | OPERAND_REG_IMM_V2BF16, |
211 | OPERAND_REG_IMM_V2FP16, |
212 | OPERAND_REG_IMM_V2INT16, |
213 | OPERAND_REG_IMM_V2INT32, |
214 | OPERAND_REG_IMM_V2FP32, |
215 | |
216 | /// Operands with register or inline constant |
217 | OPERAND_REG_INLINE_C_INT16, |
218 | OPERAND_REG_INLINE_C_INT32, |
219 | OPERAND_REG_INLINE_C_INT64, |
220 | OPERAND_REG_INLINE_C_BF16, |
221 | OPERAND_REG_INLINE_C_FP16, |
222 | OPERAND_REG_INLINE_C_FP32, |
223 | OPERAND_REG_INLINE_C_FP64, |
224 | OPERAND_REG_INLINE_C_V2INT16, |
225 | OPERAND_REG_INLINE_C_V2BF16, |
226 | OPERAND_REG_INLINE_C_V2FP16, |
227 | OPERAND_REG_INLINE_C_V2INT32, |
228 | OPERAND_REG_INLINE_C_V2FP32, |
229 | |
230 | // Operand for split barrier inline constant |
231 | OPERAND_INLINE_SPLIT_BARRIER_INT32, |
232 | |
233 | /// Operand with 32-bit immediate that uses the constant bus. |
234 | OPERAND_KIMM32, |
235 | OPERAND_KIMM16, |
236 | |
237 | /// Operands with an AccVGPR register or inline constant |
238 | OPERAND_REG_INLINE_AC_INT16, |
239 | OPERAND_REG_INLINE_AC_INT32, |
240 | OPERAND_REG_INLINE_AC_BF16, |
241 | OPERAND_REG_INLINE_AC_FP16, |
242 | OPERAND_REG_INLINE_AC_FP32, |
243 | OPERAND_REG_INLINE_AC_FP64, |
244 | OPERAND_REG_INLINE_AC_V2INT16, |
245 | OPERAND_REG_INLINE_AC_V2BF16, |
246 | OPERAND_REG_INLINE_AC_V2FP16, |
247 | OPERAND_REG_INLINE_AC_V2INT32, |
248 | OPERAND_REG_INLINE_AC_V2FP32, |
249 | |
250 | // Operand for source modifiers for VOP instructions |
251 | OPERAND_INPUT_MODS, |
252 | |
253 | // Operand for SDWA instructions |
254 | OPERAND_SDWA_VOPC_DST, |
255 | |
256 | OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32, |
257 | OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32, |
258 | |
259 | OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16, |
260 | OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32, |
261 | |
262 | OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16, |
263 | OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32, |
264 | |
265 | OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32, |
266 | OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST, |
267 | |
268 | OPERAND_KIMM_FIRST = OPERAND_KIMM32, |
269 | OPERAND_KIMM_LAST = OPERAND_KIMM16 |
270 | |
271 | }; |
272 | |
273 | // Should be in sync with the OperandSemantics defined in SIRegisterInfo.td |
274 | enum OperandSemantics : unsigned { |
275 | INT = 0, |
276 | FP16 = 1, |
277 | BF16 = 2, |
278 | FP32 = 3, |
279 | FP64 = 4, |
280 | }; |
281 | } |
282 | |
283 | // Input operand modifiers bit-masks |
284 | // NEG and SEXT share same bit-mask because they can't be set simultaneously. |
285 | namespace SISrcMods { |
286 | enum : unsigned { |
287 | NONE = 0, |
288 | NEG = 1 << 0, // Floating-point negate modifier |
289 | ABS = 1 << 1, // Floating-point absolute modifier |
290 | SEXT = 1 << 0, // Integer sign-extend modifier |
291 | NEG_HI = ABS, // Floating-point negate high packed component modifier. |
292 | OP_SEL_0 = 1 << 2, |
293 | OP_SEL_1 = 1 << 3, |
294 | DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1) |
295 | }; |
296 | } |
297 | |
298 | namespace SIOutMods { |
299 | enum : unsigned { |
300 | NONE = 0, |
301 | MUL2 = 1, |
302 | MUL4 = 2, |
303 | DIV2 = 3 |
304 | }; |
305 | } |
306 | |
307 | namespace AMDGPU { |
308 | namespace VGPRIndexMode { |
309 | |
310 | enum Id : unsigned { // id of symbolic names |
311 | ID_SRC0 = 0, |
312 | ID_SRC1, |
313 | ID_SRC2, |
314 | ID_DST, |
315 | |
316 | ID_MIN = ID_SRC0, |
317 | ID_MAX = ID_DST |
318 | }; |
319 | |
320 | enum EncBits : unsigned { |
321 | OFF = 0, |
322 | SRC0_ENABLE = 1 << ID_SRC0, |
323 | SRC1_ENABLE = 1 << ID_SRC1, |
324 | SRC2_ENABLE = 1 << ID_SRC2, |
325 | DST_ENABLE = 1 << ID_DST, |
326 | ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE, |
327 | UNDEF = 0xFFFF |
328 | }; |
329 | |
330 | } // namespace VGPRIndexMode |
331 | } // namespace AMDGPU |
332 | |
333 | namespace AMDGPUAsmVariants { |
334 | enum : unsigned { |
335 | DEFAULT = 0, |
336 | VOP3 = 1, |
337 | SDWA = 2, |
338 | SDWA9 = 3, |
339 | DPP = 4, |
340 | VOP3_DPP = 5 |
341 | }; |
342 | } // namespace AMDGPUAsmVariants |
343 | |
344 | namespace AMDGPU { |
345 | namespace EncValues { // Encoding values of enum9/8/7 operands |
346 | |
347 | enum : unsigned { |
348 | SGPR_MIN = 0, |
349 | SGPR_MAX_SI = 101, |
350 | SGPR_MAX_GFX10 = 105, |
351 | TTMP_VI_MIN = 112, |
352 | TTMP_VI_MAX = 123, |
353 | TTMP_GFX9PLUS_MIN = 108, |
354 | TTMP_GFX9PLUS_MAX = 123, |
355 | INLINE_INTEGER_C_MIN = 128, |
356 | INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64 |
357 | INLINE_INTEGER_C_MAX = 208, |
358 | INLINE_FLOATING_C_MIN = 240, |
359 | INLINE_FLOATING_C_MAX = 248, |
360 | LITERAL_CONST = 255, |
361 | VGPR_MIN = 256, |
362 | VGPR_MAX = 511, |
363 | IS_VGPR = 256, // Indicates VGPR or AGPR |
364 | }; |
365 | |
366 | } // namespace EncValues |
367 | |
368 | // Register codes as defined in the TableGen's HWEncoding field. |
369 | namespace HWEncoding { |
370 | enum : unsigned { |
371 | REG_IDX_MASK = 0xff, |
372 | IS_VGPR_OR_AGPR = 1 << 8, |
373 | IS_HI = 1 << 9, // High 16-bit register. |
374 | }; |
375 | } // namespace HWEncoding |
376 | |
377 | namespace CPol { |
378 | |
379 | enum CPol { |
380 | GLC = 1, |
381 | SLC = 2, |
382 | DLC = 4, |
383 | SCC = 16, |
384 | SC0 = GLC, |
385 | SC1 = SCC, |
386 | NT = SLC, |
387 | ALL_pregfx12 = GLC | SLC | DLC | SCC, |
388 | SWZ_pregfx12 = 8, |
389 | |
390 | // Below are GFX12+ cache policy bits |
391 | |
392 | // Temporal hint |
393 | TH = 0x7, // All TH bits |
394 | TH_RT = 0, // regular |
395 | TH_NT = 1, // non-temporal |
396 | TH_HT = 2, // high-temporal |
397 | TH_LU = 3, // last use |
398 | TH_RT_WB = 3, // regular (CU, SE), high-temporal with write-back (MALL) |
399 | TH_NT_RT = 4, // non-temporal (CU, SE), regular (MALL) |
400 | TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL) |
401 | TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL) |
402 | TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL) |
403 | TH_BYPASS = 3, // only to be used with scope = 3 |
404 | |
405 | TH_RESERVED = 7, // unused value for load insts |
406 | |
407 | // Bits of TH for atomics |
408 | TH_ATOMIC_RETURN = GLC, // Returning vs non-returning |
409 | TH_ATOMIC_NT = SLC, // Non-temporal vs regular |
410 | TH_ATOMIC_CASCADE = 4, // Cascading vs regular |
411 | |
412 | // Scope |
413 | SCOPE = 0x3 << 3, // All Scope bits |
414 | SCOPE_CU = 0 << 3, |
415 | SCOPE_SE = 1 << 3, |
416 | SCOPE_DEV = 2 << 3, |
417 | SCOPE_SYS = 3 << 3, |
418 | |
419 | SWZ = 1 << 6, // Swizzle bit |
420 | |
421 | ALL = TH | SCOPE, |
422 | |
423 | // Helper bits |
424 | TH_TYPE_LOAD = 1 << 7, // TH_LOAD policy |
425 | TH_TYPE_STORE = 1 << 8, // TH_STORE policy |
426 | TH_TYPE_ATOMIC = 1 << 9, // TH_ATOMIC policy |
427 | TH_REAL_BYPASS = 1 << 10, // is TH=3 bypass policy or not |
428 | |
429 | // Volatile (used to preserve/signal operation volatility for buffer |
430 | // operations not a real instruction bit) |
431 | VOLATILE = 1 << 31, |
432 | }; |
433 | |
434 | } // namespace CPol |
435 | |
436 | namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns. |
437 | |
438 | enum Id { // Message ID, width(4) [3:0]. |
439 | ID_INTERRUPT = 1, |
440 | |
441 | ID_GS_PreGFX11 = 2, // replaced in GFX11 |
442 | ID_GS_DONE_PreGFX11 = 3, // replaced in GFX11 |
443 | |
444 | ID_HS_TESSFACTOR_GFX11Plus = 2, // reused in GFX11 |
445 | ID_DEALLOC_VGPRS_GFX11Plus = 3, // reused in GFX11 |
446 | |
447 | ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11 |
448 | ID_STALL_WAVE_GEN = 5, // added in GFX9, removed in GFX12 |
449 | ID_HALT_WAVES = 6, // added in GFX9, removed in GFX12 |
450 | ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11 |
451 | ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10 |
452 | ID_GS_ALLOC_REQ = 9, // added in GFX9 |
453 | ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11 |
454 | ID_GET_DDID = 11, // added in GFX10, removed in GFX11 |
455 | ID_SYSMSG = 15, |
456 | |
457 | ID_RTN_GET_DOORBELL = 128, |
458 | ID_RTN_GET_DDID = 129, |
459 | ID_RTN_GET_TMA = 130, |
460 | ID_RTN_GET_REALTIME = 131, |
461 | ID_RTN_SAVE_WAVE = 132, |
462 | ID_RTN_GET_TBA = 133, |
463 | ID_RTN_GET_TBA_TO_PC = 134, |
464 | ID_RTN_GET_SE_AID_ID = 135, |
465 | |
466 | ID_MASK_PreGFX11_ = 0xF, |
467 | ID_MASK_GFX11Plus_ = 0xFF |
468 | }; |
469 | |
470 | enum Op { // Both GS and SYS operation IDs. |
471 | OP_SHIFT_ = 4, |
472 | OP_NONE_ = 0, |
473 | // Bits used for operation encoding |
474 | OP_WIDTH_ = 3, |
475 | OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_), |
476 | // GS operations are encoded in bits 5:4 |
477 | OP_GS_NOP = 0, |
478 | OP_GS_CUT = 1, |
479 | OP_GS_EMIT = 2, |
480 | OP_GS_EMIT_CUT = 3, |
481 | OP_GS_FIRST_ = OP_GS_NOP, |
482 | // SYS operations are encoded in bits 6:4 |
483 | OP_SYS_ECC_ERR_INTERRUPT = 1, |
484 | OP_SYS_REG_RD = 2, |
485 | OP_SYS_HOST_TRAP_ACK = 3, |
486 | OP_SYS_TTRACE_PC = 4, |
487 | OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT, |
488 | }; |
489 | |
490 | enum StreamId : unsigned { // Stream ID, (2) [9:8]. |
491 | STREAM_ID_NONE_ = 0, |
492 | STREAM_ID_DEFAULT_ = 0, |
493 | STREAM_ID_LAST_ = 4, |
494 | STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_, |
495 | STREAM_ID_SHIFT_ = 8, |
496 | STREAM_ID_WIDTH_= 2, |
497 | STREAM_ID_MASK_ = (((1 << STREAM_ID_WIDTH_) - 1) << STREAM_ID_SHIFT_) |
498 | }; |
499 | |
500 | } // namespace SendMsg |
501 | |
502 | namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns. |
503 | |
504 | enum Id { // HwRegCode, (6) [5:0] |
505 | ID_MODE = 1, |
506 | ID_STATUS = 2, |
507 | ID_TRAPSTS = 3, |
508 | ID_HW_ID = 4, |
509 | ID_GPR_ALLOC = 5, |
510 | ID_LDS_ALLOC = 6, |
511 | ID_IB_STS = 7, |
512 | ID_PERF_SNAPSHOT_DATA_gfx12 = 10, |
513 | ID_PERF_SNAPSHOT_PC_LO_gfx12 = 11, |
514 | ID_PERF_SNAPSHOT_PC_HI_gfx12 = 12, |
515 | ID_MEM_BASES = 15, |
516 | ID_TBA_LO = 16, |
517 | ID_TBA_HI = 17, |
518 | ID_TMA_LO = 18, |
519 | ID_TMA_HI = 19, |
520 | ID_FLAT_SCR_LO = 20, |
521 | ID_FLAT_SCR_HI = 21, |
522 | ID_XNACK_MASK = 22, |
523 | ID_HW_ID1 = 23, |
524 | ID_HW_ID2 = 24, |
525 | ID_POPS_PACKER = 25, |
526 | ID_PERF_SNAPSHOT_DATA_gfx11 = 27, |
527 | ID_SHADER_CYCLES = 29, |
528 | ID_SHADER_CYCLES_HI = 30, |
529 | ID_DVGPR_ALLOC_LO = 31, |
530 | ID_DVGPR_ALLOC_HI = 32, |
531 | |
532 | // Register numbers reused in GFX11 |
533 | ID_PERF_SNAPSHOT_PC_LO_gfx11 = 18, |
534 | ID_PERF_SNAPSHOT_PC_HI_gfx11 = 19, |
535 | |
536 | // Register numbers reused in GFX12+ |
537 | ID_STATE_PRIV = 4, |
538 | ID_PERF_SNAPSHOT_DATA1 = 15, |
539 | ID_PERF_SNAPSHOT_DATA2 = 16, |
540 | ID_EXCP_FLAG_PRIV = 17, |
541 | ID_EXCP_FLAG_USER = 18, |
542 | ID_TRAP_CTRL = 19, |
543 | |
544 | // GFX940 specific registers |
545 | ID_XCC_ID = 20, |
546 | ID_SQ_PERF_SNAPSHOT_DATA = 21, |
547 | ID_SQ_PERF_SNAPSHOT_DATA1 = 22, |
548 | ID_SQ_PERF_SNAPSHOT_PC_LO = 23, |
549 | ID_SQ_PERF_SNAPSHOT_PC_HI = 24, |
550 | }; |
551 | |
552 | enum Offset : unsigned { // Offset, (5) [10:6] |
553 | OFFSET_MEM_VIOL = 8, |
554 | }; |
555 | |
556 | enum ModeRegisterMasks : uint32_t { |
557 | FP_ROUND_MASK = 0xf << 0, // Bits 0..3 |
558 | FP_DENORM_MASK = 0xf << 4, // Bits 4..7 |
559 | DX10_CLAMP_MASK = 1 << 8, |
560 | IEEE_MODE_MASK = 1 << 9, |
561 | LOD_CLAMP_MASK = 1 << 10, |
562 | DEBUG_MASK = 1 << 11, |
563 | |
564 | // EXCP_EN fields. |
565 | EXCP_EN_INVALID_MASK = 1 << 12, |
566 | EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13, |
567 | EXCP_EN_FLOAT_DIV0_MASK = 1 << 14, |
568 | EXCP_EN_OVERFLOW_MASK = 1 << 15, |
569 | EXCP_EN_UNDERFLOW_MASK = 1 << 16, |
570 | EXCP_EN_INEXACT_MASK = 1 << 17, |
571 | EXCP_EN_INT_DIV0_MASK = 1 << 18, |
572 | |
573 | GPR_IDX_EN_MASK = 1 << 27, |
574 | VSKIP_MASK = 1 << 28, |
575 | CSP_MASK = 0x7u << 29 // Bits 29..31 |
576 | }; |
577 | |
578 | } // namespace Hwreg |
579 | |
580 | namespace MTBUFFormat { |
581 | |
582 | enum DataFormat : int64_t { |
583 | DFMT_INVALID = 0, |
584 | DFMT_8, |
585 | DFMT_16, |
586 | DFMT_8_8, |
587 | DFMT_32, |
588 | DFMT_16_16, |
589 | DFMT_10_11_11, |
590 | DFMT_11_11_10, |
591 | DFMT_10_10_10_2, |
592 | DFMT_2_10_10_10, |
593 | DFMT_8_8_8_8, |
594 | DFMT_32_32, |
595 | DFMT_16_16_16_16, |
596 | DFMT_32_32_32, |
597 | DFMT_32_32_32_32, |
598 | DFMT_RESERVED_15, |
599 | |
600 | DFMT_MIN = DFMT_INVALID, |
601 | DFMT_MAX = DFMT_RESERVED_15, |
602 | |
603 | DFMT_UNDEF = -1, |
604 | DFMT_DEFAULT = DFMT_8, |
605 | |
606 | DFMT_SHIFT = 0, |
607 | DFMT_MASK = 0xF |
608 | }; |
609 | |
610 | enum NumFormat : int64_t { |
611 | NFMT_UNORM = 0, |
612 | NFMT_SNORM, |
613 | NFMT_USCALED, |
614 | NFMT_SSCALED, |
615 | NFMT_UINT, |
616 | NFMT_SINT, |
617 | NFMT_RESERVED_6, // VI and GFX9 |
618 | NFMT_SNORM_OGL = NFMT_RESERVED_6, // SI and CI only |
619 | NFMT_FLOAT, |
620 | |
621 | NFMT_MIN = NFMT_UNORM, |
622 | NFMT_MAX = NFMT_FLOAT, |
623 | |
624 | NFMT_UNDEF = -1, |
625 | NFMT_DEFAULT = NFMT_UNORM, |
626 | |
627 | NFMT_SHIFT = 4, |
628 | NFMT_MASK = 7 |
629 | }; |
630 | |
631 | enum MergedFormat : int64_t { |
632 | DFMT_NFMT_UNDEF = -1, |
633 | DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) | |
634 | ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT), |
635 | |
636 | |
637 | DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT), |
638 | |
639 | DFMT_NFMT_MAX = DFMT_NFMT_MASK |
640 | }; |
641 | |
642 | enum UnifiedFormatCommon : int64_t { |
643 | UFMT_MAX = 127, |
644 | UFMT_UNDEF = -1, |
645 | UFMT_DEFAULT = 1 |
646 | }; |
647 | |
648 | } // namespace MTBUFFormat |
649 | |
650 | namespace UfmtGFX10 { |
651 | enum UnifiedFormat : int64_t { |
652 | UFMT_INVALID = 0, |
653 | |
654 | UFMT_8_UNORM, |
655 | UFMT_8_SNORM, |
656 | UFMT_8_USCALED, |
657 | UFMT_8_SSCALED, |
658 | UFMT_8_UINT, |
659 | UFMT_8_SINT, |
660 | |
661 | UFMT_16_UNORM, |
662 | UFMT_16_SNORM, |
663 | UFMT_16_USCALED, |
664 | UFMT_16_SSCALED, |
665 | UFMT_16_UINT, |
666 | UFMT_16_SINT, |
667 | UFMT_16_FLOAT, |
668 | |
669 | UFMT_8_8_UNORM, |
670 | UFMT_8_8_SNORM, |
671 | UFMT_8_8_USCALED, |
672 | UFMT_8_8_SSCALED, |
673 | UFMT_8_8_UINT, |
674 | UFMT_8_8_SINT, |
675 | |
676 | UFMT_32_UINT, |
677 | UFMT_32_SINT, |
678 | UFMT_32_FLOAT, |
679 | |
680 | UFMT_16_16_UNORM, |
681 | UFMT_16_16_SNORM, |
682 | UFMT_16_16_USCALED, |
683 | UFMT_16_16_SSCALED, |
684 | UFMT_16_16_UINT, |
685 | UFMT_16_16_SINT, |
686 | UFMT_16_16_FLOAT, |
687 | |
688 | UFMT_10_11_11_UNORM, |
689 | UFMT_10_11_11_SNORM, |
690 | UFMT_10_11_11_USCALED, |
691 | UFMT_10_11_11_SSCALED, |
692 | UFMT_10_11_11_UINT, |
693 | UFMT_10_11_11_SINT, |
694 | UFMT_10_11_11_FLOAT, |
695 | |
696 | UFMT_11_11_10_UNORM, |
697 | UFMT_11_11_10_SNORM, |
698 | UFMT_11_11_10_USCALED, |
699 | UFMT_11_11_10_SSCALED, |
700 | UFMT_11_11_10_UINT, |
701 | UFMT_11_11_10_SINT, |
702 | UFMT_11_11_10_FLOAT, |
703 | |
704 | UFMT_10_10_10_2_UNORM, |
705 | UFMT_10_10_10_2_SNORM, |
706 | UFMT_10_10_10_2_USCALED, |
707 | UFMT_10_10_10_2_SSCALED, |
708 | UFMT_10_10_10_2_UINT, |
709 | UFMT_10_10_10_2_SINT, |
710 | |
711 | UFMT_2_10_10_10_UNORM, |
712 | UFMT_2_10_10_10_SNORM, |
713 | UFMT_2_10_10_10_USCALED, |
714 | UFMT_2_10_10_10_SSCALED, |
715 | UFMT_2_10_10_10_UINT, |
716 | UFMT_2_10_10_10_SINT, |
717 | |
718 | UFMT_8_8_8_8_UNORM, |
719 | UFMT_8_8_8_8_SNORM, |
720 | UFMT_8_8_8_8_USCALED, |
721 | UFMT_8_8_8_8_SSCALED, |
722 | UFMT_8_8_8_8_UINT, |
723 | UFMT_8_8_8_8_SINT, |
724 | |
725 | UFMT_32_32_UINT, |
726 | UFMT_32_32_SINT, |
727 | UFMT_32_32_FLOAT, |
728 | |
729 | UFMT_16_16_16_16_UNORM, |
730 | UFMT_16_16_16_16_SNORM, |
731 | UFMT_16_16_16_16_USCALED, |
732 | UFMT_16_16_16_16_SSCALED, |
733 | UFMT_16_16_16_16_UINT, |
734 | UFMT_16_16_16_16_SINT, |
735 | UFMT_16_16_16_16_FLOAT, |
736 | |
737 | UFMT_32_32_32_UINT, |
738 | UFMT_32_32_32_SINT, |
739 | UFMT_32_32_32_FLOAT, |
740 | UFMT_32_32_32_32_UINT, |
741 | UFMT_32_32_32_32_SINT, |
742 | UFMT_32_32_32_32_FLOAT, |
743 | |
744 | UFMT_FIRST = UFMT_INVALID, |
745 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
746 | }; |
747 | |
748 | } // namespace UfmtGFX10 |
749 | |
750 | namespace UfmtGFX11 { |
751 | enum UnifiedFormat : int64_t { |
752 | UFMT_INVALID = 0, |
753 | |
754 | UFMT_8_UNORM, |
755 | UFMT_8_SNORM, |
756 | UFMT_8_USCALED, |
757 | UFMT_8_SSCALED, |
758 | UFMT_8_UINT, |
759 | UFMT_8_SINT, |
760 | |
761 | UFMT_16_UNORM, |
762 | UFMT_16_SNORM, |
763 | UFMT_16_USCALED, |
764 | UFMT_16_SSCALED, |
765 | UFMT_16_UINT, |
766 | UFMT_16_SINT, |
767 | UFMT_16_FLOAT, |
768 | |
769 | UFMT_8_8_UNORM, |
770 | UFMT_8_8_SNORM, |
771 | UFMT_8_8_USCALED, |
772 | UFMT_8_8_SSCALED, |
773 | UFMT_8_8_UINT, |
774 | UFMT_8_8_SINT, |
775 | |
776 | UFMT_32_UINT, |
777 | UFMT_32_SINT, |
778 | UFMT_32_FLOAT, |
779 | |
780 | UFMT_16_16_UNORM, |
781 | UFMT_16_16_SNORM, |
782 | UFMT_16_16_USCALED, |
783 | UFMT_16_16_SSCALED, |
784 | UFMT_16_16_UINT, |
785 | UFMT_16_16_SINT, |
786 | UFMT_16_16_FLOAT, |
787 | |
788 | UFMT_10_11_11_FLOAT, |
789 | |
790 | UFMT_11_11_10_FLOAT, |
791 | |
792 | UFMT_10_10_10_2_UNORM, |
793 | UFMT_10_10_10_2_SNORM, |
794 | UFMT_10_10_10_2_UINT, |
795 | UFMT_10_10_10_2_SINT, |
796 | |
797 | UFMT_2_10_10_10_UNORM, |
798 | UFMT_2_10_10_10_SNORM, |
799 | UFMT_2_10_10_10_USCALED, |
800 | UFMT_2_10_10_10_SSCALED, |
801 | UFMT_2_10_10_10_UINT, |
802 | UFMT_2_10_10_10_SINT, |
803 | |
804 | UFMT_8_8_8_8_UNORM, |
805 | UFMT_8_8_8_8_SNORM, |
806 | UFMT_8_8_8_8_USCALED, |
807 | UFMT_8_8_8_8_SSCALED, |
808 | UFMT_8_8_8_8_UINT, |
809 | UFMT_8_8_8_8_SINT, |
810 | |
811 | UFMT_32_32_UINT, |
812 | UFMT_32_32_SINT, |
813 | UFMT_32_32_FLOAT, |
814 | |
815 | UFMT_16_16_16_16_UNORM, |
816 | UFMT_16_16_16_16_SNORM, |
817 | UFMT_16_16_16_16_USCALED, |
818 | UFMT_16_16_16_16_SSCALED, |
819 | UFMT_16_16_16_16_UINT, |
820 | UFMT_16_16_16_16_SINT, |
821 | UFMT_16_16_16_16_FLOAT, |
822 | |
823 | UFMT_32_32_32_UINT, |
824 | UFMT_32_32_32_SINT, |
825 | UFMT_32_32_32_FLOAT, |
826 | UFMT_32_32_32_32_UINT, |
827 | UFMT_32_32_32_32_SINT, |
828 | UFMT_32_32_32_32_FLOAT, |
829 | |
830 | UFMT_FIRST = UFMT_INVALID, |
831 | UFMT_LAST = UFMT_32_32_32_32_FLOAT, |
832 | }; |
833 | |
834 | } // namespace UfmtGFX11 |
835 | |
836 | namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. |
837 | |
838 | enum Id : unsigned { // id of symbolic names |
839 | ID_QUAD_PERM = 0, |
840 | ID_BITMASK_PERM, |
841 | ID_SWAP, |
842 | ID_REVERSE, |
843 | ID_BROADCAST |
844 | }; |
845 | |
846 | enum EncBits : unsigned { |
847 | |
848 | // swizzle mode encodings |
849 | |
850 | QUAD_PERM_ENC = 0x8000, |
851 | QUAD_PERM_ENC_MASK = 0xFF00, |
852 | |
853 | BITMASK_PERM_ENC = 0x0000, |
854 | BITMASK_PERM_ENC_MASK = 0x8000, |
855 | |
856 | // QUAD_PERM encodings |
857 | |
858 | LANE_MASK = 0x3, |
859 | LANE_MAX = LANE_MASK, |
860 | LANE_SHIFT = 2, |
861 | LANE_NUM = 4, |
862 | |
863 | // BITMASK_PERM encodings |
864 | |
865 | BITMASK_MASK = 0x1F, |
866 | BITMASK_MAX = BITMASK_MASK, |
867 | BITMASK_WIDTH = 5, |
868 | |
869 | BITMASK_AND_SHIFT = 0, |
870 | BITMASK_OR_SHIFT = 5, |
871 | BITMASK_XOR_SHIFT = 10 |
872 | }; |
873 | |
874 | } // namespace Swizzle |
875 | |
876 | namespace SDWA { |
877 | |
878 | enum SdwaSel : unsigned { |
879 | BYTE_0 = 0, |
880 | BYTE_1 = 1, |
881 | BYTE_2 = 2, |
882 | BYTE_3 = 3, |
883 | WORD_0 = 4, |
884 | WORD_1 = 5, |
885 | DWORD = 6, |
886 | }; |
887 | |
888 | enum DstUnused : unsigned { |
889 | UNUSED_PAD = 0, |
890 | UNUSED_SEXT = 1, |
891 | UNUSED_PRESERVE = 2, |
892 | }; |
893 | |
894 | enum SDWA9EncValues : unsigned { |
895 | SRC_SGPR_MASK = 0x100, |
896 | SRC_VGPR_MASK = 0xFF, |
897 | VOPC_DST_VCC_MASK = 0x80, |
898 | VOPC_DST_SGPR_MASK = 0x7F, |
899 | |
900 | SRC_VGPR_MIN = 0, |
901 | SRC_VGPR_MAX = 255, |
902 | SRC_SGPR_MIN = 256, |
903 | SRC_SGPR_MAX_SI = 357, |
904 | SRC_SGPR_MAX_GFX10 = 361, |
905 | SRC_TTMP_MIN = 364, |
906 | SRC_TTMP_MAX = 379, |
907 | }; |
908 | |
909 | } // namespace SDWA |
910 | |
911 | namespace DPP { |
912 | |
913 | // clang-format off |
914 | enum DppCtrl : unsigned { |
915 | QUAD_PERM_FIRST = 0, |
916 | QUAD_PERM_ID = 0xE4, // identity permutation |
917 | QUAD_PERM_LAST = 0xFF, |
918 | DPP_UNUSED1 = 0x100, |
919 | ROW_SHL0 = 0x100, |
920 | ROW_SHL_FIRST = 0x101, |
921 | ROW_SHL_LAST = 0x10F, |
922 | DPP_UNUSED2 = 0x110, |
923 | ROW_SHR0 = 0x110, |
924 | ROW_SHR_FIRST = 0x111, |
925 | ROW_SHR_LAST = 0x11F, |
926 | DPP_UNUSED3 = 0x120, |
927 | ROW_ROR0 = 0x120, |
928 | ROW_ROR_FIRST = 0x121, |
929 | ROW_ROR_LAST = 0x12F, |
930 | WAVE_SHL1 = 0x130, |
931 | DPP_UNUSED4_FIRST = 0x131, |
932 | DPP_UNUSED4_LAST = 0x133, |
933 | WAVE_ROL1 = 0x134, |
934 | DPP_UNUSED5_FIRST = 0x135, |
935 | DPP_UNUSED5_LAST = 0x137, |
936 | WAVE_SHR1 = 0x138, |
937 | DPP_UNUSED6_FIRST = 0x139, |
938 | DPP_UNUSED6_LAST = 0x13B, |
939 | WAVE_ROR1 = 0x13C, |
940 | DPP_UNUSED7_FIRST = 0x13D, |
941 | DPP_UNUSED7_LAST = 0x13F, |
942 | ROW_MIRROR = 0x140, |
943 | ROW_HALF_MIRROR = 0x141, |
944 | BCAST15 = 0x142, |
945 | BCAST31 = 0x143, |
946 | DPP_UNUSED8_FIRST = 0x144, |
947 | DPP_UNUSED8_LAST = 0x14F, |
948 | ROW_NEWBCAST_FIRST= 0x150, |
949 | ROW_NEWBCAST_LAST = 0x15F, |
950 | ROW_SHARE0 = 0x150, |
951 | ROW_SHARE_FIRST = 0x150, |
952 | ROW_SHARE_LAST = 0x15F, |
953 | ROW_XMASK0 = 0x160, |
954 | ROW_XMASK_FIRST = 0x160, |
955 | ROW_XMASK_LAST = 0x16F, |
956 | DPP_LAST = ROW_XMASK_LAST |
957 | }; |
958 | // clang-format on |
959 | |
960 | enum DppFiMode { |
961 | DPP_FI_0 = 0, |
962 | DPP_FI_1 = 1, |
963 | DPP8_FI_0 = 0xE9, |
964 | DPP8_FI_1 = 0xEA, |
965 | }; |
966 | |
967 | } // namespace DPP |
968 | |
969 | namespace Exp { |
970 | |
971 | enum Target : unsigned { |
972 | ET_MRT0 = 0, |
973 | ET_MRT7 = 7, |
974 | ET_MRTZ = 8, |
975 | ET_NULL = 9, // Pre-GFX11 |
976 | ET_POS0 = 12, |
977 | ET_POS3 = 15, |
978 | ET_POS4 = 16, // GFX10+ |
979 | ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget |
980 | ET_PRIM = 20, // GFX10+ |
981 | ET_DUAL_SRC_BLEND0 = 21, // GFX11+ |
982 | ET_DUAL_SRC_BLEND1 = 22, // GFX11+ |
983 | ET_PARAM0 = 32, // Pre-GFX11 |
984 | ET_PARAM31 = 63, // Pre-GFX11 |
985 | |
986 | ET_NULL_MAX_IDX = 0, |
987 | ET_MRTZ_MAX_IDX = 0, |
988 | ET_PRIM_MAX_IDX = 0, |
989 | ET_MRT_MAX_IDX = 7, |
990 | ET_POS_MAX_IDX = 4, |
991 | ET_DUAL_SRC_BLEND_MAX_IDX = 1, |
992 | ET_PARAM_MAX_IDX = 31, |
993 | |
994 | ET_INVALID = 255, |
995 | }; |
996 | |
997 | } // namespace Exp |
998 | |
999 | namespace VOP3PEncoding { |
1000 | |
1001 | enum OpSel : uint64_t { |
1002 | OP_SEL_HI_0 = UINT64_C(1) << 59, |
1003 | OP_SEL_HI_1 = UINT64_C(1) << 60, |
1004 | OP_SEL_HI_2 = UINT64_C(1) << 14, |
1005 | }; |
1006 | |
1007 | } // namespace VOP3PEncoding |
1008 | |
1009 | namespace ImplicitArg { |
1010 | // Implicit kernel argument offset for code object version 5. |
1011 | enum Offset_COV5 : unsigned { |
1012 | HOSTCALL_PTR_OFFSET = 80, |
1013 | MULTIGRID_SYNC_ARG_OFFSET = 88, |
1014 | HEAP_PTR_OFFSET = 96, |
1015 | |
1016 | DEFAULT_QUEUE_OFFSET = 104, |
1017 | COMPLETION_ACTION_OFFSET = 112, |
1018 | |
1019 | PRIVATE_BASE_OFFSET = 192, |
1020 | SHARED_BASE_OFFSET = 196, |
1021 | QUEUE_PTR_OFFSET = 200, |
1022 | }; |
1023 | |
1024 | } // namespace ImplicitArg |
1025 | |
1026 | namespace VirtRegFlag { |
1027 | // Virtual register flags used for various target specific handlings during |
1028 | // codegen. |
1029 | enum Register_Flag : uint8_t { |
1030 | // Register operand in a whole-wave mode operation. |
1031 | WWM_REG = 1 << 0, |
1032 | }; |
1033 | |
1034 | } // namespace VirtRegFlag |
1035 | |
1036 | } // namespace AMDGPU |
1037 | |
1038 | namespace AMDGPU { |
1039 | namespace Barrier { |
1040 | enum Type { TRAP = -2, WORKGROUP = -1 }; |
1041 | } // namespace Barrier |
1042 | } // namespace AMDGPU |
1043 | |
1044 | // clang-format off |
1045 | |
1046 | #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 |
1047 | #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) |
1048 | #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) |
1049 | #define S_00B028_MEM_ORDERED(x) (((x) & 0x1) << 25) |
1050 | #define G_00B028_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
1051 | #define C_00B028_MEM_ORDERED 0xFDFFFFFF |
1052 | |
1053 | #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C |
1054 | #define (x) (((x) & 0xFF) << 8) |
1055 | #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 |
1056 | #define S_00B128_MEM_ORDERED(x) (((x) & 0x1) << 27) |
1057 | #define G_00B128_MEM_ORDERED(x) (((x) >> 27) & 0x1) |
1058 | #define C_00B128_MEM_ORDERED 0xF7FFFFFF |
1059 | |
1060 | #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 |
1061 | #define S_00B228_WGP_MODE(x) (((x) & 0x1) << 27) |
1062 | #define G_00B228_WGP_MODE(x) (((x) >> 27) & 0x1) |
1063 | #define C_00B228_WGP_MODE 0xF7FFFFFF |
1064 | #define S_00B228_MEM_ORDERED(x) (((x) & 0x1) << 25) |
1065 | #define G_00B228_MEM_ORDERED(x) (((x) >> 25) & 0x1) |
1066 | #define C_00B228_MEM_ORDERED 0xFDFFFFFF |
1067 | |
1068 | #define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328 |
1069 | #define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 |
1070 | #define S_00B428_WGP_MODE(x) (((x) & 0x1) << 26) |
1071 | #define G_00B428_WGP_MODE(x) (((x) >> 26) & 0x1) |
1072 | #define C_00B428_WGP_MODE 0xFBFFFFFF |
1073 | #define S_00B428_MEM_ORDERED(x) (((x) & 0x1) << 24) |
1074 | #define G_00B428_MEM_ORDERED(x) (((x) >> 24) & 0x1) |
1075 | #define C_00B428_MEM_ORDERED 0xFEFFFFFF |
1076 | |
1077 | #define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528 |
1078 | |
1079 | #define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C |
1080 | #define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0) |
1081 | #define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1) |
1082 | #define C_00B84C_SCRATCH_EN 0xFFFFFFFE |
1083 | #define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1) |
1084 | #define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F) |
1085 | #define C_00B84C_USER_SGPR 0xFFFFFFC1 |
1086 | #define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6) |
1087 | #define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1) |
1088 | #define C_00B84C_TRAP_HANDLER 0xFFFFFFBF |
1089 | #define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7) |
1090 | #define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1) |
1091 | #define C_00B84C_TGID_X_EN 0xFFFFFF7F |
1092 | #define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8) |
1093 | #define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1) |
1094 | #define C_00B84C_TGID_Y_EN 0xFFFFFEFF |
1095 | #define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9) |
1096 | #define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1) |
1097 | #define C_00B84C_TGID_Z_EN 0xFFFFFDFF |
1098 | #define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10) |
1099 | #define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1) |
1100 | #define C_00B84C_TG_SIZE_EN 0xFFFFFBFF |
1101 | #define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11) |
1102 | #define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03) |
1103 | #define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF |
1104 | /* CIK */ |
1105 | #define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13) |
1106 | #define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03) |
1107 | #define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF |
1108 | /* */ |
1109 | #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) |
1110 | #define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF) |
1111 | #define C_00B84C_LDS_SIZE 0xFF007FFF |
1112 | #define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24) |
1113 | #define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F) |
1114 | #define C_00B84C_EXCP_EN 0x80FFFFFF |
1115 | |
1116 | #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC |
1117 | #define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0 |
1118 | |
1119 | #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 |
1120 | #define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) |
1121 | #define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F) |
1122 | #define C_00B848_VGPRS 0xFFFFFFC0 |
1123 | #define S_00B848_SGPRS(x) (((x) & 0x0F) << 6) |
1124 | #define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F) |
1125 | #define C_00B848_SGPRS 0xFFFFFC3F |
1126 | #define S_00B848_PRIORITY(x) (((x) & 0x03) << 10) |
1127 | #define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03) |
1128 | #define C_00B848_PRIORITY 0xFFFFF3FF |
1129 | #define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12) |
1130 | #define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF) |
1131 | #define C_00B848_FLOAT_MODE 0xFFF00FFF |
1132 | #define S_00B848_PRIV(x) (((x) & 0x1) << 20) |
1133 | #define G_00B848_PRIV(x) (((x) >> 20) & 0x1) |
1134 | #define C_00B848_PRIV 0xFFEFFFFF |
1135 | #define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21) |
1136 | #define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1) |
1137 | #define C_00B848_DX10_CLAMP 0xFFDFFFFF |
1138 | #define S_00B848_RR_WG_MODE(x) (((x) & 0x1) << 21) |
1139 | #define G_00B848_RR_WG_MODE(x) (((x) >> 21) & 0x1) |
1140 | #define C_00B848_RR_WG_MODE 0xFFDFFFFF |
1141 | #define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22) |
1142 | #define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1) |
1143 | #define C_00B848_DEBUG_MODE 0xFFBFFFFF |
1144 | #define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23) |
1145 | #define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1) |
1146 | #define C_00B848_IEEE_MODE 0xFF7FFFFF |
1147 | #define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29) |
1148 | #define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1) |
1149 | #define C_00B848_WGP_MODE 0xDFFFFFFF |
1150 | #define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30) |
1151 | #define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1) |
1152 | #define C_00B848_MEM_ORDERED 0xBFFFFFFF |
1153 | #define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31) |
1154 | #define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1) |
1155 | #define C_00B848_FWD_PROGRESS 0x7FFFFFFF |
1156 | |
1157 | // Helpers for setting FLOAT_MODE |
1158 | #define FP_ROUND_ROUND_TO_NEAREST 0 |
1159 | #define FP_ROUND_ROUND_TO_INF 1 |
1160 | #define FP_ROUND_ROUND_TO_NEGINF 2 |
1161 | #define FP_ROUND_ROUND_TO_ZERO 3 |
1162 | |
1163 | // Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double |
1164 | // precision. |
1165 | #define FP_ROUND_MODE_SP(x) ((x) & 0x3) |
1166 | #define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2) |
1167 | |
1168 | #define FP_DENORM_FLUSH_IN_FLUSH_OUT 0 |
1169 | #define FP_DENORM_FLUSH_OUT 1 |
1170 | #define FP_DENORM_FLUSH_IN 2 |
1171 | #define FP_DENORM_FLUSH_NONE 3 |
1172 | |
1173 | |
1174 | // Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double |
1175 | // precision. |
1176 | #define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4) |
1177 | #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) |
1178 | |
1179 | #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860 |
1180 | #define S_00B860_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
1181 | #define S_00B860_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
1182 | #define S_00B860_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
1183 | |
1184 | #define R_0286E8_SPI_TMPRING_SIZE 0x0286E8 |
1185 | #define S_0286E8_WAVESIZE_PreGFX11(x) (((x) & 0x1FFF) << 12) |
1186 | #define S_0286E8_WAVESIZE_GFX11(x) (((x) & 0x7FFF) << 12) |
1187 | #define S_0286E8_WAVESIZE_GFX12Plus(x) (((x) & 0x3FFFF) << 12) |
1188 | |
1189 | #define R_028B54_VGT_SHADER_STAGES_EN 0x028B54 |
1190 | #define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21) |
1191 | #define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22) |
1192 | #define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23) |
1193 | #define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8 |
1194 | #define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15) |
1195 | #define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800 |
1196 | #define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15) |
1197 | |
1198 | #define R_SPILLED_SGPRS 0x4 |
1199 | #define R_SPILLED_VGPRS 0x8 |
1200 | |
1201 | // clang-format on |
1202 | |
1203 | } // End namespace llvm |
1204 | |
1205 | #endif |
1206 | |