1 | //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPUBaseInfo.h" |
10 | #include "AMDGPU.h" |
11 | #include "AMDGPUAsmUtils.h" |
12 | #include "AMDKernelCodeT.h" |
13 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
14 | #include "Utils/AMDKernelCodeTUtils.h" |
15 | #include "llvm/ADT/StringExtras.h" |
16 | #include "llvm/BinaryFormat/ELF.h" |
17 | #include "llvm/IR/Attributes.h" |
18 | #include "llvm/IR/Constants.h" |
19 | #include "llvm/IR/Function.h" |
20 | #include "llvm/IR/GlobalValue.h" |
21 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
22 | #include "llvm/IR/IntrinsicsR600.h" |
23 | #include "llvm/IR/LLVMContext.h" |
24 | #include "llvm/MC/MCInstrInfo.h" |
25 | #include "llvm/MC/MCRegisterInfo.h" |
26 | #include "llvm/MC/MCSubtargetInfo.h" |
27 | #include "llvm/Support/CommandLine.h" |
28 | #include "llvm/TargetParser/TargetParser.h" |
29 | #include <optional> |
30 | |
31 | #define GET_INSTRINFO_NAMED_OPS |
32 | #define GET_INSTRMAP_INFO |
33 | #include "AMDGPUGenInstrInfo.inc" |
34 | |
35 | static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( |
36 | "amdhsa-code-object-version" , llvm::cl::Hidden, |
37 | llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV6), |
38 | llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " |
39 | "or asm directive still take priority if present)" )); |
40 | |
41 | namespace { |
42 | |
43 | /// \returns Bit mask for given bit \p Shift and bit \p Width. |
44 | unsigned getBitMask(unsigned Shift, unsigned Width) { |
45 | return ((1 << Width) - 1) << Shift; |
46 | } |
47 | |
48 | /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. |
49 | /// |
50 | /// \returns Packed \p Dst. |
51 | unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { |
52 | unsigned Mask = getBitMask(Shift, Width); |
53 | return ((Src << Shift) & Mask) | (Dst & ~Mask); |
54 | } |
55 | |
56 | /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. |
57 | /// |
58 | /// \returns Unpacked bits. |
59 | unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { |
60 | return (Src & getBitMask(Shift, Width)) >> Shift; |
61 | } |
62 | |
63 | /// \returns Vmcnt bit shift (lower bits). |
64 | unsigned getVmcntBitShiftLo(unsigned VersionMajor) { |
65 | return VersionMajor >= 11 ? 10 : 0; |
66 | } |
67 | |
68 | /// \returns Vmcnt bit width (lower bits). |
69 | unsigned getVmcntBitWidthLo(unsigned VersionMajor) { |
70 | return VersionMajor >= 11 ? 6 : 4; |
71 | } |
72 | |
73 | /// \returns Expcnt bit shift. |
74 | unsigned getExpcntBitShift(unsigned VersionMajor) { |
75 | return VersionMajor >= 11 ? 0 : 4; |
76 | } |
77 | |
78 | /// \returns Expcnt bit width. |
79 | unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } |
80 | |
81 | /// \returns Lgkmcnt bit shift. |
82 | unsigned getLgkmcntBitShift(unsigned VersionMajor) { |
83 | return VersionMajor >= 11 ? 4 : 8; |
84 | } |
85 | |
86 | /// \returns Lgkmcnt bit width. |
87 | unsigned getLgkmcntBitWidth(unsigned VersionMajor) { |
88 | return VersionMajor >= 10 ? 6 : 4; |
89 | } |
90 | |
91 | /// \returns Vmcnt bit shift (higher bits). |
92 | unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } |
93 | |
94 | /// \returns Vmcnt bit width (higher bits). |
95 | unsigned getVmcntBitWidthHi(unsigned VersionMajor) { |
96 | return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; |
97 | } |
98 | |
99 | /// \returns Loadcnt bit width |
100 | unsigned getLoadcntBitWidth(unsigned VersionMajor) { |
101 | return VersionMajor >= 12 ? 6 : 0; |
102 | } |
103 | |
104 | /// \returns Samplecnt bit width. |
105 | unsigned getSamplecntBitWidth(unsigned VersionMajor) { |
106 | return VersionMajor >= 12 ? 6 : 0; |
107 | } |
108 | |
109 | /// \returns Bvhcnt bit width. |
110 | unsigned getBvhcntBitWidth(unsigned VersionMajor) { |
111 | return VersionMajor >= 12 ? 3 : 0; |
112 | } |
113 | |
114 | /// \returns Dscnt bit width. |
115 | unsigned getDscntBitWidth(unsigned VersionMajor) { |
116 | return VersionMajor >= 12 ? 6 : 0; |
117 | } |
118 | |
119 | /// \returns Dscnt bit shift in combined S_WAIT instructions. |
120 | unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } |
121 | |
122 | /// \returns Storecnt or Vscnt bit width, depending on VersionMajor. |
123 | unsigned getStorecntBitWidth(unsigned VersionMajor) { |
124 | return VersionMajor >= 10 ? 6 : 0; |
125 | } |
126 | |
127 | /// \returns Kmcnt bit width. |
128 | unsigned getKmcntBitWidth(unsigned VersionMajor) { |
129 | return VersionMajor >= 12 ? 5 : 0; |
130 | } |
131 | |
132 | /// \returns Xcnt bit width. |
133 | unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) { |
134 | return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0; |
135 | } |
136 | |
137 | /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. |
138 | unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { |
139 | return VersionMajor >= 12 ? 8 : 0; |
140 | } |
141 | |
142 | /// \returns VaSdst bit width |
143 | inline unsigned getVaSdstBitWidth() { return 3; } |
144 | |
145 | /// \returns VaSdst bit shift |
146 | inline unsigned getVaSdstBitShift() { return 9; } |
147 | |
148 | /// \returns VmVsrc bit width |
149 | inline unsigned getVmVsrcBitWidth() { return 3; } |
150 | |
151 | /// \returns VmVsrc bit shift |
152 | inline unsigned getVmVsrcBitShift() { return 2; } |
153 | |
154 | /// \returns VaVdst bit width |
155 | inline unsigned getVaVdstBitWidth() { return 4; } |
156 | |
157 | /// \returns VaVdst bit shift |
158 | inline unsigned getVaVdstBitShift() { return 12; } |
159 | |
160 | /// \returns VaVcc bit width |
161 | inline unsigned getVaVccBitWidth() { return 1; } |
162 | |
163 | /// \returns VaVcc bit shift |
164 | inline unsigned getVaVccBitShift() { return 1; } |
165 | |
166 | /// \returns SaSdst bit width |
167 | inline unsigned getSaSdstBitWidth() { return 1; } |
168 | |
169 | /// \returns SaSdst bit shift |
170 | inline unsigned getSaSdstBitShift() { return 0; } |
171 | |
172 | /// \returns VaSsrc width |
173 | inline unsigned getVaSsrcBitWidth() { return 1; } |
174 | |
175 | /// \returns VaSsrc bit shift |
176 | inline unsigned getVaSsrcBitShift() { return 8; } |
177 | |
178 | /// \returns HoldCnt bit shift |
179 | inline unsigned getHoldCntWidth() { return 1; } |
180 | |
181 | /// \returns HoldCnt bit shift |
182 | inline unsigned getHoldCntBitShift() { return 7; } |
183 | |
184 | } // end anonymous namespace |
185 | |
186 | namespace llvm { |
187 | |
188 | namespace AMDGPU { |
189 | |
190 | /// \returns true if the target supports signed immediate offset for SMRD |
191 | /// instructions. |
192 | bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { |
193 | return isGFX9Plus(STI: ST); |
194 | } |
195 | |
196 | /// \returns True if \p STI is AMDHSA. |
197 | bool isHsaAbi(const MCSubtargetInfo &STI) { |
198 | return STI.getTargetTriple().getOS() == Triple::AMDHSA; |
199 | } |
200 | |
201 | unsigned getAMDHSACodeObjectVersion(const Module &M) { |
202 | if (auto *Ver = mdconst::extract_or_null<ConstantInt>( |
203 | MD: M.getModuleFlag(Key: "amdhsa_code_object_version" ))) { |
204 | return (unsigned)Ver->getZExtValue() / 100; |
205 | } |
206 | |
207 | return getDefaultAMDHSACodeObjectVersion(); |
208 | } |
209 | |
210 | unsigned getDefaultAMDHSACodeObjectVersion() { |
211 | return DefaultAMDHSACodeObjectVersion; |
212 | } |
213 | |
214 | unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) { |
215 | switch (ABIVersion) { |
216 | case ELF::ELFABIVERSION_AMDGPU_HSA_V4: |
217 | return 4; |
218 | case ELF::ELFABIVERSION_AMDGPU_HSA_V5: |
219 | return 5; |
220 | case ELF::ELFABIVERSION_AMDGPU_HSA_V6: |
221 | return 6; |
222 | default: |
223 | return getDefaultAMDHSACodeObjectVersion(); |
224 | } |
225 | } |
226 | |
227 | uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { |
228 | if (T.getOS() != Triple::AMDHSA) |
229 | return 0; |
230 | |
231 | switch (CodeObjectVersion) { |
232 | case 4: |
233 | return ELF::ELFABIVERSION_AMDGPU_HSA_V4; |
234 | case 5: |
235 | return ELF::ELFABIVERSION_AMDGPU_HSA_V5; |
236 | case 6: |
237 | return ELF::ELFABIVERSION_AMDGPU_HSA_V6; |
238 | default: |
239 | report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " + |
240 | Twine(CodeObjectVersion)); |
241 | } |
242 | } |
243 | |
244 | unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { |
245 | switch (CodeObjectVersion) { |
246 | case AMDHSA_COV4: |
247 | return 48; |
248 | case AMDHSA_COV5: |
249 | case AMDHSA_COV6: |
250 | default: |
251 | return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; |
252 | } |
253 | } |
254 | |
255 | // FIXME: All such magic numbers about the ABI should be in a |
256 | // central TD file. |
257 | unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { |
258 | switch (CodeObjectVersion) { |
259 | case AMDHSA_COV4: |
260 | return 24; |
261 | case AMDHSA_COV5: |
262 | case AMDHSA_COV6: |
263 | default: |
264 | return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; |
265 | } |
266 | } |
267 | |
268 | unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { |
269 | switch (CodeObjectVersion) { |
270 | case AMDHSA_COV4: |
271 | return 32; |
272 | case AMDHSA_COV5: |
273 | case AMDHSA_COV6: |
274 | default: |
275 | return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; |
276 | } |
277 | } |
278 | |
279 | unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { |
280 | switch (CodeObjectVersion) { |
281 | case AMDHSA_COV4: |
282 | return 40; |
283 | case AMDHSA_COV5: |
284 | case AMDHSA_COV6: |
285 | default: |
286 | return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; |
287 | } |
288 | } |
289 | |
290 | #define GET_MIMGBaseOpcodesTable_IMPL |
291 | #define GET_MIMGDimInfoTable_IMPL |
292 | #define GET_MIMGInfoTable_IMPL |
293 | #define GET_MIMGLZMappingTable_IMPL |
294 | #define GET_MIMGMIPMappingTable_IMPL |
295 | #define GET_MIMGBiasMappingTable_IMPL |
296 | #define GET_MIMGOffsetMappingTable_IMPL |
297 | #define GET_MIMGG16MappingTable_IMPL |
298 | #define GET_MAIInstInfoTable_IMPL |
299 | #include "AMDGPUGenSearchableTables.inc" |
300 | |
301 | int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, |
302 | unsigned VDataDwords, unsigned VAddrDwords) { |
303 | const MIMGInfo *Info = |
304 | getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords); |
305 | return Info ? Info->Opcode : -1; |
306 | } |
307 | |
308 | const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { |
309 | const MIMGInfo *Info = getMIMGInfo(Opcode: Opc); |
310 | return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr; |
311 | } |
312 | |
313 | int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { |
314 | const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc); |
315 | const MIMGInfo *NewInfo = |
316 | getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding, |
317 | VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords); |
318 | return NewInfo ? NewInfo->Opcode : -1; |
319 | } |
320 | |
321 | unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, |
322 | const MIMGDimInfo *Dim, bool IsA16, |
323 | bool IsG16Supported) { |
324 | unsigned AddrWords = BaseOpcode->NumExtraArgs; |
325 | unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + |
326 | (BaseOpcode->LodOrClampOrMip ? 1 : 0); |
327 | if (IsA16) |
328 | AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2); |
329 | else |
330 | AddrWords += AddrComponents; |
331 | |
332 | // Note: For subtargets that support A16 but not G16, enabling A16 also |
333 | // enables 16 bit gradients. |
334 | // For subtargets that support A16 (operand) and G16 (done with a different |
335 | // instruction encoding), they are independent. |
336 | |
337 | if (BaseOpcode->Gradients) { |
338 | if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) |
339 | // There are two gradients per coordinate, we pack them separately. |
340 | // For the 3d case, |
341 | // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) |
342 | AddrWords += alignTo<2>(Value: Dim->NumGradients / 2); |
343 | else |
344 | AddrWords += Dim->NumGradients; |
345 | } |
346 | return AddrWords; |
347 | } |
348 | |
349 | struct MUBUFInfo { |
350 | uint16_t Opcode; |
351 | uint16_t BaseOpcode; |
352 | uint8_t elements; |
353 | bool has_vaddr; |
354 | bool has_srsrc; |
355 | bool has_soffset; |
356 | bool IsBufferInv; |
357 | bool tfe; |
358 | }; |
359 | |
360 | struct MTBUFInfo { |
361 | uint16_t Opcode; |
362 | uint16_t BaseOpcode; |
363 | uint8_t elements; |
364 | bool has_vaddr; |
365 | bool has_srsrc; |
366 | bool has_soffset; |
367 | }; |
368 | |
369 | struct SMInfo { |
370 | uint16_t Opcode; |
371 | bool IsBuffer; |
372 | }; |
373 | |
374 | struct VOPInfo { |
375 | uint16_t Opcode; |
376 | bool IsSingle; |
377 | }; |
378 | |
379 | struct VOPC64DPPInfo { |
380 | uint16_t Opcode; |
381 | }; |
382 | |
383 | struct VOPCDPPAsmOnlyInfo { |
384 | uint16_t Opcode; |
385 | }; |
386 | |
387 | struct VOP3CDPPAsmOnlyInfo { |
388 | uint16_t Opcode; |
389 | }; |
390 | |
391 | struct VOPDComponentInfo { |
392 | uint16_t BaseVOP; |
393 | uint16_t VOPDOp; |
394 | bool CanBeVOPDX; |
395 | }; |
396 | |
397 | struct VOPDInfo { |
398 | uint16_t Opcode; |
399 | uint16_t OpX; |
400 | uint16_t OpY; |
401 | uint16_t Subtarget; |
402 | }; |
403 | |
404 | struct VOPTrue16Info { |
405 | uint16_t Opcode; |
406 | bool IsTrue16; |
407 | }; |
408 | |
409 | #define GET_FP4FP8DstByteSelTable_DECL |
410 | #define GET_FP4FP8DstByteSelTable_IMPL |
411 | |
412 | struct DPMACCInstructionInfo { |
413 | uint16_t Opcode; |
414 | bool IsDPMACCInstruction; |
415 | }; |
416 | |
417 | struct FP4FP8DstByteSelInfo { |
418 | uint16_t Opcode; |
419 | bool HasFP8DstByteSel; |
420 | bool HasFP4DstByteSel; |
421 | }; |
422 | |
423 | #define GET_MTBUFInfoTable_DECL |
424 | #define GET_MTBUFInfoTable_IMPL |
425 | #define GET_MUBUFInfoTable_DECL |
426 | #define GET_MUBUFInfoTable_IMPL |
427 | #define GET_SMInfoTable_DECL |
428 | #define GET_SMInfoTable_IMPL |
429 | #define GET_VOP1InfoTable_DECL |
430 | #define GET_VOP1InfoTable_IMPL |
431 | #define GET_VOP2InfoTable_DECL |
432 | #define GET_VOP2InfoTable_IMPL |
433 | #define GET_VOP3InfoTable_DECL |
434 | #define GET_VOP3InfoTable_IMPL |
435 | #define GET_VOPC64DPPTable_DECL |
436 | #define GET_VOPC64DPPTable_IMPL |
437 | #define GET_VOPC64DPP8Table_DECL |
438 | #define GET_VOPC64DPP8Table_IMPL |
439 | #define GET_VOPCAsmOnlyInfoTable_DECL |
440 | #define GET_VOPCAsmOnlyInfoTable_IMPL |
441 | #define GET_VOP3CAsmOnlyInfoTable_DECL |
442 | #define GET_VOP3CAsmOnlyInfoTable_IMPL |
443 | #define GET_VOPDComponentTable_DECL |
444 | #define GET_VOPDComponentTable_IMPL |
445 | #define GET_VOPDPairs_DECL |
446 | #define GET_VOPDPairs_IMPL |
447 | #define GET_VOPTrue16Table_DECL |
448 | #define GET_VOPTrue16Table_IMPL |
449 | #define GET_True16D16Table_IMPL |
450 | #define GET_WMMAOpcode2AddrMappingTable_DECL |
451 | #define GET_WMMAOpcode2AddrMappingTable_IMPL |
452 | #define GET_WMMAOpcode3AddrMappingTable_DECL |
453 | #define GET_WMMAOpcode3AddrMappingTable_IMPL |
454 | #define GET_getMFMA_F8F6F4_WithSize_DECL |
455 | #define GET_getMFMA_F8F6F4_WithSize_IMPL |
456 | #define GET_isMFMA_F8F6F4Table_IMPL |
457 | #define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL |
458 | |
459 | #include "AMDGPUGenSearchableTables.inc" |
460 | |
461 | int getMTBUFBaseOpcode(unsigned Opc) { |
462 | const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc); |
463 | return Info ? Info->BaseOpcode : -1; |
464 | } |
465 | |
466 | int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { |
467 | const MTBUFInfo *Info = |
468 | getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements); |
469 | return Info ? Info->Opcode : -1; |
470 | } |
471 | |
472 | int getMTBUFElements(unsigned Opc) { |
473 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
474 | return Info ? Info->elements : 0; |
475 | } |
476 | |
477 | bool getMTBUFHasVAddr(unsigned Opc) { |
478 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
479 | return Info && Info->has_vaddr; |
480 | } |
481 | |
482 | bool getMTBUFHasSrsrc(unsigned Opc) { |
483 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
484 | return Info && Info->has_srsrc; |
485 | } |
486 | |
487 | bool getMTBUFHasSoffset(unsigned Opc) { |
488 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc); |
489 | return Info && Info->has_soffset; |
490 | } |
491 | |
492 | int getMUBUFBaseOpcode(unsigned Opc) { |
493 | const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc); |
494 | return Info ? Info->BaseOpcode : -1; |
495 | } |
496 | |
497 | int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { |
498 | const MUBUFInfo *Info = |
499 | getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements); |
500 | return Info ? Info->Opcode : -1; |
501 | } |
502 | |
503 | int getMUBUFElements(unsigned Opc) { |
504 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
505 | return Info ? Info->elements : 0; |
506 | } |
507 | |
508 | bool getMUBUFHasVAddr(unsigned Opc) { |
509 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
510 | return Info && Info->has_vaddr; |
511 | } |
512 | |
513 | bool getMUBUFHasSrsrc(unsigned Opc) { |
514 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
515 | return Info && Info->has_srsrc; |
516 | } |
517 | |
518 | bool getMUBUFHasSoffset(unsigned Opc) { |
519 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
520 | return Info && Info->has_soffset; |
521 | } |
522 | |
523 | bool getMUBUFIsBufferInv(unsigned Opc) { |
524 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
525 | return Info && Info->IsBufferInv; |
526 | } |
527 | |
528 | bool getMUBUFTfe(unsigned Opc) { |
529 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc); |
530 | return Info && Info->tfe; |
531 | } |
532 | |
533 | bool getSMEMIsBuffer(unsigned Opc) { |
534 | const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc); |
535 | return Info && Info->IsBuffer; |
536 | } |
537 | |
538 | bool getVOP1IsSingle(unsigned Opc) { |
539 | const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc); |
540 | return !Info || Info->IsSingle; |
541 | } |
542 | |
543 | bool getVOP2IsSingle(unsigned Opc) { |
544 | const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc); |
545 | return !Info || Info->IsSingle; |
546 | } |
547 | |
548 | bool getVOP3IsSingle(unsigned Opc) { |
549 | const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc); |
550 | return !Info || Info->IsSingle; |
551 | } |
552 | |
553 | bool isVOPC64DPP(unsigned Opc) { |
554 | return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc); |
555 | } |
556 | |
557 | bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); } |
558 | |
559 | bool getMAIIsDGEMM(unsigned Opc) { |
560 | const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc); |
561 | return Info && Info->is_dgemm; |
562 | } |
563 | |
564 | bool getMAIIsGFX940XDL(unsigned Opc) { |
565 | const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc); |
566 | return Info && Info->is_gfx940_xdl; |
567 | } |
568 | |
569 | uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) { |
570 | switch (EncodingVal) { |
571 | case MFMAScaleFormats::FP6_E2M3: |
572 | case MFMAScaleFormats::FP6_E3M2: |
573 | return 6; |
574 | case MFMAScaleFormats::FP4_E2M1: |
575 | return 4; |
576 | case MFMAScaleFormats::FP8_E4M3: |
577 | case MFMAScaleFormats::FP8_E5M2: |
578 | default: |
579 | return 8; |
580 | } |
581 | |
582 | llvm_unreachable("covered switch over mfma scale formats" ); |
583 | } |
584 | |
585 | const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, |
586 | unsigned BLGP, |
587 | unsigned F8F8Opcode) { |
588 | uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ); |
589 | uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP); |
590 | return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode); |
591 | } |
592 | |
593 | unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { |
594 | if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts)) |
595 | return SIEncodingFamily::GFX12; |
596 | if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts)) |
597 | return SIEncodingFamily::GFX11; |
598 | llvm_unreachable("Subtarget generation does not support VOPD!" ); |
599 | } |
600 | |
601 | CanBeVOPD getCanBeVOPD(unsigned Opc) { |
602 | const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc); |
603 | if (Info) |
604 | return {.X: Info->CanBeVOPDX, .Y: true}; |
605 | return {.X: false, .Y: false}; |
606 | } |
607 | |
608 | unsigned getVOPDOpcode(unsigned Opc) { |
609 | const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc); |
610 | return Info ? Info->VOPDOp : ~0u; |
611 | } |
612 | |
613 | bool isVOPD(unsigned Opc) { |
614 | return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X); |
615 | } |
616 | |
617 | bool isMAC(unsigned Opc) { |
618 | return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || |
619 | Opc == AMDGPU::V_MAC_F32_e64_gfx10 || |
620 | Opc == AMDGPU::V_MAC_F32_e64_vi || |
621 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || |
622 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || |
623 | Opc == AMDGPU::V_MAC_F16_e64_vi || |
624 | Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || |
625 | Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || |
626 | Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || |
627 | Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || |
628 | Opc == AMDGPU::V_FMAC_F32_e64_vi || |
629 | Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || |
630 | Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || |
631 | Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || |
632 | Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || |
633 | Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 || |
634 | Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || |
635 | Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 || |
636 | Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || |
637 | Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi || |
638 | Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || |
639 | Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || |
640 | Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; |
641 | } |
642 | |
643 | bool isPermlane16(unsigned Opc) { |
644 | return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || |
645 | Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || |
646 | Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || |
647 | Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || |
648 | Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || |
649 | Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || |
650 | Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || |
651 | Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; |
652 | } |
653 | |
654 | bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { |
655 | return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || |
656 | Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || |
657 | Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || |
658 | Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || |
659 | Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || |
660 | Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || |
661 | Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 || |
662 | Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 || |
663 | Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 || |
664 | Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12; |
665 | } |
666 | |
667 | bool isGenericAtomic(unsigned Opc) { |
668 | return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || |
669 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || |
670 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || |
671 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || |
672 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || |
673 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || |
674 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || |
675 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || |
676 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || |
677 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || |
678 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || |
679 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || |
680 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || |
681 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || |
682 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || |
683 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || |
684 | Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; |
685 | } |
686 | |
687 | bool isAsyncStore(unsigned Opc) { |
688 | return false; // placeholder before async store implementation. |
689 | } |
690 | |
691 | bool isTensorStore(unsigned Opc) { |
692 | return Opc == TENSOR_STORE_FROM_LDS_gfx1250 || |
693 | Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250; |
694 | } |
695 | |
696 | unsigned getTemporalHintType(const MCInstrDesc TID) { |
697 | if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)) |
698 | return CPol::TH_TYPE_ATOMIC; |
699 | unsigned Opc = TID.getOpcode(); |
700 | // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE |
701 | if (TID.mayStore() && |
702 | (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad())) |
703 | return CPol::TH_TYPE_STORE; |
704 | |
705 | // This will default to returning TH_TYPE_LOAD when neither MayStore nor |
706 | // MayLoad flag is present which is the case with instructions like |
707 | // image_get_resinfo. |
708 | return CPol::TH_TYPE_LOAD; |
709 | } |
710 | |
711 | bool isTrue16Inst(unsigned Opc) { |
712 | const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc); |
713 | return Info && Info->IsTrue16; |
714 | } |
715 | |
716 | FPType getFPDstSelType(unsigned Opc) { |
717 | const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opcode: Opc); |
718 | if (!Info) |
719 | return FPType::None; |
720 | if (Info->HasFP8DstByteSel) |
721 | return FPType::FP8; |
722 | if (Info->HasFP4DstByteSel) |
723 | return FPType::FP4; |
724 | |
725 | return FPType::None; |
726 | } |
727 | |
728 | unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { |
729 | const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc); |
730 | return Info ? Info->Opcode3Addr : ~0u; |
731 | } |
732 | |
733 | unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { |
734 | const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc); |
735 | return Info ? Info->Opcode2Addr : ~0u; |
736 | } |
737 | |
738 | // Wrapper for Tablegen'd function. enum Subtarget is not defined in any |
739 | // header files, so we need to wrap it in a function that takes unsigned |
740 | // instead. |
741 | int getMCOpcode(uint16_t Opcode, unsigned Gen) { |
742 | return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen)); |
743 | } |
744 | |
745 | int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { |
746 | const VOPDInfo *Info = |
747 | getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily); |
748 | return Info ? Info->Opcode : -1; |
749 | } |
750 | |
751 | std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { |
752 | const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode); |
753 | assert(Info); |
754 | const auto *OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX); |
755 | const auto *OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY); |
756 | assert(OpX && OpY); |
757 | return {OpX->BaseVOP, OpY->BaseVOP}; |
758 | } |
759 | |
760 | namespace VOPD { |
761 | |
762 | ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { |
763 | assert(OpDesc.getNumDefs() == Component::DST_NUM); |
764 | |
765 | assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); |
766 | assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); |
767 | auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO); |
768 | assert(TiedIdx == -1 || TiedIdx == Component::DST); |
769 | HasSrc2Acc = TiedIdx != -1; |
770 | |
771 | SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); |
772 | assert(SrcOperandsNum <= Component::MAX_SRC_NUM); |
773 | |
774 | auto OperandsNum = OpDesc.getNumOperands(); |
775 | unsigned CompOprIdx; |
776 | for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { |
777 | if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { |
778 | MandatoryLiteralIdx = CompOprIdx; |
779 | break; |
780 | } |
781 | } |
782 | } |
783 | |
784 | unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { |
785 | assert(CompOprIdx < Component::MAX_OPR_NUM); |
786 | |
787 | if (CompOprIdx == Component::DST) |
788 | return getIndexOfDstInParsedOperands(); |
789 | |
790 | auto CompSrcIdx = CompOprIdx - Component::DST_NUM; |
791 | if (CompSrcIdx < getCompParsedSrcOperandsNum()) |
792 | return getIndexOfSrcInParsedOperands(CompSrcIdx); |
793 | |
794 | // The specified operand does not exist. |
795 | return 0; |
796 | } |
797 | |
798 | std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( |
799 | std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { |
800 | |
801 | auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx); |
802 | auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx); |
803 | |
804 | const unsigned CompOprNum = |
805 | SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; |
806 | unsigned CompOprIdx; |
807 | for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { |
808 | unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; |
809 | if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && |
810 | ((OpXRegs[CompOprIdx] & BanksMasks) == |
811 | (OpYRegs[CompOprIdx] & BanksMasks))) |
812 | return CompOprIdx; |
813 | } |
814 | |
815 | return {}; |
816 | } |
817 | |
818 | // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used |
819 | // by the specified component. If an operand is unused |
820 | // or is not a VGPR, the corresponding value is 0. |
821 | // |
822 | // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index |
823 | // for the specified component and MC operand. The callback must return 0 |
824 | // if the operand is not a register or not a VGPR. |
825 | InstInfo::RegIndices InstInfo::getRegIndices( |
826 | unsigned CompIdx, |
827 | std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { |
828 | assert(CompIdx < COMPONENTS_NUM); |
829 | |
830 | const auto &Comp = CompInfo[CompIdx]; |
831 | InstInfo::RegIndices RegIndices; |
832 | |
833 | RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); |
834 | |
835 | for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { |
836 | unsigned CompSrcIdx = CompOprIdx - DST_NUM; |
837 | RegIndices[CompOprIdx] = |
838 | Comp.hasRegSrcOperand(CompSrcIdx) |
839 | ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) |
840 | : 0; |
841 | } |
842 | return RegIndices; |
843 | } |
844 | |
845 | } // namespace VOPD |
846 | |
847 | VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { |
848 | return VOPD::InstInfo(OpX, OpY); |
849 | } |
850 | |
851 | VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, |
852 | const MCInstrInfo *InstrInfo) { |
853 | auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); |
854 | const auto &OpXDesc = InstrInfo->get(Opcode: OpX); |
855 | const auto &OpYDesc = InstrInfo->get(Opcode: OpY); |
856 | VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); |
857 | VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); |
858 | return VOPD::InstInfo(OpXInfo, OpYInfo); |
859 | } |
860 | |
861 | namespace IsaInfo { |
862 | |
863 | AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) |
864 | : STI(STI), XnackSetting(TargetIDSetting::Any), |
865 | SramEccSetting(TargetIDSetting::Any) { |
866 | if (!STI.getFeatureBits().test(I: FeatureSupportsXNACK)) |
867 | XnackSetting = TargetIDSetting::Unsupported; |
868 | if (!STI.getFeatureBits().test(I: FeatureSupportsSRAMECC)) |
869 | SramEccSetting = TargetIDSetting::Unsupported; |
870 | } |
871 | |
872 | void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { |
873 | // Check if xnack or sramecc is explicitly enabled or disabled. In the |
874 | // absence of the target features we assume we must generate code that can run |
875 | // in any environment. |
876 | SubtargetFeatures Features(FS); |
877 | std::optional<bool> XnackRequested; |
878 | std::optional<bool> SramEccRequested; |
879 | |
880 | for (const std::string &Feature : Features.getFeatures()) { |
881 | if (Feature == "+xnack" ) |
882 | XnackRequested = true; |
883 | else if (Feature == "-xnack" ) |
884 | XnackRequested = false; |
885 | else if (Feature == "+sramecc" ) |
886 | SramEccRequested = true; |
887 | else if (Feature == "-sramecc" ) |
888 | SramEccRequested = false; |
889 | } |
890 | |
891 | bool XnackSupported = isXnackSupported(); |
892 | bool SramEccSupported = isSramEccSupported(); |
893 | |
894 | if (XnackRequested) { |
895 | if (XnackSupported) { |
896 | XnackSetting = |
897 | *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; |
898 | } else { |
899 | // If a specific xnack setting was requested and this GPU does not support |
900 | // xnack emit a warning. Setting will remain set to "Unsupported". |
901 | if (*XnackRequested) { |
902 | errs() << "warning: xnack 'On' was requested for a processor that does " |
903 | "not support it!\n" ; |
904 | } else { |
905 | errs() << "warning: xnack 'Off' was requested for a processor that " |
906 | "does not support it!\n" ; |
907 | } |
908 | } |
909 | } |
910 | |
911 | if (SramEccRequested) { |
912 | if (SramEccSupported) { |
913 | SramEccSetting = |
914 | *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; |
915 | } else { |
916 | // If a specific sramecc setting was requested and this GPU does not |
917 | // support sramecc emit a warning. Setting will remain set to |
918 | // "Unsupported". |
919 | if (*SramEccRequested) { |
920 | errs() << "warning: sramecc 'On' was requested for a processor that " |
921 | "does not support it!\n" ; |
922 | } else { |
923 | errs() << "warning: sramecc 'Off' was requested for a processor that " |
924 | "does not support it!\n" ; |
925 | } |
926 | } |
927 | } |
928 | } |
929 | |
930 | static TargetIDSetting |
931 | getTargetIDSettingFromFeatureString(StringRef FeatureString) { |
932 | if (FeatureString.ends_with(Suffix: "-" )) |
933 | return TargetIDSetting::Off; |
934 | if (FeatureString.ends_with(Suffix: "+" )) |
935 | return TargetIDSetting::On; |
936 | |
937 | llvm_unreachable("Malformed feature string" ); |
938 | } |
939 | |
940 | void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { |
941 | SmallVector<StringRef, 3> TargetIDSplit; |
942 | TargetID.split(A&: TargetIDSplit, Separator: ':'); |
943 | |
944 | for (const auto &FeatureString : TargetIDSplit) { |
945 | if (FeatureString.starts_with(Prefix: "xnack" )) |
946 | XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); |
947 | if (FeatureString.starts_with(Prefix: "sramecc" )) |
948 | SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); |
949 | } |
950 | } |
951 | |
952 | std::string AMDGPUTargetID::toString() const { |
953 | std::string StringRep; |
954 | raw_string_ostream StreamRep(StringRep); |
955 | |
956 | auto TargetTriple = STI.getTargetTriple(); |
957 | auto Version = getIsaVersion(GPU: STI.getCPU()); |
958 | |
959 | StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName() |
960 | << '-' << TargetTriple.getOSName() << '-' |
961 | << TargetTriple.getEnvironmentName() << '-'; |
962 | |
963 | std::string Processor; |
964 | // TODO: Following else statement is present here because we used various |
965 | // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). |
966 | // Remove once all aliases are removed from GCNProcessors.td. |
967 | if (Version.Major >= 9) |
968 | Processor = STI.getCPU().str(); |
969 | else |
970 | Processor = (Twine("gfx" ) + Twine(Version.Major) + Twine(Version.Minor) + |
971 | Twine(Version.Stepping)) |
972 | .str(); |
973 | |
974 | std::string Features; |
975 | if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { |
976 | // sramecc. |
977 | if (getSramEccSetting() == TargetIDSetting::Off) |
978 | Features += ":sramecc-" ; |
979 | else if (getSramEccSetting() == TargetIDSetting::On) |
980 | Features += ":sramecc+" ; |
981 | // xnack. |
982 | if (getXnackSetting() == TargetIDSetting::Off) |
983 | Features += ":xnack-" ; |
984 | else if (getXnackSetting() == TargetIDSetting::On) |
985 | Features += ":xnack+" ; |
986 | } |
987 | |
988 | StreamRep << Processor << Features; |
989 | |
990 | return StringRep; |
991 | } |
992 | |
993 | unsigned getWavefrontSize(const MCSubtargetInfo *STI) { |
994 | if (STI->getFeatureBits().test(I: FeatureWavefrontSize16)) |
995 | return 16; |
996 | if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) |
997 | return 32; |
998 | |
999 | return 64; |
1000 | } |
1001 | |
1002 | unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { |
1003 | unsigned BytesPerCU = getAddressableLocalMemorySize(STI); |
1004 | |
1005 | // "Per CU" really means "per whatever functional block the waves of a |
1006 | // workgroup must share". So the effective local memory size is doubled in |
1007 | // WGP mode on gfx10. |
1008 | if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode)) |
1009 | BytesPerCU *= 2; |
1010 | |
1011 | return BytesPerCU; |
1012 | } |
1013 | |
1014 | unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { |
1015 | if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768)) |
1016 | return 32768; |
1017 | if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536)) |
1018 | return 65536; |
1019 | if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840)) |
1020 | return 163840; |
1021 | return 0; |
1022 | } |
1023 | |
1024 | unsigned getEUsPerCU(const MCSubtargetInfo *STI) { |
1025 | // "Per CU" really means "per whatever functional block the waves of a |
1026 | // workgroup must share". For gfx10 in CU mode this is the CU, which contains |
1027 | // two SIMDs. |
1028 | if (isGFX10Plus(STI: *STI) && STI->getFeatureBits().test(I: FeatureCuMode)) |
1029 | return 2; |
1030 | // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains |
1031 | // two CUs, so a total of four SIMDs. |
1032 | return 4; |
1033 | } |
1034 | |
1035 | unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, |
1036 | unsigned FlatWorkGroupSize) { |
1037 | assert(FlatWorkGroupSize != 0); |
1038 | if (!STI->getTargetTriple().isAMDGCN()) |
1039 | return 8; |
1040 | unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); |
1041 | unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); |
1042 | if (N == 1) { |
1043 | // Single-wave workgroups don't consume barrier resources. |
1044 | return MaxWaves; |
1045 | } |
1046 | |
1047 | unsigned MaxBarriers = 16; |
1048 | if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode)) |
1049 | MaxBarriers = 32; |
1050 | |
1051 | return std::min(a: MaxWaves / N, b: MaxBarriers); |
1052 | } |
1053 | |
1054 | unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } |
1055 | |
1056 | unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { |
1057 | // FIXME: Need to take scratch memory into account. |
1058 | if (isGFX90A(STI: *STI)) |
1059 | return 8; |
1060 | if (!isGFX10Plus(STI: *STI)) |
1061 | return 10; |
1062 | return hasGFX10_3Insts(STI: *STI) ? 16 : 20; |
1063 | } |
1064 | |
1065 | unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, |
1066 | unsigned FlatWorkGroupSize) { |
1067 | return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize), |
1068 | Denominator: getEUsPerCU(STI)); |
1069 | } |
1070 | |
1071 | unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; } |
1072 | |
1073 | unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { |
1074 | // Some subtargets allow encoding 2048, but this isn't tested or supported. |
1075 | return 1024; |
1076 | } |
1077 | |
1078 | unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, |
1079 | unsigned FlatWorkGroupSize) { |
1080 | return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI)); |
1081 | } |
1082 | |
1083 | unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { |
1084 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1085 | if (Version.Major >= 10) |
1086 | return getAddressableNumSGPRs(STI); |
1087 | if (Version.Major >= 8) |
1088 | return 16; |
1089 | return 8; |
1090 | } |
1091 | |
1092 | unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; } |
1093 | |
1094 | unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { |
1095 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1096 | if (Version.Major >= 8) |
1097 | return 800; |
1098 | return 512; |
1099 | } |
1100 | |
1101 | unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { |
1102 | if (STI->getFeatureBits().test(I: FeatureSGPRInitBug)) |
1103 | return FIXED_NUM_SGPRS_FOR_INIT_BUG; |
1104 | |
1105 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1106 | if (Version.Major >= 10) |
1107 | return 106; |
1108 | if (Version.Major >= 8) |
1109 | return 102; |
1110 | return 104; |
1111 | } |
1112 | |
1113 | unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { |
1114 | assert(WavesPerEU != 0); |
1115 | |
1116 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1117 | if (Version.Major >= 10) |
1118 | return 0; |
1119 | |
1120 | if (WavesPerEU >= getMaxWavesPerEU(STI)) |
1121 | return 0; |
1122 | |
1123 | unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); |
1124 | if (STI->getFeatureBits().test(I: FeatureTrapHandler)) |
1125 | MinNumSGPRs -= std::min(a: MinNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS); |
1126 | MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1; |
1127 | return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI)); |
1128 | } |
1129 | |
1130 | unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, |
1131 | bool Addressable) { |
1132 | assert(WavesPerEU != 0); |
1133 | |
1134 | unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); |
1135 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1136 | if (Version.Major >= 10) |
1137 | return Addressable ? AddressableNumSGPRs : 108; |
1138 | if (Version.Major >= 8 && !Addressable) |
1139 | AddressableNumSGPRs = 112; |
1140 | unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; |
1141 | if (STI->getFeatureBits().test(I: FeatureTrapHandler)) |
1142 | MaxNumSGPRs -= std::min(a: MaxNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS); |
1143 | MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI)); |
1144 | return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs); |
1145 | } |
1146 | |
1147 | unsigned (const MCSubtargetInfo *STI, bool VCCUsed, |
1148 | bool FlatScrUsed, bool XNACKUsed) { |
1149 | unsigned = 0; |
1150 | if (VCCUsed) |
1151 | ExtraSGPRs = 2; |
1152 | |
1153 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1154 | if (Version.Major >= 10) |
1155 | return ExtraSGPRs; |
1156 | |
1157 | if (Version.Major < 8) { |
1158 | if (FlatScrUsed) |
1159 | ExtraSGPRs = 4; |
1160 | } else { |
1161 | if (XNACKUsed) |
1162 | ExtraSGPRs = 4; |
1163 | |
1164 | if (FlatScrUsed || |
1165 | STI->getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch)) |
1166 | ExtraSGPRs = 6; |
1167 | } |
1168 | |
1169 | return ExtraSGPRs; |
1170 | } |
1171 | |
1172 | unsigned (const MCSubtargetInfo *STI, bool VCCUsed, |
1173 | bool FlatScrUsed) { |
1174 | return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, |
1175 | XNACKUsed: STI->getFeatureBits().test(I: AMDGPU::FeatureXNACK)); |
1176 | } |
1177 | |
1178 | static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, |
1179 | unsigned Granule) { |
1180 | return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule); |
1181 | } |
1182 | |
1183 | unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { |
1184 | // SGPRBlocks is actual number of SGPR blocks minus 1. |
1185 | return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) - |
1186 | 1; |
1187 | } |
1188 | |
1189 | unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, |
1190 | unsigned DynamicVGPRBlockSize, |
1191 | std::optional<bool> EnableWavefrontSize32) { |
1192 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1193 | return 8; |
1194 | |
1195 | if (DynamicVGPRBlockSize != 0) |
1196 | return DynamicVGPRBlockSize; |
1197 | |
1198 | // Temporarily check the subtarget feature, until we fully switch to using |
1199 | // attributes. |
1200 | if (STI->getFeatureBits().test(I: FeatureDynamicVGPR)) |
1201 | return STI->getFeatureBits().test(I: FeatureDynamicVGPRBlockSize32) ? 32 : 16; |
1202 | |
1203 | bool IsWave32 = EnableWavefrontSize32 |
1204 | ? *EnableWavefrontSize32 |
1205 | : STI->getFeatureBits().test(I: FeatureWavefrontSize32); |
1206 | |
1207 | if (STI->getFeatureBits().test(I: Feature1_5xVGPRs)) |
1208 | return IsWave32 ? 24 : 12; |
1209 | |
1210 | if (hasGFX10_3Insts(STI: *STI)) |
1211 | return IsWave32 ? 16 : 8; |
1212 | |
1213 | return IsWave32 ? 8 : 4; |
1214 | } |
1215 | |
1216 | unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, |
1217 | std::optional<bool> EnableWavefrontSize32) { |
1218 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1219 | return 8; |
1220 | |
1221 | bool IsWave32 = EnableWavefrontSize32 |
1222 | ? *EnableWavefrontSize32 |
1223 | : STI->getFeatureBits().test(I: FeatureWavefrontSize32); |
1224 | |
1225 | return IsWave32 ? 8 : 4; |
1226 | } |
1227 | |
1228 | unsigned getArchVGPRAllocGranule() { return 4; } |
1229 | |
1230 | unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { |
1231 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1232 | return 512; |
1233 | if (!isGFX10Plus(STI: *STI)) |
1234 | return 256; |
1235 | bool IsWave32 = STI->getFeatureBits().test(I: FeatureWavefrontSize32); |
1236 | if (STI->getFeatureBits().test(I: Feature1_5xVGPRs)) |
1237 | return IsWave32 ? 1536 : 768; |
1238 | return IsWave32 ? 1024 : 512; |
1239 | } |
1240 | |
1241 | unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } |
1242 | |
1243 | unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, |
1244 | unsigned DynamicVGPRBlockSize) { |
1245 | if (STI->getFeatureBits().test(I: FeatureGFX90AInsts)) |
1246 | return 512; |
1247 | |
1248 | // Temporarily check the subtarget feature, until we fully switch to using |
1249 | // attributes. |
1250 | if (DynamicVGPRBlockSize != 0 || |
1251 | STI->getFeatureBits().test(I: FeatureDynamicVGPR)) |
1252 | // On GFX12 we can allocate at most 8 blocks of VGPRs. |
1253 | return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize); |
1254 | return getAddressableNumArchVGPRs(STI); |
1255 | } |
1256 | |
1257 | unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, |
1258 | unsigned NumVGPRs, |
1259 | unsigned DynamicVGPRBlockSize) { |
1260 | return getNumWavesPerEUWithNumVGPRs( |
1261 | NumVGPRs, Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize), |
1262 | MaxWaves: getMaxWavesPerEU(STI), TotalNumVGPRs: getTotalNumVGPRs(STI)); |
1263 | } |
1264 | |
1265 | unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule, |
1266 | unsigned MaxWaves, |
1267 | unsigned TotalNumVGPRs) { |
1268 | if (NumVGPRs < Granule) |
1269 | return MaxWaves; |
1270 | unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule); |
1271 | return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves); |
1272 | } |
1273 | |
1274 | unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, |
1275 | AMDGPUSubtarget::Generation Gen) { |
1276 | if (Gen >= AMDGPUSubtarget::GFX10) |
1277 | return MaxWaves; |
1278 | |
1279 | if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { |
1280 | if (SGPRs <= 80) |
1281 | return 10; |
1282 | if (SGPRs <= 88) |
1283 | return 9; |
1284 | if (SGPRs <= 100) |
1285 | return 8; |
1286 | return 7; |
1287 | } |
1288 | if (SGPRs <= 48) |
1289 | return 10; |
1290 | if (SGPRs <= 56) |
1291 | return 9; |
1292 | if (SGPRs <= 64) |
1293 | return 8; |
1294 | if (SGPRs <= 72) |
1295 | return 7; |
1296 | if (SGPRs <= 80) |
1297 | return 6; |
1298 | return 5; |
1299 | } |
1300 | |
1301 | unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, |
1302 | unsigned DynamicVGPRBlockSize) { |
1303 | assert(WavesPerEU != 0); |
1304 | |
1305 | unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); |
1306 | if (WavesPerEU >= MaxWavesPerEU) |
1307 | return 0; |
1308 | |
1309 | unsigned TotNumVGPRs = getTotalNumVGPRs(STI); |
1310 | unsigned AddrsableNumVGPRs = |
1311 | getAddressableNumVGPRs(STI, DynamicVGPRBlockSize); |
1312 | unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize); |
1313 | unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule); |
1314 | |
1315 | if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule)) |
1316 | return 0; |
1317 | |
1318 | unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs, |
1319 | DynamicVGPRBlockSize); |
1320 | if (WavesPerEU < MinWavesPerEU) |
1321 | return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU, DynamicVGPRBlockSize); |
1322 | |
1323 | unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule); |
1324 | unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext); |
1325 | return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs); |
1326 | } |
1327 | |
1328 | unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, |
1329 | unsigned DynamicVGPRBlockSize) { |
1330 | assert(WavesPerEU != 0); |
1331 | |
1332 | unsigned MaxNumVGPRs = |
1333 | alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU, |
1334 | Align: getVGPRAllocGranule(STI, DynamicVGPRBlockSize)); |
1335 | unsigned AddressableNumVGPRs = |
1336 | getAddressableNumVGPRs(STI, DynamicVGPRBlockSize); |
1337 | return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs); |
1338 | } |
1339 | |
1340 | unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, |
1341 | std::optional<bool> EnableWavefrontSize32) { |
1342 | return getGranulatedNumRegisterBlocks( |
1343 | NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) - |
1344 | 1; |
1345 | } |
1346 | |
1347 | unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, |
1348 | unsigned NumVGPRs, |
1349 | unsigned DynamicVGPRBlockSize, |
1350 | std::optional<bool> EnableWavefrontSize32) { |
1351 | return getGranulatedNumRegisterBlocks( |
1352 | NumRegs: NumVGPRs, |
1353 | Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32)); |
1354 | } |
1355 | } // end namespace IsaInfo |
1356 | |
1357 | void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, |
1358 | const MCSubtargetInfo *STI) { |
1359 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1360 | KernelCode.amd_kernel_code_version_major = 1; |
1361 | KernelCode.amd_kernel_code_version_minor = 2; |
1362 | KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU |
1363 | KernelCode.amd_machine_version_major = Version.Major; |
1364 | KernelCode.amd_machine_version_minor = Version.Minor; |
1365 | KernelCode.amd_machine_version_stepping = Version.Stepping; |
1366 | KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); |
1367 | if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) { |
1368 | KernelCode.wavefront_size = 5; |
1369 | KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; |
1370 | } else { |
1371 | KernelCode.wavefront_size = 6; |
1372 | } |
1373 | |
1374 | // If the code object does not support indirect functions, then the value must |
1375 | // be 0xffffffff. |
1376 | KernelCode.call_convention = -1; |
1377 | |
1378 | // These alignment values are specified in powers of two, so alignment = |
1379 | // 2^n. The minimum alignment is 2^4 = 16. |
1380 | KernelCode.kernarg_segment_alignment = 4; |
1381 | KernelCode.group_segment_alignment = 4; |
1382 | KernelCode.private_segment_alignment = 4; |
1383 | |
1384 | if (Version.Major >= 10) { |
1385 | KernelCode.compute_pgm_resource_registers |= |
1386 | S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | |
1387 | S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1); |
1388 | } |
1389 | } |
1390 | |
1391 | bool isGroupSegment(const GlobalValue *GV) { |
1392 | return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; |
1393 | } |
1394 | |
1395 | bool isGlobalSegment(const GlobalValue *GV) { |
1396 | return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; |
1397 | } |
1398 | |
1399 | bool isReadOnlySegment(const GlobalValue *GV) { |
1400 | unsigned AS = GV->getAddressSpace(); |
1401 | return AS == AMDGPUAS::CONSTANT_ADDRESS || |
1402 | AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; |
1403 | } |
1404 | |
1405 | bool shouldEmitConstantsToTextSection(const Triple &TT) { |
1406 | return TT.getArch() == Triple::r600; |
1407 | } |
1408 | |
1409 | std::pair<unsigned, unsigned> |
1410 | getIntegerPairAttribute(const Function &F, StringRef Name, |
1411 | std::pair<unsigned, unsigned> Default, |
1412 | bool OnlyFirstRequired) { |
1413 | if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired)) |
1414 | return {Attr->first, Attr->second.value_or(u&: Default.second)}; |
1415 | return Default; |
1416 | } |
1417 | |
1418 | std::optional<std::pair<unsigned, std::optional<unsigned>>> |
1419 | getIntegerPairAttribute(const Function &F, StringRef Name, |
1420 | bool OnlyFirstRequired) { |
1421 | Attribute A = F.getFnAttribute(Kind: Name); |
1422 | if (!A.isStringAttribute()) |
1423 | return std::nullopt; |
1424 | |
1425 | LLVMContext &Ctx = F.getContext(); |
1426 | std::pair<unsigned, std::optional<unsigned>> Ints; |
1427 | std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ','); |
1428 | if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) { |
1429 | Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name); |
1430 | return std::nullopt; |
1431 | } |
1432 | unsigned Second = 0; |
1433 | if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Second)) { |
1434 | if (!OnlyFirstRequired || !Strs.second.trim().empty()) { |
1435 | Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name); |
1436 | return std::nullopt; |
1437 | } |
1438 | } else { |
1439 | Ints.second = Second; |
1440 | } |
1441 | |
1442 | return Ints; |
1443 | } |
1444 | |
1445 | SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, |
1446 | unsigned Size, |
1447 | unsigned DefaultVal) { |
1448 | std::optional<SmallVector<unsigned>> R = |
1449 | getIntegerVecAttribute(F, Name, Size); |
1450 | return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal); |
1451 | } |
1452 | |
1453 | std::optional<SmallVector<unsigned>> |
1454 | getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) { |
1455 | assert(Size > 2); |
1456 | LLVMContext &Ctx = F.getContext(); |
1457 | |
1458 | Attribute A = F.getFnAttribute(Kind: Name); |
1459 | if (!A.isValid()) |
1460 | return std::nullopt; |
1461 | if (!A.isStringAttribute()) { |
1462 | Ctx.emitError(ErrorStr: Name + " is not a string attribute" ); |
1463 | return std::nullopt; |
1464 | } |
1465 | |
1466 | SmallVector<unsigned> Vals(Size); |
1467 | |
1468 | StringRef S = A.getValueAsString(); |
1469 | unsigned i = 0; |
1470 | for (; !S.empty() && i < Size; i++) { |
1471 | std::pair<StringRef, StringRef> Strs = S.split(Separator: ','); |
1472 | unsigned IntVal; |
1473 | if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) { |
1474 | Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " + |
1475 | Name); |
1476 | return std::nullopt; |
1477 | } |
1478 | Vals[i] = IntVal; |
1479 | S = Strs.second; |
1480 | } |
1481 | |
1482 | if (!S.empty() || i < Size) { |
1483 | Ctx.emitError(ErrorStr: "attribute " + Name + |
1484 | " has incorrect number of integers; expected " + |
1485 | llvm::utostr(X: Size)); |
1486 | return std::nullopt; |
1487 | } |
1488 | return Vals; |
1489 | } |
1490 | |
1491 | unsigned getVmcntBitMask(const IsaVersion &Version) { |
1492 | return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) + |
1493 | getVmcntBitWidthHi(VersionMajor: Version.Major))) - |
1494 | 1; |
1495 | } |
1496 | |
1497 | unsigned getLoadcntBitMask(const IsaVersion &Version) { |
1498 | return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1; |
1499 | } |
1500 | |
1501 | unsigned getSamplecntBitMask(const IsaVersion &Version) { |
1502 | return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1; |
1503 | } |
1504 | |
1505 | unsigned getBvhcntBitMask(const IsaVersion &Version) { |
1506 | return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1; |
1507 | } |
1508 | |
1509 | unsigned getExpcntBitMask(const IsaVersion &Version) { |
1510 | return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1; |
1511 | } |
1512 | |
1513 | unsigned getLgkmcntBitMask(const IsaVersion &Version) { |
1514 | return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1; |
1515 | } |
1516 | |
1517 | unsigned getDscntBitMask(const IsaVersion &Version) { |
1518 | return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1; |
1519 | } |
1520 | |
1521 | unsigned getKmcntBitMask(const IsaVersion &Version) { |
1522 | return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1; |
1523 | } |
1524 | |
1525 | unsigned getXcntBitMask(const IsaVersion &Version) { |
1526 | return (1 << getXcntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1; |
1527 | } |
1528 | |
1529 | unsigned getStorecntBitMask(const IsaVersion &Version) { |
1530 | return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1; |
1531 | } |
1532 | |
1533 | unsigned getWaitcntBitMask(const IsaVersion &Version) { |
1534 | unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1535 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1536 | unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1537 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1538 | unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1539 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1540 | unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1541 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1542 | return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; |
1543 | } |
1544 | |
1545 | unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1546 | unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1547 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1548 | unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1549 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1550 | return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major); |
1551 | } |
1552 | |
1553 | unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1554 | return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1555 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1556 | } |
1557 | |
1558 | unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1559 | return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1560 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1561 | } |
1562 | |
1563 | void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, |
1564 | unsigned &Expcnt, unsigned &Lgkmcnt) { |
1565 | Vmcnt = decodeVmcnt(Version, Waitcnt); |
1566 | Expcnt = decodeExpcnt(Version, Waitcnt); |
1567 | Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); |
1568 | } |
1569 | |
1570 | Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { |
1571 | Waitcnt Decoded; |
1572 | Decoded.LoadCnt = decodeVmcnt(Version, Waitcnt: Encoded); |
1573 | Decoded.ExpCnt = decodeExpcnt(Version, Waitcnt: Encoded); |
1574 | Decoded.DsCnt = decodeLgkmcnt(Version, Waitcnt: Encoded); |
1575 | return Decoded; |
1576 | } |
1577 | |
1578 | unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, |
1579 | unsigned Vmcnt) { |
1580 | Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1581 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1582 | return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt, |
1583 | Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1584 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1585 | } |
1586 | |
1587 | unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, |
1588 | unsigned Expcnt) { |
1589 | return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1590 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1591 | } |
1592 | |
1593 | unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, |
1594 | unsigned Lgkmcnt) { |
1595 | return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1596 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1597 | } |
1598 | |
1599 | unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, |
1600 | unsigned Expcnt, unsigned Lgkmcnt) { |
1601 | unsigned Waitcnt = getWaitcntBitMask(Version); |
1602 | Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); |
1603 | Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); |
1604 | Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); |
1605 | return Waitcnt; |
1606 | } |
1607 | |
1608 | unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
1609 | return encodeWaitcnt(Version, Vmcnt: Decoded.LoadCnt, Expcnt: Decoded.ExpCnt, Lgkmcnt: Decoded.DsCnt); |
1610 | } |
1611 | |
1612 | static unsigned getCombinedCountBitMask(const IsaVersion &Version, |
1613 | bool IsStore) { |
1614 | unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major), |
1615 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1616 | if (IsStore) { |
1617 | unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1618 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1619 | return Dscnt | Storecnt; |
1620 | } |
1621 | unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1622 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1623 | return Dscnt | Loadcnt; |
1624 | } |
1625 | |
1626 | Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { |
1627 | Waitcnt Decoded; |
1628 | Decoded.LoadCnt = |
1629 | unpackBits(Src: LoadcntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1630 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1631 | Decoded.DsCnt = unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1632 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1633 | return Decoded; |
1634 | } |
1635 | |
1636 | Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { |
1637 | Waitcnt Decoded; |
1638 | Decoded.StoreCnt = |
1639 | unpackBits(Src: StorecntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1640 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1641 | Decoded.DsCnt = unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1642 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1643 | return Decoded; |
1644 | } |
1645 | |
1646 | static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, |
1647 | unsigned Loadcnt) { |
1648 | return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1649 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1650 | } |
1651 | |
1652 | static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, |
1653 | unsigned Storecnt) { |
1654 | return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1655 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1656 | } |
1657 | |
1658 | static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, |
1659 | unsigned Dscnt) { |
1660 | return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1661 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1662 | } |
1663 | |
1664 | static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, |
1665 | unsigned Dscnt) { |
1666 | unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false); |
1667 | Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); |
1668 | Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); |
1669 | return Waitcnt; |
1670 | } |
1671 | |
1672 | unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
1673 | return encodeLoadcntDscnt(Version, Loadcnt: Decoded.LoadCnt, Dscnt: Decoded.DsCnt); |
1674 | } |
1675 | |
1676 | static unsigned encodeStorecntDscnt(const IsaVersion &Version, |
1677 | unsigned Storecnt, unsigned Dscnt) { |
1678 | unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true); |
1679 | Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); |
1680 | Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); |
1681 | return Waitcnt; |
1682 | } |
1683 | |
1684 | unsigned encodeStorecntDscnt(const IsaVersion &Version, |
1685 | const Waitcnt &Decoded) { |
1686 | return encodeStorecntDscnt(Version, Storecnt: Decoded.StoreCnt, Dscnt: Decoded.DsCnt); |
1687 | } |
1688 | |
1689 | //===----------------------------------------------------------------------===// |
1690 | // Custom Operand Values |
1691 | //===----------------------------------------------------------------------===// |
1692 | |
1693 | static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, |
1694 | int Size, |
1695 | const MCSubtargetInfo &STI) { |
1696 | unsigned Enc = 0; |
1697 | for (int Idx = 0; Idx < Size; ++Idx) { |
1698 | const auto &Op = Opr[Idx]; |
1699 | if (Op.isSupported(STI)) |
1700 | Enc |= Op.encode(Val: Op.Default); |
1701 | } |
1702 | return Enc; |
1703 | } |
1704 | |
1705 | static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, |
1706 | int Size, unsigned Code, |
1707 | bool &HasNonDefaultVal, |
1708 | const MCSubtargetInfo &STI) { |
1709 | unsigned UsedOprMask = 0; |
1710 | HasNonDefaultVal = false; |
1711 | for (int Idx = 0; Idx < Size; ++Idx) { |
1712 | const auto &Op = Opr[Idx]; |
1713 | if (!Op.isSupported(STI)) |
1714 | continue; |
1715 | UsedOprMask |= Op.getMask(); |
1716 | unsigned Val = Op.decode(Code); |
1717 | if (!Op.isValid(Val)) |
1718 | return false; |
1719 | HasNonDefaultVal |= (Val != Op.Default); |
1720 | } |
1721 | return (Code & ~UsedOprMask) == 0; |
1722 | } |
1723 | |
1724 | static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, |
1725 | unsigned Code, int &Idx, StringRef &Name, |
1726 | unsigned &Val, bool &IsDefault, |
1727 | const MCSubtargetInfo &STI) { |
1728 | while (Idx < Size) { |
1729 | const auto &Op = Opr[Idx++]; |
1730 | if (Op.isSupported(STI)) { |
1731 | Name = Op.Name; |
1732 | Val = Op.decode(Code); |
1733 | IsDefault = (Val == Op.Default); |
1734 | return true; |
1735 | } |
1736 | } |
1737 | |
1738 | return false; |
1739 | } |
1740 | |
1741 | static int encodeCustomOperandVal(const CustomOperandVal &Op, |
1742 | int64_t InputVal) { |
1743 | if (InputVal < 0 || InputVal > Op.Max) |
1744 | return OPR_VAL_INVALID; |
1745 | return Op.encode(Val: InputVal); |
1746 | } |
1747 | |
1748 | static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, |
1749 | const StringRef Name, int64_t InputVal, |
1750 | unsigned &UsedOprMask, |
1751 | const MCSubtargetInfo &STI) { |
1752 | int InvalidId = OPR_ID_UNKNOWN; |
1753 | for (int Idx = 0; Idx < Size; ++Idx) { |
1754 | const auto &Op = Opr[Idx]; |
1755 | if (Op.Name == Name) { |
1756 | if (!Op.isSupported(STI)) { |
1757 | InvalidId = OPR_ID_UNSUPPORTED; |
1758 | continue; |
1759 | } |
1760 | auto OprMask = Op.getMask(); |
1761 | if (OprMask & UsedOprMask) |
1762 | return OPR_ID_DUPLICATE; |
1763 | UsedOprMask |= OprMask; |
1764 | return encodeCustomOperandVal(Op, InputVal); |
1765 | } |
1766 | } |
1767 | return InvalidId; |
1768 | } |
1769 | |
1770 | //===----------------------------------------------------------------------===// |
1771 | // DepCtr |
1772 | //===----------------------------------------------------------------------===// |
1773 | |
1774 | namespace DepCtr { |
1775 | |
1776 | int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { |
1777 | static int Default = -1; |
1778 | if (Default == -1) |
1779 | Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI); |
1780 | return Default; |
1781 | } |
1782 | |
1783 | bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, |
1784 | const MCSubtargetInfo &STI) { |
1785 | return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, |
1786 | HasNonDefaultVal, STI); |
1787 | } |
1788 | |
1789 | bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, |
1790 | bool &IsDefault, const MCSubtargetInfo &STI) { |
1791 | return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val, |
1792 | IsDefault, STI); |
1793 | } |
1794 | |
1795 | int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, |
1796 | const MCSubtargetInfo &STI) { |
1797 | return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask, |
1798 | STI); |
1799 | } |
1800 | |
1801 | unsigned decodeFieldVmVsrc(unsigned Encoded) { |
1802 | return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth()); |
1803 | } |
1804 | |
1805 | unsigned decodeFieldVaVdst(unsigned Encoded) { |
1806 | return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth()); |
1807 | } |
1808 | |
1809 | unsigned decodeFieldSaSdst(unsigned Encoded) { |
1810 | return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth()); |
1811 | } |
1812 | |
1813 | unsigned decodeFieldVaSdst(unsigned Encoded) { |
1814 | return unpackBits(Src: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth()); |
1815 | } |
1816 | |
1817 | unsigned decodeFieldVaVcc(unsigned Encoded) { |
1818 | return unpackBits(Src: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth()); |
1819 | } |
1820 | |
1821 | unsigned decodeFieldVaSsrc(unsigned Encoded) { |
1822 | return unpackBits(Src: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth()); |
1823 | } |
1824 | |
1825 | unsigned decodeFieldHoldCnt(unsigned Encoded) { |
1826 | return unpackBits(Src: Encoded, Shift: getHoldCntBitShift(), Width: getHoldCntWidth()); |
1827 | } |
1828 | |
1829 | unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { |
1830 | return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth()); |
1831 | } |
1832 | |
1833 | unsigned encodeFieldVmVsrc(unsigned VmVsrc) { |
1834 | return encodeFieldVmVsrc(Encoded: 0xffff, VmVsrc); |
1835 | } |
1836 | |
1837 | unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { |
1838 | return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth()); |
1839 | } |
1840 | |
1841 | unsigned encodeFieldVaVdst(unsigned VaVdst) { |
1842 | return encodeFieldVaVdst(Encoded: 0xffff, VaVdst); |
1843 | } |
1844 | |
1845 | unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { |
1846 | return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth()); |
1847 | } |
1848 | |
1849 | unsigned encodeFieldSaSdst(unsigned SaSdst) { |
1850 | return encodeFieldSaSdst(Encoded: 0xffff, SaSdst); |
1851 | } |
1852 | |
1853 | unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) { |
1854 | return packBits(Src: VaSdst, Dst: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth()); |
1855 | } |
1856 | |
1857 | unsigned encodeFieldVaSdst(unsigned VaSdst) { |
1858 | return encodeFieldVaSdst(Encoded: 0xffff, VaSdst); |
1859 | } |
1860 | |
1861 | unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) { |
1862 | return packBits(Src: VaVcc, Dst: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth()); |
1863 | } |
1864 | |
1865 | unsigned encodeFieldVaVcc(unsigned VaVcc) { |
1866 | return encodeFieldVaVcc(Encoded: 0xffff, VaVcc); |
1867 | } |
1868 | |
1869 | unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) { |
1870 | return packBits(Src: VaSsrc, Dst: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth()); |
1871 | } |
1872 | |
1873 | unsigned encodeFieldVaSsrc(unsigned VaSsrc) { |
1874 | return encodeFieldVaSsrc(Encoded: 0xffff, VaSsrc); |
1875 | } |
1876 | |
1877 | unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) { |
1878 | return packBits(Src: HoldCnt, Dst: Encoded, Shift: getHoldCntBitShift(), Width: getHoldCntWidth()); |
1879 | } |
1880 | |
1881 | unsigned encodeFieldHoldCnt(unsigned HoldCnt) { |
1882 | return encodeFieldHoldCnt(Encoded: 0xffff, HoldCnt); |
1883 | } |
1884 | |
1885 | } // namespace DepCtr |
1886 | |
1887 | //===----------------------------------------------------------------------===// |
1888 | // exp tgt |
1889 | //===----------------------------------------------------------------------===// |
1890 | |
1891 | namespace Exp { |
1892 | |
1893 | struct ExpTgt { |
1894 | StringLiteral Name; |
1895 | unsigned Tgt; |
1896 | unsigned MaxIndex; |
1897 | }; |
1898 | |
1899 | // clang-format off |
1900 | static constexpr ExpTgt ExpTgtInfo[] = { |
1901 | {.Name: {"null" }, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX}, |
1902 | {.Name: {"mrtz" }, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX}, |
1903 | {.Name: {"prim" }, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX}, |
1904 | {.Name: {"mrt" }, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX}, |
1905 | {.Name: {"pos" }, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX}, |
1906 | {.Name: {"dual_src_blend" },.Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX}, |
1907 | {.Name: {"param" }, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX}, |
1908 | }; |
1909 | // clang-format on |
1910 | |
1911 | bool getTgtName(unsigned Id, StringRef &Name, int &Index) { |
1912 | for (const ExpTgt &Val : ExpTgtInfo) { |
1913 | if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { |
1914 | Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); |
1915 | Name = Val.Name; |
1916 | return true; |
1917 | } |
1918 | } |
1919 | return false; |
1920 | } |
1921 | |
1922 | unsigned getTgtId(const StringRef Name) { |
1923 | |
1924 | for (const ExpTgt &Val : ExpTgtInfo) { |
1925 | if (Val.MaxIndex == 0 && Name == Val.Name) |
1926 | return Val.Tgt; |
1927 | |
1928 | if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) { |
1929 | StringRef Suffix = Name.drop_front(N: Val.Name.size()); |
1930 | |
1931 | unsigned Id; |
1932 | if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex) |
1933 | return ET_INVALID; |
1934 | |
1935 | // Disable leading zeroes |
1936 | if (Suffix.size() > 1 && Suffix[0] == '0') |
1937 | return ET_INVALID; |
1938 | |
1939 | return Val.Tgt + Id; |
1940 | } |
1941 | } |
1942 | return ET_INVALID; |
1943 | } |
1944 | |
1945 | bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { |
1946 | switch (Id) { |
1947 | case ET_NULL: |
1948 | return !isGFX11Plus(STI); |
1949 | case ET_POS4: |
1950 | case ET_PRIM: |
1951 | return isGFX10Plus(STI); |
1952 | case ET_DUAL_SRC_BLEND0: |
1953 | case ET_DUAL_SRC_BLEND1: |
1954 | return isGFX11Plus(STI); |
1955 | default: |
1956 | if (Id >= ET_PARAM0 && Id <= ET_PARAM31) |
1957 | return !isGFX11Plus(STI); |
1958 | return true; |
1959 | } |
1960 | } |
1961 | |
1962 | } // namespace Exp |
1963 | |
1964 | //===----------------------------------------------------------------------===// |
1965 | // MTBUF Format |
1966 | //===----------------------------------------------------------------------===// |
1967 | |
1968 | namespace MTBUFFormat { |
1969 | |
1970 | int64_t getDfmt(const StringRef Name) { |
1971 | for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { |
1972 | if (Name == DfmtSymbolic[Id]) |
1973 | return Id; |
1974 | } |
1975 | return DFMT_UNDEF; |
1976 | } |
1977 | |
1978 | StringRef getDfmtName(unsigned Id) { |
1979 | assert(Id <= DFMT_MAX); |
1980 | return DfmtSymbolic[Id]; |
1981 | } |
1982 | |
1983 | static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { |
1984 | if (isSI(STI) || isCI(STI)) |
1985 | return NfmtSymbolicSICI; |
1986 | if (isVI(STI) || isGFX9(STI)) |
1987 | return NfmtSymbolicVI; |
1988 | return NfmtSymbolicGFX10; |
1989 | } |
1990 | |
1991 | int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { |
1992 | const auto *lookupTable = getNfmtLookupTable(STI); |
1993 | for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { |
1994 | if (Name == lookupTable[Id]) |
1995 | return Id; |
1996 | } |
1997 | return NFMT_UNDEF; |
1998 | } |
1999 | |
2000 | StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { |
2001 | assert(Id <= NFMT_MAX); |
2002 | return getNfmtLookupTable(STI)[Id]; |
2003 | } |
2004 | |
2005 | bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { |
2006 | unsigned Dfmt; |
2007 | unsigned Nfmt; |
2008 | decodeDfmtNfmt(Format: Id, Dfmt, Nfmt); |
2009 | return isValidNfmt(Val: Nfmt, STI); |
2010 | } |
2011 | |
2012 | bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { |
2013 | return !getNfmtName(Id, STI).empty(); |
2014 | } |
2015 | |
2016 | int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { |
2017 | return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); |
2018 | } |
2019 | |
2020 | void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { |
2021 | Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; |
2022 | Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; |
2023 | } |
2024 | |
2025 | int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { |
2026 | if (isGFX11Plus(STI)) { |
2027 | for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { |
2028 | if (Name == UfmtSymbolicGFX11[Id]) |
2029 | return Id; |
2030 | } |
2031 | } else { |
2032 | for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { |
2033 | if (Name == UfmtSymbolicGFX10[Id]) |
2034 | return Id; |
2035 | } |
2036 | } |
2037 | return UFMT_UNDEF; |
2038 | } |
2039 | |
2040 | StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { |
2041 | if (isValidUnifiedFormat(Val: Id, STI)) |
2042 | return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; |
2043 | return "" ; |
2044 | } |
2045 | |
2046 | bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { |
2047 | return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; |
2048 | } |
2049 | |
2050 | int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, |
2051 | const MCSubtargetInfo &STI) { |
2052 | int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); |
2053 | if (isGFX11Plus(STI)) { |
2054 | for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { |
2055 | if (Fmt == DfmtNfmt2UFmtGFX11[Id]) |
2056 | return Id; |
2057 | } |
2058 | } else { |
2059 | for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { |
2060 | if (Fmt == DfmtNfmt2UFmtGFX10[Id]) |
2061 | return Id; |
2062 | } |
2063 | } |
2064 | return UFMT_UNDEF; |
2065 | } |
2066 | |
2067 | bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { |
2068 | return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); |
2069 | } |
2070 | |
2071 | unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { |
2072 | if (isGFX10Plus(STI)) |
2073 | return UFMT_DEFAULT; |
2074 | return DFMT_NFMT_DEFAULT; |
2075 | } |
2076 | |
2077 | } // namespace MTBUFFormat |
2078 | |
2079 | //===----------------------------------------------------------------------===// |
2080 | // SendMsg |
2081 | //===----------------------------------------------------------------------===// |
2082 | |
2083 | namespace SendMsg { |
2084 | |
2085 | static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { |
2086 | return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; |
2087 | } |
2088 | |
2089 | bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { |
2090 | return (MsgId & ~(getMsgIdMask(STI))) == 0; |
2091 | } |
2092 | |
2093 | bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, |
2094 | bool Strict) { |
2095 | assert(isValidMsgId(MsgId, STI)); |
2096 | |
2097 | if (!Strict) |
2098 | return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId); |
2099 | |
2100 | if (msgRequiresOp(MsgId, STI)) { |
2101 | if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP) |
2102 | return false; |
2103 | |
2104 | return !getMsgOpName(MsgId, Encoding: OpId, STI).empty(); |
2105 | } |
2106 | |
2107 | return OpId == OP_NONE_; |
2108 | } |
2109 | |
2110 | bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, |
2111 | const MCSubtargetInfo &STI, bool Strict) { |
2112 | assert(isValidMsgOp(MsgId, OpId, STI, Strict)); |
2113 | |
2114 | if (!Strict) |
2115 | return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId); |
2116 | |
2117 | if (!isGFX11Plus(STI)) { |
2118 | switch (MsgId) { |
2119 | case ID_GS_PreGFX11: |
2120 | return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; |
2121 | case ID_GS_DONE_PreGFX11: |
2122 | return (OpId == OP_GS_NOP) |
2123 | ? (StreamId == STREAM_ID_NONE_) |
2124 | : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); |
2125 | } |
2126 | } |
2127 | return StreamId == STREAM_ID_NONE_; |
2128 | } |
2129 | |
2130 | bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { |
2131 | return MsgId == ID_SYSMSG || |
2132 | (!isGFX11Plus(STI) && |
2133 | (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); |
2134 | } |
2135 | |
2136 | bool msgSupportsStream(int64_t MsgId, int64_t OpId, |
2137 | const MCSubtargetInfo &STI) { |
2138 | return !isGFX11Plus(STI) && |
2139 | (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && |
2140 | OpId != OP_GS_NOP; |
2141 | } |
2142 | |
2143 | void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, |
2144 | uint16_t &StreamId, const MCSubtargetInfo &STI) { |
2145 | MsgId = Val & getMsgIdMask(STI); |
2146 | if (isGFX11Plus(STI)) { |
2147 | OpId = 0; |
2148 | StreamId = 0; |
2149 | } else { |
2150 | OpId = (Val & OP_MASK_) >> OP_SHIFT_; |
2151 | StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; |
2152 | } |
2153 | } |
2154 | |
2155 | uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) { |
2156 | return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); |
2157 | } |
2158 | |
2159 | } // namespace SendMsg |
2160 | |
2161 | //===----------------------------------------------------------------------===// |
2162 | // |
2163 | //===----------------------------------------------------------------------===// |
2164 | |
2165 | unsigned getInitialPSInputAddr(const Function &F) { |
2166 | return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr" , Default: 0); |
2167 | } |
2168 | |
2169 | bool getHasColorExport(const Function &F) { |
2170 | // As a safe default always respond as if PS has color exports. |
2171 | return F.getFnAttributeAsParsedInteger( |
2172 | Kind: "amdgpu-color-export" , |
2173 | Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; |
2174 | } |
2175 | |
2176 | bool getHasDepthExport(const Function &F) { |
2177 | return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export" , Default: 0) != 0; |
2178 | } |
2179 | |
2180 | unsigned getDynamicVGPRBlockSize(const Function &F) { |
2181 | unsigned BlockSize = |
2182 | F.getFnAttributeAsParsedInteger(Kind: "amdgpu-dynamic-vgpr-block-size" , Default: 0); |
2183 | |
2184 | if (BlockSize == 16 || BlockSize == 32) |
2185 | return BlockSize; |
2186 | |
2187 | return 0; |
2188 | } |
2189 | |
2190 | bool hasXNACK(const MCSubtargetInfo &STI) { |
2191 | return STI.hasFeature(Feature: AMDGPU::FeatureXNACK); |
2192 | } |
2193 | |
2194 | bool hasSRAMECC(const MCSubtargetInfo &STI) { |
2195 | return STI.hasFeature(Feature: AMDGPU::FeatureSRAMECC); |
2196 | } |
2197 | |
2198 | bool hasMIMG_R128(const MCSubtargetInfo &STI) { |
2199 | return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) && |
2200 | !STI.hasFeature(Feature: AMDGPU::FeatureR128A16); |
2201 | } |
2202 | |
2203 | bool hasA16(const MCSubtargetInfo &STI) { |
2204 | return STI.hasFeature(Feature: AMDGPU::FeatureA16); |
2205 | } |
2206 | |
2207 | bool hasG16(const MCSubtargetInfo &STI) { |
2208 | return STI.hasFeature(Feature: AMDGPU::FeatureG16); |
2209 | } |
2210 | |
2211 | bool hasPackedD16(const MCSubtargetInfo &STI) { |
2212 | return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && |
2213 | !isSI(STI); |
2214 | } |
2215 | |
2216 | bool hasGDS(const MCSubtargetInfo &STI) { |
2217 | return STI.hasFeature(Feature: AMDGPU::FeatureGDS); |
2218 | } |
2219 | |
2220 | unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { |
2221 | auto Version = getIsaVersion(GPU: STI.getCPU()); |
2222 | if (Version.Major == 10) |
2223 | return Version.Minor >= 3 ? 13 : 5; |
2224 | if (Version.Major == 11) |
2225 | return 5; |
2226 | if (Version.Major >= 12) |
2227 | return HasSampler ? 4 : 5; |
2228 | return 0; |
2229 | } |
2230 | |
2231 | unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } |
2232 | |
2233 | bool isSI(const MCSubtargetInfo &STI) { |
2234 | return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands); |
2235 | } |
2236 | |
2237 | bool isCI(const MCSubtargetInfo &STI) { |
2238 | return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands); |
2239 | } |
2240 | |
2241 | bool isVI(const MCSubtargetInfo &STI) { |
2242 | return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands); |
2243 | } |
2244 | |
2245 | bool isGFX9(const MCSubtargetInfo &STI) { |
2246 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX9); |
2247 | } |
2248 | |
2249 | bool isGFX9_GFX10(const MCSubtargetInfo &STI) { |
2250 | return isGFX9(STI) || isGFX10(STI); |
2251 | } |
2252 | |
2253 | bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { |
2254 | return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); |
2255 | } |
2256 | |
2257 | bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { |
2258 | return isVI(STI) || isGFX9(STI) || isGFX10(STI); |
2259 | } |
2260 | |
2261 | bool isGFX8Plus(const MCSubtargetInfo &STI) { |
2262 | return isVI(STI) || isGFX9Plus(STI); |
2263 | } |
2264 | |
2265 | bool isGFX9Plus(const MCSubtargetInfo &STI) { |
2266 | return isGFX9(STI) || isGFX10Plus(STI); |
2267 | } |
2268 | |
2269 | bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); } |
2270 | |
2271 | bool isGFX10(const MCSubtargetInfo &STI) { |
2272 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10); |
2273 | } |
2274 | |
2275 | bool isGFX10_GFX11(const MCSubtargetInfo &STI) { |
2276 | return isGFX10(STI) || isGFX11(STI); |
2277 | } |
2278 | |
2279 | bool isGFX10Plus(const MCSubtargetInfo &STI) { |
2280 | return isGFX10(STI) || isGFX11Plus(STI); |
2281 | } |
2282 | |
2283 | bool isGFX11(const MCSubtargetInfo &STI) { |
2284 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX11); |
2285 | } |
2286 | |
2287 | bool isGFX11Plus(const MCSubtargetInfo &STI) { |
2288 | return isGFX11(STI) || isGFX12Plus(STI); |
2289 | } |
2290 | |
2291 | bool isGFX12(const MCSubtargetInfo &STI) { |
2292 | return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; |
2293 | } |
2294 | |
2295 | bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } |
2296 | |
2297 | bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } |
2298 | |
2299 | bool isGFX1250(const MCSubtargetInfo &STI) { |
2300 | return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts]; |
2301 | } |
2302 | |
2303 | bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); } |
2304 | |
2305 | bool isNotGFX10Plus(const MCSubtargetInfo &STI) { |
2306 | return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); |
2307 | } |
2308 | |
2309 | bool isGFX10Before1030(const MCSubtargetInfo &STI) { |
2310 | return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); |
2311 | } |
2312 | |
2313 | bool isGCN3Encoding(const MCSubtargetInfo &STI) { |
2314 | return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding); |
2315 | } |
2316 | |
2317 | bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { |
2318 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding); |
2319 | } |
2320 | |
2321 | bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { |
2322 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding); |
2323 | } |
2324 | |
2325 | bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { |
2326 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts); |
2327 | } |
2328 | |
2329 | bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { |
2330 | return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); |
2331 | } |
2332 | |
2333 | bool isGFX90A(const MCSubtargetInfo &STI) { |
2334 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts); |
2335 | } |
2336 | |
2337 | bool isGFX940(const MCSubtargetInfo &STI) { |
2338 | return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts); |
2339 | } |
2340 | |
2341 | bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { |
2342 | return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch); |
2343 | } |
2344 | |
2345 | bool hasMAIInsts(const MCSubtargetInfo &STI) { |
2346 | return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts); |
2347 | } |
2348 | |
2349 | bool hasVOPD(const MCSubtargetInfo &STI) { |
2350 | return STI.hasFeature(Feature: AMDGPU::FeatureVOPD); |
2351 | } |
2352 | |
2353 | bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { |
2354 | return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR); |
2355 | } |
2356 | |
2357 | unsigned hasKernargPreload(const MCSubtargetInfo &STI) { |
2358 | return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload); |
2359 | } |
2360 | |
2361 | int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, |
2362 | int32_t ArgNumVGPR) { |
2363 | if (has90AInsts && ArgNumAGPR) |
2364 | return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR; |
2365 | return std::max(a: ArgNumVGPR, b: ArgNumAGPR); |
2366 | } |
2367 | |
2368 | bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) { |
2369 | const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID); |
2370 | const MCRegister FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0); |
2371 | return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) || |
2372 | Reg == AMDGPU::SCC; |
2373 | } |
2374 | |
2375 | bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) { |
2376 | return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16; |
2377 | } |
2378 | |
2379 | #define MAP_REG2REG \ |
2380 | using namespace AMDGPU; \ |
2381 | switch (Reg.id()) { \ |
2382 | default: \ |
2383 | return Reg; \ |
2384 | CASE_CI_VI(FLAT_SCR) \ |
2385 | CASE_CI_VI(FLAT_SCR_LO) \ |
2386 | CASE_CI_VI(FLAT_SCR_HI) \ |
2387 | CASE_VI_GFX9PLUS(TTMP0) \ |
2388 | CASE_VI_GFX9PLUS(TTMP1) \ |
2389 | CASE_VI_GFX9PLUS(TTMP2) \ |
2390 | CASE_VI_GFX9PLUS(TTMP3) \ |
2391 | CASE_VI_GFX9PLUS(TTMP4) \ |
2392 | CASE_VI_GFX9PLUS(TTMP5) \ |
2393 | CASE_VI_GFX9PLUS(TTMP6) \ |
2394 | CASE_VI_GFX9PLUS(TTMP7) \ |
2395 | CASE_VI_GFX9PLUS(TTMP8) \ |
2396 | CASE_VI_GFX9PLUS(TTMP9) \ |
2397 | CASE_VI_GFX9PLUS(TTMP10) \ |
2398 | CASE_VI_GFX9PLUS(TTMP11) \ |
2399 | CASE_VI_GFX9PLUS(TTMP12) \ |
2400 | CASE_VI_GFX9PLUS(TTMP13) \ |
2401 | CASE_VI_GFX9PLUS(TTMP14) \ |
2402 | CASE_VI_GFX9PLUS(TTMP15) \ |
2403 | CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ |
2404 | CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ |
2405 | CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ |
2406 | CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ |
2407 | CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ |
2408 | CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ |
2409 | CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ |
2410 | CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ |
2411 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ |
2412 | CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ |
2413 | CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ |
2414 | CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ |
2415 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ |
2416 | CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ |
2417 | CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ |
2418 | CASE_VI_GFX9PLUS( \ |
2419 | TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ |
2420 | CASE_GFXPRE11_GFX11PLUS(M0) \ |
2421 | CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ |
2422 | CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ |
2423 | } |
2424 | |
2425 | #define CASE_CI_VI(node) \ |
2426 | assert(!isSI(STI)); \ |
2427 | case node: \ |
2428 | return isCI(STI) ? node##_ci : node##_vi; |
2429 | |
2430 | #define CASE_VI_GFX9PLUS(node) \ |
2431 | case node: \ |
2432 | return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; |
2433 | |
2434 | #define CASE_GFXPRE11_GFX11PLUS(node) \ |
2435 | case node: \ |
2436 | return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; |
2437 | |
2438 | #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ |
2439 | case node: \ |
2440 | return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; |
2441 | |
2442 | MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) { |
2443 | if (STI.getTargetTriple().getArch() == Triple::r600) |
2444 | return Reg; |
2445 | MAP_REG2REG |
2446 | } |
2447 | |
2448 | #undef CASE_CI_VI |
2449 | #undef CASE_VI_GFX9PLUS |
2450 | #undef CASE_GFXPRE11_GFX11PLUS |
2451 | #undef CASE_GFXPRE11_GFX11PLUS_TO |
2452 | |
2453 | #define CASE_CI_VI(node) \ |
2454 | case node##_ci: \ |
2455 | case node##_vi: \ |
2456 | return node; |
2457 | #define CASE_VI_GFX9PLUS(node) \ |
2458 | case node##_vi: \ |
2459 | case node##_gfx9plus: \ |
2460 | return node; |
2461 | #define CASE_GFXPRE11_GFX11PLUS(node) \ |
2462 | case node##_gfx11plus: \ |
2463 | case node##_gfxpre11: \ |
2464 | return node; |
2465 | #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) |
2466 | |
2467 | MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG } |
2468 | |
2469 | bool isInlineValue(unsigned Reg) { |
2470 | switch (Reg) { |
2471 | case AMDGPU::SRC_SHARED_BASE_LO: |
2472 | case AMDGPU::SRC_SHARED_BASE: |
2473 | case AMDGPU::SRC_SHARED_LIMIT_LO: |
2474 | case AMDGPU::SRC_SHARED_LIMIT: |
2475 | case AMDGPU::SRC_PRIVATE_BASE_LO: |
2476 | case AMDGPU::SRC_PRIVATE_BASE: |
2477 | case AMDGPU::SRC_PRIVATE_LIMIT_LO: |
2478 | case AMDGPU::SRC_PRIVATE_LIMIT: |
2479 | case AMDGPU::SRC_POPS_EXITING_WAVE_ID: |
2480 | return true; |
2481 | case AMDGPU::SRC_VCCZ: |
2482 | case AMDGPU::SRC_EXECZ: |
2483 | case AMDGPU::SRC_SCC: |
2484 | return true; |
2485 | case AMDGPU::SGPR_NULL: |
2486 | return true; |
2487 | default: |
2488 | return false; |
2489 | } |
2490 | } |
2491 | |
2492 | #undef CASE_CI_VI |
2493 | #undef CASE_VI_GFX9PLUS |
2494 | #undef CASE_GFXPRE11_GFX11PLUS |
2495 | #undef CASE_GFXPRE11_GFX11PLUS_TO |
2496 | #undef MAP_REG2REG |
2497 | |
2498 | bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2499 | assert(OpNo < Desc.NumOperands); |
2500 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2501 | return OpType >= AMDGPU::OPERAND_SRC_FIRST && |
2502 | OpType <= AMDGPU::OPERAND_SRC_LAST; |
2503 | } |
2504 | |
2505 | bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2506 | assert(OpNo < Desc.NumOperands); |
2507 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2508 | return OpType >= AMDGPU::OPERAND_KIMM_FIRST && |
2509 | OpType <= AMDGPU::OPERAND_KIMM_LAST; |
2510 | } |
2511 | |
2512 | bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2513 | assert(OpNo < Desc.NumOperands); |
2514 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2515 | switch (OpType) { |
2516 | case AMDGPU::OPERAND_REG_IMM_FP32: |
2517 | case AMDGPU::OPERAND_REG_IMM_FP64: |
2518 | case AMDGPU::OPERAND_REG_IMM_FP16: |
2519 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2520 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
2521 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
2522 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
2523 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2524 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
2525 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
2526 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
2527 | return true; |
2528 | default: |
2529 | return false; |
2530 | } |
2531 | } |
2532 | |
2533 | bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2534 | assert(OpNo < Desc.NumOperands); |
2535 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2536 | return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && |
2537 | OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || |
2538 | (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && |
2539 | OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); |
2540 | } |
2541 | |
2542 | // Avoid using MCRegisterClass::getSize, since that function will go away |
2543 | // (move from MC* level to Target* level). Return size in bits. |
2544 | unsigned getRegBitWidth(unsigned RCID) { |
2545 | switch (RCID) { |
2546 | case AMDGPU::VGPR_16RegClassID: |
2547 | case AMDGPU::VGPR_16_Lo128RegClassID: |
2548 | case AMDGPU::SGPR_LO16RegClassID: |
2549 | case AMDGPU::AGPR_LO16RegClassID: |
2550 | return 16; |
2551 | case AMDGPU::SGPR_32RegClassID: |
2552 | case AMDGPU::VGPR_32RegClassID: |
2553 | case AMDGPU::VRegOrLds_32RegClassID: |
2554 | case AMDGPU::AGPR_32RegClassID: |
2555 | case AMDGPU::VS_32RegClassID: |
2556 | case AMDGPU::AV_32RegClassID: |
2557 | case AMDGPU::SReg_32RegClassID: |
2558 | case AMDGPU::SReg_32_XM0RegClassID: |
2559 | case AMDGPU::SRegOrLds_32RegClassID: |
2560 | return 32; |
2561 | case AMDGPU::SGPR_64RegClassID: |
2562 | case AMDGPU::VS_64RegClassID: |
2563 | case AMDGPU::SReg_64RegClassID: |
2564 | case AMDGPU::VReg_64RegClassID: |
2565 | case AMDGPU::AReg_64RegClassID: |
2566 | case AMDGPU::SReg_64_XEXECRegClassID: |
2567 | case AMDGPU::VReg_64_Align2RegClassID: |
2568 | case AMDGPU::AReg_64_Align2RegClassID: |
2569 | case AMDGPU::AV_64RegClassID: |
2570 | case AMDGPU::AV_64_Align2RegClassID: |
2571 | return 64; |
2572 | case AMDGPU::SGPR_96RegClassID: |
2573 | case AMDGPU::SReg_96RegClassID: |
2574 | case AMDGPU::VReg_96RegClassID: |
2575 | case AMDGPU::AReg_96RegClassID: |
2576 | case AMDGPU::VReg_96_Align2RegClassID: |
2577 | case AMDGPU::AReg_96_Align2RegClassID: |
2578 | case AMDGPU::AV_96RegClassID: |
2579 | case AMDGPU::AV_96_Align2RegClassID: |
2580 | return 96; |
2581 | case AMDGPU::SGPR_128RegClassID: |
2582 | case AMDGPU::SReg_128RegClassID: |
2583 | case AMDGPU::VReg_128RegClassID: |
2584 | case AMDGPU::AReg_128RegClassID: |
2585 | case AMDGPU::VReg_128_Align2RegClassID: |
2586 | case AMDGPU::AReg_128_Align2RegClassID: |
2587 | case AMDGPU::AV_128RegClassID: |
2588 | case AMDGPU::AV_128_Align2RegClassID: |
2589 | case AMDGPU::SReg_128_XNULLRegClassID: |
2590 | return 128; |
2591 | case AMDGPU::SGPR_160RegClassID: |
2592 | case AMDGPU::SReg_160RegClassID: |
2593 | case AMDGPU::VReg_160RegClassID: |
2594 | case AMDGPU::AReg_160RegClassID: |
2595 | case AMDGPU::VReg_160_Align2RegClassID: |
2596 | case AMDGPU::AReg_160_Align2RegClassID: |
2597 | case AMDGPU::AV_160RegClassID: |
2598 | case AMDGPU::AV_160_Align2RegClassID: |
2599 | return 160; |
2600 | case AMDGPU::SGPR_192RegClassID: |
2601 | case AMDGPU::SReg_192RegClassID: |
2602 | case AMDGPU::VReg_192RegClassID: |
2603 | case AMDGPU::AReg_192RegClassID: |
2604 | case AMDGPU::VReg_192_Align2RegClassID: |
2605 | case AMDGPU::AReg_192_Align2RegClassID: |
2606 | case AMDGPU::AV_192RegClassID: |
2607 | case AMDGPU::AV_192_Align2RegClassID: |
2608 | return 192; |
2609 | case AMDGPU::SGPR_224RegClassID: |
2610 | case AMDGPU::SReg_224RegClassID: |
2611 | case AMDGPU::VReg_224RegClassID: |
2612 | case AMDGPU::AReg_224RegClassID: |
2613 | case AMDGPU::VReg_224_Align2RegClassID: |
2614 | case AMDGPU::AReg_224_Align2RegClassID: |
2615 | case AMDGPU::AV_224RegClassID: |
2616 | case AMDGPU::AV_224_Align2RegClassID: |
2617 | return 224; |
2618 | case AMDGPU::SGPR_256RegClassID: |
2619 | case AMDGPU::SReg_256RegClassID: |
2620 | case AMDGPU::VReg_256RegClassID: |
2621 | case AMDGPU::AReg_256RegClassID: |
2622 | case AMDGPU::VReg_256_Align2RegClassID: |
2623 | case AMDGPU::AReg_256_Align2RegClassID: |
2624 | case AMDGPU::AV_256RegClassID: |
2625 | case AMDGPU::AV_256_Align2RegClassID: |
2626 | case AMDGPU::SReg_256_XNULLRegClassID: |
2627 | return 256; |
2628 | case AMDGPU::SGPR_288RegClassID: |
2629 | case AMDGPU::SReg_288RegClassID: |
2630 | case AMDGPU::VReg_288RegClassID: |
2631 | case AMDGPU::AReg_288RegClassID: |
2632 | case AMDGPU::VReg_288_Align2RegClassID: |
2633 | case AMDGPU::AReg_288_Align2RegClassID: |
2634 | case AMDGPU::AV_288RegClassID: |
2635 | case AMDGPU::AV_288_Align2RegClassID: |
2636 | return 288; |
2637 | case AMDGPU::SGPR_320RegClassID: |
2638 | case AMDGPU::SReg_320RegClassID: |
2639 | case AMDGPU::VReg_320RegClassID: |
2640 | case AMDGPU::AReg_320RegClassID: |
2641 | case AMDGPU::VReg_320_Align2RegClassID: |
2642 | case AMDGPU::AReg_320_Align2RegClassID: |
2643 | case AMDGPU::AV_320RegClassID: |
2644 | case AMDGPU::AV_320_Align2RegClassID: |
2645 | return 320; |
2646 | case AMDGPU::SGPR_352RegClassID: |
2647 | case AMDGPU::SReg_352RegClassID: |
2648 | case AMDGPU::VReg_352RegClassID: |
2649 | case AMDGPU::AReg_352RegClassID: |
2650 | case AMDGPU::VReg_352_Align2RegClassID: |
2651 | case AMDGPU::AReg_352_Align2RegClassID: |
2652 | case AMDGPU::AV_352RegClassID: |
2653 | case AMDGPU::AV_352_Align2RegClassID: |
2654 | return 352; |
2655 | case AMDGPU::SGPR_384RegClassID: |
2656 | case AMDGPU::SReg_384RegClassID: |
2657 | case AMDGPU::VReg_384RegClassID: |
2658 | case AMDGPU::AReg_384RegClassID: |
2659 | case AMDGPU::VReg_384_Align2RegClassID: |
2660 | case AMDGPU::AReg_384_Align2RegClassID: |
2661 | case AMDGPU::AV_384RegClassID: |
2662 | case AMDGPU::AV_384_Align2RegClassID: |
2663 | return 384; |
2664 | case AMDGPU::SGPR_512RegClassID: |
2665 | case AMDGPU::SReg_512RegClassID: |
2666 | case AMDGPU::VReg_512RegClassID: |
2667 | case AMDGPU::AReg_512RegClassID: |
2668 | case AMDGPU::VReg_512_Align2RegClassID: |
2669 | case AMDGPU::AReg_512_Align2RegClassID: |
2670 | case AMDGPU::AV_512RegClassID: |
2671 | case AMDGPU::AV_512_Align2RegClassID: |
2672 | return 512; |
2673 | case AMDGPU::SGPR_1024RegClassID: |
2674 | case AMDGPU::SReg_1024RegClassID: |
2675 | case AMDGPU::VReg_1024RegClassID: |
2676 | case AMDGPU::AReg_1024RegClassID: |
2677 | case AMDGPU::VReg_1024_Align2RegClassID: |
2678 | case AMDGPU::AReg_1024_Align2RegClassID: |
2679 | case AMDGPU::AV_1024RegClassID: |
2680 | case AMDGPU::AV_1024_Align2RegClassID: |
2681 | return 1024; |
2682 | default: |
2683 | llvm_unreachable("Unexpected register class" ); |
2684 | } |
2685 | } |
2686 | |
2687 | unsigned getRegBitWidth(const MCRegisterClass &RC) { |
2688 | return getRegBitWidth(RCID: RC.getID()); |
2689 | } |
2690 | |
2691 | unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, |
2692 | unsigned OpNo) { |
2693 | assert(OpNo < Desc.NumOperands); |
2694 | unsigned RCID = Desc.operands()[OpNo].RegClass; |
2695 | return getRegBitWidth(RCID) / 8; |
2696 | } |
2697 | |
2698 | bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { |
2699 | if (isInlinableIntLiteral(Literal)) |
2700 | return true; |
2701 | |
2702 | uint64_t Val = static_cast<uint64_t>(Literal); |
2703 | return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) || |
2704 | (Val == llvm::bit_cast<uint64_t>(from: 1.0)) || |
2705 | (Val == llvm::bit_cast<uint64_t>(from: -1.0)) || |
2706 | (Val == llvm::bit_cast<uint64_t>(from: 0.5)) || |
2707 | (Val == llvm::bit_cast<uint64_t>(from: -0.5)) || |
2708 | (Val == llvm::bit_cast<uint64_t>(from: 2.0)) || |
2709 | (Val == llvm::bit_cast<uint64_t>(from: -2.0)) || |
2710 | (Val == llvm::bit_cast<uint64_t>(from: 4.0)) || |
2711 | (Val == llvm::bit_cast<uint64_t>(from: -4.0)) || |
2712 | (Val == 0x3fc45f306dc9c882 && HasInv2Pi); |
2713 | } |
2714 | |
2715 | bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { |
2716 | if (isInlinableIntLiteral(Literal)) |
2717 | return true; |
2718 | |
2719 | // The actual type of the operand does not seem to matter as long |
2720 | // as the bits match one of the inline immediate values. For example: |
2721 | // |
2722 | // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, |
2723 | // so it is a legal inline immediate. |
2724 | // |
2725 | // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in |
2726 | // floating-point, so it is a legal inline immediate. |
2727 | |
2728 | uint32_t Val = static_cast<uint32_t>(Literal); |
2729 | return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) || |
2730 | (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) || |
2731 | (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) || |
2732 | (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) || |
2733 | (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) || |
2734 | (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) || |
2735 | (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) || |
2736 | (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) || |
2737 | (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) || |
2738 | (Val == 0x3e22f983 && HasInv2Pi); |
2739 | } |
2740 | |
2741 | bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) { |
2742 | if (!HasInv2Pi) |
2743 | return false; |
2744 | if (isInlinableIntLiteral(Literal)) |
2745 | return true; |
2746 | uint16_t Val = static_cast<uint16_t>(Literal); |
2747 | return Val == 0x3F00 || // 0.5 |
2748 | Val == 0xBF00 || // -0.5 |
2749 | Val == 0x3F80 || // 1.0 |
2750 | Val == 0xBF80 || // -1.0 |
2751 | Val == 0x4000 || // 2.0 |
2752 | Val == 0xC000 || // -2.0 |
2753 | Val == 0x4080 || // 4.0 |
2754 | Val == 0xC080 || // -4.0 |
2755 | Val == 0x3E22; // 1.0 / (2.0 * pi) |
2756 | } |
2757 | |
2758 | bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) { |
2759 | return isInlinableLiteral32(Literal, HasInv2Pi); |
2760 | } |
2761 | |
2762 | bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) { |
2763 | if (!HasInv2Pi) |
2764 | return false; |
2765 | if (isInlinableIntLiteral(Literal)) |
2766 | return true; |
2767 | uint16_t Val = static_cast<uint16_t>(Literal); |
2768 | return Val == 0x3C00 || // 1.0 |
2769 | Val == 0xBC00 || // -1.0 |
2770 | Val == 0x3800 || // 0.5 |
2771 | Val == 0xB800 || // -0.5 |
2772 | Val == 0x4000 || // 2.0 |
2773 | Val == 0xC000 || // -2.0 |
2774 | Val == 0x4400 || // 4.0 |
2775 | Val == 0xC400 || // -4.0 |
2776 | Val == 0x3118; // 1/2pi |
2777 | } |
2778 | |
2779 | std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { |
2780 | // Unfortunately, the Instruction Set Architecture Reference Guide is |
2781 | // misleading about how the inline operands work for (packed) 16-bit |
2782 | // instructions. In a nutshell, the actual HW behavior is: |
2783 | // |
2784 | // - integer encodings (-16 .. 64) are always produced as sign-extended |
2785 | // 32-bit values |
2786 | // - float encodings are produced as: |
2787 | // - for F16 instructions: corresponding half-precision float values in |
2788 | // the LSBs, 0 in the MSBs |
2789 | // - for UI16 instructions: corresponding single-precision float value |
2790 | int32_t Signed = static_cast<int32_t>(Literal); |
2791 | if (Signed >= 0 && Signed <= 64) |
2792 | return 128 + Signed; |
2793 | |
2794 | if (Signed >= -16 && Signed <= -1) |
2795 | return 192 + std::abs(x: Signed); |
2796 | |
2797 | if (IsFloat) { |
2798 | // clang-format off |
2799 | switch (Literal) { |
2800 | case 0x3800: return 240; // 0.5 |
2801 | case 0xB800: return 241; // -0.5 |
2802 | case 0x3C00: return 242; // 1.0 |
2803 | case 0xBC00: return 243; // -1.0 |
2804 | case 0x4000: return 244; // 2.0 |
2805 | case 0xC000: return 245; // -2.0 |
2806 | case 0x4400: return 246; // 4.0 |
2807 | case 0xC400: return 247; // -4.0 |
2808 | case 0x3118: return 248; // 1.0 / (2.0 * pi) |
2809 | default: break; |
2810 | } |
2811 | // clang-format on |
2812 | } else { |
2813 | // clang-format off |
2814 | switch (Literal) { |
2815 | case 0x3F000000: return 240; // 0.5 |
2816 | case 0xBF000000: return 241; // -0.5 |
2817 | case 0x3F800000: return 242; // 1.0 |
2818 | case 0xBF800000: return 243; // -1.0 |
2819 | case 0x40000000: return 244; // 2.0 |
2820 | case 0xC0000000: return 245; // -2.0 |
2821 | case 0x40800000: return 246; // 4.0 |
2822 | case 0xC0800000: return 247; // -4.0 |
2823 | case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) |
2824 | default: break; |
2825 | } |
2826 | // clang-format on |
2827 | } |
2828 | |
2829 | return {}; |
2830 | } |
2831 | |
2832 | // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction |
2833 | // or nullopt. |
2834 | std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { |
2835 | return getInlineEncodingV216(IsFloat: false, Literal); |
2836 | } |
2837 | |
2838 | // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction |
2839 | // or nullopt. |
2840 | std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) { |
2841 | int32_t Signed = static_cast<int32_t>(Literal); |
2842 | if (Signed >= 0 && Signed <= 64) |
2843 | return 128 + Signed; |
2844 | |
2845 | if (Signed >= -16 && Signed <= -1) |
2846 | return 192 + std::abs(x: Signed); |
2847 | |
2848 | // clang-format off |
2849 | switch (Literal) { |
2850 | case 0x3F00: return 240; // 0.5 |
2851 | case 0xBF00: return 241; // -0.5 |
2852 | case 0x3F80: return 242; // 1.0 |
2853 | case 0xBF80: return 243; // -1.0 |
2854 | case 0x4000: return 244; // 2.0 |
2855 | case 0xC000: return 245; // -2.0 |
2856 | case 0x4080: return 246; // 4.0 |
2857 | case 0xC080: return 247; // -4.0 |
2858 | case 0x3E22: return 248; // 1.0 / (2.0 * pi) |
2859 | default: break; |
2860 | } |
2861 | // clang-format on |
2862 | |
2863 | return std::nullopt; |
2864 | } |
2865 | |
2866 | // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction |
2867 | // or nullopt. |
2868 | std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { |
2869 | return getInlineEncodingV216(IsFloat: true, Literal); |
2870 | } |
2871 | |
2872 | // Whether the given literal can be inlined for a V_PK_* instruction. |
2873 | bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { |
2874 | switch (OpType) { |
2875 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
2876 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
2877 | return getInlineEncodingV216(IsFloat: false, Literal).has_value(); |
2878 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2879 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2880 | return getInlineEncodingV216(IsFloat: true, Literal).has_value(); |
2881 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
2882 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
2883 | return isInlinableLiteralV2BF16(Literal); |
2884 | default: |
2885 | llvm_unreachable("bad packed operand type" ); |
2886 | } |
2887 | } |
2888 | |
2889 | // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. |
2890 | bool isInlinableLiteralV2I16(uint32_t Literal) { |
2891 | return getInlineEncodingV2I16(Literal).has_value(); |
2892 | } |
2893 | |
2894 | // Whether the given literal can be inlined for a V_PK_*_BF16 instruction. |
2895 | bool isInlinableLiteralV2BF16(uint32_t Literal) { |
2896 | return getInlineEncodingV2BF16(Literal).has_value(); |
2897 | } |
2898 | |
2899 | // Whether the given literal can be inlined for a V_PK_*_F16 instruction. |
2900 | bool isInlinableLiteralV2F16(uint32_t Literal) { |
2901 | return getInlineEncodingV2F16(Literal).has_value(); |
2902 | } |
2903 | |
2904 | bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { |
2905 | if (IsFP64) |
2906 | return !(Val & 0xffffffffu); |
2907 | |
2908 | return isUInt<32>(x: Val) || isInt<32>(x: Val); |
2909 | } |
2910 | |
2911 | bool isArgPassedInSGPR(const Argument *A) { |
2912 | const Function *F = A->getParent(); |
2913 | |
2914 | // Arguments to compute shaders are never a source of divergence. |
2915 | CallingConv::ID CC = F->getCallingConv(); |
2916 | switch (CC) { |
2917 | case CallingConv::AMDGPU_KERNEL: |
2918 | case CallingConv::SPIR_KERNEL: |
2919 | return true; |
2920 | case CallingConv::AMDGPU_VS: |
2921 | case CallingConv::AMDGPU_LS: |
2922 | case CallingConv::AMDGPU_HS: |
2923 | case CallingConv::AMDGPU_ES: |
2924 | case CallingConv::AMDGPU_GS: |
2925 | case CallingConv::AMDGPU_PS: |
2926 | case CallingConv::AMDGPU_CS: |
2927 | case CallingConv::AMDGPU_Gfx: |
2928 | case CallingConv::AMDGPU_CS_Chain: |
2929 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2930 | // For non-compute shaders, SGPR inputs are marked with either inreg or |
2931 | // byval. Everything else is in VGPRs. |
2932 | return A->hasAttribute(Kind: Attribute::InReg) || |
2933 | A->hasAttribute(Kind: Attribute::ByVal); |
2934 | default: |
2935 | // TODO: treat i1 as divergent? |
2936 | return A->hasAttribute(Kind: Attribute::InReg); |
2937 | } |
2938 | } |
2939 | |
2940 | bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { |
2941 | // Arguments to compute shaders are never a source of divergence. |
2942 | CallingConv::ID CC = CB->getCallingConv(); |
2943 | switch (CC) { |
2944 | case CallingConv::AMDGPU_KERNEL: |
2945 | case CallingConv::SPIR_KERNEL: |
2946 | return true; |
2947 | case CallingConv::AMDGPU_VS: |
2948 | case CallingConv::AMDGPU_LS: |
2949 | case CallingConv::AMDGPU_HS: |
2950 | case CallingConv::AMDGPU_ES: |
2951 | case CallingConv::AMDGPU_GS: |
2952 | case CallingConv::AMDGPU_PS: |
2953 | case CallingConv::AMDGPU_CS: |
2954 | case CallingConv::AMDGPU_Gfx: |
2955 | case CallingConv::AMDGPU_CS_Chain: |
2956 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2957 | // For non-compute shaders, SGPR inputs are marked with either inreg or |
2958 | // byval. Everything else is in VGPRs. |
2959 | return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) || |
2960 | CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal); |
2961 | default: |
2962 | return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg); |
2963 | } |
2964 | } |
2965 | |
2966 | static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { |
2967 | return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST); |
2968 | } |
2969 | |
2970 | bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, |
2971 | int64_t EncodedOffset) { |
2972 | if (isGFX12Plus(STI: ST)) |
2973 | return isUInt<23>(x: EncodedOffset); |
2974 | |
2975 | return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset) |
2976 | : isUInt<8>(x: EncodedOffset); |
2977 | } |
2978 | |
2979 | bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, |
2980 | int64_t EncodedOffset, bool IsBuffer) { |
2981 | if (isGFX12Plus(STI: ST)) |
2982 | return isInt<24>(x: EncodedOffset); |
2983 | |
2984 | return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(x: EncodedOffset); |
2985 | } |
2986 | |
2987 | static bool isDwordAligned(uint64_t ByteOffset) { |
2988 | return (ByteOffset & 3) == 0; |
2989 | } |
2990 | |
2991 | uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, |
2992 | uint64_t ByteOffset) { |
2993 | if (hasSMEMByteOffset(ST)) |
2994 | return ByteOffset; |
2995 | |
2996 | assert(isDwordAligned(ByteOffset)); |
2997 | return ByteOffset >> 2; |
2998 | } |
2999 | |
3000 | std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, |
3001 | int64_t ByteOffset, bool IsBuffer, |
3002 | bool HasSOffset) { |
3003 | // For unbuffered smem loads, it is illegal for the Immediate Offset to be |
3004 | // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. |
3005 | // Handle case where SOffset is not present. |
3006 | if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST)) |
3007 | return std::nullopt; |
3008 | |
3009 | if (isGFX12Plus(STI: ST)) // 24 bit signed offsets |
3010 | return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset) |
3011 | : std::nullopt; |
3012 | |
3013 | // The signed version is always a byte offset. |
3014 | if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { |
3015 | assert(hasSMEMByteOffset(ST)); |
3016 | return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset) |
3017 | : std::nullopt; |
3018 | } |
3019 | |
3020 | if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) |
3021 | return std::nullopt; |
3022 | |
3023 | int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); |
3024 | return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) |
3025 | ? std::optional<int64_t>(EncodedOffset) |
3026 | : std::nullopt; |
3027 | } |
3028 | |
3029 | std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, |
3030 | int64_t ByteOffset) { |
3031 | if (!isCI(STI: ST) || !isDwordAligned(ByteOffset)) |
3032 | return std::nullopt; |
3033 | |
3034 | int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); |
3035 | return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset) |
3036 | : std::nullopt; |
3037 | } |
3038 | |
3039 | unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { |
3040 | if (AMDGPU::isGFX10(STI: ST)) |
3041 | return 12; |
3042 | |
3043 | if (AMDGPU::isGFX12(STI: ST)) |
3044 | return 24; |
3045 | return 13; |
3046 | } |
3047 | |
3048 | namespace { |
3049 | |
3050 | struct SourceOfDivergence { |
3051 | unsigned Intr; |
3052 | }; |
3053 | const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); |
3054 | |
3055 | struct AlwaysUniform { |
3056 | unsigned Intr; |
3057 | }; |
3058 | const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); |
3059 | |
3060 | #define GET_SourcesOfDivergence_IMPL |
3061 | #define GET_UniformIntrinsics_IMPL |
3062 | #define GET_Gfx9BufferFormat_IMPL |
3063 | #define GET_Gfx10BufferFormat_IMPL |
3064 | #define GET_Gfx11PlusBufferFormat_IMPL |
3065 | |
3066 | #include "AMDGPUGenSearchableTables.inc" |
3067 | |
3068 | } // end anonymous namespace |
3069 | |
3070 | bool isIntrinsicSourceOfDivergence(unsigned IntrID) { |
3071 | return lookupSourceOfDivergence(Intr: IntrID); |
3072 | } |
3073 | |
3074 | bool isIntrinsicAlwaysUniform(unsigned IntrID) { |
3075 | return lookupAlwaysUniform(Intr: IntrID); |
3076 | } |
3077 | |
3078 | const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, |
3079 | uint8_t NumComponents, |
3080 | uint8_t NumFormat, |
3081 | const MCSubtargetInfo &STI) { |
3082 | return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo( |
3083 | BitsPerComp, NumComponents, NumFormat) |
3084 | : isGFX10(STI) |
3085 | ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat) |
3086 | : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); |
3087 | } |
3088 | |
3089 | const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, |
3090 | const MCSubtargetInfo &STI) { |
3091 | return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) |
3092 | : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) |
3093 | : getGfx9BufferFormatInfo(Format); |
3094 | } |
3095 | |
3096 | bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { |
3097 | for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) { |
3098 | int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), Name: OpName); |
3099 | if (Idx == -1) |
3100 | continue; |
3101 | |
3102 | if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || |
3103 | OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) |
3104 | return true; |
3105 | } |
3106 | |
3107 | return false; |
3108 | } |
3109 | |
3110 | bool isDPALU_DPP(const MCInstrDesc &OpDesc) { |
3111 | return hasAny64BitVGPROperands(OpDesc); |
3112 | } |
3113 | |
3114 | unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) { |
3115 | // Currently this is 128 for all subtargets |
3116 | return 128; |
3117 | } |
3118 | |
3119 | } // namespace AMDGPU |
3120 | |
3121 | raw_ostream &operator<<(raw_ostream &OS, |
3122 | const AMDGPU::IsaInfo::TargetIDSetting S) { |
3123 | switch (S) { |
3124 | case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): |
3125 | OS << "Unsupported" ; |
3126 | break; |
3127 | case (AMDGPU::IsaInfo::TargetIDSetting::Any): |
3128 | OS << "Any" ; |
3129 | break; |
3130 | case (AMDGPU::IsaInfo::TargetIDSetting::Off): |
3131 | OS << "Off" ; |
3132 | break; |
3133 | case (AMDGPU::IsaInfo::TargetIDSetting::On): |
3134 | OS << "On" ; |
3135 | break; |
3136 | } |
3137 | return OS; |
3138 | } |
3139 | |
3140 | } // namespace llvm |
3141 | |