1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "Utils/AMDKernelCodeTUtils.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCRegisterInfo.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/TargetParser/TargetParser.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
36static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
37 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV6),
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns Asynccnt bit width.
139unsigned getAsynccntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
140 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
141}
142
143/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
144unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
145 return VersionMajor >= 12 ? 8 : 0;
146}
147
148/// \returns VaSdst bit width
149inline unsigned getVaSdstBitWidth() { return 3; }
150
151/// \returns VaSdst bit shift
152inline unsigned getVaSdstBitShift() { return 9; }
153
154/// \returns VmVsrc bit width
155inline unsigned getVmVsrcBitWidth() { return 3; }
156
157/// \returns VmVsrc bit shift
158inline unsigned getVmVsrcBitShift() { return 2; }
159
160/// \returns VaVdst bit width
161inline unsigned getVaVdstBitWidth() { return 4; }
162
163/// \returns VaVdst bit shift
164inline unsigned getVaVdstBitShift() { return 12; }
165
166/// \returns VaVcc bit width
167inline unsigned getVaVccBitWidth() { return 1; }
168
169/// \returns VaVcc bit shift
170inline unsigned getVaVccBitShift() { return 1; }
171
172/// \returns SaSdst bit width
173inline unsigned getSaSdstBitWidth() { return 1; }
174
175/// \returns SaSdst bit shift
176inline unsigned getSaSdstBitShift() { return 0; }
177
178/// \returns VaSsrc width
179inline unsigned getVaSsrcBitWidth() { return 1; }
180
181/// \returns VaSsrc bit shift
182inline unsigned getVaSsrcBitShift() { return 8; }
183
184/// \returns HoldCnt bit shift
185inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
186 static constexpr const unsigned MinMajor = 10;
187 static constexpr const unsigned MinMinor = 3;
188 return std::tie(args&: VersionMajor, args&: VersionMinor) >= std::tie(args: MinMajor, args: MinMinor)
189 ? 1
190 : 0;
191}
192
193/// \returns HoldCnt bit shift
194inline unsigned getHoldCntBitShift() { return 7; }
195
196} // end anonymous namespace
197
198namespace llvm {
199
200namespace AMDGPU {
201
202iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
203 return enum_seq(Begin: LOAD_CNT, End: MaxCounter);
204}
205
206StringLiteral getInstCounterName(InstCounterType T) {
207 switch (T) {
208 case LOAD_CNT:
209 return "LOAD_CNT";
210 case DS_CNT:
211 return "DS_CNT";
212 case EXP_CNT:
213 return "EXP_CNT";
214 case STORE_CNT:
215 return "STORE_CNT";
216 case SAMPLE_CNT:
217 return "SAMPLE_CNT";
218 case BVH_CNT:
219 return "BVH_CNT";
220 case KM_CNT:
221 return "KM_CNT";
222 case X_CNT:
223 return "X_CNT";
224 case VA_VDST:
225 return "VA_VDST";
226 case VM_VSRC:
227 return "VM_VSRC";
228 default:
229 return "Unknown T";
230 }
231}
232
233#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
234void Waitcnt::dump() const { dbgs() << *this << "\n"; }
235#endif
236
237/// \returns true if the target supports signed immediate offset for SMRD
238/// instructions.
239bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
240 return isGFX9Plus(STI: ST);
241}
242
243/// \returns True if \p STI is AMDHSA.
244bool isHsaAbi(const MCSubtargetInfo &STI) {
245 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
246}
247
248unsigned getAMDHSACodeObjectVersion(const Module &M) {
249 if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
250 MD: M.getModuleFlag(Key: "amdhsa_code_object_version"))) {
251 return (unsigned)Ver->getZExtValue() / 100;
252 }
253
254 return getDefaultAMDHSACodeObjectVersion();
255}
256
257unsigned getDefaultAMDHSACodeObjectVersion() {
258 return DefaultAMDHSACodeObjectVersion;
259}
260
261unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
262 switch (ABIVersion) {
263 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
264 return 4;
265 case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
266 return 5;
267 case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
268 return 6;
269 default:
270 return getDefaultAMDHSACodeObjectVersion();
271 }
272}
273
274uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
275 if (T.getOS() != Triple::AMDHSA)
276 return 0;
277
278 switch (CodeObjectVersion) {
279 case 4:
280 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
281 case 5:
282 return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
283 case 6:
284 return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
285 default:
286 report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " +
287 Twine(CodeObjectVersion));
288 }
289}
290
291unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
292 switch (CodeObjectVersion) {
293 case AMDHSA_COV4:
294 return 48;
295 case AMDHSA_COV5:
296 case AMDHSA_COV6:
297 default:
298 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
299 }
300}
301
302// FIXME: All such magic numbers about the ABI should be in a
303// central TD file.
304unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
305 switch (CodeObjectVersion) {
306 case AMDHSA_COV4:
307 return 24;
308 case AMDHSA_COV5:
309 case AMDHSA_COV6:
310 default:
311 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
312 }
313}
314
315unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
316 switch (CodeObjectVersion) {
317 case AMDHSA_COV4:
318 return 32;
319 case AMDHSA_COV5:
320 case AMDHSA_COV6:
321 default:
322 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
323 }
324}
325
326unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
327 switch (CodeObjectVersion) {
328 case AMDHSA_COV4:
329 return 40;
330 case AMDHSA_COV5:
331 case AMDHSA_COV6:
332 default:
333 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
334 }
335}
336
337#define GET_MIMGBaseOpcodesTable_IMPL
338#define GET_MIMGDimInfoTable_IMPL
339#define GET_MIMGInfoTable_IMPL
340#define GET_MIMGLZMappingTable_IMPL
341#define GET_MIMGMIPMappingTable_IMPL
342#define GET_MIMGBiasMappingTable_IMPL
343#define GET_MIMGOffsetMappingTable_IMPL
344#define GET_MIMGG16MappingTable_IMPL
345#define GET_MAIInstInfoTable_IMPL
346#define GET_WMMAInstInfoTable_IMPL
347#include "AMDGPUGenSearchableTables.inc"
348
349int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
350 unsigned VDataDwords, unsigned VAddrDwords) {
351 const MIMGInfo *Info =
352 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
353 return Info ? Info->Opcode : -1;
354}
355
356const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
357 const MIMGInfo *Info = getMIMGInfo(Opcode: Opc);
358 return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr;
359}
360
361int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
362 const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc);
363 const MIMGInfo *NewInfo =
364 getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding,
365 VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords);
366 return NewInfo ? NewInfo->Opcode : -1;
367}
368
369unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
370 const MIMGDimInfo *Dim, bool IsA16,
371 bool IsG16Supported) {
372 unsigned AddrWords = BaseOpcode->NumExtraArgs;
373 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
374 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
375 if (IsA16)
376 AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2);
377 else
378 AddrWords += AddrComponents;
379
380 // Note: For subtargets that support A16 but not G16, enabling A16 also
381 // enables 16 bit gradients.
382 // For subtargets that support A16 (operand) and G16 (done with a different
383 // instruction encoding), they are independent.
384
385 if (BaseOpcode->Gradients) {
386 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
387 // There are two gradients per coordinate, we pack them separately.
388 // For the 3d case,
389 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
390 AddrWords += alignTo<2>(Value: Dim->NumGradients / 2);
391 else
392 AddrWords += Dim->NumGradients;
393 }
394 return AddrWords;
395}
396
397struct MUBUFInfo {
398 uint32_t Opcode;
399 uint32_t BaseOpcode;
400 uint8_t elements;
401 bool has_vaddr;
402 bool has_srsrc;
403 bool has_soffset;
404 bool IsBufferInv;
405 bool tfe;
406};
407
408struct MTBUFInfo {
409 uint32_t Opcode;
410 uint32_t BaseOpcode;
411 uint8_t elements;
412 bool has_vaddr;
413 bool has_srsrc;
414 bool has_soffset;
415};
416
417struct SMInfo {
418 uint32_t Opcode;
419 bool IsBuffer;
420};
421
422struct VOPInfo {
423 uint32_t Opcode;
424 bool IsSingle;
425};
426
427struct VOPC64DPPInfo {
428 uint32_t Opcode;
429};
430
431struct VOPCDPPAsmOnlyInfo {
432 uint32_t Opcode;
433};
434
435struct VOP3CDPPAsmOnlyInfo {
436 uint32_t Opcode;
437};
438
439struct VOPDComponentInfo {
440 uint16_t BaseVOP;
441 uint16_t VOPDOp;
442 bool CanBeVOPDX;
443 bool CanBeVOPD3X;
444};
445
446struct VOPDInfo {
447 uint32_t Opcode;
448 uint16_t OpX;
449 uint16_t OpY;
450 uint16_t Subtarget;
451 bool VOPD3;
452};
453
454struct VOPTrue16Info {
455 uint32_t Opcode;
456 bool IsTrue16;
457};
458
459#define GET_FP4FP8DstByteSelTable_DECL
460#define GET_FP4FP8DstByteSelTable_IMPL
461
462struct DPMACCInstructionInfo {
463 uint32_t Opcode;
464 bool IsDPMACCInstruction;
465};
466
467struct FP4FP8DstByteSelInfo {
468 uint32_t Opcode;
469 bool HasFP8DstByteSel;
470 bool HasFP4DstByteSel;
471};
472
473#define GET_DPMACCInstructionTable_DECL
474#define GET_DPMACCInstructionTable_IMPL
475#define GET_MTBUFInfoTable_DECL
476#define GET_MTBUFInfoTable_IMPL
477#define GET_MUBUFInfoTable_DECL
478#define GET_MUBUFInfoTable_IMPL
479#define GET_SMInfoTable_DECL
480#define GET_SMInfoTable_IMPL
481#define GET_VOP1InfoTable_DECL
482#define GET_VOP1InfoTable_IMPL
483#define GET_VOP2InfoTable_DECL
484#define GET_VOP2InfoTable_IMPL
485#define GET_VOP3InfoTable_DECL
486#define GET_VOP3InfoTable_IMPL
487#define GET_VOPC64DPPTable_DECL
488#define GET_VOPC64DPPTable_IMPL
489#define GET_VOPC64DPP8Table_DECL
490#define GET_VOPC64DPP8Table_IMPL
491#define GET_VOPCAsmOnlyInfoTable_DECL
492#define GET_VOPCAsmOnlyInfoTable_IMPL
493#define GET_VOP3CAsmOnlyInfoTable_DECL
494#define GET_VOP3CAsmOnlyInfoTable_IMPL
495#define GET_VOPDComponentTable_DECL
496#define GET_VOPDComponentTable_IMPL
497#define GET_VOPDPairs_DECL
498#define GET_VOPDPairs_IMPL
499#define GET_VOPTrue16Table_DECL
500#define GET_VOPTrue16Table_IMPL
501#define GET_True16D16Table_IMPL
502#define GET_WMMAOpcode2AddrMappingTable_DECL
503#define GET_WMMAOpcode2AddrMappingTable_IMPL
504#define GET_WMMAOpcode3AddrMappingTable_DECL
505#define GET_WMMAOpcode3AddrMappingTable_IMPL
506#define GET_getMFMA_F8F6F4_WithSize_DECL
507#define GET_getMFMA_F8F6F4_WithSize_IMPL
508#define GET_isMFMA_F8F6F4Table_IMPL
509#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
510
511#include "AMDGPUGenSearchableTables.inc"
512
513int getMTBUFBaseOpcode(unsigned Opc) {
514 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc);
515 return Info ? Info->BaseOpcode : -1;
516}
517
518int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
519 const MTBUFInfo *Info =
520 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
521 return Info ? Info->Opcode : -1;
522}
523
524int getMTBUFElements(unsigned Opc) {
525 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
526 return Info ? Info->elements : 0;
527}
528
529bool getMTBUFHasVAddr(unsigned Opc) {
530 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
531 return Info && Info->has_vaddr;
532}
533
534bool getMTBUFHasSrsrc(unsigned Opc) {
535 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
536 return Info && Info->has_srsrc;
537}
538
539bool getMTBUFHasSoffset(unsigned Opc) {
540 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
541 return Info && Info->has_soffset;
542}
543
544int getMUBUFBaseOpcode(unsigned Opc) {
545 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc);
546 return Info ? Info->BaseOpcode : -1;
547}
548
549int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
550 const MUBUFInfo *Info =
551 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
552 return Info ? Info->Opcode : -1;
553}
554
555int getMUBUFElements(unsigned Opc) {
556 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
557 return Info ? Info->elements : 0;
558}
559
560bool getMUBUFHasVAddr(unsigned Opc) {
561 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
562 return Info && Info->has_vaddr;
563}
564
565bool getMUBUFHasSrsrc(unsigned Opc) {
566 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
567 return Info && Info->has_srsrc;
568}
569
570bool getMUBUFHasSoffset(unsigned Opc) {
571 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
572 return Info && Info->has_soffset;
573}
574
575bool getMUBUFIsBufferInv(unsigned Opc) {
576 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
577 return Info && Info->IsBufferInv;
578}
579
580bool getMUBUFTfe(unsigned Opc) {
581 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
582 return Info && Info->tfe;
583}
584
585bool getSMEMIsBuffer(unsigned Opc) {
586 const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc);
587 return Info && Info->IsBuffer;
588}
589
590bool getVOP1IsSingle(unsigned Opc) {
591 const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc);
592 return !Info || Info->IsSingle;
593}
594
595bool getVOP2IsSingle(unsigned Opc) {
596 const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc);
597 return !Info || Info->IsSingle;
598}
599
600bool getVOP3IsSingle(unsigned Opc) {
601 const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc);
602 return !Info || Info->IsSingle;
603}
604
605bool isVOPC64DPP(unsigned Opc) {
606 return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc);
607}
608
609bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); }
610
611bool getMAIIsDGEMM(unsigned Opc) {
612 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
613 return Info && Info->is_dgemm;
614}
615
616bool getMAIIsGFX940XDL(unsigned Opc) {
617 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
618 return Info && Info->is_gfx940_xdl;
619}
620
621bool getWMMAIsXDL(unsigned Opc) {
622 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opcode: Opc);
623 return Info ? Info->is_wmma_xdl : false;
624}
625
626uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
627 switch (EncodingVal) {
628 case MFMAScaleFormats::FP6_E2M3:
629 case MFMAScaleFormats::FP6_E3M2:
630 return 6;
631 case MFMAScaleFormats::FP4_E2M1:
632 return 4;
633 case MFMAScaleFormats::FP8_E4M3:
634 case MFMAScaleFormats::FP8_E5M2:
635 default:
636 return 8;
637 }
638
639 llvm_unreachable("covered switch over mfma scale formats");
640}
641
642const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
643 unsigned BLGP,
644 unsigned F8F8Opcode) {
645 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ);
646 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP);
647 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
648}
649
650uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt) {
651 switch (Fmt) {
652 case WMMA::MATRIX_FMT_FP8:
653 case WMMA::MATRIX_FMT_BF8:
654 return 16;
655 case WMMA::MATRIX_FMT_FP6:
656 case WMMA::MATRIX_FMT_BF6:
657 return 12;
658 case WMMA::MATRIX_FMT_FP4:
659 return 8;
660 }
661
662 llvm_unreachable("covered switch over wmma scale formats");
663}
664
665const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
666 unsigned FmtB,
667 unsigned F8F8Opcode) {
668 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtA);
669 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtB);
670 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
671}
672
673unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
674 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX13Insts))
675 return SIEncodingFamily::GFX13;
676 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts))
677 return SIEncodingFamily::GFX1250;
678 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts))
679 return SIEncodingFamily::GFX12;
680 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11_7Insts))
681 return SIEncodingFamily::GFX1170;
682 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts))
683 return SIEncodingFamily::GFX11;
684 llvm_unreachable("Subtarget generation does not support VOPD!");
685}
686
687CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
688 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
689 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
690 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
691 if (Info) {
692 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
693 // VOPDX is just a placeholder here, it is supported on all encodings.
694 // TODO: This can be optimized by creating tables of supported VOPDY
695 // opcodes per encoding.
696 unsigned VOPDMov = AMDGPU::getVOPDOpcode(Opc: AMDGPU::V_MOV_B32_e32, VOPD3);
697 bool CanBeVOPDX;
698 if (VOPD3) {
699 CanBeVOPDX = getVOPDFull(OpX: AMDGPU::getVOPDOpcode(Opc, VOPD3), OpY: VOPDMov,
700 EncodingFamily, VOPD3) != -1;
701 } else {
702 // The list of VOPDX opcodes is currently the same in all encoding
703 // families, so we do not need a family-specific check.
704 CanBeVOPDX = Info->CanBeVOPDX;
705 }
706 bool CanBeVOPDY = getVOPDFull(OpX: VOPDMov, OpY: AMDGPU::getVOPDOpcode(Opc, VOPD3),
707 EncodingFamily, VOPD3) != -1;
708 return {.X: CanBeVOPDX, .Y: CanBeVOPDY};
709 }
710
711 return {.X: false, .Y: false};
712}
713
714unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
715 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
716 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
717 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
718 return Info ? Info->VOPDOp : ~0u;
719}
720
721bool isVOPD(unsigned Opc) {
722 return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X);
723}
724
725bool isMAC(unsigned Opc) {
726 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
727 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
728 Opc == AMDGPU::V_MAC_F32_e64_vi ||
729 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
730 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
731 Opc == AMDGPU::V_MAC_F16_e64_vi ||
732 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
733 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
734 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
735 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
736 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
737 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
738 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
739 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
740 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
741 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
742 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
743 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
744 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
745 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
746 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
747 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
748 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
749 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
750 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
751 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
752 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
753 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
754}
755
756bool isPermlane16(unsigned Opc) {
757 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
758 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
759 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
760 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
761 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
762 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
763 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
764 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
765}
766
767bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
768 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
769 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
770 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
771 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
772 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
773 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
774 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
775 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
776 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
777 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
778}
779
780bool isGenericAtomic(unsigned Opc) {
781 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
782 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
783 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
784 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
785 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
786 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
787 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
788 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
789 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
790 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
791 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
792 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
793 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
794 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
795 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
796 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
797 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
798 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
799 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
800}
801
802bool isAsyncStore(unsigned Opc) {
803 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
804 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
805 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
806 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
807 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
808 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
809 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
810 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
811}
812
813bool isTensorStore(unsigned Opc) {
814 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
815 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
816}
817
818unsigned getTemporalHintType(const MCInstrDesc TID) {
819 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
820 return CPol::TH_TYPE_ATOMIC;
821 unsigned Opc = TID.getOpcode();
822 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
823 if (TID.mayStore() &&
824 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
825 return CPol::TH_TYPE_STORE;
826
827 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
828 // MayLoad flag is present which is the case with instructions like
829 // image_get_resinfo.
830 return CPol::TH_TYPE_LOAD;
831}
832
833bool isTrue16Inst(unsigned Opc) {
834 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc);
835 return Info && Info->IsTrue16;
836}
837
838FPType getFPDstSelType(unsigned Opc) {
839 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opcode: Opc);
840 if (!Info)
841 return FPType::None;
842 if (Info->HasFP8DstByteSel)
843 return FPType::FP8;
844 if (Info->HasFP4DstByteSel)
845 return FPType::FP4;
846
847 return FPType::None;
848}
849
850bool isDPMACCInstruction(unsigned Opc) {
851 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opcode: Opc);
852 return Info && Info->IsDPMACCInstruction;
853}
854
855unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
856 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc);
857 return Info ? Info->Opcode3Addr : ~0u;
858}
859
860unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
861 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc);
862 return Info ? Info->Opcode2Addr : ~0u;
863}
864
865// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
866// header files, so we need to wrap it in a function that takes unsigned
867// instead.
868int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
869 return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen));
870}
871
872unsigned getBitOp2(unsigned Opc) {
873 switch (Opc) {
874 default:
875 return 0;
876 case AMDGPU::V_AND_B32_e32:
877 return 0x40;
878 case AMDGPU::V_OR_B32_e32:
879 return 0x54;
880 case AMDGPU::V_XOR_B32_e32:
881 return 0x14;
882 case AMDGPU::V_XNOR_B32_e32:
883 return 0x41;
884 }
885}
886
887int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
888 bool VOPD3) {
889 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc: OpY) : 0;
890 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
891 const VOPDInfo *Info =
892 getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily, VOPD3);
893 return Info ? Info->Opcode : -1;
894}
895
896std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
897 const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode);
898 assert(Info);
899 const auto *OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX);
900 const auto *OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY);
901 assert(OpX && OpY);
902 return {OpX->BaseVOP, OpY->BaseVOP};
903}
904
905namespace VOPD {
906
907ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
908 assert(OpDesc.getNumDefs() == Component::DST_NUM);
909
910 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
911 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
912 auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO);
913 assert(TiedIdx == -1 || TiedIdx == Component::DST);
914 HasSrc2Acc = TiedIdx != -1;
915 Opcode = OpDesc.getOpcode();
916
917 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
918 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src2) ? 3
919 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm) ? 3
920 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src1) ? 2
921 : 1;
922 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
923
924 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
925 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
926 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
927 // operands.
928 NumVOPD3Mods = 2;
929 if (IsVOP3)
930 SrcOperandsNum = 3;
931 } else if (isSISrcFPOperand(Desc: OpDesc,
932 OpNo: getNamedOperandIdx(Opcode, Name: OpName::src0))) {
933 // All FP VOPD instructions have Neg modifiers for all operands except
934 // for tied src2.
935 NumVOPD3Mods = SrcOperandsNum;
936 if (HasSrc2Acc)
937 --NumVOPD3Mods;
938 }
939
940 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
941 return;
942
943 auto OperandsNum = OpDesc.getNumOperands();
944 unsigned CompOprIdx;
945 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
946 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
947 MandatoryLiteralIdx = CompOprIdx;
948 break;
949 }
950 }
951}
952
953int ComponentProps::getBitOp3OperandIdx() const {
954 return getNamedOperandIdx(Opcode, Name: OpName::bitop3);
955}
956
957unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
958 assert(CompOprIdx < Component::MAX_OPR_NUM);
959
960 if (CompOprIdx == Component::DST)
961 return getIndexOfDstInParsedOperands();
962
963 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
964 if (CompSrcIdx < getCompParsedSrcOperandsNum())
965 return getIndexOfSrcInParsedOperands(CompSrcIdx);
966
967 // The specified operand does not exist.
968 return 0;
969}
970
971std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
972 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
973 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
974 bool VOPD3) const {
975
976 auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx,
977 VOPD3: CompInfo[ComponentIndex::X].isVOP3());
978 auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx,
979 VOPD3: CompInfo[ComponentIndex::Y].isVOP3());
980
981 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
982 unsigned BanksMask) -> bool {
983 MCRegister BaseX = MRI.getSubReg(Reg: X, Idx: AMDGPU::sub0);
984 MCRegister BaseY = MRI.getSubReg(Reg: Y, Idx: AMDGPU::sub0);
985 if (!BaseX)
986 BaseX = X;
987 if (!BaseY)
988 BaseY = Y;
989 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
990 return true;
991 if (BaseX != X /* This is 64-bit register */ &&
992 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
993 return true;
994 if (BaseY != Y &&
995 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
996 return true;
997
998 // If both are 64-bit bank conflict will be detected yet while checking
999 // the first subreg.
1000 return false;
1001 };
1002
1003 unsigned CompOprIdx;
1004 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
1005 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
1006 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
1007 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
1008 continue;
1009
1010 if (getVGPREncodingMSBs(Reg: OpXRegs[CompOprIdx], MRI) !=
1011 getVGPREncodingMSBs(Reg: OpYRegs[CompOprIdx], MRI))
1012 return CompOprIdx;
1013
1014 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
1015 continue;
1016
1017 if (CompOprIdx < Component::DST_NUM) {
1018 // Even if we do not check vdst parity, vdst operands still shall not
1019 // overlap.
1020 if (MRI.regsOverlap(RegA: OpXRegs[CompOprIdx], RegB: OpYRegs[CompOprIdx]))
1021 return CompOprIdx;
1022 if (VOPD3) // No need to check dst parity.
1023 continue;
1024 }
1025
1026 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
1027 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
1028 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
1029 return CompOprIdx;
1030 }
1031
1032 return {};
1033}
1034
1035// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1036// by the specified component. If an operand is unused
1037// or is not a VGPR, the corresponding value is 0.
1038//
1039// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1040// for the specified component and MC operand. The callback must return 0
1041// if the operand is not a register or not a VGPR.
1042InstInfo::RegIndices
1043InstInfo::getRegIndices(unsigned CompIdx,
1044 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1045 bool VOPD3) const {
1046 assert(CompIdx < COMPONENTS_NUM);
1047
1048 const auto &Comp = CompInfo[CompIdx];
1049 InstInfo::RegIndices RegIndices;
1050
1051 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1052
1053 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1054 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1055 RegIndices[CompOprIdx] =
1056 Comp.hasRegSrcOperand(CompSrcIdx)
1057 ? GetRegIdx(CompIdx,
1058 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1059 : MCRegister();
1060 }
1061 return RegIndices;
1062}
1063
1064} // namespace VOPD
1065
1066VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
1067 return VOPD::InstInfo(OpX, OpY);
1068}
1069
1070VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
1071 const MCInstrInfo *InstrInfo) {
1072 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1073 const auto &OpXDesc = InstrInfo->get(Opcode: OpX);
1074 const auto &OpYDesc = InstrInfo->get(Opcode: OpY);
1075 bool VOPD3 = InstrInfo->get(Opcode: VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1076 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
1077 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1078 return VOPD::InstInfo(OpXInfo, OpYInfo);
1079}
1080
1081namespace IsaInfo {
1082
1083AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
1084 : STI(STI), XnackSetting(TargetIDSetting::Any),
1085 SramEccSetting(TargetIDSetting::Any) {
1086 if (!STI.getFeatureBits().test(I: FeatureSupportsXNACK))
1087 XnackSetting = TargetIDSetting::Unsupported;
1088 if (!STI.getFeatureBits().test(I: FeatureSupportsSRAMECC))
1089 SramEccSetting = TargetIDSetting::Unsupported;
1090}
1091
1092void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
1093 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1094 // absence of the target features we assume we must generate code that can run
1095 // in any environment.
1096 SubtargetFeatures Features(FS);
1097 std::optional<bool> XnackRequested;
1098 std::optional<bool> SramEccRequested;
1099
1100 for (const std::string &Feature : Features.getFeatures()) {
1101 if (Feature == "+xnack")
1102 XnackRequested = true;
1103 else if (Feature == "-xnack")
1104 XnackRequested = false;
1105 else if (Feature == "+sramecc")
1106 SramEccRequested = true;
1107 else if (Feature == "-sramecc")
1108 SramEccRequested = false;
1109 }
1110
1111 bool XnackSupported = isXnackSupported();
1112 bool SramEccSupported = isSramEccSupported();
1113
1114 if (XnackRequested) {
1115 if (XnackSupported) {
1116 XnackSetting =
1117 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1118 } else {
1119 // If a specific xnack setting was requested and this GPU does not support
1120 // xnack emit a warning. Setting will remain set to "Unsupported".
1121 if (*XnackRequested) {
1122 errs() << "warning: xnack 'On' was requested for a processor that does "
1123 "not support it!\n";
1124 } else {
1125 errs() << "warning: xnack 'Off' was requested for a processor that "
1126 "does not support it!\n";
1127 }
1128 }
1129 }
1130
1131 if (SramEccRequested) {
1132 if (SramEccSupported) {
1133 SramEccSetting =
1134 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1135 } else {
1136 // If a specific sramecc setting was requested and this GPU does not
1137 // support sramecc emit a warning. Setting will remain set to
1138 // "Unsupported".
1139 if (*SramEccRequested) {
1140 errs() << "warning: sramecc 'On' was requested for a processor that "
1141 "does not support it!\n";
1142 } else {
1143 errs() << "warning: sramecc 'Off' was requested for a processor that "
1144 "does not support it!\n";
1145 }
1146 }
1147 }
1148}
1149
1150static TargetIDSetting
1151getTargetIDSettingFromFeatureString(StringRef FeatureString) {
1152 if (FeatureString.ends_with(Suffix: "-"))
1153 return TargetIDSetting::Off;
1154 if (FeatureString.ends_with(Suffix: "+"))
1155 return TargetIDSetting::On;
1156
1157 llvm_unreachable("Malformed feature string");
1158}
1159
1160void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
1161 SmallVector<StringRef, 3> TargetIDSplit;
1162 TargetID.split(A&: TargetIDSplit, Separator: ':');
1163
1164 for (const auto &FeatureString : TargetIDSplit) {
1165 if (FeatureString.starts_with(Prefix: "xnack"))
1166 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1167 if (FeatureString.starts_with(Prefix: "sramecc"))
1168 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1169 }
1170}
1171
1172void AMDGPUTargetID::print(raw_ostream &StreamRep) const {
1173 const Triple &TargetTriple = STI.getTargetTriple();
1174 auto Version = getIsaVersion(GPU: STI.getCPU());
1175
1176 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1177 << '-' << TargetTriple.getOSName() << '-'
1178 << TargetTriple.getEnvironmentName() << '-';
1179
1180 std::string Processor;
1181 // TODO: Following else statement is present here because we used various
1182 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1183 // Remove once all aliases are removed from GCNProcessors.td.
1184 if (Version.Major >= 9)
1185 Processor = STI.getCPU().str();
1186 else
1187 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1188 Twine(Version.Stepping))
1189 .str();
1190
1191 std::string Features;
1192 if (TargetTriple.getOS() == Triple::AMDHSA) {
1193 // sramecc.
1194 if (getSramEccSetting() == TargetIDSetting::Off)
1195 Features += ":sramecc-";
1196 else if (getSramEccSetting() == TargetIDSetting::On)
1197 Features += ":sramecc+";
1198 // xnack.
1199 if (getXnackSetting() == TargetIDSetting::Off)
1200 Features += ":xnack-";
1201 else if (getXnackSetting() == TargetIDSetting::On)
1202 Features += ":xnack+";
1203 }
1204
1205 StreamRep << Processor << Features;
1206}
1207
1208std::string AMDGPUTargetID::toString() const {
1209 std::string Str;
1210 raw_string_ostream OS(Str);
1211 OS << *this;
1212 return Str;
1213}
1214
1215unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1216 if (STI->getFeatureBits().test(I: FeatureWavefrontSize16))
1217 return 16;
1218 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32))
1219 return 32;
1220
1221 return 64;
1222}
1223
1224unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
1225 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1226
1227 // "Per CU" really means "per whatever functional block the waves of a
1228 // workgroup must share". So the effective local memory size is doubled in
1229 // WGP mode on gfx10.
1230 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1231 BytesPerCU *= 2;
1232
1233 return BytesPerCU;
1234}
1235
1236unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
1237 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
1238 return 32768;
1239 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
1240 return 65536;
1241 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
1242 return 163840;
1243 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
1244 return 327680;
1245 return 32768;
1246}
1247
1248unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1249 // "Per CU" really means "per whatever functional block the waves of a
1250 // workgroup must share".
1251
1252 // GFX12.5 only supports CU mode, which contains four SIMDs.
1253 if (isGFX1250(STI: *STI)) {
1254 assert(STI->getFeatureBits().test(FeatureCuMode));
1255 return 4;
1256 }
1257
1258 // For gfx10 in CU mode the functional block is the CU, which contains
1259 // two SIMDs.
1260 if (isGFX10Plus(STI: *STI) && STI->getFeatureBits().test(I: FeatureCuMode))
1261 return 2;
1262
1263 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1264 // contains two CUs, so a total of four SIMDs.
1265 return 4;
1266}
1267
1268unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
1269 unsigned FlatWorkGroupSize) {
1270 assert(FlatWorkGroupSize != 0);
1271 if (!STI->getTargetTriple().isAMDGCN())
1272 return 8;
1273 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1274 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1275 if (N == 1) {
1276 // Single-wave workgroups don't consume barrier resources.
1277 return MaxWaves;
1278 }
1279
1280 unsigned MaxBarriers = 16;
1281 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1282 MaxBarriers = 32;
1283
1284 return std::min(a: MaxWaves / N, b: MaxBarriers);
1285}
1286
1287unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1288
1289unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1290 // FIXME: Need to take scratch memory into account.
1291 if (isGFX90A(STI: *STI))
1292 return 8;
1293 if (!isGFX10Plus(STI: *STI))
1294 return 10;
1295 return hasGFX10_3Insts(STI: *STI) ? 16 : 20;
1296}
1297
1298unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1299 unsigned FlatWorkGroupSize) {
1300 return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1301 Denominator: getEUsPerCU(STI));
1302}
1303
1304unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1305
1306unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1307 unsigned FlatWorkGroupSize) {
1308 return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI));
1309}
1310
1311unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1312 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1313 if (Version.Major >= 10)
1314 return getAddressableNumSGPRs(STI);
1315 if (Version.Major >= 8)
1316 return 16;
1317 return 8;
1318}
1319
1320unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1321
1322unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1323 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1324 if (Version.Major >= 8)
1325 return 800;
1326 return 512;
1327}
1328
1329unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1330 if (STI->getFeatureBits().test(I: FeatureSGPRInitBug))
1331 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1332
1333 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1334 if (Version.Major >= 10)
1335 return 106;
1336 if (Version.Major >= 8)
1337 return 102;
1338 return 104;
1339}
1340
1341unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1342 assert(WavesPerEU != 0);
1343
1344 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1345 if (Version.Major >= 10)
1346 return 0;
1347
1348 if (WavesPerEU >= getMaxWavesPerEU(STI))
1349 return 0;
1350
1351 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1352 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1353 MinNumSGPRs -= std::min(a: MinNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1354 MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1;
1355 return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI));
1356}
1357
1358unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1359 bool Addressable) {
1360 assert(WavesPerEU != 0);
1361
1362 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1363 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1364 if (Version.Major >= 10)
1365 return Addressable ? AddressableNumSGPRs : 108;
1366 if (Version.Major >= 8 && !Addressable)
1367 AddressableNumSGPRs = 112;
1368 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1369 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1370 MaxNumSGPRs -= std::min(a: MaxNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1371 MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI));
1372 return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs);
1373}
1374
1375unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1376 bool FlatScrUsed, bool XNACKUsed) {
1377 unsigned ExtraSGPRs = 0;
1378 if (VCCUsed)
1379 ExtraSGPRs = 2;
1380
1381 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1382 if (Version.Major >= 10)
1383 return ExtraSGPRs;
1384
1385 if (Version.Major < 8) {
1386 if (FlatScrUsed)
1387 ExtraSGPRs = 4;
1388 } else {
1389 if (XNACKUsed)
1390 ExtraSGPRs = 4;
1391
1392 if (FlatScrUsed ||
1393 STI->getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch))
1394 ExtraSGPRs = 6;
1395 }
1396
1397 return ExtraSGPRs;
1398}
1399
1400unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1401 bool FlatScrUsed) {
1402 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1403 XNACKUsed: STI->getFeatureBits().test(I: AMDGPU::FeatureXNACK));
1404}
1405
1406static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1407 unsigned Granule) {
1408 return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule);
1409}
1410
1411unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1412 // SGPRBlocks is actual number of SGPR blocks minus 1.
1413 return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) -
1414 1;
1415}
1416
1417unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1418 unsigned DynamicVGPRBlockSize,
1419 std::optional<bool> EnableWavefrontSize32) {
1420 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1421 return 8;
1422
1423 if (DynamicVGPRBlockSize != 0)
1424 return DynamicVGPRBlockSize;
1425
1426 bool IsWave32 = EnableWavefrontSize32
1427 ? *EnableWavefrontSize32
1428 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1429
1430 if (STI->getFeatureBits().test(I: Feature1536VGPRs))
1431 return IsWave32 ? 24 : 12;
1432
1433 if (hasGFX10_3Insts(STI: *STI))
1434 return IsWave32 ? 16 : 8;
1435
1436 return IsWave32 ? 8 : 4;
1437}
1438
1439unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1440 std::optional<bool> EnableWavefrontSize32) {
1441 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1442 return 8;
1443
1444 bool IsWave32 = EnableWavefrontSize32
1445 ? *EnableWavefrontSize32
1446 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1447
1448 if (STI->getFeatureBits().test(I: Feature1024AddressableVGPRs))
1449 return IsWave32 ? 16 : 8;
1450
1451 return IsWave32 ? 8 : 4;
1452}
1453
1454unsigned getArchVGPRAllocGranule() { return 4; }
1455
1456unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1457 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1458 return 512;
1459 if (!isGFX10Plus(STI: *STI))
1460 return 256;
1461 bool IsWave32 = STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1462 if (STI->getFeatureBits().test(I: Feature1536VGPRs))
1463 return IsWave32 ? 1536 : 768;
1464 return IsWave32 ? 1024 : 512;
1465}
1466
1467unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) {
1468 const auto &Features = STI->getFeatureBits();
1469 if (Features.test(I: Feature1024AddressableVGPRs))
1470 return Features.test(I: FeatureWavefrontSize32) ? 1024 : 512;
1471 return 256;
1472}
1473
1474unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
1475 unsigned DynamicVGPRBlockSize) {
1476 const auto &Features = STI->getFeatureBits();
1477 if (Features.test(I: FeatureGFX90AInsts))
1478 return 512;
1479
1480 if (DynamicVGPRBlockSize != 0)
1481 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1482 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1483 return getAddressableNumArchVGPRs(STI);
1484}
1485
1486unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1487 unsigned NumVGPRs,
1488 unsigned DynamicVGPRBlockSize) {
1489 return getNumWavesPerEUWithNumVGPRs(
1490 NumVGPRs, Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1491 MaxWaves: getMaxWavesPerEU(STI), TotalNumVGPRs: getTotalNumVGPRs(STI));
1492}
1493
1494unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1495 unsigned MaxWaves,
1496 unsigned TotalNumVGPRs) {
1497 if (NumVGPRs < Granule)
1498 return MaxWaves;
1499 unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule);
1500 return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves);
1501}
1502
1503unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1504 AMDGPUSubtarget::Generation Gen) {
1505 if (Gen >= AMDGPUSubtarget::GFX10)
1506 return MaxWaves;
1507
1508 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1509 if (SGPRs <= 80)
1510 return 10;
1511 if (SGPRs <= 88)
1512 return 9;
1513 if (SGPRs <= 100)
1514 return 8;
1515 return 7;
1516 }
1517 if (SGPRs <= 48)
1518 return 10;
1519 if (SGPRs <= 56)
1520 return 9;
1521 if (SGPRs <= 64)
1522 return 8;
1523 if (SGPRs <= 72)
1524 return 7;
1525 if (SGPRs <= 80)
1526 return 6;
1527 return 5;
1528}
1529
1530unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1531 unsigned DynamicVGPRBlockSize) {
1532 assert(WavesPerEU != 0);
1533
1534 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1535 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1536 // zero because there is no nonzero VGPR usage N where going below N
1537 // achieves higher (static) occupancy.
1538 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1539 if (DynamicVGPREnabled)
1540 return 0;
1541
1542 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1543 if (WavesPerEU >= MaxWavesPerEU)
1544 return 0;
1545
1546 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1547 unsigned AddrsableNumVGPRs =
1548 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1549 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1550 unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule);
1551
1552 if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule))
1553 return 0;
1554
1555 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs,
1556 DynamicVGPRBlockSize);
1557 if (WavesPerEU < MinWavesPerEU)
1558 return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU, DynamicVGPRBlockSize);
1559
1560 unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule);
1561 unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext);
1562 return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs);
1563}
1564
1565unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1566 unsigned DynamicVGPRBlockSize) {
1567 assert(WavesPerEU != 0);
1568
1569 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1570 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1571 unsigned MaxNumVGPRs =
1572 DynamicVGPREnabled
1573 ? getTotalNumVGPRs(STI)
1574 : alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU,
1575 Align: getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1576 unsigned AddressableNumVGPRs =
1577 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1578 return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs);
1579}
1580
1581unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1582 std::optional<bool> EnableWavefrontSize32) {
1583 return getGranulatedNumRegisterBlocks(
1584 NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1585 1;
1586}
1587
1588unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1589 unsigned NumVGPRs,
1590 unsigned DynamicVGPRBlockSize,
1591 std::optional<bool> EnableWavefrontSize32) {
1592 return getGranulatedNumRegisterBlocks(
1593 NumRegs: NumVGPRs,
1594 Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1595}
1596} // end namespace IsaInfo
1597
1598void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1599 const MCSubtargetInfo *STI) {
1600 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1601 KernelCode.amd_kernel_code_version_major = 1;
1602 KernelCode.amd_kernel_code_version_minor = 2;
1603 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1604 KernelCode.amd_machine_version_major = Version.Major;
1605 KernelCode.amd_machine_version_minor = Version.Minor;
1606 KernelCode.amd_machine_version_stepping = Version.Stepping;
1607 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1608 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) {
1609 KernelCode.wavefront_size = 5;
1610 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1611 } else {
1612 KernelCode.wavefront_size = 6;
1613 }
1614
1615 // If the code object does not support indirect functions, then the value must
1616 // be 0xffffffff.
1617 KernelCode.call_convention = -1;
1618
1619 // These alignment values are specified in powers of two, so alignment =
1620 // 2^n. The minimum alignment is 2^4 = 16.
1621 KernelCode.kernarg_segment_alignment = 4;
1622 KernelCode.group_segment_alignment = 4;
1623 KernelCode.private_segment_alignment = 4;
1624
1625 if (Version.Major >= 10) {
1626 KernelCode.compute_pgm_resource_registers |=
1627 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1628 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1629 }
1630}
1631
1632bool isGroupSegment(const GlobalValue *GV) {
1633 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1634}
1635
1636bool isGlobalSegment(const GlobalValue *GV) {
1637 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1638}
1639
1640bool isReadOnlySegment(const GlobalValue *GV) {
1641 unsigned AS = GV->getAddressSpace();
1642 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1643 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1644}
1645
1646bool shouldEmitConstantsToTextSection(const Triple &TT) {
1647 return TT.getArch() == Triple::r600;
1648}
1649
1650static bool isValidRegPrefix(char C) {
1651 return C == 'v' || C == 's' || C == 'a';
1652}
1653
1654std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1655 char Kind = RegName.front();
1656 if (!isValidRegPrefix(C: Kind))
1657 return {};
1658
1659 RegName = RegName.drop_front();
1660 if (RegName.consume_front(Prefix: "[")) {
1661 unsigned Idx, End;
1662 bool Failed = RegName.consumeInteger(Radix: 10, Result&: Idx);
1663 Failed |= !RegName.consume_front(Prefix: ":");
1664 Failed |= RegName.consumeInteger(Radix: 10, Result&: End);
1665 Failed |= !RegName.consume_back(Suffix: "]");
1666 if (!Failed) {
1667 unsigned NumRegs = End - Idx + 1;
1668 if (NumRegs > 1)
1669 return {Kind, Idx, NumRegs};
1670 }
1671 } else {
1672 unsigned Idx;
1673 bool Failed = RegName.getAsInteger(Radix: 10, Result&: Idx);
1674 if (!Failed)
1675 return {Kind, Idx, 1};
1676 }
1677
1678 return {};
1679}
1680
1681std::tuple<char, unsigned, unsigned>
1682parseAsmConstraintPhysReg(StringRef Constraint) {
1683 StringRef RegName = Constraint;
1684 if (!RegName.consume_front(Prefix: "{") || !RegName.consume_back(Suffix: "}"))
1685 return {};
1686 return parseAsmPhysRegName(RegName);
1687}
1688
1689std::pair<unsigned, unsigned>
1690getIntegerPairAttribute(const Function &F, StringRef Name,
1691 std::pair<unsigned, unsigned> Default,
1692 bool OnlyFirstRequired) {
1693 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1694 return {Attr->first, Attr->second.value_or(u&: Default.second)};
1695 return Default;
1696}
1697
1698std::optional<std::pair<unsigned, std::optional<unsigned>>>
1699getIntegerPairAttribute(const Function &F, StringRef Name,
1700 bool OnlyFirstRequired) {
1701 Attribute A = F.getFnAttribute(Kind: Name);
1702 if (!A.isStringAttribute())
1703 return std::nullopt;
1704
1705 LLVMContext &Ctx = F.getContext();
1706 std::pair<unsigned, std::optional<unsigned>> Ints;
1707 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ',');
1708 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) {
1709 Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name);
1710 return std::nullopt;
1711 }
1712 unsigned Second = 0;
1713 if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Second)) {
1714 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1715 Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name);
1716 return std::nullopt;
1717 }
1718 } else {
1719 Ints.second = Second;
1720 }
1721
1722 return Ints;
1723}
1724
1725SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1726 unsigned Size,
1727 unsigned DefaultVal) {
1728 std::optional<SmallVector<unsigned>> R =
1729 getIntegerVecAttribute(F, Name, Size);
1730 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1731}
1732
1733std::optional<SmallVector<unsigned>>
1734getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1735 assert(Size > 2);
1736 LLVMContext &Ctx = F.getContext();
1737
1738 Attribute A = F.getFnAttribute(Kind: Name);
1739 if (!A.isValid())
1740 return std::nullopt;
1741 if (!A.isStringAttribute()) {
1742 Ctx.emitError(ErrorStr: Name + " is not a string attribute");
1743 return std::nullopt;
1744 }
1745
1746 SmallVector<unsigned> Vals(Size);
1747
1748 StringRef S = A.getValueAsString();
1749 unsigned i = 0;
1750 for (; !S.empty() && i < Size; i++) {
1751 std::pair<StringRef, StringRef> Strs = S.split(Separator: ',');
1752 unsigned IntVal;
1753 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) {
1754 Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " +
1755 Name);
1756 return std::nullopt;
1757 }
1758 Vals[i] = IntVal;
1759 S = Strs.second;
1760 }
1761
1762 if (!S.empty() || i < Size) {
1763 Ctx.emitError(ErrorStr: "attribute " + Name +
1764 " has incorrect number of integers; expected " +
1765 llvm::utostr(X: Size));
1766 return std::nullopt;
1767 }
1768 return Vals;
1769}
1770
1771bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1772 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1773 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1774 auto Low =
1775 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 0))->getValue();
1776 auto High =
1777 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 1))->getValue();
1778 // There are two types of [A; B) ranges:
1779 // A < B, e.g. [4; 5) which is a range that only includes 4.
1780 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1781 // everything except 4.
1782 if (Low.ult(RHS: High)) {
1783 if (Low.ule(RHS: Val) && High.ugt(RHS: Val))
1784 return true;
1785 } else {
1786 if (Low.uge(RHS: Val) && High.ult(RHS: Val))
1787 return true;
1788 }
1789 }
1790
1791 return false;
1792}
1793
1794unsigned getVmcntBitMask(const IsaVersion &Version) {
1795 return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) +
1796 getVmcntBitWidthHi(VersionMajor: Version.Major))) -
1797 1;
1798}
1799
1800unsigned getLoadcntBitMask(const IsaVersion &Version) {
1801 return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1;
1802}
1803
1804unsigned getSamplecntBitMask(const IsaVersion &Version) {
1805 return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1;
1806}
1807
1808unsigned getBvhcntBitMask(const IsaVersion &Version) {
1809 return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1;
1810}
1811
1812unsigned getExpcntBitMask(const IsaVersion &Version) {
1813 return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1;
1814}
1815
1816unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1817 return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1;
1818}
1819
1820unsigned getDscntBitMask(const IsaVersion &Version) {
1821 return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1;
1822}
1823
1824unsigned getKmcntBitMask(const IsaVersion &Version) {
1825 return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1;
1826}
1827
1828unsigned getXcntBitMask(const IsaVersion &Version) {
1829 return (1 << getXcntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1830}
1831
1832unsigned getAsynccntBitMask(const IsaVersion &Version) {
1833 return (1 << getAsynccntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1834}
1835
1836unsigned getStorecntBitMask(const IsaVersion &Version) {
1837 return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1;
1838}
1839
1840HardwareLimits::HardwareLimits(const IsaVersion &IV) {
1841 bool HasExtendedWaitCounts = IV.Major >= 12;
1842 if (HasExtendedWaitCounts) {
1843 LoadcntMax = getLoadcntBitMask(Version: IV);
1844 DscntMax = getDscntBitMask(Version: IV);
1845 } else {
1846 LoadcntMax = getVmcntBitMask(Version: IV);
1847 DscntMax = getLgkmcntBitMask(Version: IV);
1848 }
1849 ExpcntMax = getExpcntBitMask(Version: IV);
1850 StorecntMax = getStorecntBitMask(Version: IV);
1851 SamplecntMax = getSamplecntBitMask(Version: IV);
1852 BvhcntMax = getBvhcntBitMask(Version: IV);
1853 KmcntMax = getKmcntBitMask(Version: IV);
1854 XcntMax = getXcntBitMask(Version: IV);
1855 AsyncMax = getAsynccntBitMask(Version: IV);
1856 VaVdstMax = DepCtr::getVaVdstBitMask();
1857 VmVsrcMax = DepCtr::getVmVsrcBitMask();
1858}
1859
1860unsigned getWaitcntBitMask(const IsaVersion &Version) {
1861 unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1862 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1863 unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major),
1864 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1865 unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1866 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1867 unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1868 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1869 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1870}
1871
1872unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1873 unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1874 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1875 unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1876 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1877 return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major);
1878}
1879
1880unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1881 return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1882 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1883}
1884
1885unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1886 return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1887 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1888}
1889
1890void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1891 unsigned &Expcnt, unsigned &Lgkmcnt) {
1892 Vmcnt = decodeVmcnt(Version, Waitcnt);
1893 Expcnt = decodeExpcnt(Version, Waitcnt);
1894 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1895}
1896
1897Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1898 Waitcnt Decoded;
1899 Decoded.set(T: LOAD_CNT, Val: decodeVmcnt(Version, Waitcnt: Encoded));
1900 Decoded.set(T: EXP_CNT, Val: decodeExpcnt(Version, Waitcnt: Encoded));
1901 Decoded.set(T: DS_CNT, Val: decodeLgkmcnt(Version, Waitcnt: Encoded));
1902 return Decoded;
1903}
1904
1905unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1906 unsigned Vmcnt) {
1907 Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1908 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1909 return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt,
1910 Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1911 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1912}
1913
1914unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1915 unsigned Expcnt) {
1916 return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1917 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1918}
1919
1920unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1921 unsigned Lgkmcnt) {
1922 return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1923 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1924}
1925
1926unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1927 unsigned Expcnt, unsigned Lgkmcnt) {
1928 unsigned Waitcnt = getWaitcntBitMask(Version);
1929 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1930 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1931 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1932 return Waitcnt;
1933}
1934
1935unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1936 return encodeWaitcnt(Version, Vmcnt: Decoded.get(T: LOAD_CNT), Expcnt: Decoded.get(T: EXP_CNT),
1937 Lgkmcnt: Decoded.get(T: DS_CNT));
1938}
1939
1940static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1941 bool IsStore) {
1942 unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major),
1943 Width: getDscntBitWidth(VersionMajor: Version.Major));
1944 if (IsStore) {
1945 unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1946 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1947 return Dscnt | Storecnt;
1948 }
1949 unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1950 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1951 return Dscnt | Loadcnt;
1952}
1953
1954Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1955 Waitcnt Decoded;
1956 Decoded.set(T: LOAD_CNT, Val: unpackBits(Src: LoadcntDscnt,
1957 Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1958 Width: getLoadcntBitWidth(VersionMajor: Version.Major)));
1959 Decoded.set(T: DS_CNT, Val: unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1960 Width: getDscntBitWidth(VersionMajor: Version.Major)));
1961 return Decoded;
1962}
1963
1964Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1965 Waitcnt Decoded;
1966 Decoded.set(T: STORE_CNT, Val: unpackBits(Src: StorecntDscnt,
1967 Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1968 Width: getStorecntBitWidth(VersionMajor: Version.Major)));
1969 Decoded.set(T: DS_CNT, Val: unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1970 Width: getDscntBitWidth(VersionMajor: Version.Major)));
1971 return Decoded;
1972}
1973
1974static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1975 unsigned Loadcnt) {
1976 return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1977 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1978}
1979
1980static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1981 unsigned Storecnt) {
1982 return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1983 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1984}
1985
1986static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1987 unsigned Dscnt) {
1988 return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1989 Width: getDscntBitWidth(VersionMajor: Version.Major));
1990}
1991
1992static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1993 unsigned Dscnt) {
1994 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false);
1995 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1996 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1997 return Waitcnt;
1998}
1999
2000unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
2001 return encodeLoadcntDscnt(Version, Loadcnt: Decoded.get(T: LOAD_CNT),
2002 Dscnt: Decoded.get(T: DS_CNT));
2003}
2004
2005static unsigned encodeStorecntDscnt(const IsaVersion &Version,
2006 unsigned Storecnt, unsigned Dscnt) {
2007 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true);
2008 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
2009 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
2010 return Waitcnt;
2011}
2012
2013unsigned encodeStorecntDscnt(const IsaVersion &Version,
2014 const Waitcnt &Decoded) {
2015 return encodeStorecntDscnt(Version, Storecnt: Decoded.get(T: STORE_CNT),
2016 Dscnt: Decoded.get(T: DS_CNT));
2017}
2018
2019//===----------------------------------------------------------------------===//
2020// Custom Operand Values
2021//===----------------------------------------------------------------------===//
2022
2023static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
2024 int Size,
2025 const MCSubtargetInfo &STI) {
2026 unsigned Enc = 0;
2027 for (int Idx = 0; Idx < Size; ++Idx) {
2028 const auto &Op = Opr[Idx];
2029 if (Op.isSupported(STI))
2030 Enc |= Op.encode(Val: Op.Default);
2031 }
2032 return Enc;
2033}
2034
2035static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
2036 int Size, unsigned Code,
2037 bool &HasNonDefaultVal,
2038 const MCSubtargetInfo &STI) {
2039 unsigned UsedOprMask = 0;
2040 HasNonDefaultVal = false;
2041 for (int Idx = 0; Idx < Size; ++Idx) {
2042 const auto &Op = Opr[Idx];
2043 if (!Op.isSupported(STI))
2044 continue;
2045 UsedOprMask |= Op.getMask();
2046 unsigned Val = Op.decode(Code);
2047 if (!Op.isValid(Val))
2048 return false;
2049 HasNonDefaultVal |= (Val != Op.Default);
2050 }
2051 return (Code & ~UsedOprMask) == 0;
2052}
2053
2054static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
2055 unsigned Code, int &Idx, StringRef &Name,
2056 unsigned &Val, bool &IsDefault,
2057 const MCSubtargetInfo &STI) {
2058 while (Idx < Size) {
2059 const auto &Op = Opr[Idx++];
2060 if (Op.isSupported(STI)) {
2061 Name = Op.Name;
2062 Val = Op.decode(Code);
2063 IsDefault = (Val == Op.Default);
2064 return true;
2065 }
2066 }
2067
2068 return false;
2069}
2070
2071static int encodeCustomOperandVal(const CustomOperandVal &Op,
2072 int64_t InputVal) {
2073 if (InputVal < 0 || InputVal > Op.Max)
2074 return OPR_VAL_INVALID;
2075 return Op.encode(Val: InputVal);
2076}
2077
2078static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2079 const StringRef Name, int64_t InputVal,
2080 unsigned &UsedOprMask,
2081 const MCSubtargetInfo &STI) {
2082 int InvalidId = OPR_ID_UNKNOWN;
2083 for (int Idx = 0; Idx < Size; ++Idx) {
2084 const auto &Op = Opr[Idx];
2085 if (Op.Name == Name) {
2086 if (!Op.isSupported(STI)) {
2087 InvalidId = OPR_ID_UNSUPPORTED;
2088 continue;
2089 }
2090 auto OprMask = Op.getMask();
2091 if (OprMask & UsedOprMask)
2092 return OPR_ID_DUPLICATE;
2093 UsedOprMask |= OprMask;
2094 return encodeCustomOperandVal(Op, InputVal);
2095 }
2096 }
2097 return InvalidId;
2098}
2099
2100//===----------------------------------------------------------------------===//
2101// DepCtr
2102//===----------------------------------------------------------------------===//
2103
2104namespace DepCtr {
2105
2106int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
2107 static int Default = -1;
2108 if (Default == -1)
2109 Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI);
2110 return Default;
2111}
2112
2113bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2114 const MCSubtargetInfo &STI) {
2115 return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code,
2116 HasNonDefaultVal, STI);
2117}
2118
2119bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2120 bool &IsDefault, const MCSubtargetInfo &STI) {
2121 return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val,
2122 IsDefault, STI);
2123}
2124
2125int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2126 const MCSubtargetInfo &STI) {
2127 return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask,
2128 STI);
2129}
2130
2131unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2132
2133unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2134
2135unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2136
2137unsigned getHoldCntBitMask(const IsaVersion &Version) {
2138 return (1 << getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
2139}
2140
2141unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2142
2143unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2144
2145unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2146
2147unsigned decodeFieldVmVsrc(unsigned Encoded) {
2148 return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2149}
2150
2151unsigned decodeFieldVaVdst(unsigned Encoded) {
2152 return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2153}
2154
2155unsigned decodeFieldSaSdst(unsigned Encoded) {
2156 return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2157}
2158
2159unsigned decodeFieldVaSdst(unsigned Encoded) {
2160 return unpackBits(Src: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2161}
2162
2163unsigned decodeFieldVaVcc(unsigned Encoded) {
2164 return unpackBits(Src: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2165}
2166
2167unsigned decodeFieldVaSsrc(unsigned Encoded) {
2168 return unpackBits(Src: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2169}
2170
2171unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2172 return unpackBits(Src: Encoded, Shift: getHoldCntBitShift(),
2173 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2174}
2175
2176unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2177 return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2178}
2179
2180unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2181 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2182 return encodeFieldVmVsrc(Encoded, VmVsrc);
2183}
2184
2185unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2186 return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2187}
2188
2189unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2190 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2191 return encodeFieldVaVdst(Encoded, VaVdst);
2192}
2193
2194unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2195 return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2196}
2197
2198unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2199 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2200 return encodeFieldSaSdst(Encoded, SaSdst);
2201}
2202
2203unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2204 return packBits(Src: VaSdst, Dst: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2205}
2206
2207unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2208 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2209 return encodeFieldVaSdst(Encoded, VaSdst);
2210}
2211
2212unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2213 return packBits(Src: VaVcc, Dst: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2214}
2215
2216unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2217 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2218 return encodeFieldVaVcc(Encoded, VaVcc);
2219}
2220
2221unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2222 return packBits(Src: VaSsrc, Dst: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2223}
2224
2225unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2226 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2227 return encodeFieldVaSsrc(Encoded, VaSsrc);
2228}
2229
2230unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2231 const IsaVersion &Version) {
2232 return packBits(Src: HoldCnt, Dst: Encoded, Shift: getHoldCntBitShift(),
2233 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2234}
2235
2236unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2237 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2238 return encodeFieldHoldCnt(Encoded, HoldCnt, Version: getIsaVersion(GPU: STI.getCPU()));
2239}
2240
2241} // namespace DepCtr
2242
2243//===----------------------------------------------------------------------===//
2244// exp tgt
2245//===----------------------------------------------------------------------===//
2246
2247namespace Exp {
2248
2249struct ExpTgt {
2250 StringLiteral Name;
2251 unsigned Tgt;
2252 unsigned MaxIndex;
2253};
2254
2255// clang-format off
2256static constexpr ExpTgt ExpTgtInfo[] = {
2257 {.Name: {"null"}, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX},
2258 {.Name: {"mrtz"}, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX},
2259 {.Name: {"prim"}, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX},
2260 {.Name: {"mrt"}, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX},
2261 {.Name: {"pos"}, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX},
2262 {.Name: {"dual_src_blend"},.Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX},
2263 {.Name: {"param"}, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX},
2264};
2265// clang-format on
2266
2267bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2268 for (const ExpTgt &Val : ExpTgtInfo) {
2269 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2270 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2271 Name = Val.Name;
2272 return true;
2273 }
2274 }
2275 return false;
2276}
2277
2278unsigned getTgtId(const StringRef Name) {
2279
2280 for (const ExpTgt &Val : ExpTgtInfo) {
2281 if (Val.MaxIndex == 0 && Name == Val.Name)
2282 return Val.Tgt;
2283
2284 if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) {
2285 StringRef Suffix = Name.drop_front(N: Val.Name.size());
2286
2287 unsigned Id;
2288 if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex)
2289 return ET_INVALID;
2290
2291 // Disable leading zeroes
2292 if (Suffix.size() > 1 && Suffix[0] == '0')
2293 return ET_INVALID;
2294
2295 return Val.Tgt + Id;
2296 }
2297 }
2298 return ET_INVALID;
2299}
2300
2301bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2302 switch (Id) {
2303 case ET_NULL:
2304 return !isGFX11Plus(STI);
2305 case ET_POS4:
2306 case ET_PRIM:
2307 return isGFX10Plus(STI);
2308 case ET_DUAL_SRC_BLEND0:
2309 case ET_DUAL_SRC_BLEND1:
2310 return isGFX11Plus(STI);
2311 default:
2312 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2313 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2314 return true;
2315 }
2316}
2317
2318} // namespace Exp
2319
2320//===----------------------------------------------------------------------===//
2321// MTBUF Format
2322//===----------------------------------------------------------------------===//
2323
2324namespace MTBUFFormat {
2325
2326int64_t getDfmt(const StringRef Name) {
2327 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2328 if (Name == DfmtSymbolic[Id])
2329 return Id;
2330 }
2331 return DFMT_UNDEF;
2332}
2333
2334StringRef getDfmtName(unsigned Id) {
2335 assert(Id <= DFMT_MAX);
2336 return DfmtSymbolic[Id];
2337}
2338
2339static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
2340 if (isSI(STI) || isCI(STI))
2341 return NfmtSymbolicSICI;
2342 if (isVI(STI) || isGFX9(STI))
2343 return NfmtSymbolicVI;
2344 return NfmtSymbolicGFX10;
2345}
2346
2347int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2348 const auto *lookupTable = getNfmtLookupTable(STI);
2349 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2350 if (Name == lookupTable[Id])
2351 return Id;
2352 }
2353 return NFMT_UNDEF;
2354}
2355
2356StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2357 assert(Id <= NFMT_MAX);
2358 return getNfmtLookupTable(STI)[Id];
2359}
2360
2361bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2362 unsigned Dfmt;
2363 unsigned Nfmt;
2364 decodeDfmtNfmt(Format: Id, Dfmt, Nfmt);
2365 return isValidNfmt(Val: Nfmt, STI);
2366}
2367
2368bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2369 return !getNfmtName(Id, STI).empty();
2370}
2371
2372int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2373 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2374}
2375
2376void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2377 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2378 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2379}
2380
2381int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2382 if (isGFX11Plus(STI)) {
2383 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2384 if (Name == UfmtSymbolicGFX11[Id])
2385 return Id;
2386 }
2387 } else {
2388 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2389 if (Name == UfmtSymbolicGFX10[Id])
2390 return Id;
2391 }
2392 }
2393 return UFMT_UNDEF;
2394}
2395
2396StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2397 if (isValidUnifiedFormat(Val: Id, STI))
2398 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2399 return "";
2400}
2401
2402bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2403 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2404}
2405
2406int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2407 const MCSubtargetInfo &STI) {
2408 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2409 if (isGFX11Plus(STI)) {
2410 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2411 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2412 return Id;
2413 }
2414 } else {
2415 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2416 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2417 return Id;
2418 }
2419 }
2420 return UFMT_UNDEF;
2421}
2422
2423bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2424 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2425}
2426
2427unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2428 if (isGFX10Plus(STI))
2429 return UFMT_DEFAULT;
2430 return DFMT_NFMT_DEFAULT;
2431}
2432
2433} // namespace MTBUFFormat
2434
2435//===----------------------------------------------------------------------===//
2436// SendMsg
2437//===----------------------------------------------------------------------===//
2438
2439namespace SendMsg {
2440
2441static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2442 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2443}
2444
2445bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2446 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2447}
2448
2449bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2450 bool Strict) {
2451 assert(isValidMsgId(MsgId, STI));
2452
2453 if (!Strict)
2454 return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId);
2455
2456 if (msgRequiresOp(MsgId, STI)) {
2457 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2458 return false;
2459
2460 return !getMsgOpName(MsgId, Encoding: OpId, STI).empty();
2461 }
2462
2463 return OpId == OP_NONE_;
2464}
2465
2466bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2467 const MCSubtargetInfo &STI, bool Strict) {
2468 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2469
2470 if (!Strict)
2471 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId);
2472
2473 if (!isGFX11Plus(STI)) {
2474 switch (MsgId) {
2475 case ID_GS_PreGFX11:
2476 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2477 case ID_GS_DONE_PreGFX11:
2478 return (OpId == OP_GS_NOP)
2479 ? (StreamId == STREAM_ID_NONE_)
2480 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2481 }
2482 }
2483 return StreamId == STREAM_ID_NONE_;
2484}
2485
2486bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2487 return MsgId == ID_SYSMSG ||
2488 (!isGFX11Plus(STI) &&
2489 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2490}
2491
2492bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2493 const MCSubtargetInfo &STI) {
2494 return !isGFX11Plus(STI) &&
2495 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2496 OpId != OP_GS_NOP;
2497}
2498
2499void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2500 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2501 MsgId = Val & getMsgIdMask(STI);
2502 if (isGFX11Plus(STI)) {
2503 OpId = 0;
2504 StreamId = 0;
2505 } else {
2506 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2507 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2508 }
2509}
2510
2511uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2512 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2513}
2514
2515bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2516 // Explicitly list message types that are known to not use m0.
2517 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2518 // types are added in the future that do use m0.
2519 if (isGFX11Plus(STI)) {
2520 switch (MsgId) {
2521 case ID_DEALLOC_VGPRS_GFX11Plus:
2522 return true;
2523 default:
2524 break;
2525 }
2526 }
2527 switch (MsgId) {
2528 case ID_SAVEWAVE:
2529 case ID_STALL_WAVE_GEN:
2530 case ID_HALT_WAVES:
2531 case ID_ORDERED_PS_DONE:
2532 case ID_EARLY_PRIM_DEALLOC:
2533 case ID_GET_DOORBELL:
2534 case ID_GET_DDID:
2535 case ID_SYSMSG:
2536 return true;
2537 default:
2538 return false;
2539 }
2540}
2541
2542} // namespace SendMsg
2543
2544//===----------------------------------------------------------------------===//
2545//
2546//===----------------------------------------------------------------------===//
2547
2548unsigned getInitialPSInputAddr(const Function &F) {
2549 return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr", Default: 0);
2550}
2551
2552bool getHasColorExport(const Function &F) {
2553 // As a safe default always respond as if PS has color exports.
2554 return F.getFnAttributeAsParsedInteger(
2555 Kind: "amdgpu-color-export",
2556 Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2557}
2558
2559bool getHasDepthExport(const Function &F) {
2560 return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export", Default: 0) != 0;
2561}
2562
2563unsigned getDynamicVGPRBlockSize(const Function &F) {
2564 unsigned BlockSize =
2565 F.getFnAttributeAsParsedInteger(Kind: "amdgpu-dynamic-vgpr-block-size", Default: 0);
2566
2567 if (BlockSize == 16 || BlockSize == 32)
2568 return BlockSize;
2569
2570 return 0;
2571}
2572
2573bool hasXNACK(const MCSubtargetInfo &STI) {
2574 return STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2575}
2576
2577bool hasSRAMECC(const MCSubtargetInfo &STI) {
2578 return STI.hasFeature(Feature: AMDGPU::FeatureSRAMECC);
2579}
2580
2581bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2582 return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) &&
2583 !STI.hasFeature(Feature: AMDGPU::FeatureR128A16);
2584}
2585
2586bool hasA16(const MCSubtargetInfo &STI) {
2587 return STI.hasFeature(Feature: AMDGPU::FeatureA16);
2588}
2589
2590bool hasG16(const MCSubtargetInfo &STI) {
2591 return STI.hasFeature(Feature: AMDGPU::FeatureG16);
2592}
2593
2594bool hasPackedD16(const MCSubtargetInfo &STI) {
2595 return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2596 !isSI(STI);
2597}
2598
2599bool hasGDS(const MCSubtargetInfo &STI) {
2600 return STI.hasFeature(Feature: AMDGPU::FeatureGDS);
2601}
2602
2603unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2604 auto Version = getIsaVersion(GPU: STI.getCPU());
2605 if (Version.Major == 10)
2606 return Version.Minor >= 3 ? 13 : 5;
2607 if (Version.Major == 11)
2608 return 5;
2609 if (Version.Major >= 12)
2610 return HasSampler ? 4 : 5;
2611 return 0;
2612}
2613
2614unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) {
2615 if (isGFX1250Plus(STI))
2616 return 32;
2617 return 16;
2618}
2619
2620bool isSI(const MCSubtargetInfo &STI) {
2621 return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands);
2622}
2623
2624bool isCI(const MCSubtargetInfo &STI) {
2625 return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands);
2626}
2627
2628bool isVI(const MCSubtargetInfo &STI) {
2629 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2630}
2631
2632bool isGFX9(const MCSubtargetInfo &STI) {
2633 return STI.hasFeature(Feature: AMDGPU::FeatureGFX9);
2634}
2635
2636bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2637 return isGFX9(STI) || isGFX10(STI);
2638}
2639
2640bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2641 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2642}
2643
2644bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2645 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2646}
2647
2648bool isGFX8Plus(const MCSubtargetInfo &STI) {
2649 return isVI(STI) || isGFX9Plus(STI);
2650}
2651
2652bool isGFX9Plus(const MCSubtargetInfo &STI) {
2653 return isGFX9(STI) || isGFX10Plus(STI);
2654}
2655
2656bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2657
2658bool isGFX10(const MCSubtargetInfo &STI) {
2659 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10);
2660}
2661
2662bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2663 return isGFX10(STI) || isGFX11(STI);
2664}
2665
2666bool isGFX10Plus(const MCSubtargetInfo &STI) {
2667 return isGFX10(STI) || isGFX11Plus(STI);
2668}
2669
2670bool isGFX11(const MCSubtargetInfo &STI) {
2671 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2672}
2673
2674bool isGFX11Plus(const MCSubtargetInfo &STI) {
2675 return isGFX11(STI) || isGFX12Plus(STI);
2676}
2677
2678bool isGFX12(const MCSubtargetInfo &STI) {
2679 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2680}
2681
2682bool isGFX12Plus(const MCSubtargetInfo &STI) {
2683 return isGFX12(STI) || isGFX13Plus(STI);
2684}
2685
2686bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2687
2688bool isGFX1250(const MCSubtargetInfo &STI) {
2689 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2690}
2691
2692bool isGFX1250Plus(const MCSubtargetInfo &STI) {
2693 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2694}
2695
2696bool isGFX13(const MCSubtargetInfo &STI) {
2697 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2698}
2699
2700bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2701
2702bool supportsWGP(const MCSubtargetInfo &STI) {
2703 if (isGFX1250(STI))
2704 return false;
2705 return isGFX10Plus(STI);
2706}
2707
2708bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2709
2710bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2711 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2712}
2713
2714bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2715 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2716}
2717
2718bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2719 return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding);
2720}
2721
2722bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2723 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding);
2724}
2725
2726bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2727 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding);
2728}
2729
2730bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2731 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts);
2732}
2733
2734bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2735 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2736}
2737
2738bool isGFX90A(const MCSubtargetInfo &STI) {
2739 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2740}
2741
2742bool isGFX940(const MCSubtargetInfo &STI) {
2743 return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts);
2744}
2745
2746bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2747 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2748}
2749
2750bool hasMAIInsts(const MCSubtargetInfo &STI) {
2751 return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts);
2752}
2753
2754bool hasVOPD(const MCSubtargetInfo &STI) {
2755 return STI.hasFeature(Feature: AMDGPU::FeatureVOPDInsts);
2756}
2757
2758bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2759 return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR);
2760}
2761
2762unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2763 return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload);
2764}
2765
2766int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2767 int32_t ArgNumVGPR) {
2768 if (has90AInsts && ArgNumAGPR)
2769 return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR;
2770 return std::max(a: ArgNumVGPR, b: ArgNumAGPR);
2771}
2772
2773bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2774 const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID);
2775 const MCRegister FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0);
2776 return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) ||
2777 Reg == AMDGPU::SCC;
2778}
2779
2780bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2781 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2782}
2783
2784#define MAP_REG2REG \
2785 using namespace AMDGPU; \
2786 switch (Reg.id()) { \
2787 default: \
2788 return Reg; \
2789 CASE_CI_VI(FLAT_SCR) \
2790 CASE_CI_VI(FLAT_SCR_LO) \
2791 CASE_CI_VI(FLAT_SCR_HI) \
2792 CASE_VI_GFX9PLUS(TTMP0) \
2793 CASE_VI_GFX9PLUS(TTMP1) \
2794 CASE_VI_GFX9PLUS(TTMP2) \
2795 CASE_VI_GFX9PLUS(TTMP3) \
2796 CASE_VI_GFX9PLUS(TTMP4) \
2797 CASE_VI_GFX9PLUS(TTMP5) \
2798 CASE_VI_GFX9PLUS(TTMP6) \
2799 CASE_VI_GFX9PLUS(TTMP7) \
2800 CASE_VI_GFX9PLUS(TTMP8) \
2801 CASE_VI_GFX9PLUS(TTMP9) \
2802 CASE_VI_GFX9PLUS(TTMP10) \
2803 CASE_VI_GFX9PLUS(TTMP11) \
2804 CASE_VI_GFX9PLUS(TTMP12) \
2805 CASE_VI_GFX9PLUS(TTMP13) \
2806 CASE_VI_GFX9PLUS(TTMP14) \
2807 CASE_VI_GFX9PLUS(TTMP15) \
2808 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2809 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2810 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2811 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2812 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2813 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2814 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2815 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2816 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2817 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2818 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2819 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2820 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2821 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2822 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2823 CASE_VI_GFX9PLUS( \
2824 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2825 CASE_GFXPRE11_GFX11PLUS(M0) \
2826 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2827 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2828 }
2829
2830#define CASE_CI_VI(node) \
2831 assert(!isSI(STI)); \
2832 case node: \
2833 return isCI(STI) ? node##_ci : node##_vi;
2834
2835#define CASE_VI_GFX9PLUS(node) \
2836 case node: \
2837 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2838
2839#define CASE_GFXPRE11_GFX11PLUS(node) \
2840 case node: \
2841 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2842
2843#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2844 case node: \
2845 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2846
2847MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2848 if (STI.getTargetTriple().getArch() == Triple::r600)
2849 return Reg;
2850 MAP_REG2REG
2851}
2852
2853#undef CASE_CI_VI
2854#undef CASE_VI_GFX9PLUS
2855#undef CASE_GFXPRE11_GFX11PLUS
2856#undef CASE_GFXPRE11_GFX11PLUS_TO
2857
2858#define CASE_CI_VI(node) \
2859 case node##_ci: \
2860 case node##_vi: \
2861 return node;
2862#define CASE_VI_GFX9PLUS(node) \
2863 case node##_vi: \
2864 case node##_gfx9plus: \
2865 return node;
2866#define CASE_GFXPRE11_GFX11PLUS(node) \
2867 case node##_gfx11plus: \
2868 case node##_gfxpre11: \
2869 return node;
2870#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2871
2872MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2873
2874bool isInlineValue(MCRegister Reg) {
2875 switch (Reg.id()) {
2876 case AMDGPU::SRC_SHARED_BASE_LO:
2877 case AMDGPU::SRC_SHARED_BASE:
2878 case AMDGPU::SRC_SHARED_LIMIT_LO:
2879 case AMDGPU::SRC_SHARED_LIMIT:
2880 case AMDGPU::SRC_PRIVATE_BASE_LO:
2881 case AMDGPU::SRC_PRIVATE_BASE:
2882 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2883 case AMDGPU::SRC_PRIVATE_LIMIT:
2884 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2885 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2886 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2887 return true;
2888 case AMDGPU::SRC_VCCZ:
2889 case AMDGPU::SRC_EXECZ:
2890 case AMDGPU::SRC_SCC:
2891 return true;
2892 case AMDGPU::SGPR_NULL:
2893 return true;
2894 default:
2895 return false;
2896 }
2897}
2898
2899#undef CASE_CI_VI
2900#undef CASE_VI_GFX9PLUS
2901#undef CASE_GFXPRE11_GFX11PLUS
2902#undef CASE_GFXPRE11_GFX11PLUS_TO
2903#undef MAP_REG2REG
2904
2905bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2906 assert(OpNo < Desc.NumOperands);
2907 unsigned OpType = Desc.operands()[OpNo].OperandType;
2908 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2909 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2910}
2911
2912bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2913 assert(OpNo < Desc.NumOperands);
2914 unsigned OpType = Desc.operands()[OpNo].OperandType;
2915 switch (OpType) {
2916 case AMDGPU::OPERAND_REG_IMM_FP32:
2917 case AMDGPU::OPERAND_REG_IMM_FP64:
2918 case AMDGPU::OPERAND_REG_IMM_FP16:
2919 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2920 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2921 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2922 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2923 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2924 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2925 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2926 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2927 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2928 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2929 return true;
2930 default:
2931 return false;
2932 }
2933}
2934
2935bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2936 assert(OpNo < Desc.NumOperands);
2937 unsigned OpType = Desc.operands()[OpNo].OperandType;
2938 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2939 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2940 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2941 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2942}
2943
2944// Avoid using MCRegisterClass::getSize, since that function will go away
2945// (move from MC* level to Target* level). Return size in bits.
2946unsigned getRegBitWidth(unsigned RCID) {
2947 switch (RCID) {
2948 case AMDGPU::VGPR_16RegClassID:
2949 case AMDGPU::VGPR_16_Lo128RegClassID:
2950 case AMDGPU::SGPR_LO16RegClassID:
2951 case AMDGPU::AGPR_LO16RegClassID:
2952 return 16;
2953 case AMDGPU::SGPR_32RegClassID:
2954 case AMDGPU::VGPR_32RegClassID:
2955 case AMDGPU::VGPR_32_Lo256RegClassID:
2956 case AMDGPU::VRegOrLds_32RegClassID:
2957 case AMDGPU::AGPR_32RegClassID:
2958 case AMDGPU::VS_32RegClassID:
2959 case AMDGPU::AV_32RegClassID:
2960 case AMDGPU::SReg_32RegClassID:
2961 case AMDGPU::SReg_32_XM0RegClassID:
2962 case AMDGPU::SRegOrLds_32RegClassID:
2963 return 32;
2964 case AMDGPU::SGPR_64RegClassID:
2965 case AMDGPU::VS_64RegClassID:
2966 case AMDGPU::SReg_64RegClassID:
2967 case AMDGPU::VReg_64RegClassID:
2968 case AMDGPU::AReg_64RegClassID:
2969 case AMDGPU::SReg_64_XEXECRegClassID:
2970 case AMDGPU::VReg_64_Align2RegClassID:
2971 case AMDGPU::AReg_64_Align2RegClassID:
2972 case AMDGPU::AV_64RegClassID:
2973 case AMDGPU::AV_64_Align2RegClassID:
2974 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2975 case AMDGPU::VS_64_Lo256RegClassID:
2976 return 64;
2977 case AMDGPU::SGPR_96RegClassID:
2978 case AMDGPU::SReg_96RegClassID:
2979 case AMDGPU::VReg_96RegClassID:
2980 case AMDGPU::AReg_96RegClassID:
2981 case AMDGPU::VReg_96_Align2RegClassID:
2982 case AMDGPU::AReg_96_Align2RegClassID:
2983 case AMDGPU::AV_96RegClassID:
2984 case AMDGPU::AV_96_Align2RegClassID:
2985 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2986 return 96;
2987 case AMDGPU::SGPR_128RegClassID:
2988 case AMDGPU::SReg_128RegClassID:
2989 case AMDGPU::VReg_128RegClassID:
2990 case AMDGPU::AReg_128RegClassID:
2991 case AMDGPU::VReg_128_Align2RegClassID:
2992 case AMDGPU::AReg_128_Align2RegClassID:
2993 case AMDGPU::AV_128RegClassID:
2994 case AMDGPU::AV_128_Align2RegClassID:
2995 case AMDGPU::SReg_128_XNULLRegClassID:
2996 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2997 return 128;
2998 case AMDGPU::SGPR_160RegClassID:
2999 case AMDGPU::SReg_160RegClassID:
3000 case AMDGPU::VReg_160RegClassID:
3001 case AMDGPU::AReg_160RegClassID:
3002 case AMDGPU::VReg_160_Align2RegClassID:
3003 case AMDGPU::AReg_160_Align2RegClassID:
3004 case AMDGPU::AV_160RegClassID:
3005 case AMDGPU::AV_160_Align2RegClassID:
3006 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
3007 return 160;
3008 case AMDGPU::SGPR_192RegClassID:
3009 case AMDGPU::SReg_192RegClassID:
3010 case AMDGPU::VReg_192RegClassID:
3011 case AMDGPU::AReg_192RegClassID:
3012 case AMDGPU::VReg_192_Align2RegClassID:
3013 case AMDGPU::AReg_192_Align2RegClassID:
3014 case AMDGPU::AV_192RegClassID:
3015 case AMDGPU::AV_192_Align2RegClassID:
3016 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
3017 return 192;
3018 case AMDGPU::SGPR_224RegClassID:
3019 case AMDGPU::SReg_224RegClassID:
3020 case AMDGPU::VReg_224RegClassID:
3021 case AMDGPU::AReg_224RegClassID:
3022 case AMDGPU::VReg_224_Align2RegClassID:
3023 case AMDGPU::AReg_224_Align2RegClassID:
3024 case AMDGPU::AV_224RegClassID:
3025 case AMDGPU::AV_224_Align2RegClassID:
3026 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
3027 return 224;
3028 case AMDGPU::SGPR_256RegClassID:
3029 case AMDGPU::SReg_256RegClassID:
3030 case AMDGPU::VReg_256RegClassID:
3031 case AMDGPU::AReg_256RegClassID:
3032 case AMDGPU::VReg_256_Align2RegClassID:
3033 case AMDGPU::AReg_256_Align2RegClassID:
3034 case AMDGPU::AV_256RegClassID:
3035 case AMDGPU::AV_256_Align2RegClassID:
3036 case AMDGPU::SReg_256_XNULLRegClassID:
3037 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
3038 return 256;
3039 case AMDGPU::SGPR_288RegClassID:
3040 case AMDGPU::SReg_288RegClassID:
3041 case AMDGPU::VReg_288RegClassID:
3042 case AMDGPU::AReg_288RegClassID:
3043 case AMDGPU::VReg_288_Align2RegClassID:
3044 case AMDGPU::AReg_288_Align2RegClassID:
3045 case AMDGPU::AV_288RegClassID:
3046 case AMDGPU::AV_288_Align2RegClassID:
3047 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
3048 return 288;
3049 case AMDGPU::SGPR_320RegClassID:
3050 case AMDGPU::SReg_320RegClassID:
3051 case AMDGPU::VReg_320RegClassID:
3052 case AMDGPU::AReg_320RegClassID:
3053 case AMDGPU::VReg_320_Align2RegClassID:
3054 case AMDGPU::AReg_320_Align2RegClassID:
3055 case AMDGPU::AV_320RegClassID:
3056 case AMDGPU::AV_320_Align2RegClassID:
3057 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
3058 return 320;
3059 case AMDGPU::SGPR_352RegClassID:
3060 case AMDGPU::SReg_352RegClassID:
3061 case AMDGPU::VReg_352RegClassID:
3062 case AMDGPU::AReg_352RegClassID:
3063 case AMDGPU::VReg_352_Align2RegClassID:
3064 case AMDGPU::AReg_352_Align2RegClassID:
3065 case AMDGPU::AV_352RegClassID:
3066 case AMDGPU::AV_352_Align2RegClassID:
3067 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3068 return 352;
3069 case AMDGPU::SGPR_384RegClassID:
3070 case AMDGPU::SReg_384RegClassID:
3071 case AMDGPU::VReg_384RegClassID:
3072 case AMDGPU::AReg_384RegClassID:
3073 case AMDGPU::VReg_384_Align2RegClassID:
3074 case AMDGPU::AReg_384_Align2RegClassID:
3075 case AMDGPU::AV_384RegClassID:
3076 case AMDGPU::AV_384_Align2RegClassID:
3077 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3078 return 384;
3079 case AMDGPU::SGPR_512RegClassID:
3080 case AMDGPU::SReg_512RegClassID:
3081 case AMDGPU::VReg_512RegClassID:
3082 case AMDGPU::AReg_512RegClassID:
3083 case AMDGPU::VReg_512_Align2RegClassID:
3084 case AMDGPU::AReg_512_Align2RegClassID:
3085 case AMDGPU::AV_512RegClassID:
3086 case AMDGPU::AV_512_Align2RegClassID:
3087 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3088 return 512;
3089 case AMDGPU::SGPR_1024RegClassID:
3090 case AMDGPU::SReg_1024RegClassID:
3091 case AMDGPU::VReg_1024RegClassID:
3092 case AMDGPU::AReg_1024RegClassID:
3093 case AMDGPU::VReg_1024_Align2RegClassID:
3094 case AMDGPU::AReg_1024_Align2RegClassID:
3095 case AMDGPU::AV_1024RegClassID:
3096 case AMDGPU::AV_1024_Align2RegClassID:
3097 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3098 return 1024;
3099 default:
3100 llvm_unreachable("Unexpected register class");
3101 }
3102}
3103
3104unsigned getRegBitWidth(const MCRegisterClass &RC) {
3105 return getRegBitWidth(RCID: RC.getID());
3106}
3107
3108bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3109 if (isInlinableIntLiteral(Literal))
3110 return true;
3111
3112 uint64_t Val = static_cast<uint64_t>(Literal);
3113 return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) ||
3114 (Val == llvm::bit_cast<uint64_t>(from: 1.0)) ||
3115 (Val == llvm::bit_cast<uint64_t>(from: -1.0)) ||
3116 (Val == llvm::bit_cast<uint64_t>(from: 0.5)) ||
3117 (Val == llvm::bit_cast<uint64_t>(from: -0.5)) ||
3118 (Val == llvm::bit_cast<uint64_t>(from: 2.0)) ||
3119 (Val == llvm::bit_cast<uint64_t>(from: -2.0)) ||
3120 (Val == llvm::bit_cast<uint64_t>(from: 4.0)) ||
3121 (Val == llvm::bit_cast<uint64_t>(from: -4.0)) ||
3122 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3123}
3124
3125bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3126 if (isInlinableIntLiteral(Literal))
3127 return true;
3128
3129 // The actual type of the operand does not seem to matter as long
3130 // as the bits match one of the inline immediate values. For example:
3131 //
3132 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3133 // so it is a legal inline immediate.
3134 //
3135 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3136 // floating-point, so it is a legal inline immediate.
3137
3138 uint32_t Val = static_cast<uint32_t>(Literal);
3139 return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) ||
3140 (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) ||
3141 (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) ||
3142 (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) ||
3143 (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) ||
3144 (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) ||
3145 (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) ||
3146 (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) ||
3147 (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) ||
3148 (Val == 0x3e22f983 && HasInv2Pi);
3149}
3150
3151bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3152 if (!HasInv2Pi)
3153 return false;
3154 if (isInlinableIntLiteral(Literal))
3155 return true;
3156 uint16_t Val = static_cast<uint16_t>(Literal);
3157 return Val == 0x3F00 || // 0.5
3158 Val == 0xBF00 || // -0.5
3159 Val == 0x3F80 || // 1.0
3160 Val == 0xBF80 || // -1.0
3161 Val == 0x4000 || // 2.0
3162 Val == 0xC000 || // -2.0
3163 Val == 0x4080 || // 4.0
3164 Val == 0xC080 || // -4.0
3165 Val == 0x3E22; // 1.0 / (2.0 * pi)
3166}
3167
3168bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3169 return isInlinableLiteral32(Literal, HasInv2Pi);
3170}
3171
3172bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3173 if (!HasInv2Pi)
3174 return false;
3175 if (isInlinableIntLiteral(Literal))
3176 return true;
3177 uint16_t Val = static_cast<uint16_t>(Literal);
3178 return Val == 0x3C00 || // 1.0
3179 Val == 0xBC00 || // -1.0
3180 Val == 0x3800 || // 0.5
3181 Val == 0xB800 || // -0.5
3182 Val == 0x4000 || // 2.0
3183 Val == 0xC000 || // -2.0
3184 Val == 0x4400 || // 4.0
3185 Val == 0xC400 || // -4.0
3186 Val == 0x3118; // 1/2pi
3187}
3188
3189std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3190 // Unfortunately, the Instruction Set Architecture Reference Guide is
3191 // misleading about how the inline operands work for (packed) 16-bit
3192 // instructions. In a nutshell, the actual HW behavior is:
3193 //
3194 // - integer encodings (-16 .. 64) are always produced as sign-extended
3195 // 32-bit values
3196 // - float encodings are produced as:
3197 // - for F16 instructions: corresponding half-precision float values in
3198 // the LSBs, 0 in the MSBs
3199 // - for UI16 instructions: corresponding single-precision float value
3200 int32_t Signed = static_cast<int32_t>(Literal);
3201 if (Signed >= 0 && Signed <= 64)
3202 return 128 + Signed;
3203
3204 if (Signed >= -16 && Signed <= -1)
3205 return 192 + std::abs(x: Signed);
3206
3207 if (IsFloat) {
3208 // clang-format off
3209 switch (Literal) {
3210 case 0x3800: return 240; // 0.5
3211 case 0xB800: return 241; // -0.5
3212 case 0x3C00: return 242; // 1.0
3213 case 0xBC00: return 243; // -1.0
3214 case 0x4000: return 244; // 2.0
3215 case 0xC000: return 245; // -2.0
3216 case 0x4400: return 246; // 4.0
3217 case 0xC400: return 247; // -4.0
3218 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3219 default: break;
3220 }
3221 // clang-format on
3222 } else {
3223 // clang-format off
3224 switch (Literal) {
3225 case 0x3F000000: return 240; // 0.5
3226 case 0xBF000000: return 241; // -0.5
3227 case 0x3F800000: return 242; // 1.0
3228 case 0xBF800000: return 243; // -1.0
3229 case 0x40000000: return 244; // 2.0
3230 case 0xC0000000: return 245; // -2.0
3231 case 0x40800000: return 246; // 4.0
3232 case 0xC0800000: return 247; // -4.0
3233 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3234 default: break;
3235 }
3236 // clang-format on
3237 }
3238
3239 return {};
3240}
3241
3242// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3243// or nullopt.
3244std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3245 return getInlineEncodingV216(IsFloat: false, Literal);
3246}
3247
3248// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3249// or nullopt.
3250std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3251 int32_t Signed = static_cast<int32_t>(Literal);
3252 if (Signed >= 0 && Signed <= 64)
3253 return 128 + Signed;
3254
3255 if (Signed >= -16 && Signed <= -1)
3256 return 192 + std::abs(x: Signed);
3257
3258 // clang-format off
3259 switch (Literal) {
3260 case 0x3F00: return 240; // 0.5
3261 case 0xBF00: return 241; // -0.5
3262 case 0x3F80: return 242; // 1.0
3263 case 0xBF80: return 243; // -1.0
3264 case 0x4000: return 244; // 2.0
3265 case 0xC000: return 245; // -2.0
3266 case 0x4080: return 246; // 4.0
3267 case 0xC080: return 247; // -4.0
3268 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3269 default: break;
3270 }
3271 // clang-format on
3272
3273 return std::nullopt;
3274}
3275
3276// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3277// or nullopt.
3278std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3279 return getInlineEncodingV216(IsFloat: true, Literal);
3280}
3281
3282// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3283// or nullopt. This accounts for different inline constant behavior:
3284// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3285// - GFX11+: fp16 inline constants are duplicated into both halves
3286std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
3287 bool IsGFX11Plus) {
3288 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3289 if (!IsGFX11Plus)
3290 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3291
3292 // GFX11+ behavior: f16 duplicated in both halves
3293 // First, check for sign-extended integer inline constants (-16 to 64)
3294 // These work the same across all generations
3295 int32_t Signed = static_cast<int32_t>(Literal);
3296 if (Signed >= 0 && Signed <= 64)
3297 return 128 + Signed;
3298
3299 if (Signed >= -16 && Signed <= -1)
3300 return 192 + std::abs(x: Signed);
3301
3302 // For float inline constants on GFX11+, both halves must be equal
3303 uint16_t Lo = static_cast<uint16_t>(Literal);
3304 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3305 if (Lo != Hi)
3306 return std::nullopt;
3307 return getInlineEncodingV216(/*IsFloat=*/true, Literal: Lo);
3308}
3309
3310// Whether the given literal can be inlined for a V_PK_* instruction.
3311bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
3312 switch (OpType) {
3313 case AMDGPU::OPERAND_REG_IMM_V2INT16:
3314 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
3315 return getInlineEncodingV216(IsFloat: false, Literal).has_value();
3316 case AMDGPU::OPERAND_REG_IMM_V2FP16:
3317 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
3318 return getInlineEncodingV216(IsFloat: true, Literal).has_value();
3319 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
3320 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3321 case AMDGPU::OPERAND_REG_IMM_V2BF16:
3322 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
3323 return isInlinableLiteralV2BF16(Literal);
3324 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
3325 return false;
3326 default:
3327 llvm_unreachable("bad packed operand type");
3328 }
3329}
3330
3331// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3332bool isInlinableLiteralV2I16(uint32_t Literal) {
3333 return getInlineEncodingV2I16(Literal).has_value();
3334}
3335
3336// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3337bool isInlinableLiteralV2BF16(uint32_t Literal) {
3338 return getInlineEncodingV2BF16(Literal).has_value();
3339}
3340
3341// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3342bool isInlinableLiteralV2F16(uint32_t Literal) {
3343 return getInlineEncodingV2F16(Literal).has_value();
3344}
3345
3346// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3347bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus) {
3348 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3349}
3350
3351bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3352 if (IsFP64)
3353 return !Lo_32(Value: Val);
3354
3355 return isUInt<32>(x: Val) || isInt<32>(x: Val);
3356}
3357
3358int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3359 switch (Type) {
3360 default:
3361 break;
3362 case OPERAND_REG_IMM_BF16:
3363 case OPERAND_REG_IMM_FP16:
3364 case OPERAND_REG_INLINE_C_BF16:
3365 case OPERAND_REG_INLINE_C_FP16:
3366 return Imm & 0xffff;
3367 case OPERAND_INLINE_SPLIT_BARRIER_INT32:
3368 case OPERAND_REG_IMM_FP32:
3369 case OPERAND_REG_IMM_INT32:
3370 case OPERAND_REG_IMM_V2BF16:
3371 case OPERAND_REG_IMM_V2FP16:
3372 case OPERAND_REG_IMM_V2FP16_SPLAT:
3373 case OPERAND_REG_IMM_V2FP32:
3374 case OPERAND_REG_IMM_V2INT16:
3375 case OPERAND_REG_IMM_V2INT32:
3376 case OPERAND_REG_INLINE_AC_FP32:
3377 case OPERAND_REG_INLINE_AC_INT32:
3378 case OPERAND_REG_INLINE_C_FP32:
3379 case OPERAND_REG_INLINE_C_INT32:
3380 return Lo_32(Value: Imm);
3381 case OPERAND_REG_IMM_FP64:
3382 return IsLit ? Imm : Hi_32(Value: Imm);
3383 }
3384 return Imm;
3385}
3386
3387bool isArgPassedInSGPR(const Argument *A) {
3388 const Function *F = A->getParent();
3389
3390 // Arguments to compute shaders are never a source of divergence.
3391 CallingConv::ID CC = F->getCallingConv();
3392 switch (CC) {
3393 case CallingConv::AMDGPU_KERNEL:
3394 case CallingConv::SPIR_KERNEL:
3395 return true;
3396 case CallingConv::AMDGPU_VS:
3397 case CallingConv::AMDGPU_LS:
3398 case CallingConv::AMDGPU_HS:
3399 case CallingConv::AMDGPU_ES:
3400 case CallingConv::AMDGPU_GS:
3401 case CallingConv::AMDGPU_PS:
3402 case CallingConv::AMDGPU_CS:
3403 case CallingConv::AMDGPU_Gfx:
3404 case CallingConv::AMDGPU_CS_Chain:
3405 case CallingConv::AMDGPU_CS_ChainPreserve:
3406 // For non-compute shaders, SGPR inputs are marked with either inreg or
3407 // byval. Everything else is in VGPRs.
3408 return A->hasAttribute(Kind: Attribute::InReg) ||
3409 A->hasAttribute(Kind: Attribute::ByVal);
3410 default:
3411 // TODO: treat i1 as divergent?
3412 return A->hasAttribute(Kind: Attribute::InReg);
3413 }
3414}
3415
3416bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3417 // Arguments to compute shaders are never a source of divergence.
3418 CallingConv::ID CC = CB->getCallingConv();
3419 switch (CC) {
3420 case CallingConv::AMDGPU_KERNEL:
3421 case CallingConv::SPIR_KERNEL:
3422 return true;
3423 case CallingConv::AMDGPU_VS:
3424 case CallingConv::AMDGPU_LS:
3425 case CallingConv::AMDGPU_HS:
3426 case CallingConv::AMDGPU_ES:
3427 case CallingConv::AMDGPU_GS:
3428 case CallingConv::AMDGPU_PS:
3429 case CallingConv::AMDGPU_CS:
3430 case CallingConv::AMDGPU_Gfx:
3431 case CallingConv::AMDGPU_CS_Chain:
3432 case CallingConv::AMDGPU_CS_ChainPreserve:
3433 // For non-compute shaders, SGPR inputs are marked with either inreg or
3434 // byval. Everything else is in VGPRs.
3435 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) ||
3436 CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal);
3437 default:
3438 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg);
3439 }
3440}
3441
3442static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3443 return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST);
3444}
3445
3446bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
3447 int64_t EncodedOffset) {
3448 if (isGFX12Plus(STI: ST))
3449 return isUInt<23>(x: EncodedOffset);
3450
3451 return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset)
3452 : isUInt<8>(x: EncodedOffset);
3453}
3454
3455bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
3456 int64_t EncodedOffset, bool IsBuffer) {
3457 if (isGFX12Plus(STI: ST)) {
3458 if (IsBuffer && EncodedOffset < 0)
3459 return false;
3460 return isInt<24>(x: EncodedOffset);
3461 }
3462
3463 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(x: EncodedOffset);
3464}
3465
3466static bool isDwordAligned(uint64_t ByteOffset) {
3467 return (ByteOffset & 3) == 0;
3468}
3469
3470uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
3471 uint64_t ByteOffset) {
3472 if (hasSMEMByteOffset(ST))
3473 return ByteOffset;
3474
3475 assert(isDwordAligned(ByteOffset));
3476 return ByteOffset >> 2;
3477}
3478
3479std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3480 int64_t ByteOffset, bool IsBuffer,
3481 bool HasSOffset) {
3482 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3483 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3484 // Handle case where SOffset is not present.
3485 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3486 return std::nullopt;
3487
3488 if (isGFX12Plus(STI: ST)) // 24 bit signed offsets
3489 return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3490 : std::nullopt;
3491
3492 // The signed version is always a byte offset.
3493 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3494 assert(hasSMEMByteOffset(ST));
3495 return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3496 : std::nullopt;
3497 }
3498
3499 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3500 return std::nullopt;
3501
3502 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3503 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3504 ? std::optional<int64_t>(EncodedOffset)
3505 : std::nullopt;
3506}
3507
3508std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3509 int64_t ByteOffset) {
3510 if (!isCI(STI: ST) || !isDwordAligned(ByteOffset))
3511 return std::nullopt;
3512
3513 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3514 return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3515 : std::nullopt;
3516}
3517
3518unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3519 if (ST.getFeatureBits().test(I: FeatureFlatOffsetBits12))
3520 return 12;
3521 if (ST.getFeatureBits().test(I: FeatureFlatOffsetBits24))
3522 return 24;
3523 return 13;
3524}
3525
3526namespace {
3527
3528struct SourceOfDivergence {
3529 unsigned Intr;
3530};
3531const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3532
3533struct AlwaysUniform {
3534 unsigned Intr;
3535};
3536const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3537
3538#define GET_SourcesOfDivergence_IMPL
3539#define GET_UniformIntrinsics_IMPL
3540#define GET_Gfx9BufferFormat_IMPL
3541#define GET_Gfx10BufferFormat_IMPL
3542#define GET_Gfx11PlusBufferFormat_IMPL
3543
3544#include "AMDGPUGenSearchableTables.inc"
3545
3546} // end anonymous namespace
3547
3548bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3549 return lookupSourceOfDivergence(Intr: IntrID);
3550}
3551
3552bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3553 return lookupAlwaysUniform(Intr: IntrID);
3554}
3555
3556const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3557 uint8_t NumComponents,
3558 uint8_t NumFormat,
3559 const MCSubtargetInfo &STI) {
3560 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3561 BitsPerComp, NumComponents, NumFormat)
3562 : isGFX10(STI)
3563 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3564 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3565}
3566
3567const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3568 const MCSubtargetInfo &STI) {
3569 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3570 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3571 : getGfx9BufferFormatInfo(Format);
3572}
3573
3574const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
3575 const MCRegisterInfo &MRI) {
3576 const unsigned VGPRClasses[] = {
3577 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3578 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3579 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3580 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3581 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3582 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3583 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3584 AMDGPU::VReg_1024RegClassID};
3585
3586 for (unsigned RCID : VGPRClasses) {
3587 const MCRegisterClass &RC = MRI.getRegClass(i: RCID);
3588 if (RC.contains(Reg))
3589 return &RC;
3590 }
3591
3592 return nullptr;
3593}
3594
3595unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI) {
3596 unsigned Enc = MRI.getEncodingValue(Reg);
3597 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3598 return Idx >> 8;
3599}
3600
3601MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
3602 const MCRegisterInfo &MRI) {
3603 unsigned Enc = MRI.getEncodingValue(Reg);
3604 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3605 if (Idx >= 0x100)
3606 return MCRegister();
3607
3608 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3609 if (!RC)
3610 return MCRegister();
3611
3612 Idx |= MSBs << 8;
3613 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3614 // This class has 2048 registers with interleaved lo16 and hi16.
3615 Idx *= 2;
3616 if (Enc & AMDGPU::HWEncoding::IS_HI16)
3617 ++Idx;
3618 }
3619
3620 return RC->getRegister(i: Idx);
3621}
3622
3623static std::optional<unsigned>
3624convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3625 bool HasSetregVGPRMSBFixup) {
3626 constexpr unsigned VGPRMSBShift =
3627 llvm::countr_zero_constexpr<unsigned>(Val: AMDGPU::Hwreg::DST_VGPR_MSB);
3628
3629 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Encoded: Simm16);
3630 if (HwRegId != Hwreg::ID_MODE ||
3631 (!HasSetregVGPRMSBFixup && (Offset + Size) < VGPRMSBShift))
3632 return {};
3633 // If there is SetregVGPRMSBFixup then Offset is ignored.
3634 if (!HasSetregVGPRMSBFixup)
3635 Imm <<= Offset;
3636 Imm = (Imm & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3637 if (!HasSetregVGPRMSBFixup)
3638 Imm &= llvm::maskTrailingOnes<unsigned>(N: Size);
3639 return llvm::rotr<uint8_t>(V: static_cast<uint8_t>(Imm), /*R=*/2);
3640}
3641
3642std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3643 bool HasSetregVGPRMSBFixup) {
3644 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3645 return convertSetRegImmToVgprMSBs(Imm: MI.getOperand(i: 0).getImm(),
3646 Simm16: MI.getOperand(i: 1).getImm(),
3647 HasSetregVGPRMSBFixup);
3648}
3649
3650std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3651 bool HasSetregVGPRMSBFixup) {
3652 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3653 return convertSetRegImmToVgprMSBs(Imm: MI.getOperand(i: 0).getImm(),
3654 Simm16: MI.getOperand(i: 1).getImm(),
3655 HasSetregVGPRMSBFixup);
3656}
3657
3658std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3659getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
3660 static const AMDGPU::OpName VOPOps[4] = {
3661 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3662 AMDGPU::OpName::vdst};
3663 static const AMDGPU::OpName VDSOps[4] = {
3664 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3665 AMDGPU::OpName::vdst};
3666 static const AMDGPU::OpName FLATOps[4] = {
3667 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3668 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3669 static const AMDGPU::OpName BUFOps[4] = {
3670 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3671 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3672 static const AMDGPU::OpName VIMGOps[4] = {
3673 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3674 AMDGPU::OpName::vdata};
3675
3676 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3677 // address is supposed to match X operand, otherwise VOPD shall not be
3678 // combined.
3679 static const AMDGPU::OpName VOPDOpsX[4] = {
3680 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3681 AMDGPU::OpName::vdstX};
3682 static const AMDGPU::OpName VOPDOpsY[4] = {
3683 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3684 AMDGPU::OpName::vdstY};
3685
3686 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3687 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3688 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3689 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3690 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3691 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3692 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3693 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3694 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3695 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3696
3697 unsigned TSFlags = Desc.TSFlags;
3698
3699 if (TSFlags &
3700 (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
3701 SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
3702 switch (Desc.getOpcode()) {
3703 // LD_SCALE operands ignore MSB.
3704 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3705 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3706 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3707 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3708 return {};
3709 case AMDGPU::V_FMAMK_F16:
3710 case AMDGPU::V_FMAMK_F16_t16:
3711 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3712 case AMDGPU::V_FMAMK_F16_fake16:
3713 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3714 case AMDGPU::V_FMAMK_F32:
3715 case AMDGPU::V_FMAMK_F32_gfx12:
3716 case AMDGPU::V_FMAMK_F64:
3717 case AMDGPU::V_FMAMK_F64_gfx1250:
3718 return {VOP2MADMKOps, nullptr};
3719 default:
3720 break;
3721 }
3722 return {VOPOps, nullptr};
3723 }
3724
3725 if (TSFlags & SIInstrFlags::DS)
3726 return {VDSOps, nullptr};
3727
3728 if (TSFlags & SIInstrFlags::FLAT)
3729 return {FLATOps, nullptr};
3730
3731 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3732 return {BUFOps, nullptr};
3733
3734 if (TSFlags & SIInstrFlags::VIMAGE)
3735 return {VIMGOps, nullptr};
3736
3737 if (AMDGPU::isVOPD(Opc: Desc.getOpcode())) {
3738 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode: Desc.getOpcode());
3739 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3740 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3741 }
3742
3743 assert(!(TSFlags & SIInstrFlags::MIMG));
3744
3745 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3746 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3747 " these instructions are not expected on gfx1250");
3748
3749 return {};
3750}
3751
3752bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3753 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3754
3755 if (TSFlags & SIInstrFlags::SMRD)
3756 return !getSMEMIsBuffer(Opc: Opcode);
3757 if (!(TSFlags & SIInstrFlags::FLAT))
3758 return false;
3759
3760 // Only SV and SVS modes are supported.
3761 if (TSFlags & SIInstrFlags::FlatScratch)
3762 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr);
3763
3764 // Only GVS mode is supported.
3765 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr) &&
3766 hasNamedOperand(Opcode, NamedIdx: OpName::saddr);
3767
3768 return false;
3769}
3770
3771bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3772 const MCSubtargetInfo &ST) {
3773 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3774 int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), Name: OpName);
3775 if (Idx == -1)
3776 continue;
3777
3778 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3779 int16_t RegClass = MII.getOpRegClassID(
3780 OpInfo, HwModeId: ST.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo));
3781 if (RegClass == AMDGPU::VReg_64RegClassID ||
3782 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3783 return true;
3784 }
3785
3786 return false;
3787}
3788
3789bool isDPALU_DPP32BitOpc(unsigned Opc) {
3790 switch (Opc) {
3791 case AMDGPU::V_MUL_LO_U32_e64:
3792 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3793 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3794 case AMDGPU::V_MUL_HI_U32_e64:
3795 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3796 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3797 case AMDGPU::V_MUL_HI_I32_e64:
3798 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3799 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3800 case AMDGPU::V_MAD_U32_e64:
3801 case AMDGPU::V_MAD_U32_e64_dpp:
3802 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3803 return true;
3804 default:
3805 return false;
3806 }
3807}
3808
3809bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3810 const MCSubtargetInfo &ST) {
3811 if (!ST.hasFeature(Feature: AMDGPU::FeatureDPALU_DPP))
3812 return false;
3813
3814 if (isDPALU_DPP32BitOpc(Opc: OpDesc.getOpcode()))
3815 return ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts);
3816
3817 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3818}
3819
3820unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3821 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
3822 return 64;
3823 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
3824 return 128;
3825 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
3826 return 320;
3827 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
3828 return 512;
3829 return 64; // In sync with getAddressableLocalMemorySize
3830}
3831
3832bool isPackedFP32Inst(unsigned Opc) {
3833 switch (Opc) {
3834 case AMDGPU::V_PK_ADD_F32:
3835 case AMDGPU::V_PK_ADD_F32_gfx12:
3836 case AMDGPU::V_PK_MUL_F32:
3837 case AMDGPU::V_PK_MUL_F32_gfx12:
3838 case AMDGPU::V_PK_FMA_F32:
3839 case AMDGPU::V_PK_FMA_F32_gfx12:
3840 return true;
3841 default:
3842 return false;
3843 }
3844}
3845
3846const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3847 assert(isFixedDims() && "expect kind to be FixedDims");
3848 return Dims;
3849}
3850
3851std::string ClusterDimsAttr::to_string() const {
3852 SmallString<10> Buffer;
3853 raw_svector_ostream OS(Buffer);
3854
3855 switch (getKind()) {
3856 case Kind::Unknown:
3857 return "";
3858 case Kind::NoCluster: {
3859 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3860 return Buffer.c_str();
3861 }
3862 case Kind::VariableDims: {
3863 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3864 << EncoVariableDims;
3865 return Buffer.c_str();
3866 }
3867 case Kind::FixedDims: {
3868 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3869 return Buffer.c_str();
3870 }
3871 }
3872 llvm_unreachable("Unknown ClusterDimsAttr kind");
3873}
3874
3875ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
3876 std::optional<SmallVector<unsigned>> Attr =
3877 getIntegerVecAttribute(F, Name: "amdgpu-cluster-dims", /*Size=*/3);
3878 ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
3879
3880 if (!Attr.has_value())
3881 AttrKind = Kind::Unknown;
3882 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoNoCluster)))
3883 AttrKind = Kind::NoCluster;
3884 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoVariableDims)))
3885 AttrKind = Kind::VariableDims;
3886
3887 ClusterDimsAttr A(AttrKind);
3888 if (AttrKind == Kind::FixedDims)
3889 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3890
3891 return A;
3892}
3893
3894} // namespace AMDGPU
3895
3896raw_ostream &operator<<(raw_ostream &OS,
3897 const AMDGPU::IsaInfo::TargetIDSetting S) {
3898 switch (S) {
3899 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3900 OS << "Unsupported";
3901 break;
3902 case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3903 OS << "Any";
3904 break;
3905 case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3906 OS << "Off";
3907 break;
3908 case (AMDGPU::IsaInfo::TargetIDSetting::On):
3909 OS << "On";
3910 break;
3911 }
3912 return OS;
3913}
3914
3915} // namespace llvm
3916