1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "Utils/AMDKernelCodeTUtils.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCRegisterInfo.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/TargetParser/AMDGPUTargetParser.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
36static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
37 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV6),
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns Asynccnt bit width.
139unsigned getAsynccntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
140 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
141}
142
143/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
144unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
145 return VersionMajor >= 12 ? 8 : 0;
146}
147
148/// \returns VaSdst bit width
149inline unsigned getVaSdstBitWidth() { return 3; }
150
151/// \returns VaSdst bit shift
152inline unsigned getVaSdstBitShift() { return 9; }
153
154/// \returns VmVsrc bit width
155inline unsigned getVmVsrcBitWidth() { return 3; }
156
157/// \returns VmVsrc bit shift
158inline unsigned getVmVsrcBitShift() { return 2; }
159
160/// \returns VaVdst bit width
161inline unsigned getVaVdstBitWidth() { return 4; }
162
163/// \returns VaVdst bit shift
164inline unsigned getVaVdstBitShift() { return 12; }
165
166/// \returns VaVcc bit width
167inline unsigned getVaVccBitWidth() { return 1; }
168
169/// \returns VaVcc bit shift
170inline unsigned getVaVccBitShift() { return 1; }
171
172/// \returns SaSdst bit width
173inline unsigned getSaSdstBitWidth() { return 1; }
174
175/// \returns SaSdst bit shift
176inline unsigned getSaSdstBitShift() { return 0; }
177
178/// \returns VaSsrc width
179inline unsigned getVaSsrcBitWidth() { return 1; }
180
181/// \returns VaSsrc bit shift
182inline unsigned getVaSsrcBitShift() { return 8; }
183
184/// \returns HoldCnt bit shift
185inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
186 static constexpr const unsigned MinMajor = 10;
187 static constexpr const unsigned MinMinor = 3;
188 return std::tie(args&: VersionMajor, args&: VersionMinor) >= std::tie(args: MinMajor, args: MinMinor)
189 ? 1
190 : 0;
191}
192
193/// \returns HoldCnt bit shift
194inline unsigned getHoldCntBitShift() { return 7; }
195
196} // end anonymous namespace
197
198namespace llvm {
199
200namespace AMDGPU {
201
202/// \returns true if the target supports signed immediate offset for SMRD
203/// instructions.
204bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
205 return isGFX9Plus(STI: ST);
206}
207
208/// \returns True if \p STI is AMDHSA.
209bool isHsaAbi(const MCSubtargetInfo &STI) {
210 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
211}
212
213unsigned getAMDHSACodeObjectVersion(const Module &M) {
214 if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
215 MD: M.getModuleFlag(Key: "amdhsa_code_object_version"))) {
216 return (unsigned)Ver->getZExtValue() / 100;
217 }
218
219 return getDefaultAMDHSACodeObjectVersion();
220}
221
222unsigned getDefaultAMDHSACodeObjectVersion() {
223 return DefaultAMDHSACodeObjectVersion;
224}
225
226unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
227 switch (ABIVersion) {
228 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
229 return 4;
230 case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
231 return 5;
232 case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
233 return 6;
234 default:
235 return getDefaultAMDHSACodeObjectVersion();
236 }
237}
238
239uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
240 if (T.getOS() != Triple::AMDHSA)
241 return 0;
242
243 switch (CodeObjectVersion) {
244 case 4:
245 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
246 case 5:
247 return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
248 case 6:
249 return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
250 default:
251 report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " +
252 Twine(CodeObjectVersion));
253 }
254}
255
256unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
257 switch (CodeObjectVersion) {
258 case AMDHSA_COV4:
259 return 48;
260 case AMDHSA_COV5:
261 case AMDHSA_COV6:
262 default:
263 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
264 }
265}
266
267// FIXME: All such magic numbers about the ABI should be in a
268// central TD file.
269unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
270 switch (CodeObjectVersion) {
271 case AMDHSA_COV4:
272 return 24;
273 case AMDHSA_COV5:
274 case AMDHSA_COV6:
275 default:
276 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
277 }
278}
279
280unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
281 switch (CodeObjectVersion) {
282 case AMDHSA_COV4:
283 return 32;
284 case AMDHSA_COV5:
285 case AMDHSA_COV6:
286 default:
287 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
288 }
289}
290
291unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
292 switch (CodeObjectVersion) {
293 case AMDHSA_COV4:
294 return 40;
295 case AMDHSA_COV5:
296 case AMDHSA_COV6:
297 default:
298 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
299 }
300}
301
302#define GET_MIMGBaseOpcodesTable_IMPL
303#define GET_MIMGDimInfoTable_IMPL
304#define GET_MIMGInfoTable_IMPL
305#define GET_MIMGLZMappingTable_IMPL
306#define GET_MIMGMIPMappingTable_IMPL
307#define GET_MIMGBiasMappingTable_IMPL
308#define GET_MIMGOffsetMappingTable_IMPL
309#define GET_MIMGG16MappingTable_IMPL
310#define GET_MAIInstInfoTable_IMPL
311#define GET_WMMAInstInfoTable_IMPL
312#include "AMDGPUGenSearchableTables.inc"
313
314int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
315 unsigned VDataDwords, unsigned VAddrDwords) {
316 const MIMGInfo *Info =
317 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
318 return Info ? Info->Opcode : -1;
319}
320
321const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
322 const MIMGInfo *Info = getMIMGInfo(Opcode: Opc);
323 return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr;
324}
325
326int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
327 const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc);
328 const MIMGInfo *NewInfo =
329 getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding,
330 VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords);
331 return NewInfo ? NewInfo->Opcode : -1;
332}
333
334unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
335 const MIMGDimInfo *Dim, bool IsA16,
336 bool IsG16Supported) {
337 unsigned AddrWords = BaseOpcode->NumExtraArgs;
338 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
339 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
340 if (IsA16)
341 AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2);
342 else
343 AddrWords += AddrComponents;
344
345 // Note: For subtargets that support A16 but not G16, enabling A16 also
346 // enables 16 bit gradients.
347 // For subtargets that support A16 (operand) and G16 (done with a different
348 // instruction encoding), they are independent.
349
350 if (BaseOpcode->Gradients) {
351 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
352 // There are two gradients per coordinate, we pack them separately.
353 // For the 3d case,
354 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
355 AddrWords += alignTo<2>(Value: Dim->NumGradients / 2);
356 else
357 AddrWords += Dim->NumGradients;
358 }
359 return AddrWords;
360}
361
362struct MUBUFInfo {
363 uint32_t Opcode;
364 uint32_t BaseOpcode;
365 uint8_t elements;
366 bool has_vaddr;
367 bool has_srsrc;
368 bool has_soffset;
369 bool IsBufferInv;
370 bool tfe;
371};
372
373struct MTBUFInfo {
374 uint32_t Opcode;
375 uint32_t BaseOpcode;
376 uint8_t elements;
377 bool has_vaddr;
378 bool has_srsrc;
379 bool has_soffset;
380};
381
382struct SMInfo {
383 uint32_t Opcode;
384 bool IsBuffer;
385};
386
387struct VOPInfo {
388 uint32_t Opcode;
389 bool IsSingle;
390};
391
392struct VOPC64DPPInfo {
393 uint32_t Opcode;
394};
395
396struct VOPCDPPAsmOnlyInfo {
397 uint32_t Opcode;
398};
399
400struct VOP3CDPPAsmOnlyInfo {
401 uint32_t Opcode;
402};
403
404struct VOPDComponentInfo {
405 uint16_t BaseVOP;
406 uint16_t VOPDOp;
407};
408
409struct VOPDInfo {
410 uint32_t Opcode;
411 uint16_t OpX;
412 uint16_t OpY;
413 uint16_t Subtarget;
414 bool VOPD3;
415};
416
417struct VOPTrue16Info {
418 uint32_t Opcode;
419 bool IsTrue16;
420};
421
422struct VOPDXYInfo {
423 uint16_t VOPDOp;
424 uint16_t Subtarget;
425 bool VOPD3;
426};
427
428#define GET_FP4FP8DstByteSelTable_DECL
429#define GET_FP4FP8DstByteSelTable_IMPL
430
431struct DPMACCInstructionInfo {
432 uint32_t Opcode;
433 bool IsDPMACCInstruction;
434};
435
436struct FP4FP8DstByteSelInfo {
437 uint32_t Opcode;
438 bool HasFP8DstByteSel;
439 bool HasFP4DstByteSel;
440};
441
442#define GET_DPMACCInstructionTable_DECL
443#define GET_DPMACCInstructionTable_IMPL
444#define GET_MTBUFInfoTable_DECL
445#define GET_MTBUFInfoTable_IMPL
446#define GET_MUBUFInfoTable_DECL
447#define GET_MUBUFInfoTable_IMPL
448#define GET_SMInfoTable_DECL
449#define GET_SMInfoTable_IMPL
450#define GET_VOP1InfoTable_DECL
451#define GET_VOP1InfoTable_IMPL
452#define GET_VOP2InfoTable_DECL
453#define GET_VOP2InfoTable_IMPL
454#define GET_VOP3InfoTable_DECL
455#define GET_VOP3InfoTable_IMPL
456#define GET_VOPC64DPPTable_DECL
457#define GET_VOPC64DPPTable_IMPL
458#define GET_VOPC64DPP8Table_DECL
459#define GET_VOPC64DPP8Table_IMPL
460#define GET_VOPCAsmOnlyInfoTable_DECL
461#define GET_VOPCAsmOnlyInfoTable_IMPL
462#define GET_VOP3CAsmOnlyInfoTable_DECL
463#define GET_VOP3CAsmOnlyInfoTable_IMPL
464#define GET_VOPDComponentTable_DECL
465#define GET_VOPDComponentTable_IMPL
466#define GET_VOPDPairs_DECL
467#define GET_VOPDPairs_IMPL
468#define GET_VOPDXTable_DECL
469#define GET_VOPDXTable_IMPL
470#define GET_VOPDYTable_DECL
471#define GET_VOPDYTable_IMPL
472#define GET_VOPTrue16Table_DECL
473#define GET_VOPTrue16Table_IMPL
474#define GET_True16D16Table_IMPL
475#define GET_WMMAOpcode2AddrMappingTable_DECL
476#define GET_WMMAOpcode2AddrMappingTable_IMPL
477#define GET_WMMAOpcode3AddrMappingTable_DECL
478#define GET_WMMAOpcode3AddrMappingTable_IMPL
479#define GET_getMFMA_F8F6F4_WithSize_DECL
480#define GET_getMFMA_F8F6F4_WithSize_IMPL
481#define GET_isMFMA_F8F6F4Table_IMPL
482#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
483
484#include "AMDGPUGenSearchableTables.inc"
485
486int getMTBUFBaseOpcode(unsigned Opc) {
487 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc);
488 return Info ? Info->BaseOpcode : -1;
489}
490
491int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
492 const MTBUFInfo *Info =
493 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
494 return Info ? Info->Opcode : -1;
495}
496
497int getMTBUFElements(unsigned Opc) {
498 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
499 return Info ? Info->elements : 0;
500}
501
502bool getMTBUFHasVAddr(unsigned Opc) {
503 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
504 return Info && Info->has_vaddr;
505}
506
507bool getMTBUFHasSrsrc(unsigned Opc) {
508 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
509 return Info && Info->has_srsrc;
510}
511
512bool getMTBUFHasSoffset(unsigned Opc) {
513 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
514 return Info && Info->has_soffset;
515}
516
517int getMUBUFBaseOpcode(unsigned Opc) {
518 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc);
519 return Info ? Info->BaseOpcode : -1;
520}
521
522int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
523 const MUBUFInfo *Info =
524 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
525 return Info ? Info->Opcode : -1;
526}
527
528int getMUBUFElements(unsigned Opc) {
529 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
530 return Info ? Info->elements : 0;
531}
532
533bool getMUBUFHasVAddr(unsigned Opc) {
534 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
535 return Info && Info->has_vaddr;
536}
537
538bool getMUBUFHasSrsrc(unsigned Opc) {
539 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
540 return Info && Info->has_srsrc;
541}
542
543bool getMUBUFHasSoffset(unsigned Opc) {
544 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
545 return Info && Info->has_soffset;
546}
547
548bool getMUBUFIsBufferInv(unsigned Opc) {
549 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
550 return Info && Info->IsBufferInv;
551}
552
553bool getMUBUFTfe(unsigned Opc) {
554 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
555 return Info && Info->tfe;
556}
557
558bool getSMEMIsBuffer(unsigned Opc) {
559 const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc);
560 return Info && Info->IsBuffer;
561}
562
563bool getVOP1IsSingle(unsigned Opc) {
564 const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc);
565 return !Info || Info->IsSingle;
566}
567
568bool getVOP2IsSingle(unsigned Opc) {
569 const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc);
570 return !Info || Info->IsSingle;
571}
572
573bool getVOP3IsSingle(unsigned Opc) {
574 const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc);
575 return !Info || Info->IsSingle;
576}
577
578bool isVOPC64DPP(unsigned Opc) {
579 return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc);
580}
581
582bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); }
583
584bool getMAIIsDGEMM(unsigned Opc) {
585 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
586 return Info && Info->is_dgemm;
587}
588
589bool getMAIIsGFX940XDL(unsigned Opc) {
590 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
591 return Info && Info->is_gfx940_xdl;
592}
593
594bool getWMMAIsXDL(unsigned Opc) {
595 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opcode: Opc);
596 return Info ? Info->is_wmma_xdl : false;
597}
598
599bool getHasMatrixScale(unsigned Opc) {
600 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opcode: Opc);
601 return Info && Info->HasMatrixScale;
602}
603
604uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
605 switch (EncodingVal) {
606 case MFMAScaleFormats::FP6_E2M3:
607 case MFMAScaleFormats::FP6_E3M2:
608 return 6;
609 case MFMAScaleFormats::FP4_E2M1:
610 return 4;
611 case MFMAScaleFormats::FP8_E4M3:
612 case MFMAScaleFormats::FP8_E5M2:
613 default:
614 return 8;
615 }
616
617 llvm_unreachable("covered switch over mfma scale formats");
618}
619
620const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
621 unsigned BLGP,
622 unsigned F8F8Opcode) {
623 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ);
624 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP);
625 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
626}
627
628uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt) {
629 switch (Fmt) {
630 case WMMA::MATRIX_FMT_FP8:
631 case WMMA::MATRIX_FMT_BF8:
632 return 16;
633 case WMMA::MATRIX_FMT_FP6:
634 case WMMA::MATRIX_FMT_BF6:
635 return 12;
636 case WMMA::MATRIX_FMT_FP4:
637 return 8;
638 }
639
640 llvm_unreachable("covered switch over wmma scale formats");
641}
642
643const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
644 unsigned FmtB,
645 unsigned F8F8Opcode) {
646 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtA);
647 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtB);
648 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
649}
650
651bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale,
652 unsigned BFmt, unsigned BScale) {
653 auto isValid = [](unsigned Fmt, unsigned Scale) -> bool {
654 switch (Fmt) {
655 case WMMA::MATRIX_FMT_FP8:
656 case WMMA::MATRIX_FMT_BF8:
657 case WMMA::MATRIX_FMT_FP6:
658 case WMMA::MATRIX_FMT_BF6:
659 if (Scale != WMMA::MATRIX_SCALE_FMT_E8)
660 return false;
661 break;
662 case WMMA::MATRIX_FMT_FP4:
663 if (Scale != WMMA::MATRIX_SCALE_FMT_E8 &&
664 Scale != WMMA::MATRIX_SCALE_FMT_E5M3 &&
665 Scale != WMMA::MATRIX_SCALE_FMT_E4M3)
666 return false;
667 break;
668 }
669 return true;
670 };
671
672 if (!isValid(AFmt, AScale) || !isValid(BFmt, BScale))
673 return false;
674
675 if (AFmt == WMMA::MATRIX_FMT_FP4 && BFmt == WMMA::MATRIX_FMT_FP4 &&
676 AScale != BScale)
677 return false;
678
679 return true;
680}
681
682unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
683 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX13Insts))
684 return SIEncodingFamily::GFX13;
685 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts))
686 return SIEncodingFamily::GFX1250;
687 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts))
688 return SIEncodingFamily::GFX12;
689 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11_7Insts))
690 return SIEncodingFamily::GFX1170;
691 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts))
692 return SIEncodingFamily::GFX11;
693 llvm_unreachable("Subtarget generation does not support VOPD!");
694}
695
696static constexpr unsigned getVOPDXYKey(unsigned VOPDOp, unsigned Subtarget,
697 bool VOPD3) {
698 return (VOPDOp << 5) | (Subtarget << 1) | (VOPD3 ? 1u : 0u);
699}
700
701// TODO: Ideally, the table should be emitted by the TableGen backend, however
702// this is currently not supported, so the direct lookup table is generated
703// manually here.
704constexpr unsigned VOPDXYKeyBits = 11;
705static constexpr std::array<CanBeVOPD, 1 << VOPDXYKeyBits> buildVOPDXYLookup() {
706 std::array<CanBeVOPD, 1 << VOPDXYKeyBits> Table{};
707 for (auto &E : Table)
708 E = {.X: false, .Y: false};
709 for (const auto &E : VOPDXTable)
710 Table[getVOPDXYKey(VOPDOp: E.VOPDOp, Subtarget: E.Subtarget, VOPD3: E.VOPD3)].X = true;
711 for (const auto &E : VOPDYTable)
712 Table[getVOPDXYKey(VOPDOp: E.VOPDOp, Subtarget: E.Subtarget, VOPD3: E.VOPD3)].Y = true;
713 return Table;
714}
715
716constexpr auto VOPDXYLookup = buildVOPDXYLookup();
717
718CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
719 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
720 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
721 // Normalize through VOPDComponentTable so that e32 and e64 variants
722 // of the same logical opcode all share a single entry.
723 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
724 if (!Info)
725 return {.X: false, .Y: false};
726 return VOPDXYLookup[getVOPDXYKey(VOPDOp: Info->VOPDOp, Subtarget: EncodingFamily, VOPD3)];
727}
728
729unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
730 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
731 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
732 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
733 return Info ? Info->VOPDOp : ~0u;
734}
735
736bool isVOPD(unsigned Opc) {
737 return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X);
738}
739
740bool isMAC(unsigned Opc) {
741 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
742 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
743 Opc == AMDGPU::V_MAC_F32_e64_vi ||
744 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
745 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
746 Opc == AMDGPU::V_MAC_F16_e64_vi ||
747 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
748 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
749 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
750 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
751 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
752 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
753 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
754 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
755 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
756 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
757 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
758 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
759 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
760 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
761 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
762 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
763 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
764 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
765 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
766 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
767 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
768 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
769}
770
771bool isPermlane16(unsigned Opc) {
772 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
773 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
774 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
775 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
776 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
777 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx13 ||
778 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
779 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx13 ||
780 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
781 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx13 ||
782 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12 ||
783 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx13;
784}
785
786bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
787 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
788 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
789 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
790 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
791 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
792 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
793 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
794 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
795 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
796 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
797}
798
799bool isGenericAtomic(unsigned Opc) {
800 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
801 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
802 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
803 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
804 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
805 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
806 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
807 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
808 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
809 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
810 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
811 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
812 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
813 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
814 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
815 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
816 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
817 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
818 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
819}
820
821bool isAsyncStore(unsigned Opc) {
822 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
823 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
824 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
825 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
826 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
827 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
828 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
829 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
830}
831
832bool isTensorStore(unsigned Opc) {
833 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
834 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
835}
836
837unsigned getTemporalHintType(const MCInstrDesc TID) {
838 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
839 return CPol::TH_TYPE_ATOMIC;
840 unsigned Opc = TID.getOpcode();
841 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
842 if (TID.mayStore() &&
843 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
844 return CPol::TH_TYPE_STORE;
845
846 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
847 // MayLoad flag is present which is the case with instructions like
848 // image_get_resinfo.
849 return CPol::TH_TYPE_LOAD;
850}
851
852bool isTrue16Inst(unsigned Opc) {
853 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc);
854 return Info && Info->IsTrue16;
855}
856
857FPType getFPDstSelType(unsigned Opc) {
858 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opcode: Opc);
859 if (!Info)
860 return FPType::None;
861 if (Info->HasFP8DstByteSel)
862 return FPType::FP8;
863 if (Info->HasFP4DstByteSel)
864 return FPType::FP4;
865
866 return FPType::None;
867}
868
869bool isDPMACCInstruction(unsigned Opc) {
870 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opcode: Opc);
871 return Info && Info->IsDPMACCInstruction;
872}
873
874unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
875 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc);
876 return Info ? Info->Opcode3Addr : ~0u;
877}
878
879unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
880 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc);
881 return Info ? Info->Opcode2Addr : ~0u;
882}
883
884// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
885// header files, so we need to wrap it in a function that takes unsigned
886// instead.
887int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
888 return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen));
889}
890
891unsigned getBitOp2(unsigned Opc) {
892 switch (Opc) {
893 default:
894 return 0;
895 case AMDGPU::V_AND_B32_e32:
896 return 0x40;
897 case AMDGPU::V_OR_B32_e32:
898 return 0x54;
899 case AMDGPU::V_XOR_B32_e32:
900 return 0x14;
901 case AMDGPU::V_XNOR_B32_e32:
902 return 0x41;
903 }
904}
905
906int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
907 bool VOPD3) {
908 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc: OpY) : 0;
909 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
910 const VOPDInfo *Info =
911 getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily, VOPD3);
912 return Info ? Info->Opcode : -1;
913}
914
915std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
916 const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode);
917 assert(Info);
918 const auto *OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX);
919 const auto *OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY);
920 assert(OpX && OpY);
921 return {OpX->BaseVOP, OpY->BaseVOP};
922}
923
924namespace VOPD {
925
926ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
927 assert(OpDesc.getNumDefs() == Component::DST_NUM);
928
929 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
930 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
931 auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO);
932 assert(TiedIdx == -1 || TiedIdx == Component::DST);
933 HasSrc2Acc = TiedIdx != -1;
934 Opcode = OpDesc.getOpcode();
935
936 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
937 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src2) ? 3
938 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm) ? 3
939 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src1) ? 2
940 : 1;
941 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
942
943 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
944 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
945 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
946 // operands.
947 NumVOPD3Mods = 2;
948 if (IsVOP3)
949 SrcOperandsNum = 3;
950 } else if (isSISrcFPOperand(Desc: OpDesc,
951 OpNo: getNamedOperandIdx(Opcode, Name: OpName::src0))) {
952 // All FP VOPD instructions have Neg modifiers for all operands except
953 // for tied src2.
954 NumVOPD3Mods = SrcOperandsNum;
955 if (HasSrc2Acc)
956 --NumVOPD3Mods;
957 }
958
959 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
960 return;
961
962 auto OperandsNum = OpDesc.getNumOperands();
963 unsigned CompOprIdx;
964 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
965 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
966 MandatoryLiteralIdx = CompOprIdx;
967 break;
968 }
969 }
970}
971
972int ComponentProps::getBitOp3OperandIdx() const {
973 return getNamedOperandIdx(Opcode, Name: OpName::bitop3);
974}
975
976unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
977 assert(CompOprIdx < Component::MAX_OPR_NUM);
978
979 if (CompOprIdx == Component::DST)
980 return getIndexOfDstInParsedOperands();
981
982 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
983 if (CompSrcIdx < getCompParsedSrcOperandsNum())
984 return getIndexOfSrcInParsedOperands(CompSrcIdx);
985
986 // The specified operand does not exist.
987 return 0;
988}
989
990std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
991 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
992 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
993 bool VOPD3) const {
994
995 auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx,
996 VOPD3: CompInfo[ComponentIndex::X].isVOP3());
997 auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx,
998 VOPD3: CompInfo[ComponentIndex::Y].isVOP3());
999
1000 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
1001 unsigned BanksMask) -> bool {
1002 MCRegister BaseX = MRI.getSubReg(Reg: X, Idx: AMDGPU::sub0);
1003 MCRegister BaseY = MRI.getSubReg(Reg: Y, Idx: AMDGPU::sub0);
1004 if (!BaseX)
1005 BaseX = X;
1006 if (!BaseY)
1007 BaseY = Y;
1008 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
1009 return true;
1010 if (BaseX != X /* This is 64-bit register */ &&
1011 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
1012 return true;
1013 if (BaseY != Y &&
1014 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
1015 return true;
1016
1017 // If both are 64-bit bank conflict will be detected yet while checking
1018 // the first subreg.
1019 return false;
1020 };
1021
1022 unsigned CompOprIdx;
1023 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
1024 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
1025 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
1026 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
1027 continue;
1028
1029 if (getVGPREncodingMSBs(Reg: OpXRegs[CompOprIdx], MRI) !=
1030 getVGPREncodingMSBs(Reg: OpYRegs[CompOprIdx], MRI))
1031 return CompOprIdx;
1032
1033 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
1034 continue;
1035
1036 if (CompOprIdx < Component::DST_NUM) {
1037 // Even if we do not check vdst parity, vdst operands still shall not
1038 // overlap.
1039 if (MRI.regsOverlap(RegA: OpXRegs[CompOprIdx], RegB: OpYRegs[CompOprIdx]))
1040 return CompOprIdx;
1041 if (VOPD3) // No need to check dst parity.
1042 continue;
1043 }
1044
1045 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
1046 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
1047 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
1048 return CompOprIdx;
1049 }
1050
1051 return {};
1052}
1053
1054// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1055// by the specified component. If an operand is unused
1056// or is not a VGPR, the corresponding value is 0.
1057//
1058// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1059// for the specified component and MC operand. The callback must return 0
1060// if the operand is not a register or not a VGPR.
1061InstInfo::RegIndices
1062InstInfo::getRegIndices(unsigned CompIdx,
1063 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1064 bool VOPD3) const {
1065 assert(CompIdx < COMPONENTS_NUM);
1066
1067 const auto &Comp = CompInfo[CompIdx];
1068 InstInfo::RegIndices RegIndices;
1069
1070 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1071
1072 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1073 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1074 RegIndices[CompOprIdx] =
1075 Comp.hasRegSrcOperand(CompSrcIdx)
1076 ? GetRegIdx(CompIdx,
1077 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1078 : MCRegister();
1079 }
1080 return RegIndices;
1081}
1082
1083} // namespace VOPD
1084
1085VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
1086 return VOPD::InstInfo(OpX, OpY);
1087}
1088
1089VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
1090 const MCInstrInfo *InstrInfo) {
1091 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1092 const auto &OpXDesc = InstrInfo->get(Opcode: OpX);
1093 const auto &OpYDesc = InstrInfo->get(Opcode: OpY);
1094 bool VOPD3 = InstrInfo->get(Opcode: VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1095 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
1096 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1097 return VOPD::InstInfo(OpXInfo, OpYInfo);
1098}
1099
1100TargetID createAMDGPUTargetID(const MCSubtargetInfo &STI,
1101 StringRef FeatureString) {
1102 TargetID TargetID(parseArchAMDGCN(CPU: STI.getCPU()), STI.getTargetTriple(),
1103 STI.getFeatureBits().test(I: FeatureSupportsXNACK)
1104 ? TargetIDSetting::Any
1105 : TargetIDSetting::Unsupported,
1106 STI.getFeatureBits().test(I: FeatureSupportsSRAMECC)
1107 ? TargetIDSetting::Any
1108 : TargetIDSetting::Unsupported);
1109
1110 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1111 // absence of the target features we assume we must generate code that can run
1112 // in any environment.
1113 SubtargetFeatures Features(FeatureString);
1114 std::optional<bool> XnackRequested;
1115 std::optional<bool> SramEccRequested;
1116
1117 for (const std::string &Feature : Features.getFeatures()) {
1118 if (Feature == "+xnack")
1119 XnackRequested = true;
1120 else if (Feature == "-xnack")
1121 XnackRequested = false;
1122 else if (Feature == "+sramecc")
1123 SramEccRequested = true;
1124 else if (Feature == "-sramecc")
1125 SramEccRequested = false;
1126 }
1127
1128 // Only allow changing xnack setting if the target supports on/off modes.
1129 // Targets without on/off mode support keep their initial setting (Any).
1130
1131 bool XnackSupported = STI.getFeatureBits().test(I: FeatureXNACKOnOffModes);
1132 bool SramEccSupported = TargetID.isSramEccSupported();
1133
1134 if (XnackRequested) {
1135 if (XnackSupported) {
1136 TargetID.setXnackSetting(*XnackRequested ? TargetIDSetting::On
1137 : TargetIDSetting::Off);
1138 } else {
1139 // If a specific xnack setting was requested and this GPU does not support
1140 // xnack emit a warning. Setting will remain set to "Unsupported".
1141 if (*XnackRequested) {
1142 errs() << "warning: xnack 'On' was requested for a processor that does "
1143 "not support it!\n";
1144 } else {
1145 errs() << "warning: xnack 'Off' was requested for a processor that "
1146 "does not support it!\n";
1147 }
1148 }
1149 }
1150
1151 if (SramEccRequested) {
1152 if (SramEccSupported) {
1153 TargetID.setSramEccSetting(*SramEccRequested ? TargetIDSetting::On
1154 : TargetIDSetting::Off);
1155 } else {
1156 // If a specific sramecc setting was requested and this GPU does not
1157 // support sramecc emit a warning. Setting will remain set to
1158 // "Unsupported".
1159 if (*SramEccRequested) {
1160 errs() << "warning: sramecc 'On' was requested for a processor that "
1161 "does not support it!\n";
1162 } else {
1163 errs() << "warning: sramecc 'Off' was requested for a processor that "
1164 "does not support it!\n";
1165 }
1166 }
1167 }
1168
1169 return TargetID;
1170}
1171
1172namespace IsaInfo {
1173
1174unsigned getInstCacheLineSize(const MCSubtargetInfo &STI) {
1175 if (STI.getFeatureBits().test(I: FeatureInstCacheLineSize128))
1176 return 128;
1177 if (STI.getFeatureBits().test(I: FeatureInstCacheLineSize64))
1178 return 64;
1179 return 64;
1180}
1181
1182unsigned getWavefrontSize(const MCSubtargetInfo &STI) {
1183 if (STI.getFeatureBits().test(I: FeatureWavefrontSize16))
1184 return 16;
1185 if (STI.getFeatureBits().test(I: FeatureWavefrontSize32))
1186 return 32;
1187
1188 return 64;
1189}
1190
1191unsigned getLocalMemorySize(const MCSubtargetInfo &STI) {
1192 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1193
1194 // "Per CU" really means "per whatever functional block the waves of a
1195 // workgroup must share". So the effective local memory size is doubled in
1196 // WGP mode on gfx10.
1197 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(I: FeatureCuMode))
1198 BytesPerCU *= 2;
1199
1200 return BytesPerCU;
1201}
1202
1203unsigned getAddressableLocalMemorySize(const MCSubtargetInfo &STI) {
1204 if (STI.getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
1205 return 32768;
1206 if (STI.getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
1207 return 65536;
1208 if (STI.getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
1209 return 163840;
1210 if (STI.getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
1211 return 327680;
1212 return 32768;
1213}
1214
1215unsigned getEUsPerCU(const MCSubtargetInfo &STI) {
1216 // "Per CU" really means "per whatever functional block the waves of a
1217 // workgroup must share".
1218
1219 // GFX12.5 only supports CU mode, which contains four SIMDs.
1220 if (isGFX1250(STI)) {
1221 assert(STI.getFeatureBits().test(FeatureCuMode));
1222 return 4;
1223 }
1224
1225 // For gfx10 in CU mode the functional block is the CU, which contains
1226 // two SIMDs.
1227 if (isGFX10Plus(STI) && STI.getFeatureBits().test(I: FeatureCuMode))
1228 return 2;
1229
1230 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1231 // contains two CUs, so a total of four SIMDs.
1232 return 4;
1233}
1234
1235unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo &STI,
1236 unsigned FlatWorkGroupSize) {
1237 assert(FlatWorkGroupSize != 0);
1238 if (!STI.getTargetTriple().isAMDGCN())
1239 return 8;
1240 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1241 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1242 if (N == 1) {
1243 // Single-wave workgroups don't consume barrier resources.
1244 return MaxWaves;
1245 }
1246
1247 unsigned MaxBarriers = 16;
1248 if (isGFX10Plus(STI) && !STI.getFeatureBits().test(I: FeatureCuMode))
1249 MaxBarriers = 32;
1250
1251 return std::min(a: MaxWaves / N, b: MaxBarriers);
1252}
1253
1254unsigned getMinWavesPerEU(const MCSubtargetInfo &STI) { return 1; }
1255
1256unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI) {
1257 // FIXME: Need to take scratch memory into account.
1258 if (isGFX90A(STI))
1259 return 8;
1260 if (!isGFX10Plus(STI))
1261 return 10;
1262 return hasGFX10_3Insts(STI) ? 16 : 20;
1263}
1264
1265unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo &STI,
1266 unsigned FlatWorkGroupSize) {
1267 return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1268 Denominator: getEUsPerCU(STI));
1269}
1270
1271unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI) { return 1; }
1272
1273unsigned getWavesPerWorkGroup(const MCSubtargetInfo &STI,
1274 unsigned FlatWorkGroupSize) {
1275 return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI));
1276}
1277
1278unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI) {
1279 IsaVersion Version = getIsaVersion(GPU: STI.getCPU());
1280 if (Version.Major >= 10)
1281 return getAddressableNumSGPRs(STI);
1282 if (Version.Major >= 8)
1283 return 16;
1284 return 8;
1285}
1286
1287unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI) { return 8; }
1288
1289unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI) {
1290 IsaVersion Version = getIsaVersion(GPU: STI.getCPU());
1291 if (Version.Major >= 8)
1292 return 800;
1293 return 512;
1294}
1295
1296unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI) {
1297 if (STI.getFeatureBits().test(I: FeatureSGPRInitBug))
1298 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1299
1300 IsaVersion Version = getIsaVersion(GPU: STI.getCPU());
1301 if (Version.Major >= 10)
1302 return 106;
1303 if (Version.Major >= 8)
1304 return 102;
1305 return 104;
1306}
1307
1308// Per-wave SGPRs reserved for the trap handler when enabled.
1309static unsigned getSGPRTrapHandlerReserve(const MCSubtargetInfo &STI) {
1310 return STI.getFeatureBits().test(I: FeatureTrapHandler) ? TRAP_NUM_SGPRS : 0;
1311}
1312
1313// Per-wave SGPR budget (before the addressable clamp): take off the trap
1314// reserve, round down to \p Granule. Shared by getMinNumSGPRs() and
1315// getMaxNumSGPRs(); getOccupancyWithNumSGPRs() is the closed-form algebraic
1316// inverse of this same budget (it does not call this helper), so the two encode
1317// one model.
1318static unsigned getSGPRBudgetPerWave(unsigned TotalNumSGPRs,
1319 unsigned WavesPerEU, unsigned TrapReserve,
1320 unsigned Granule) {
1321 assert(WavesPerEU != 0 && Granule != 0);
1322 unsigned Budget = TotalNumSGPRs / WavesPerEU;
1323 Budget -= std::min(a: Budget, b: TrapReserve);
1324 return alignDown(Value: Budget, Align: Granule);
1325}
1326
1327unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU) {
1328 assert(WavesPerEU != 0);
1329
1330 IsaVersion Version = getIsaVersion(GPU: STI.getCPU());
1331 if (Version.Major >= 10)
1332 return 0;
1333
1334 if (WavesPerEU >= getMaxWavesPerEU(STI))
1335 return 0;
1336
1337 unsigned MinNumSGPRs =
1338 getSGPRBudgetPerWave(TotalNumSGPRs: getTotalNumSGPRs(STI), WavesPerEU: WavesPerEU + 1,
1339 TrapReserve: getSGPRTrapHandlerReserve(STI),
1340 Granule: getSGPRAllocGranule(STI)) +
1341 1;
1342 return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI));
1343}
1344
1345unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1346 bool Addressable) {
1347 assert(WavesPerEU != 0);
1348
1349 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1350 IsaVersion Version = getIsaVersion(GPU: STI.getCPU());
1351 if (Version.Major >= 10)
1352 return Addressable ? AddressableNumSGPRs : 108;
1353 if (Version.Major >= 8 && !Addressable)
1354 AddressableNumSGPRs = 112;
1355 unsigned MaxNumSGPRs = getSGPRBudgetPerWave(TotalNumSGPRs: getTotalNumSGPRs(STI), WavesPerEU,
1356 TrapReserve: getSGPRTrapHandlerReserve(STI),
1357 Granule: getSGPRAllocGranule(STI));
1358 return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs);
1359}
1360
1361bool isSGPROccupancyLimited(const MCSubtargetInfo &STI) {
1362 // From GFX10 on the SGPR file is large enough that SGPRs never limit
1363 // occupancy. Kept as one capability so callers don't each test the version.
1364 return getIsaVersion(GPU: STI.getCPU()).Major < 10;
1365}
1366
1367unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1368 bool FlatScrUsed, bool XNACKUsed) {
1369 unsigned ExtraSGPRs = 0;
1370 if (VCCUsed)
1371 ExtraSGPRs = 2;
1372
1373 IsaVersion Version = getIsaVersion(GPU: STI.getCPU());
1374 if (Version.Major >= 10)
1375 return ExtraSGPRs;
1376
1377 if (Version.Major < 8) {
1378 if (FlatScrUsed)
1379 ExtraSGPRs = 4;
1380 } else {
1381 if (XNACKUsed)
1382 ExtraSGPRs = 4;
1383
1384 if (FlatScrUsed ||
1385 STI.getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch))
1386 ExtraSGPRs = 6;
1387 }
1388
1389 return ExtraSGPRs;
1390}
1391
1392unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
1393 bool FlatScrUsed) {
1394 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1395 XNACKUsed: STI.getFeatureBits().test(I: AMDGPU::FeatureXNACK));
1396}
1397
1398static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1399 unsigned Granule) {
1400 return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule);
1401}
1402
1403unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs) {
1404 // SGPRBlocks is actual number of SGPR blocks minus 1.
1405 return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) -
1406 1;
1407}
1408
1409unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI,
1410 unsigned DynamicVGPRBlockSize,
1411 std::optional<bool> EnableWavefrontSize32) {
1412 if (STI.getFeatureBits().test(I: FeatureGFX90AInsts))
1413 return 8;
1414
1415 if (DynamicVGPRBlockSize != 0)
1416 return DynamicVGPRBlockSize;
1417
1418 bool IsWave32 = EnableWavefrontSize32
1419 ? *EnableWavefrontSize32
1420 : STI.getFeatureBits().test(I: FeatureWavefrontSize32);
1421
1422 if (STI.getFeatureBits().test(I: Feature1536VGPRs))
1423 return IsWave32 ? 24 : 12;
1424
1425 if (hasGFX10_3Insts(STI))
1426 return IsWave32 ? 16 : 8;
1427
1428 return IsWave32 ? 8 : 4;
1429}
1430
1431unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI,
1432 std::optional<bool> EnableWavefrontSize32) {
1433 if (STI.getFeatureBits().test(I: FeatureGFX90AInsts))
1434 return 8;
1435
1436 bool IsWave32 = EnableWavefrontSize32
1437 ? *EnableWavefrontSize32
1438 : STI.getFeatureBits().test(I: FeatureWavefrontSize32);
1439
1440 if (STI.getFeatureBits().test(I: Feature1024AddressableVGPRs))
1441 return IsWave32 ? 16 : 8;
1442
1443 return IsWave32 ? 8 : 4;
1444}
1445
1446unsigned getArchVGPRAllocGranule() { return 4; }
1447
1448unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI) {
1449 if (STI.getFeatureBits().test(I: FeatureGFX90AInsts))
1450 return 512;
1451 if (!isGFX10Plus(STI))
1452 return 256;
1453 bool IsWave32 = STI.getFeatureBits().test(I: FeatureWavefrontSize32);
1454 if (STI.getFeatureBits().test(I: Feature1536VGPRs))
1455 return IsWave32 ? 1536 : 768;
1456 return IsWave32 ? 1024 : 512;
1457}
1458
1459unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo &STI) {
1460 const auto &Features = STI.getFeatureBits();
1461 if (Features.test(I: Feature1024AddressableVGPRs))
1462 return Features.test(I: FeatureWavefrontSize32) ? 1024 : 512;
1463 return 256;
1464}
1465
1466unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI,
1467 unsigned DynamicVGPRBlockSize) {
1468 const auto &Features = STI.getFeatureBits();
1469 if (Features.test(I: FeatureGFX90AInsts))
1470 return 512;
1471
1472 if (DynamicVGPRBlockSize != 0) {
1473 // On GFX12 we can allocate at most MaxDynamicVGPRBlocks blocks of VGPRs.
1474 return MaxDynamicVGPRBlocks *
1475 getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1476 }
1477 return getAddressableNumArchVGPRs(STI);
1478}
1479
1480unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo &STI,
1481 unsigned NumVGPRs,
1482 unsigned DynamicVGPRBlockSize) {
1483 return getNumWavesPerEUWithNumVGPRs(
1484 NumVGPRs, Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1485 MaxWaves: getMaxWavesPerEU(STI), TotalNumVGPRs: getTotalNumVGPRs(STI));
1486}
1487
1488unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1489 unsigned MaxWaves,
1490 unsigned TotalNumVGPRs) {
1491 if (NumVGPRs < Granule)
1492 return MaxWaves;
1493 unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule);
1494 return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves);
1495}
1496
1497unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1498 unsigned TotalNumSGPRs, unsigned Granule,
1499 unsigned TrapReserve) {
1500 // Closed-form inverse of getMaxNumSGPRs(): the budget condition
1501 // SGPRs <= alignDown(TotalNumSGPRs / W - TrapReserve, Granule)
1502 // solves to W <= TotalNumSGPRs / (alignTo(SGPRs, Granule) + TrapReserve).
1503 unsigned PerWave = alignTo(Value: SGPRs, Align: Granule) + TrapReserve;
1504 return PerWave ? std::clamp(val: TotalNumSGPRs / PerWave, lo: 1u, hi: MaxWaves) : MaxWaves;
1505}
1506
1507unsigned getOccupancyWithNumSGPRs(const MCSubtargetInfo &STI, unsigned SGPRs) {
1508 unsigned MaxWaves = getMaxWavesPerEU(STI);
1509
1510 if (!isSGPROccupancyLimited(STI))
1511 return MaxWaves;
1512
1513 return getOccupancyWithNumSGPRs(SGPRs, MaxWaves, TotalNumSGPRs: getTotalNumSGPRs(STI),
1514 Granule: getSGPRAllocGranule(STI),
1515 TrapReserve: getSGPRTrapHandlerReserve(STI));
1516}
1517
1518unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1519 unsigned DynamicVGPRBlockSize) {
1520 assert(WavesPerEU != 0);
1521
1522 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1523 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1524 // zero because there is no nonzero VGPR usage N where going below N
1525 // achieves higher (static) occupancy.
1526 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1527 if (DynamicVGPREnabled)
1528 return 0;
1529
1530 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1531 if (WavesPerEU >= MaxWavesPerEU)
1532 return 0;
1533
1534 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1535 unsigned AddrsableNumVGPRs =
1536 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1537 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1538 unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule);
1539
1540 if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule))
1541 return 0;
1542
1543 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs,
1544 DynamicVGPRBlockSize);
1545 if (WavesPerEU < MinWavesPerEU)
1546 return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU, DynamicVGPRBlockSize);
1547
1548 unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule);
1549 unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext);
1550 return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs);
1551}
1552
1553unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
1554 unsigned DynamicVGPRBlockSize) {
1555 assert(WavesPerEU != 0);
1556
1557 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1558 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1559 unsigned MaxNumVGPRs =
1560 DynamicVGPREnabled
1561 ? getTotalNumVGPRs(STI)
1562 : alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU,
1563 Align: getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1564 unsigned AddressableNumVGPRs =
1565 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1566 return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs);
1567}
1568
1569unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs,
1570 std::optional<bool> EnableWavefrontSize32) {
1571 return getGranulatedNumRegisterBlocks(
1572 NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1573 1;
1574}
1575
1576unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo &STI,
1577 unsigned NumVGPRs,
1578 unsigned DynamicVGPRBlockSize,
1579 std::optional<bool> EnableWavefrontSize32) {
1580 return getGranulatedNumRegisterBlocks(
1581 NumRegs: NumVGPRs,
1582 Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1583}
1584} // end namespace IsaInfo
1585
1586void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1587 const MCSubtargetInfo &STI) {
1588 IsaVersion Version = getIsaVersion(GPU: STI.getCPU());
1589 KernelCode.amd_kernel_code_version_major = 1;
1590 KernelCode.amd_kernel_code_version_minor = 2;
1591 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1592 KernelCode.amd_machine_version_major = Version.Major;
1593 KernelCode.amd_machine_version_minor = Version.Minor;
1594 KernelCode.amd_machine_version_stepping = Version.Stepping;
1595 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1596 if (STI.getFeatureBits().test(I: FeatureWavefrontSize32)) {
1597 KernelCode.wavefront_size = 5;
1598 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1599 } else {
1600 KernelCode.wavefront_size = 6;
1601 }
1602
1603 // If the code object does not support indirect functions, then the value must
1604 // be 0xffffffff.
1605 KernelCode.call_convention = -1;
1606
1607 // These alignment values are specified in powers of two, so alignment =
1608 // 2^n. The minimum alignment is 2^4 = 16.
1609 KernelCode.kernarg_segment_alignment = 4;
1610 KernelCode.group_segment_alignment = 4;
1611 KernelCode.private_segment_alignment = 4;
1612
1613 if (Version.Major >= 10) {
1614 KernelCode.compute_pgm_resource_registers |=
1615 S_00B848_WGP_MODE(STI.getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1616 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1617 }
1618}
1619
1620bool isGroupSegment(const GlobalValue *GV) {
1621 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1622}
1623
1624bool isGlobalSegment(const GlobalValue *GV) {
1625 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1626}
1627
1628bool isReadOnlySegment(const GlobalValue *GV) {
1629 unsigned AS = GV->getAddressSpace();
1630 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1631 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1632}
1633
1634bool shouldEmitConstantsToTextSection(const Triple &TT) {
1635 return TT.getArch() == Triple::r600;
1636}
1637
1638static bool isValidRegPrefix(char C) {
1639 return C == 'v' || C == 's' || C == 'a';
1640}
1641
1642std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1643 char Kind = RegName.front();
1644 if (!isValidRegPrefix(C: Kind))
1645 return {};
1646
1647 RegName = RegName.drop_front();
1648 if (RegName.consume_front(Prefix: "[")) {
1649 unsigned Idx, End;
1650 bool Failed = RegName.consumeInteger(Radix: 10, Result&: Idx);
1651 Failed |= !RegName.consume_front(Prefix: ":");
1652 Failed |= RegName.consumeInteger(Radix: 10, Result&: End);
1653 Failed |= !RegName.consume_back(Suffix: "]");
1654 if (!Failed) {
1655 unsigned NumRegs = End - Idx + 1;
1656 if (NumRegs > 1)
1657 return {Kind, Idx, NumRegs};
1658 }
1659 } else {
1660 unsigned Idx;
1661 bool Failed = RegName.getAsInteger(Radix: 10, Result&: Idx);
1662 if (!Failed)
1663 return {Kind, Idx, 1};
1664 }
1665
1666 return {};
1667}
1668
1669std::tuple<char, unsigned, unsigned>
1670parseAsmConstraintPhysReg(StringRef Constraint) {
1671 StringRef RegName = Constraint;
1672 if (!RegName.consume_front(Prefix: "{") || !RegName.consume_back(Suffix: "}"))
1673 return {};
1674 return parseAsmPhysRegName(RegName);
1675}
1676
1677std::pair<unsigned, unsigned>
1678getIntegerPairAttribute(const Function &F, StringRef Name,
1679 std::pair<unsigned, unsigned> Default,
1680 bool OnlyFirstRequired) {
1681 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1682 return {Attr->first, Attr->second.value_or(u&: Default.second)};
1683 return Default;
1684}
1685
1686std::optional<std::pair<unsigned, std::optional<unsigned>>>
1687getIntegerPairAttribute(const Function &F, StringRef Name,
1688 bool OnlyFirstRequired) {
1689 Attribute A = F.getFnAttribute(Kind: Name);
1690 if (!A.isStringAttribute())
1691 return std::nullopt;
1692
1693 LLVMContext &Ctx = F.getContext();
1694 std::pair<unsigned, std::optional<unsigned>> Ints;
1695 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ',');
1696 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) {
1697 Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name);
1698 return std::nullopt;
1699 }
1700 unsigned Second = 0;
1701 if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Second)) {
1702 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1703 Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name);
1704 return std::nullopt;
1705 }
1706 } else {
1707 Ints.second = Second;
1708 }
1709
1710 return Ints;
1711}
1712
1713SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1714 unsigned Size,
1715 unsigned DefaultVal) {
1716 std::optional<SmallVector<unsigned>> R =
1717 getIntegerVecAttribute(F, Name, Size);
1718 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1719}
1720
1721std::optional<SmallVector<unsigned>>
1722getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1723 assert(Size > 2);
1724 LLVMContext &Ctx = F.getContext();
1725
1726 Attribute A = F.getFnAttribute(Kind: Name);
1727 if (!A.isValid())
1728 return std::nullopt;
1729 if (!A.isStringAttribute()) {
1730 Ctx.emitError(ErrorStr: Name + " is not a string attribute");
1731 return std::nullopt;
1732 }
1733
1734 SmallVector<unsigned> Vals(Size);
1735
1736 StringRef S = A.getValueAsString();
1737 unsigned i = 0;
1738 for (; !S.empty() && i < Size; i++) {
1739 std::pair<StringRef, StringRef> Strs = S.split(Separator: ',');
1740 unsigned IntVal;
1741 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) {
1742 Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " +
1743 Name);
1744 return std::nullopt;
1745 }
1746 Vals[i] = IntVal;
1747 S = Strs.second;
1748 }
1749
1750 if (!S.empty() || i < Size) {
1751 Ctx.emitError(ErrorStr: "attribute " + Name +
1752 " has incorrect number of integers; expected " +
1753 llvm::utostr(X: Size));
1754 return std::nullopt;
1755 }
1756 return Vals;
1757}
1758
1759bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1760 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1761 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1762 auto Low =
1763 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 0))->getValue();
1764 auto High =
1765 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 1))->getValue();
1766 // There are two types of [A; B) ranges:
1767 // A < B, e.g. [4; 5) which is a range that only includes 4.
1768 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1769 // everything except 4.
1770 if (Low.ult(RHS: High)) {
1771 if (Low.ule(RHS: Val) && High.ugt(RHS: Val))
1772 return true;
1773 } else {
1774 if (Low.uge(RHS: Val) && High.ult(RHS: Val))
1775 return true;
1776 }
1777 }
1778
1779 return false;
1780}
1781
1782unsigned getVmcntBitMask(const IsaVersion &Version) {
1783 return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) +
1784 getVmcntBitWidthHi(VersionMajor: Version.Major))) -
1785 1;
1786}
1787
1788unsigned getLoadcntBitMask(const IsaVersion &Version) {
1789 return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1;
1790}
1791
1792unsigned getSamplecntBitMask(const IsaVersion &Version) {
1793 return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1;
1794}
1795
1796unsigned getBvhcntBitMask(const IsaVersion &Version) {
1797 return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1;
1798}
1799
1800unsigned getExpcntBitMask(const IsaVersion &Version) {
1801 return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1;
1802}
1803
1804unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1805 return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1;
1806}
1807
1808unsigned getDscntBitMask(const IsaVersion &Version) {
1809 return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1;
1810}
1811
1812unsigned getKmcntBitMask(const IsaVersion &Version) {
1813 return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1;
1814}
1815
1816unsigned getXcntBitMask(const IsaVersion &Version) {
1817 return (1 << getXcntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1818}
1819
1820unsigned getAsynccntBitMask(const IsaVersion &Version) {
1821 return (1 << getAsynccntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1822}
1823
1824unsigned getStorecntBitMask(const IsaVersion &Version) {
1825 return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1;
1826}
1827
1828unsigned getWaitcntBitMask(const IsaVersion &Version) {
1829 unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1830 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1831 unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major),
1832 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1833 unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1834 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1835 unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1836 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1837 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1838}
1839
1840unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1841 unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1842 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1843 unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1844 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1845 return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major);
1846}
1847
1848unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1849 return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1850 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1851}
1852
1853unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1854 return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1855 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1856}
1857
1858unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt) {
1859 return unpackBits(Src: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1860 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1861}
1862
1863unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt) {
1864 return unpackBits(Src: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1865 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1866}
1867
1868unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt) {
1869 return unpackBits(Src: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1870 Width: getDscntBitWidth(VersionMajor: Version.Major));
1871}
1872
1873void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1874 unsigned &Expcnt, unsigned &Lgkmcnt) {
1875 Vmcnt = decodeVmcnt(Version, Waitcnt);
1876 Expcnt = decodeExpcnt(Version, Waitcnt);
1877 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1878}
1879
1880unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1881 unsigned Vmcnt) {
1882 Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1883 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1884 return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt,
1885 Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1886 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1887}
1888
1889unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1890 unsigned Expcnt) {
1891 return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1892 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1893}
1894
1895unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1896 unsigned Lgkmcnt) {
1897 return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1898 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1899}
1900
1901unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1902 unsigned Expcnt, unsigned Lgkmcnt) {
1903 unsigned Waitcnt = getWaitcntBitMask(Version);
1904 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1905 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1906 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1907 return Waitcnt;
1908}
1909
1910static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1911 bool IsStore) {
1912 unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major),
1913 Width: getDscntBitWidth(VersionMajor: Version.Major));
1914 if (IsStore) {
1915 unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1916 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1917 return Dscnt | Storecnt;
1918 }
1919 unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1920 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1921 return Dscnt | Loadcnt;
1922}
1923
1924static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1925 unsigned Loadcnt) {
1926 return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1927 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1928}
1929
1930static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1931 unsigned Storecnt) {
1932 return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1933 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1934}
1935
1936static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1937 unsigned Dscnt) {
1938 return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1939 Width: getDscntBitWidth(VersionMajor: Version.Major));
1940}
1941
1942unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1943 unsigned Dscnt) {
1944 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false);
1945 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1946 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1947 return Waitcnt;
1948}
1949
1950unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt,
1951 unsigned Dscnt) {
1952 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true);
1953 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1954 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1955 return Waitcnt;
1956}
1957
1958//===----------------------------------------------------------------------===//
1959// Custom Operand Values
1960//===----------------------------------------------------------------------===//
1961
1962static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1963 int Size,
1964 const MCSubtargetInfo &STI) {
1965 unsigned Enc = 0;
1966 for (int Idx = 0; Idx < Size; ++Idx) {
1967 const auto &Op = Opr[Idx];
1968 if (Op.isSupported(STI))
1969 Enc |= Op.encode(Val: Op.Default);
1970 }
1971 return Enc;
1972}
1973
1974static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1975 int Size, unsigned Code,
1976 bool &HasNonDefaultVal,
1977 const MCSubtargetInfo &STI) {
1978 unsigned UsedOprMask = 0;
1979 HasNonDefaultVal = false;
1980 for (int Idx = 0; Idx < Size; ++Idx) {
1981 const auto &Op = Opr[Idx];
1982 if (!Op.isSupported(STI))
1983 continue;
1984 UsedOprMask |= Op.getMask();
1985 unsigned Val = Op.decode(Code);
1986 if (!Op.isValid(Val))
1987 return false;
1988 HasNonDefaultVal |= (Val != Op.Default);
1989 }
1990 return (Code & ~UsedOprMask) == 0;
1991}
1992
1993static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1994 unsigned Code, int &Idx, StringRef &Name,
1995 unsigned &Val, bool &IsDefault,
1996 const MCSubtargetInfo &STI) {
1997 while (Idx < Size) {
1998 const auto &Op = Opr[Idx++];
1999 if (Op.isSupported(STI)) {
2000 Name = Op.Name;
2001 Val = Op.decode(Code);
2002 IsDefault = (Val == Op.Default);
2003 return true;
2004 }
2005 }
2006
2007 return false;
2008}
2009
2010static int encodeCustomOperandVal(const CustomOperandVal &Op,
2011 int64_t InputVal) {
2012 if (InputVal < 0 || InputVal > Op.Max)
2013 return OPR_VAL_INVALID;
2014 return Op.encode(Val: InputVal);
2015}
2016
2017static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2018 const StringRef Name, int64_t InputVal,
2019 unsigned &UsedOprMask,
2020 const MCSubtargetInfo &STI) {
2021 int InvalidId = OPR_ID_UNKNOWN;
2022 for (int Idx = 0; Idx < Size; ++Idx) {
2023 const auto &Op = Opr[Idx];
2024 if (Op.Name == Name) {
2025 if (!Op.isSupported(STI)) {
2026 InvalidId = OPR_ID_UNSUPPORTED;
2027 continue;
2028 }
2029 auto OprMask = Op.getMask();
2030 if (OprMask & UsedOprMask)
2031 return OPR_ID_DUPLICATE;
2032 UsedOprMask |= OprMask;
2033 return encodeCustomOperandVal(Op, InputVal);
2034 }
2035 }
2036 return InvalidId;
2037}
2038
2039//===----------------------------------------------------------------------===//
2040// DepCtr
2041//===----------------------------------------------------------------------===//
2042
2043namespace DepCtr {
2044
2045int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
2046 static int Default = -1;
2047 if (Default == -1)
2048 Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI);
2049 return Default;
2050}
2051
2052bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2053 const MCSubtargetInfo &STI) {
2054 return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code,
2055 HasNonDefaultVal, STI);
2056}
2057
2058bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2059 bool &IsDefault, const MCSubtargetInfo &STI) {
2060 return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val,
2061 IsDefault, STI);
2062}
2063
2064int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2065 const MCSubtargetInfo &STI) {
2066 return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask,
2067 STI);
2068}
2069
2070unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2071
2072unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2073
2074unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2075
2076unsigned getHoldCntBitMask(const IsaVersion &Version) {
2077 return (1 << getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
2078}
2079
2080unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2081
2082unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2083
2084unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2085
2086unsigned decodeFieldVmVsrc(unsigned Encoded) {
2087 return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2088}
2089
2090unsigned decodeFieldVaVdst(unsigned Encoded) {
2091 return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2092}
2093
2094unsigned decodeFieldSaSdst(unsigned Encoded) {
2095 return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2096}
2097
2098unsigned decodeFieldVaSdst(unsigned Encoded) {
2099 return unpackBits(Src: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2100}
2101
2102unsigned decodeFieldVaVcc(unsigned Encoded) {
2103 return unpackBits(Src: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2104}
2105
2106unsigned decodeFieldVaSsrc(unsigned Encoded) {
2107 return unpackBits(Src: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2108}
2109
2110unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2111 return unpackBits(Src: Encoded, Shift: getHoldCntBitShift(),
2112 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2113}
2114
2115unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2116 return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2117}
2118
2119unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2120 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2121 return encodeFieldVmVsrc(Encoded, VmVsrc);
2122}
2123
2124unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2125 return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2126}
2127
2128unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2129 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2130 return encodeFieldVaVdst(Encoded, VaVdst);
2131}
2132
2133unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2134 return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2135}
2136
2137unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2138 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2139 return encodeFieldSaSdst(Encoded, SaSdst);
2140}
2141
2142unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2143 return packBits(Src: VaSdst, Dst: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2144}
2145
2146unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2147 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2148 return encodeFieldVaSdst(Encoded, VaSdst);
2149}
2150
2151unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2152 return packBits(Src: VaVcc, Dst: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2153}
2154
2155unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2156 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2157 return encodeFieldVaVcc(Encoded, VaVcc);
2158}
2159
2160unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2161 return packBits(Src: VaSsrc, Dst: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2162}
2163
2164unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2165 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2166 return encodeFieldVaSsrc(Encoded, VaSsrc);
2167}
2168
2169unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2170 const IsaVersion &Version) {
2171 return packBits(Src: HoldCnt, Dst: Encoded, Shift: getHoldCntBitShift(),
2172 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2173}
2174
2175unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2176 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2177 return encodeFieldHoldCnt(Encoded, HoldCnt, Version: getIsaVersion(GPU: STI.getCPU()));
2178}
2179
2180} // namespace DepCtr
2181
2182//===----------------------------------------------------------------------===//
2183// exp tgt
2184//===----------------------------------------------------------------------===//
2185
2186namespace Exp {
2187
2188struct ExpTgt {
2189 StringLiteral Name;
2190 unsigned Tgt;
2191 unsigned MaxIndex;
2192};
2193
2194// clang-format off
2195static constexpr ExpTgt ExpTgtInfo[] = {
2196 {.Name: {"null"}, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX},
2197 {.Name: {"mrtz"}, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX},
2198 {.Name: {"prim"}, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX},
2199 {.Name: {"mrt"}, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX},
2200 {.Name: {"pos"}, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX},
2201 {.Name: {"dual_src_blend"},.Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX},
2202 {.Name: {"param"}, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX},
2203};
2204// clang-format on
2205
2206bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2207 for (const ExpTgt &Val : ExpTgtInfo) {
2208 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2209 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2210 Name = Val.Name;
2211 return true;
2212 }
2213 }
2214 return false;
2215}
2216
2217unsigned getTgtId(const StringRef Name) {
2218
2219 for (const ExpTgt &Val : ExpTgtInfo) {
2220 if (Val.MaxIndex == 0 && Name == Val.Name)
2221 return Val.Tgt;
2222
2223 if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) {
2224 StringRef Suffix = Name.drop_front(N: Val.Name.size());
2225
2226 unsigned Id;
2227 if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex)
2228 return ET_INVALID;
2229
2230 // Disable leading zeroes
2231 if (Suffix.size() > 1 && Suffix[0] == '0')
2232 return ET_INVALID;
2233
2234 return Val.Tgt + Id;
2235 }
2236 }
2237 return ET_INVALID;
2238}
2239
2240bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2241 switch (Id) {
2242 case ET_NULL:
2243 return !isGFX11Plus(STI);
2244 case ET_POS4:
2245 case ET_PRIM:
2246 return isGFX10Plus(STI);
2247 case ET_DUAL_SRC_BLEND0:
2248 case ET_DUAL_SRC_BLEND1:
2249 return isGFX11Plus(STI);
2250 default:
2251 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2252 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2253 return true;
2254 }
2255}
2256
2257} // namespace Exp
2258
2259//===----------------------------------------------------------------------===//
2260// MTBUF Format
2261//===----------------------------------------------------------------------===//
2262
2263namespace MTBUFFormat {
2264
2265int64_t getDfmt(const StringRef Name) {
2266 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2267 if (Name == DfmtSymbolic[Id])
2268 return Id;
2269 }
2270 return DFMT_UNDEF;
2271}
2272
2273StringRef getDfmtName(unsigned Id) {
2274 assert(Id <= DFMT_MAX);
2275 return DfmtSymbolic[Id];
2276}
2277
2278static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
2279 if (isSI(STI) || isCI(STI))
2280 return NfmtSymbolicSICI;
2281 if (isVI(STI) || isGFX9(STI))
2282 return NfmtSymbolicVI;
2283 return NfmtSymbolicGFX10;
2284}
2285
2286int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2287 const auto *lookupTable = getNfmtLookupTable(STI);
2288 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2289 if (Name == lookupTable[Id])
2290 return Id;
2291 }
2292 return NFMT_UNDEF;
2293}
2294
2295StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2296 assert(Id <= NFMT_MAX);
2297 return getNfmtLookupTable(STI)[Id];
2298}
2299
2300bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2301 unsigned Dfmt;
2302 unsigned Nfmt;
2303 decodeDfmtNfmt(Format: Id, Dfmt, Nfmt);
2304 return isValidNfmt(Val: Nfmt, STI);
2305}
2306
2307bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2308 return !getNfmtName(Id, STI).empty();
2309}
2310
2311int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2312 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2313}
2314
2315void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2316 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2317 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2318}
2319
2320int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2321 if (isGFX11Plus(STI)) {
2322 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2323 if (Name == UfmtSymbolicGFX11[Id])
2324 return Id;
2325 }
2326 } else {
2327 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2328 if (Name == UfmtSymbolicGFX10[Id])
2329 return Id;
2330 }
2331 }
2332 return UFMT_UNDEF;
2333}
2334
2335StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2336 if (isValidUnifiedFormat(Val: Id, STI))
2337 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2338 return "";
2339}
2340
2341bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2342 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2343}
2344
2345int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2346 const MCSubtargetInfo &STI) {
2347 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2348 if (isGFX11Plus(STI)) {
2349 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2350 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2351 return Id;
2352 }
2353 } else {
2354 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2355 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2356 return Id;
2357 }
2358 }
2359 return UFMT_UNDEF;
2360}
2361
2362bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2363 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2364}
2365
2366unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2367 if (isGFX10Plus(STI))
2368 return UFMT_DEFAULT;
2369 return DFMT_NFMT_DEFAULT;
2370}
2371
2372} // namespace MTBUFFormat
2373
2374//===----------------------------------------------------------------------===//
2375// SendMsg
2376//===----------------------------------------------------------------------===//
2377
2378namespace SendMsg {
2379
2380static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2381 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2382}
2383
2384bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2385 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2386}
2387
2388bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2389 bool Strict) {
2390 assert(isValidMsgId(MsgId, STI));
2391
2392 if (!Strict)
2393 return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId);
2394
2395 if (msgRequiresOp(MsgId, STI)) {
2396 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2397 return false;
2398
2399 return !getMsgOpName(MsgId, Encoding: OpId, STI).empty();
2400 }
2401
2402 return OpId == OP_NONE_;
2403}
2404
2405bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2406 const MCSubtargetInfo &STI, bool Strict) {
2407 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2408
2409 if (!Strict)
2410 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId);
2411
2412 if (!isGFX11Plus(STI)) {
2413 switch (MsgId) {
2414 case ID_GS_PreGFX11:
2415 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2416 case ID_GS_DONE_PreGFX11:
2417 return (OpId == OP_GS_NOP)
2418 ? (StreamId == STREAM_ID_NONE_)
2419 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2420 }
2421 }
2422 return StreamId == STREAM_ID_NONE_;
2423}
2424
2425bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2426 return MsgId == ID_SYSMSG ||
2427 (!isGFX11Plus(STI) &&
2428 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2429}
2430
2431bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2432 const MCSubtargetInfo &STI) {
2433 return !isGFX11Plus(STI) &&
2434 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2435 OpId != OP_GS_NOP;
2436}
2437
2438void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2439 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2440 MsgId = Val & getMsgIdMask(STI);
2441 if (isGFX11Plus(STI)) {
2442 OpId = 0;
2443 StreamId = 0;
2444 } else {
2445 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2446 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2447 }
2448}
2449
2450uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2451 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2452}
2453
2454bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2455 // Explicitly list message types that are known to not use m0.
2456 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2457 // types are added in the future that do use m0.
2458 if (isGFX11Plus(STI)) {
2459 switch (MsgId) {
2460 case ID_DEALLOC_VGPRS_GFX11Plus:
2461 return true;
2462 default:
2463 break;
2464 }
2465 }
2466 switch (MsgId) {
2467 case ID_SAVEWAVE:
2468 case ID_STALL_WAVE_GEN:
2469 case ID_HALT_WAVES:
2470 case ID_ORDERED_PS_DONE:
2471 case ID_EARLY_PRIM_DEALLOC:
2472 case ID_GET_DOORBELL:
2473 case ID_GET_DDID:
2474 case ID_SYSMSG:
2475 return true;
2476 default:
2477 return false;
2478 }
2479}
2480
2481} // namespace SendMsg
2482
2483//===----------------------------------------------------------------------===//
2484//
2485//===----------------------------------------------------------------------===//
2486
2487unsigned getInitialPSInputAddr(const Function &F) {
2488 return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr", Default: 0);
2489}
2490
2491bool getHasColorExport(const Function &F) {
2492 // As a safe default always respond as if PS has color exports.
2493 return F.getFnAttributeAsParsedInteger(
2494 Kind: "amdgpu-color-export",
2495 Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2496}
2497
2498bool getHasDepthExport(const Function &F) {
2499 return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export", Default: 0) != 0;
2500}
2501
2502unsigned getDynamicVGPRBlockSize(const Function &F) {
2503 unsigned BlockSize =
2504 F.getFnAttributeAsParsedInteger(Kind: "amdgpu-dynamic-vgpr-block-size", Default: 0);
2505
2506 if (BlockSize == 16 || BlockSize == 32)
2507 return BlockSize;
2508
2509 return 0;
2510}
2511
2512bool hasXNACK(const MCSubtargetInfo &STI) {
2513 return STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2514}
2515
2516bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2517 return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) &&
2518 !STI.hasFeature(Feature: AMDGPU::FeatureR128A16);
2519}
2520
2521bool hasA16(const MCSubtargetInfo &STI) {
2522 return STI.hasFeature(Feature: AMDGPU::FeatureA16);
2523}
2524
2525bool hasG16(const MCSubtargetInfo &STI) {
2526 return STI.hasFeature(Feature: AMDGPU::FeatureG16);
2527}
2528
2529bool hasPackedD16(const MCSubtargetInfo &STI) {
2530 return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2531 !isSI(STI);
2532}
2533
2534bool hasGDS(const MCSubtargetInfo &STI) {
2535 return STI.hasFeature(Feature: AMDGPU::FeatureGDS);
2536}
2537
2538unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2539 auto Version = getIsaVersion(GPU: STI.getCPU());
2540 if (Version.Major == 10)
2541 return Version.Minor >= 3 ? 13 : 5;
2542 if (Version.Major == 11)
2543 return 5;
2544 if (Version.Major >= 12)
2545 return HasSampler ? 4 : 5;
2546 return 0;
2547}
2548
2549unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) {
2550 if (isGFX1250Plus(STI))
2551 return 32;
2552 return 16;
2553}
2554
2555bool isSI(const MCSubtargetInfo &STI) {
2556 return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands);
2557}
2558
2559bool isCI(const MCSubtargetInfo &STI) {
2560 return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands);
2561}
2562
2563bool isVI(const MCSubtargetInfo &STI) {
2564 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2565}
2566
2567bool isGFX9(const MCSubtargetInfo &STI) {
2568 return STI.hasFeature(Feature: AMDGPU::FeatureGFX9);
2569}
2570
2571bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2572 return isGFX9(STI) || isGFX10(STI);
2573}
2574
2575bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2576 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2577}
2578
2579bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2580 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2581}
2582
2583bool isGFX8Plus(const MCSubtargetInfo &STI) {
2584 return isVI(STI) || isGFX9Plus(STI);
2585}
2586
2587bool isGFX9Plus(const MCSubtargetInfo &STI) {
2588 return isGFX9(STI) || isGFX10Plus(STI);
2589}
2590
2591bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2592
2593bool isGFX10(const MCSubtargetInfo &STI) {
2594 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10);
2595}
2596
2597bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2598 return isGFX10(STI) || isGFX11(STI);
2599}
2600
2601bool isGFX10Plus(const MCSubtargetInfo &STI) {
2602 return isGFX10(STI) || isGFX11Plus(STI);
2603}
2604
2605bool isGFX11(const MCSubtargetInfo &STI) {
2606 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2607}
2608
2609bool isGFX11Plus(const MCSubtargetInfo &STI) {
2610 return isGFX11(STI) || isGFX12Plus(STI);
2611}
2612
2613bool isGFX12(const MCSubtargetInfo &STI) {
2614 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2615}
2616
2617bool isGFX12Plus(const MCSubtargetInfo &STI) {
2618 return isGFX12(STI) || isGFX13Plus(STI);
2619}
2620
2621bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2622
2623bool isGFX1250(const MCSubtargetInfo &STI) {
2624 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2625}
2626
2627bool isGFX1250Plus(const MCSubtargetInfo &STI) {
2628 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2629}
2630
2631bool isGFX13(const MCSubtargetInfo &STI) {
2632 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2633}
2634
2635bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2636
2637bool supportsWGP(const MCSubtargetInfo &STI) {
2638 if (isGFX1250(STI))
2639 return false;
2640 return isGFX10Plus(STI);
2641}
2642
2643bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2644
2645bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2646 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2647}
2648
2649bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2650 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2651}
2652
2653bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2654 return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding);
2655}
2656
2657bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2658 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding);
2659}
2660
2661bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2662 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding);
2663}
2664
2665bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2666 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts);
2667}
2668
2669bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2670 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2671}
2672
2673bool isGFX90A(const MCSubtargetInfo &STI) {
2674 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2675}
2676
2677bool isGFX940(const MCSubtargetInfo &STI) {
2678 return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts);
2679}
2680
2681bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2682 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2683}
2684
2685bool hasMAIInsts(const MCSubtargetInfo &STI) {
2686 return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts);
2687}
2688
2689bool hasVOPD(const MCSubtargetInfo &STI) {
2690 return STI.hasFeature(Feature: AMDGPU::FeatureVOPDInsts);
2691}
2692
2693bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2694 return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR);
2695}
2696
2697unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2698 return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload);
2699}
2700
2701int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2702 int32_t ArgNumVGPR) {
2703 if (has90AInsts && ArgNumAGPR)
2704 return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR;
2705 return std::max(a: ArgNumVGPR, b: ArgNumAGPR);
2706}
2707
2708bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2709 const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID);
2710 const MCRegister FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0);
2711 return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) ||
2712 Reg == AMDGPU::SCC;
2713}
2714
2715bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2716 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2717}
2718
2719#define MAP_REG2REG \
2720 using namespace AMDGPU; \
2721 switch (Reg.id()) { \
2722 default: \
2723 return Reg; \
2724 CASE_CI_VI(FLAT_SCR) \
2725 CASE_CI_VI(FLAT_SCR_LO) \
2726 CASE_CI_VI(FLAT_SCR_HI) \
2727 CASE_VI_GFX9PLUS(TTMP0) \
2728 CASE_VI_GFX9PLUS(TTMP1) \
2729 CASE_VI_GFX9PLUS(TTMP2) \
2730 CASE_VI_GFX9PLUS(TTMP3) \
2731 CASE_VI_GFX9PLUS(TTMP4) \
2732 CASE_VI_GFX9PLUS(TTMP5) \
2733 CASE_VI_GFX9PLUS(TTMP6) \
2734 CASE_VI_GFX9PLUS(TTMP7) \
2735 CASE_VI_GFX9PLUS(TTMP8) \
2736 CASE_VI_GFX9PLUS(TTMP9) \
2737 CASE_VI_GFX9PLUS(TTMP10) \
2738 CASE_VI_GFX9PLUS(TTMP11) \
2739 CASE_VI_GFX9PLUS(TTMP12) \
2740 CASE_VI_GFX9PLUS(TTMP13) \
2741 CASE_VI_GFX9PLUS(TTMP14) \
2742 CASE_VI_GFX9PLUS(TTMP15) \
2743 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2744 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2745 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2746 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2747 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2748 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2749 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2750 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2751 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2752 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2753 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2754 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2755 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2756 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2757 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2758 CASE_VI_GFX9PLUS( \
2759 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2760 CASE_GFXPRE11_GFX11PLUS(M0) \
2761 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2762 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2763 }
2764
2765#define CASE_CI_VI(node) \
2766 assert(!isSI(STI)); \
2767 case node: \
2768 return isCI(STI) ? node##_ci : node##_vi;
2769
2770#define CASE_VI_GFX9PLUS(node) \
2771 case node: \
2772 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2773
2774#define CASE_GFXPRE11_GFX11PLUS(node) \
2775 case node: \
2776 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2777
2778#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2779 case node: \
2780 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2781
2782MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2783 if (STI.getTargetTriple().getArch() == Triple::r600)
2784 return Reg;
2785 MAP_REG2REG
2786}
2787
2788#undef CASE_CI_VI
2789#undef CASE_VI_GFX9PLUS
2790#undef CASE_GFXPRE11_GFX11PLUS
2791#undef CASE_GFXPRE11_GFX11PLUS_TO
2792
2793#define CASE_CI_VI(node) \
2794 case node##_ci: \
2795 case node##_vi: \
2796 return node;
2797#define CASE_VI_GFX9PLUS(node) \
2798 case node##_vi: \
2799 case node##_gfx9plus: \
2800 return node;
2801#define CASE_GFXPRE11_GFX11PLUS(node) \
2802 case node##_gfx11plus: \
2803 case node##_gfxpre11: \
2804 return node;
2805#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2806
2807MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2808
2809bool isInlineValue(MCRegister Reg) {
2810 switch (Reg.id()) {
2811 case AMDGPU::SRC_SHARED_BASE_LO:
2812 case AMDGPU::SRC_SHARED_BASE:
2813 case AMDGPU::SRC_SHARED_LIMIT_LO:
2814 case AMDGPU::SRC_SHARED_LIMIT:
2815 case AMDGPU::SRC_PRIVATE_BASE_LO:
2816 case AMDGPU::SRC_PRIVATE_BASE:
2817 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2818 case AMDGPU::SRC_PRIVATE_LIMIT:
2819 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2820 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2821 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2822 return true;
2823 case AMDGPU::SRC_VCCZ:
2824 case AMDGPU::SRC_EXECZ:
2825 case AMDGPU::SRC_SCC:
2826 return true;
2827 case AMDGPU::SGPR_NULL:
2828 return true;
2829 default:
2830 return false;
2831 }
2832}
2833
2834#undef CASE_CI_VI
2835#undef CASE_VI_GFX9PLUS
2836#undef CASE_GFXPRE11_GFX11PLUS
2837#undef CASE_GFXPRE11_GFX11PLUS_TO
2838#undef MAP_REG2REG
2839
2840bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2841 assert(OpNo < Desc.NumOperands);
2842 unsigned OpType = Desc.operands()[OpNo].OperandType;
2843 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2844 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2845}
2846
2847bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2848 assert(OpNo < Desc.NumOperands);
2849 unsigned OpType = Desc.operands()[OpNo].OperandType;
2850 switch (OpType) {
2851 case AMDGPU::OPERAND_REG_IMM_FP32:
2852 case AMDGPU::OPERAND_REG_IMM_FP64:
2853 case AMDGPU::OPERAND_REG_IMM_FP16:
2854 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2855 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2856 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2857 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2858 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2859 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2860 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2861 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2862 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2863 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2864 case AMDGPU::OPERAND_REG_IMM_V2FP64:
2865 return true;
2866 default:
2867 return false;
2868 }
2869}
2870
2871bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2872 assert(OpNo < Desc.NumOperands);
2873 unsigned OpType = Desc.operands()[OpNo].OperandType;
2874 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2875 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2876 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2877 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2878}
2879
2880// Avoid using MCRegisterClass::getSize, since that function will go away
2881// (move from MC* level to Target* level). Return size in bits.
2882unsigned getRegBitWidth(unsigned RCID) {
2883 switch (RCID) {
2884 case AMDGPU::VGPR_16RegClassID:
2885 case AMDGPU::VGPR_16_Lo128RegClassID:
2886 case AMDGPU::SGPR_LO16RegClassID:
2887 case AMDGPU::AGPR_LO16RegClassID:
2888 return 16;
2889 case AMDGPU::SGPR_32RegClassID:
2890 case AMDGPU::VGPR_32RegClassID:
2891 case AMDGPU::VGPR_32_Lo256RegClassID:
2892 case AMDGPU::VRegOrLds_32RegClassID:
2893 case AMDGPU::AGPR_32RegClassID:
2894 case AMDGPU::VS_32RegClassID:
2895 case AMDGPU::AV_32RegClassID:
2896 case AMDGPU::SReg_32RegClassID:
2897 case AMDGPU::SReg_32_XM0RegClassID:
2898 case AMDGPU::SRegOrLds_32RegClassID:
2899 return 32;
2900 case AMDGPU::SGPR_64RegClassID:
2901 case AMDGPU::VS_64RegClassID:
2902 case AMDGPU::SReg_64RegClassID:
2903 case AMDGPU::VReg_64RegClassID:
2904 case AMDGPU::AReg_64RegClassID:
2905 case AMDGPU::SReg_64_XEXECRegClassID:
2906 case AMDGPU::VReg_64_Align2RegClassID:
2907 case AMDGPU::AReg_64_Align2RegClassID:
2908 case AMDGPU::AV_64RegClassID:
2909 case AMDGPU::AV_64_Align2RegClassID:
2910 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2911 case AMDGPU::VS_64_Lo256RegClassID:
2912 return 64;
2913 case AMDGPU::SGPR_96RegClassID:
2914 case AMDGPU::SReg_96RegClassID:
2915 case AMDGPU::VReg_96RegClassID:
2916 case AMDGPU::AReg_96RegClassID:
2917 case AMDGPU::VReg_96_Align2RegClassID:
2918 case AMDGPU::AReg_96_Align2RegClassID:
2919 case AMDGPU::AV_96RegClassID:
2920 case AMDGPU::AV_96_Align2RegClassID:
2921 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2922 return 96;
2923 case AMDGPU::SGPR_128RegClassID:
2924 case AMDGPU::SReg_128RegClassID:
2925 case AMDGPU::VReg_128RegClassID:
2926 case AMDGPU::AReg_128RegClassID:
2927 case AMDGPU::VReg_128_Align2RegClassID:
2928 case AMDGPU::AReg_128_Align2RegClassID:
2929 case AMDGPU::AV_128RegClassID:
2930 case AMDGPU::AV_128_Align2RegClassID:
2931 case AMDGPU::SReg_128_XNULLRegClassID:
2932 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2933 return 128;
2934 case AMDGPU::SGPR_160RegClassID:
2935 case AMDGPU::SReg_160RegClassID:
2936 case AMDGPU::VReg_160RegClassID:
2937 case AMDGPU::AReg_160RegClassID:
2938 case AMDGPU::VReg_160_Align2RegClassID:
2939 case AMDGPU::AReg_160_Align2RegClassID:
2940 case AMDGPU::AV_160RegClassID:
2941 case AMDGPU::AV_160_Align2RegClassID:
2942 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2943 return 160;
2944 case AMDGPU::SGPR_192RegClassID:
2945 case AMDGPU::SReg_192RegClassID:
2946 case AMDGPU::VReg_192RegClassID:
2947 case AMDGPU::AReg_192RegClassID:
2948 case AMDGPU::VReg_192_Align2RegClassID:
2949 case AMDGPU::AReg_192_Align2RegClassID:
2950 case AMDGPU::AV_192RegClassID:
2951 case AMDGPU::AV_192_Align2RegClassID:
2952 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2953 return 192;
2954 case AMDGPU::SGPR_224RegClassID:
2955 case AMDGPU::SReg_224RegClassID:
2956 case AMDGPU::VReg_224RegClassID:
2957 case AMDGPU::AReg_224RegClassID:
2958 case AMDGPU::VReg_224_Align2RegClassID:
2959 case AMDGPU::AReg_224_Align2RegClassID:
2960 case AMDGPU::AV_224RegClassID:
2961 case AMDGPU::AV_224_Align2RegClassID:
2962 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2963 return 224;
2964 case AMDGPU::SGPR_256RegClassID:
2965 case AMDGPU::SReg_256RegClassID:
2966 case AMDGPU::VReg_256RegClassID:
2967 case AMDGPU::AReg_256RegClassID:
2968 case AMDGPU::VReg_256_Align2RegClassID:
2969 case AMDGPU::AReg_256_Align2RegClassID:
2970 case AMDGPU::AV_256RegClassID:
2971 case AMDGPU::AV_256_Align2RegClassID:
2972 case AMDGPU::SReg_256_XNULLRegClassID:
2973 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2974 return 256;
2975 case AMDGPU::SGPR_288RegClassID:
2976 case AMDGPU::SReg_288RegClassID:
2977 case AMDGPU::VReg_288RegClassID:
2978 case AMDGPU::AReg_288RegClassID:
2979 case AMDGPU::VReg_288_Align2RegClassID:
2980 case AMDGPU::AReg_288_Align2RegClassID:
2981 case AMDGPU::AV_288RegClassID:
2982 case AMDGPU::AV_288_Align2RegClassID:
2983 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2984 return 288;
2985 case AMDGPU::SGPR_320RegClassID:
2986 case AMDGPU::SReg_320RegClassID:
2987 case AMDGPU::VReg_320RegClassID:
2988 case AMDGPU::AReg_320RegClassID:
2989 case AMDGPU::VReg_320_Align2RegClassID:
2990 case AMDGPU::AReg_320_Align2RegClassID:
2991 case AMDGPU::AV_320RegClassID:
2992 case AMDGPU::AV_320_Align2RegClassID:
2993 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
2994 return 320;
2995 case AMDGPU::SGPR_352RegClassID:
2996 case AMDGPU::SReg_352RegClassID:
2997 case AMDGPU::VReg_352RegClassID:
2998 case AMDGPU::AReg_352RegClassID:
2999 case AMDGPU::VReg_352_Align2RegClassID:
3000 case AMDGPU::AReg_352_Align2RegClassID:
3001 case AMDGPU::AV_352RegClassID:
3002 case AMDGPU::AV_352_Align2RegClassID:
3003 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3004 return 352;
3005 case AMDGPU::SGPR_384RegClassID:
3006 case AMDGPU::SReg_384RegClassID:
3007 case AMDGPU::VReg_384RegClassID:
3008 case AMDGPU::AReg_384RegClassID:
3009 case AMDGPU::VReg_384_Align2RegClassID:
3010 case AMDGPU::AReg_384_Align2RegClassID:
3011 case AMDGPU::AV_384RegClassID:
3012 case AMDGPU::AV_384_Align2RegClassID:
3013 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3014 return 384;
3015 case AMDGPU::SGPR_512RegClassID:
3016 case AMDGPU::SReg_512RegClassID:
3017 case AMDGPU::VReg_512RegClassID:
3018 case AMDGPU::AReg_512RegClassID:
3019 case AMDGPU::VReg_512_Align2RegClassID:
3020 case AMDGPU::AReg_512_Align2RegClassID:
3021 case AMDGPU::AV_512RegClassID:
3022 case AMDGPU::AV_512_Align2RegClassID:
3023 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3024 return 512;
3025 case AMDGPU::SGPR_1024RegClassID:
3026 case AMDGPU::SReg_1024RegClassID:
3027 case AMDGPU::VReg_1024RegClassID:
3028 case AMDGPU::AReg_1024RegClassID:
3029 case AMDGPU::VReg_1024_Align2RegClassID:
3030 case AMDGPU::AReg_1024_Align2RegClassID:
3031 case AMDGPU::AV_1024RegClassID:
3032 case AMDGPU::AV_1024_Align2RegClassID:
3033 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3034 return 1024;
3035 default:
3036 llvm_unreachable("Unexpected register class");
3037 }
3038}
3039
3040unsigned getRegBitWidth(const MCRegisterClass &RC) {
3041 return getRegBitWidth(RCID: RC.getID());
3042}
3043
3044bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3045 if (isInlinableIntLiteral(Literal))
3046 return true;
3047
3048 uint64_t Val = static_cast<uint64_t>(Literal);
3049 return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) ||
3050 (Val == llvm::bit_cast<uint64_t>(from: 1.0)) ||
3051 (Val == llvm::bit_cast<uint64_t>(from: -1.0)) ||
3052 (Val == llvm::bit_cast<uint64_t>(from: 0.5)) ||
3053 (Val == llvm::bit_cast<uint64_t>(from: -0.5)) ||
3054 (Val == llvm::bit_cast<uint64_t>(from: 2.0)) ||
3055 (Val == llvm::bit_cast<uint64_t>(from: -2.0)) ||
3056 (Val == llvm::bit_cast<uint64_t>(from: 4.0)) ||
3057 (Val == llvm::bit_cast<uint64_t>(from: -4.0)) ||
3058 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3059}
3060
3061bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3062 if (isInlinableIntLiteral(Literal))
3063 return true;
3064
3065 // The actual type of the operand does not seem to matter as long
3066 // as the bits match one of the inline immediate values. For example:
3067 //
3068 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3069 // so it is a legal inline immediate.
3070 //
3071 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3072 // floating-point, so it is a legal inline immediate.
3073
3074 uint32_t Val = static_cast<uint32_t>(Literal);
3075 return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) ||
3076 (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) ||
3077 (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) ||
3078 (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) ||
3079 (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) ||
3080 (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) ||
3081 (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) ||
3082 (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) ||
3083 (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) ||
3084 (Val == 0x3e22f983 && HasInv2Pi);
3085}
3086
3087bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3088 if (!HasInv2Pi)
3089 return false;
3090 if (isInlinableIntLiteral(Literal))
3091 return true;
3092 uint16_t Val = static_cast<uint16_t>(Literal);
3093 return Val == 0x3F00 || // 0.5
3094 Val == 0xBF00 || // -0.5
3095 Val == 0x3F80 || // 1.0
3096 Val == 0xBF80 || // -1.0
3097 Val == 0x4000 || // 2.0
3098 Val == 0xC000 || // -2.0
3099 Val == 0x4080 || // 4.0
3100 Val == 0xC080 || // -4.0
3101 Val == 0x3E22; // 1.0 / (2.0 * pi)
3102}
3103
3104bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3105 return isInlinableLiteral32(Literal, HasInv2Pi);
3106}
3107
3108bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3109 if (!HasInv2Pi)
3110 return false;
3111 if (isInlinableIntLiteral(Literal))
3112 return true;
3113 uint16_t Val = static_cast<uint16_t>(Literal);
3114 return Val == 0x3C00 || // 1.0
3115 Val == 0xBC00 || // -1.0
3116 Val == 0x3800 || // 0.5
3117 Val == 0xB800 || // -0.5
3118 Val == 0x4000 || // 2.0
3119 Val == 0xC000 || // -2.0
3120 Val == 0x4400 || // 4.0
3121 Val == 0xC400 || // -4.0
3122 Val == 0x3118; // 1/2pi
3123}
3124
3125std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3126 // Unfortunately, the Instruction Set Architecture Reference Guide is
3127 // misleading about how the inline operands work for (packed) 16-bit
3128 // instructions. In a nutshell, the actual HW behavior is:
3129 //
3130 // - integer encodings (-16 .. 64) are always produced as sign-extended
3131 // 32-bit values
3132 // - float encodings are produced as:
3133 // - for F16 instructions: corresponding half-precision float values in
3134 // the LSBs, 0 in the MSBs
3135 // - for UI16 instructions: corresponding single-precision float value
3136 int32_t Signed = static_cast<int32_t>(Literal);
3137 if (Signed >= 0 && Signed <= 64)
3138 return 128 + Signed;
3139
3140 if (Signed >= -16 && Signed <= -1)
3141 return 192 + std::abs(x: Signed);
3142
3143 if (IsFloat) {
3144 // clang-format off
3145 switch (Literal) {
3146 case 0x3800: return 240; // 0.5
3147 case 0xB800: return 241; // -0.5
3148 case 0x3C00: return 242; // 1.0
3149 case 0xBC00: return 243; // -1.0
3150 case 0x4000: return 244; // 2.0
3151 case 0xC000: return 245; // -2.0
3152 case 0x4400: return 246; // 4.0
3153 case 0xC400: return 247; // -4.0
3154 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3155 default: break;
3156 }
3157 // clang-format on
3158 } else {
3159 // clang-format off
3160 switch (Literal) {
3161 case 0x3F000000: return 240; // 0.5
3162 case 0xBF000000: return 241; // -0.5
3163 case 0x3F800000: return 242; // 1.0
3164 case 0xBF800000: return 243; // -1.0
3165 case 0x40000000: return 244; // 2.0
3166 case 0xC0000000: return 245; // -2.0
3167 case 0x40800000: return 246; // 4.0
3168 case 0xC0800000: return 247; // -4.0
3169 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3170 default: break;
3171 }
3172 // clang-format on
3173 }
3174
3175 return {};
3176}
3177
3178// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3179// or nullopt.
3180std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3181 return getInlineEncodingV216(IsFloat: false, Literal);
3182}
3183
3184// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3185// or nullopt.
3186std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3187 int32_t Signed = static_cast<int32_t>(Literal);
3188 if (Signed >= 0 && Signed <= 64)
3189 return 128 + Signed;
3190
3191 if (Signed >= -16 && Signed <= -1)
3192 return 192 + std::abs(x: Signed);
3193
3194 // clang-format off
3195 switch (Literal) {
3196 case 0x3F00: return 240; // 0.5
3197 case 0xBF00: return 241; // -0.5
3198 case 0x3F80: return 242; // 1.0
3199 case 0xBF80: return 243; // -1.0
3200 case 0x4000: return 244; // 2.0
3201 case 0xC000: return 245; // -2.0
3202 case 0x4080: return 246; // 4.0
3203 case 0xC080: return 247; // -4.0
3204 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3205 default: break;
3206 }
3207 // clang-format on
3208
3209 return std::nullopt;
3210}
3211
3212// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3213// or nullopt.
3214std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3215 return getInlineEncodingV216(IsFloat: true, Literal);
3216}
3217
3218// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3219// or nullopt. This accounts for different inline constant behavior:
3220// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3221// - GFX11+: fp16 inline constants are duplicated into both halves
3222std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
3223 bool IsGFX11Plus) {
3224 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3225 if (!IsGFX11Plus)
3226 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3227
3228 // GFX11+ behavior: f16 duplicated in both halves
3229 // First, check for sign-extended integer inline constants (-16 to 64)
3230 // These work the same across all generations
3231 int32_t Signed = static_cast<int32_t>(Literal);
3232 if (Signed >= 0 && Signed <= 64)
3233 return 128 + Signed;
3234
3235 if (Signed >= -16 && Signed <= -1)
3236 return 192 + std::abs(x: Signed);
3237
3238 // For float inline constants on GFX11+, both halves must be equal
3239 uint16_t Lo = static_cast<uint16_t>(Literal);
3240 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3241 if (Lo != Hi)
3242 return std::nullopt;
3243 return getInlineEncodingV216(/*IsFloat=*/true, Literal: Lo);
3244}
3245
3246// Whether the given literal can be inlined for a V_PK_* instruction.
3247bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
3248 switch (OpType) {
3249 case AMDGPU::OPERAND_REG_IMM_V2INT16:
3250 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
3251 return getInlineEncodingV216(IsFloat: false, Literal).has_value();
3252 case AMDGPU::OPERAND_REG_IMM_V2FP16:
3253 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
3254 return getInlineEncodingV216(IsFloat: true, Literal).has_value();
3255 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
3256 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3257 case AMDGPU::OPERAND_REG_IMM_V2BF16:
3258 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
3259 return isInlinableLiteralV2BF16(Literal);
3260 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
3261 return false;
3262 default:
3263 llvm_unreachable("bad packed operand type");
3264 }
3265}
3266
3267// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3268bool isInlinableLiteralV2I16(uint32_t Literal) {
3269 return getInlineEncodingV2I16(Literal).has_value();
3270}
3271
3272// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3273bool isInlinableLiteralV2BF16(uint32_t Literal) {
3274 return getInlineEncodingV2BF16(Literal).has_value();
3275}
3276
3277// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3278bool isInlinableLiteralV2F16(uint32_t Literal) {
3279 return getInlineEncodingV2F16(Literal).has_value();
3280}
3281
3282// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3283bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus) {
3284 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3285}
3286
3287bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3288 if (IsFP64)
3289 return !Lo_32(Value: Val);
3290
3291 return isUInt<32>(x: Val) || isInt<32>(x: Val);
3292}
3293
3294int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3295 switch (Type) {
3296 default:
3297 break;
3298 case OPERAND_REG_IMM_BF16:
3299 case OPERAND_REG_IMM_FP16:
3300 case OPERAND_REG_INLINE_C_BF16:
3301 case OPERAND_REG_INLINE_C_FP16:
3302 return Imm & 0xffff;
3303 case OPERAND_INLINE_SPLIT_BARRIER_INT32:
3304 case OPERAND_REG_IMM_FP32:
3305 case OPERAND_REG_IMM_INT32:
3306 case OPERAND_REG_IMM_V2BF16:
3307 case OPERAND_REG_IMM_V2FP16:
3308 case OPERAND_REG_IMM_V2FP16_SPLAT:
3309 case OPERAND_REG_IMM_V2FP32:
3310 case OPERAND_REG_IMM_V2INT16:
3311 case OPERAND_REG_IMM_V2INT32:
3312 case OPERAND_REG_INLINE_AC_FP32:
3313 case OPERAND_REG_INLINE_AC_INT32:
3314 case OPERAND_REG_INLINE_C_FP32:
3315 case OPERAND_REG_INLINE_C_INT32:
3316 return Lo_32(Value: Imm);
3317 case OPERAND_REG_IMM_FP64:
3318 case AMDGPU::OPERAND_REG_IMM_V2FP64:
3319 return IsLit ? Imm : Hi_32(Value: Imm);
3320 }
3321 return Imm;
3322}
3323
3324bool isArgPassedInSGPR(const Argument *A) {
3325 const Function *F = A->getParent();
3326
3327 // Arguments to compute shaders are never a source of divergence.
3328 CallingConv::ID CC = F->getCallingConv();
3329 switch (CC) {
3330 case CallingConv::AMDGPU_KERNEL:
3331 case CallingConv::SPIR_KERNEL:
3332 return true;
3333 case CallingConv::AMDGPU_VS:
3334 case CallingConv::AMDGPU_LS:
3335 case CallingConv::AMDGPU_HS:
3336 case CallingConv::AMDGPU_ES:
3337 case CallingConv::AMDGPU_GS:
3338 case CallingConv::AMDGPU_PS:
3339 case CallingConv::AMDGPU_CS:
3340 case CallingConv::AMDGPU_Gfx:
3341 case CallingConv::AMDGPU_CS_Chain:
3342 case CallingConv::AMDGPU_CS_ChainPreserve:
3343 // For non-compute shaders, SGPR inputs are marked with either inreg or
3344 // byval. Everything else is in VGPRs.
3345 return A->hasAttribute(Kind: Attribute::InReg) ||
3346 A->hasAttribute(Kind: Attribute::ByVal);
3347 default:
3348 // TODO: treat i1 as divergent?
3349 return A->hasAttribute(Kind: Attribute::InReg);
3350 }
3351}
3352
3353bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3354 // Arguments to compute shaders are never a source of divergence.
3355 CallingConv::ID CC = CB->getCallingConv();
3356 switch (CC) {
3357 case CallingConv::AMDGPU_KERNEL:
3358 case CallingConv::SPIR_KERNEL:
3359 return true;
3360 case CallingConv::AMDGPU_VS:
3361 case CallingConv::AMDGPU_LS:
3362 case CallingConv::AMDGPU_HS:
3363 case CallingConv::AMDGPU_ES:
3364 case CallingConv::AMDGPU_GS:
3365 case CallingConv::AMDGPU_PS:
3366 case CallingConv::AMDGPU_CS:
3367 case CallingConv::AMDGPU_Gfx:
3368 case CallingConv::AMDGPU_CS_Chain:
3369 case CallingConv::AMDGPU_CS_ChainPreserve:
3370 // For non-compute shaders, SGPR inputs are marked with either inreg or
3371 // byval. Everything else is in VGPRs.
3372 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) ||
3373 CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal);
3374 default:
3375 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg);
3376 }
3377}
3378
3379static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3380 return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST);
3381}
3382
3383bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
3384 int64_t EncodedOffset) {
3385 if (isGFX12Plus(STI: ST))
3386 return isUInt<23>(x: EncodedOffset);
3387
3388 return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset)
3389 : isUInt<8>(x: EncodedOffset);
3390}
3391
3392bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
3393 int64_t EncodedOffset, bool IsBuffer) {
3394 if (isGFX12Plus(STI: ST)) {
3395 if (IsBuffer && EncodedOffset < 0)
3396 return false;
3397 return isInt<24>(x: EncodedOffset);
3398 }
3399
3400 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(x: EncodedOffset);
3401}
3402
3403static bool isDwordAligned(uint64_t ByteOffset) {
3404 return (ByteOffset & 3) == 0;
3405}
3406
3407uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
3408 uint64_t ByteOffset) {
3409 if (hasSMEMByteOffset(ST))
3410 return ByteOffset;
3411
3412 assert(isDwordAligned(ByteOffset));
3413 return ByteOffset >> 2;
3414}
3415
3416std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3417 int64_t ByteOffset, bool IsBuffer,
3418 bool HasSOffset) {
3419 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3420 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3421 // Handle case where SOffset is not present.
3422 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3423 return std::nullopt;
3424
3425 if (isGFX12Plus(STI: ST)) // 24 bit signed offsets
3426 return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3427 : std::nullopt;
3428
3429 // The signed version is always a byte offset.
3430 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3431 assert(hasSMEMByteOffset(ST));
3432 return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3433 : std::nullopt;
3434 }
3435
3436 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3437 return std::nullopt;
3438
3439 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3440 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3441 ? std::optional<int64_t>(EncodedOffset)
3442 : std::nullopt;
3443}
3444
3445std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3446 int64_t ByteOffset) {
3447 if (!isCI(STI: ST) || !isDwordAligned(ByteOffset))
3448 return std::nullopt;
3449
3450 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3451 return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3452 : std::nullopt;
3453}
3454
3455unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3456 if (ST.getFeatureBits().test(I: FeatureFlatOffsetBits12))
3457 return 12;
3458 if (ST.getFeatureBits().test(I: FeatureFlatOffsetBits24))
3459 return 24;
3460 return 13;
3461}
3462
3463namespace {
3464
3465struct SourceOfDivergence {
3466 unsigned Intr;
3467};
3468const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3469
3470struct AlwaysUniform {
3471 unsigned Intr;
3472};
3473const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3474
3475#define GET_SourcesOfDivergence_IMPL
3476#define GET_UniformIntrinsics_IMPL
3477#define GET_Gfx9BufferFormat_IMPL
3478#define GET_Gfx10BufferFormat_IMPL
3479#define GET_Gfx11PlusBufferFormat_IMPL
3480
3481#include "AMDGPUGenSearchableTables.inc"
3482
3483} // end anonymous namespace
3484
3485bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3486 return lookupSourceOfDivergence(Intr: IntrID);
3487}
3488
3489bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3490 return lookupAlwaysUniform(Intr: IntrID);
3491}
3492
3493const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3494 uint8_t NumComponents,
3495 uint8_t NumFormat,
3496 const MCSubtargetInfo &STI) {
3497 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3498 BitsPerComp, NumComponents, NumFormat)
3499 : isGFX10(STI)
3500 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3501 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3502}
3503
3504const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3505 const MCSubtargetInfo &STI) {
3506 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3507 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3508 : getGfx9BufferFormatInfo(Format);
3509}
3510
3511const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
3512 const MCRegisterInfo &MRI) {
3513 const unsigned VGPRClasses[] = {
3514 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3515 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3516 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3517 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3518 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3519 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3520 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3521 AMDGPU::VReg_1024RegClassID};
3522
3523 for (unsigned RCID : VGPRClasses) {
3524 const MCRegisterClass &RC = MRI.getRegClass(i: RCID);
3525 if (RC.contains(Reg))
3526 return &RC;
3527 }
3528
3529 return nullptr;
3530}
3531
3532unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI) {
3533 unsigned Enc = MRI.getEncodingValue(Reg);
3534 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3535 return Idx >> 8;
3536}
3537
3538MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
3539 const MCRegisterInfo &MRI) {
3540 unsigned Enc = MRI.getEncodingValue(Reg);
3541 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3542 if (Idx >= 0x100)
3543 return MCRegister();
3544
3545 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3546 if (!RC)
3547 return MCRegister();
3548
3549 Idx |= MSBs << 8;
3550 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3551 // This class has 2048 registers with interleaved lo16 and hi16.
3552 Idx *= 2;
3553 if (Enc & AMDGPU::HWEncoding::IS_HI16)
3554 ++Idx;
3555 }
3556
3557 return RC->getRegister(i: Idx);
3558}
3559
3560static std::optional<unsigned>
3561convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3562 bool HasSetregVGPRMSBFixup) {
3563 constexpr unsigned VGPRMSBShift =
3564 llvm::countr_zero_constexpr<unsigned>(Val: AMDGPU::Hwreg::DST_VGPR_MSB);
3565
3566 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Encoded: Simm16);
3567 if (HwRegId != Hwreg::ID_MODE ||
3568 (!HasSetregVGPRMSBFixup && (Offset + Size) < VGPRMSBShift))
3569 return {};
3570 // If there is SetregVGPRMSBFixup then Offset is ignored.
3571 if (!HasSetregVGPRMSBFixup)
3572 Imm <<= Offset;
3573 Imm = (Imm & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3574 if (!HasSetregVGPRMSBFixup)
3575 Imm &= llvm::maskTrailingOnes<unsigned>(N: Size);
3576 return llvm::rotr<uint8_t>(V: static_cast<uint8_t>(Imm), /*R=*/2);
3577}
3578
3579std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3580 bool HasSetregVGPRMSBFixup) {
3581 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3582 return convertSetRegImmToVgprMSBs(Imm: MI.getOperand(i: 0).getImm(),
3583 Simm16: MI.getOperand(i: 1).getImm(),
3584 HasSetregVGPRMSBFixup);
3585}
3586
3587std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3588 bool HasSetregVGPRMSBFixup) {
3589 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3590 return convertSetRegImmToVgprMSBs(Imm: MI.getOperand(i: 0).getImm(),
3591 Simm16: MI.getOperand(i: 1).getImm(),
3592 HasSetregVGPRMSBFixup);
3593}
3594
3595std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3596getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
3597 static const AMDGPU::OpName VOPOps[4] = {
3598 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3599 AMDGPU::OpName::vdst};
3600 static const AMDGPU::OpName VDSOps[4] = {
3601 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3602 AMDGPU::OpName::vdst};
3603 static const AMDGPU::OpName FLATOps[4] = {
3604 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3605 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3606 static const AMDGPU::OpName BUFOps[4] = {
3607 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3608 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3609 static const AMDGPU::OpName VIMGOps[4] = {
3610 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3611 AMDGPU::OpName::vdata};
3612
3613 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3614 // address is supposed to match X operand, otherwise VOPD shall not be
3615 // combined.
3616 static const AMDGPU::OpName VOPDOpsX[4] = {
3617 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3618 AMDGPU::OpName::vdstX};
3619 static const AMDGPU::OpName VOPDOpsY[4] = {
3620 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3621 AMDGPU::OpName::vdstY};
3622
3623 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3624 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3625 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3626 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3627 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3628 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3629 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3630 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3631 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3632 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3633
3634 unsigned TSFlags = Desc.TSFlags;
3635
3636 if (TSFlags &
3637 (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
3638 SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
3639 switch (Desc.getOpcode()) {
3640 // LD_SCALE operands ignore MSB.
3641 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3642 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3643 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3644 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3645 return {};
3646 case AMDGPU::V_FMAMK_F16:
3647 case AMDGPU::V_FMAMK_F16_t16:
3648 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3649 case AMDGPU::V_FMAMK_F16_fake16:
3650 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3651 case AMDGPU::V_FMAMK_F32:
3652 case AMDGPU::V_FMAMK_F32_gfx12:
3653 case AMDGPU::V_FMAMK_F64:
3654 case AMDGPU::V_FMAMK_F64_gfx1250:
3655 return {VOP2MADMKOps, nullptr};
3656 default:
3657 break;
3658 }
3659 return {VOPOps, nullptr};
3660 }
3661
3662 if (TSFlags & SIInstrFlags::DS)
3663 return {VDSOps, nullptr};
3664
3665 if (TSFlags & SIInstrFlags::FLAT)
3666 return {FLATOps, nullptr};
3667
3668 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3669 return {BUFOps, nullptr};
3670
3671 if (TSFlags & SIInstrFlags::VIMAGE)
3672 return {VIMGOps, nullptr};
3673
3674 if (AMDGPU::isVOPD(Opc: Desc.getOpcode())) {
3675 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode: Desc.getOpcode());
3676 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3677 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3678 }
3679
3680 assert(!(TSFlags & SIInstrFlags::MIMG));
3681
3682 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3683 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3684 " these instructions are not expected on gfx1250");
3685
3686 return {};
3687}
3688
3689bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3690 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3691
3692 if (TSFlags & SIInstrFlags::SMRD)
3693 return !getSMEMIsBuffer(Opc: Opcode);
3694 if (!(TSFlags & SIInstrFlags::FLAT))
3695 return false;
3696
3697 // Only SV and SVS modes are supported.
3698 if (TSFlags & SIInstrFlags::FlatScratch)
3699 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr);
3700
3701 // Only GVS mode is supported.
3702 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr) &&
3703 hasNamedOperand(Opcode, NamedIdx: OpName::saddr);
3704
3705 return false;
3706}
3707
3708bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3709 const MCSubtargetInfo &ST) {
3710 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3711 int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), Name: OpName);
3712 if (Idx == -1)
3713 continue;
3714
3715 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3716 int16_t RegClass = MII.getOpRegClassID(
3717 OpInfo, HwModeId: ST.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo));
3718 if (RegClass == AMDGPU::VReg_64RegClassID ||
3719 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3720 return true;
3721 }
3722
3723 return false;
3724}
3725
3726bool isDPALU_DPP32BitOpc(unsigned Opc) {
3727 switch (Opc) {
3728 case AMDGPU::V_MUL_LO_U32_e64:
3729 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3730 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3731 case AMDGPU::V_MUL_HI_U32_e64:
3732 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3733 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3734 case AMDGPU::V_MUL_HI_I32_e64:
3735 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3736 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3737 case AMDGPU::V_MAD_U32_e64:
3738 case AMDGPU::V_MAD_U32_e64_dpp:
3739 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3740 return true;
3741 default:
3742 return false;
3743 }
3744}
3745
3746bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3747 const MCSubtargetInfo &ST) {
3748 if (!ST.hasFeature(Feature: AMDGPU::FeatureDPALU_DPP))
3749 return false;
3750
3751 if (isDPALU_DPP32BitOpc(Opc: OpDesc.getOpcode()))
3752 return ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts);
3753
3754 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3755}
3756
3757unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3758 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
3759 return 64;
3760 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
3761 return 128;
3762 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
3763 return 320;
3764 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
3765 return 512;
3766 return 64; // In sync with getAddressableLocalMemorySize
3767}
3768
3769bool isPackedFP32Inst(unsigned Opc) {
3770 switch (Opc) {
3771 case AMDGPU::V_PK_ADD_F32:
3772 case AMDGPU::V_PK_ADD_F32_gfx12:
3773 case AMDGPU::V_PK_MUL_F32:
3774 case AMDGPU::V_PK_MUL_F32_gfx12:
3775 case AMDGPU::V_PK_FMA_F32:
3776 case AMDGPU::V_PK_FMA_F32_gfx12:
3777 return true;
3778 default:
3779 return false;
3780 }
3781}
3782
3783bool isPacked64BitInst(unsigned Opc) {
3784 switch (Opc) {
3785 case AMDGPU::V_PK_ADD_F64:
3786 case AMDGPU::V_PK_ADD_F64_gfx1250:
3787 case AMDGPU::V_PK_MUL_F64:
3788 case AMDGPU::V_PK_MUL_F64_gfx1250:
3789 case AMDGPU::V_PK_FMA_F64:
3790 case AMDGPU::V_PK_FMA_F64_gfx1250:
3791 case AMDGPU::V_PK_MAX_NUM_F64:
3792 case AMDGPU::V_PK_MAX_NUM_F64_gfx1250:
3793 case AMDGPU::V_PK_MIN_NUM_F64:
3794 case AMDGPU::V_PK_MIN_NUM_F64_gfx1250:
3795 case AMDGPU::V_PK_ADD_NC_U64:
3796 case AMDGPU::V_PK_ADD_NC_U64_gfx1250:
3797 case AMDGPU::V_PK_SUB_NC_U64:
3798 case AMDGPU::V_PK_SUB_NC_U64_gfx1250:
3799 case AMDGPU::V_PK_LSHL_ADD_U64:
3800 case AMDGPU::V_PK_LSHL_ADD_U64_gfx1250:
3801 return true;
3802 default:
3803 return false;
3804 }
3805}
3806
3807bool isPackedFP32or64BitInst(unsigned Opc) {
3808 return isPackedFP32Inst(Opc) || isPacked64BitInst(Opc);
3809}
3810
3811const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3812 assert(isFixedDims() && "expect kind to be FixedDims");
3813 return Dims;
3814}
3815
3816std::string ClusterDimsAttr::to_string() const {
3817 SmallString<10> Buffer;
3818 raw_svector_ostream OS(Buffer);
3819
3820 switch (getKind()) {
3821 case Kind::Unknown:
3822 return "";
3823 case Kind::NoCluster: {
3824 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3825 return Buffer.c_str();
3826 }
3827 case Kind::VariableDims: {
3828 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3829 << EncoVariableDims;
3830 return Buffer.c_str();
3831 }
3832 case Kind::FixedDims: {
3833 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3834 return Buffer.c_str();
3835 }
3836 }
3837 llvm_unreachable("Unknown ClusterDimsAttr kind");
3838}
3839
3840ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
3841 std::optional<SmallVector<unsigned>> Attr =
3842 getIntegerVecAttribute(F, Name: "amdgpu-cluster-dims", /*Size=*/3);
3843 ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
3844
3845 if (!Attr.has_value())
3846 AttrKind = Kind::Unknown;
3847 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoNoCluster)))
3848 AttrKind = Kind::NoCluster;
3849 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoVariableDims)))
3850 AttrKind = Kind::VariableDims;
3851
3852 ClusterDimsAttr A(AttrKind);
3853 if (AttrKind == Kind::FixedDims)
3854 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3855
3856 return A;
3857}
3858
3859} // namespace AMDGPU
3860
3861raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::TargetIDSetting S) {
3862 switch (S) {
3863 case (AMDGPU::TargetIDSetting::Unsupported):
3864 OS << "Unsupported";
3865 break;
3866 case (AMDGPU::TargetIDSetting::Any):
3867 OS << "Any";
3868 break;
3869 case (AMDGPU::TargetIDSetting::Off):
3870 OS << "Off";
3871 break;
3872 case (AMDGPU::TargetIDSetting::On):
3873 OS << "On";
3874 break;
3875 }
3876 return OS;
3877}
3878
3879} // namespace llvm
3880