1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "Utils/AMDKernelCodeTUtils.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCRegisterInfo.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/TargetParser/TargetParser.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
36static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
37 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV6),
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
139unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
140 return VersionMajor >= 12 ? 8 : 0;
141}
142
143/// \returns VaSdst bit width
144inline unsigned getVaSdstBitWidth() { return 3; }
145
146/// \returns VaSdst bit shift
147inline unsigned getVaSdstBitShift() { return 9; }
148
149/// \returns VmVsrc bit width
150inline unsigned getVmVsrcBitWidth() { return 3; }
151
152/// \returns VmVsrc bit shift
153inline unsigned getVmVsrcBitShift() { return 2; }
154
155/// \returns VaVdst bit width
156inline unsigned getVaVdstBitWidth() { return 4; }
157
158/// \returns VaVdst bit shift
159inline unsigned getVaVdstBitShift() { return 12; }
160
161/// \returns VaVcc bit width
162inline unsigned getVaVccBitWidth() { return 1; }
163
164/// \returns VaVcc bit shift
165inline unsigned getVaVccBitShift() { return 1; }
166
167/// \returns SaSdst bit width
168inline unsigned getSaSdstBitWidth() { return 1; }
169
170/// \returns SaSdst bit shift
171inline unsigned getSaSdstBitShift() { return 0; }
172
173/// \returns VaSsrc width
174inline unsigned getVaSsrcBitWidth() { return 1; }
175
176/// \returns VaSsrc bit shift
177inline unsigned getVaSsrcBitShift() { return 8; }
178
179/// \returns HoldCnt bit shift
180inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
181 static constexpr const unsigned MinMajor = 10;
182 static constexpr const unsigned MinMinor = 3;
183 return std::tie(args&: VersionMajor, args&: VersionMinor) >= std::tie(args: MinMajor, args: MinMinor)
184 ? 1
185 : 0;
186}
187
188/// \returns HoldCnt bit shift
189inline unsigned getHoldCntBitShift() { return 7; }
190
191} // end anonymous namespace
192
193namespace llvm {
194
195namespace AMDGPU {
196
197iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
198 return enum_seq(Begin: LOAD_CNT, End: MaxCounter);
199}
200
201/// \returns true if the target supports signed immediate offset for SMRD
202/// instructions.
203bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
204 return isGFX9Plus(STI: ST);
205}
206
207/// \returns True if \p STI is AMDHSA.
208bool isHsaAbi(const MCSubtargetInfo &STI) {
209 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
210}
211
212unsigned getAMDHSACodeObjectVersion(const Module &M) {
213 if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
214 MD: M.getModuleFlag(Key: "amdhsa_code_object_version"))) {
215 return (unsigned)Ver->getZExtValue() / 100;
216 }
217
218 return getDefaultAMDHSACodeObjectVersion();
219}
220
221unsigned getDefaultAMDHSACodeObjectVersion() {
222 return DefaultAMDHSACodeObjectVersion;
223}
224
225unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
226 switch (ABIVersion) {
227 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
228 return 4;
229 case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
230 return 5;
231 case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
232 return 6;
233 default:
234 return getDefaultAMDHSACodeObjectVersion();
235 }
236}
237
238uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
239 if (T.getOS() != Triple::AMDHSA)
240 return 0;
241
242 switch (CodeObjectVersion) {
243 case 4:
244 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
245 case 5:
246 return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
247 case 6:
248 return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
249 default:
250 report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " +
251 Twine(CodeObjectVersion));
252 }
253}
254
255unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
256 switch (CodeObjectVersion) {
257 case AMDHSA_COV4:
258 return 48;
259 case AMDHSA_COV5:
260 case AMDHSA_COV6:
261 default:
262 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
263 }
264}
265
266// FIXME: All such magic numbers about the ABI should be in a
267// central TD file.
268unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
269 switch (CodeObjectVersion) {
270 case AMDHSA_COV4:
271 return 24;
272 case AMDHSA_COV5:
273 case AMDHSA_COV6:
274 default:
275 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
276 }
277}
278
279unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
280 switch (CodeObjectVersion) {
281 case AMDHSA_COV4:
282 return 32;
283 case AMDHSA_COV5:
284 case AMDHSA_COV6:
285 default:
286 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
287 }
288}
289
290unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
291 switch (CodeObjectVersion) {
292 case AMDHSA_COV4:
293 return 40;
294 case AMDHSA_COV5:
295 case AMDHSA_COV6:
296 default:
297 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
298 }
299}
300
301#define GET_MIMGBaseOpcodesTable_IMPL
302#define GET_MIMGDimInfoTable_IMPL
303#define GET_MIMGInfoTable_IMPL
304#define GET_MIMGLZMappingTable_IMPL
305#define GET_MIMGMIPMappingTable_IMPL
306#define GET_MIMGBiasMappingTable_IMPL
307#define GET_MIMGOffsetMappingTable_IMPL
308#define GET_MIMGG16MappingTable_IMPL
309#define GET_MAIInstInfoTable_IMPL
310#define GET_WMMAInstInfoTable_IMPL
311#include "AMDGPUGenSearchableTables.inc"
312
313int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
314 unsigned VDataDwords, unsigned VAddrDwords) {
315 const MIMGInfo *Info =
316 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
317 return Info ? Info->Opcode : -1;
318}
319
320const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
321 const MIMGInfo *Info = getMIMGInfo(Opcode: Opc);
322 return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr;
323}
324
325int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
326 const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc);
327 const MIMGInfo *NewInfo =
328 getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding,
329 VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords);
330 return NewInfo ? NewInfo->Opcode : -1;
331}
332
333unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
334 const MIMGDimInfo *Dim, bool IsA16,
335 bool IsG16Supported) {
336 unsigned AddrWords = BaseOpcode->NumExtraArgs;
337 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
338 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
339 if (IsA16)
340 AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2);
341 else
342 AddrWords += AddrComponents;
343
344 // Note: For subtargets that support A16 but not G16, enabling A16 also
345 // enables 16 bit gradients.
346 // For subtargets that support A16 (operand) and G16 (done with a different
347 // instruction encoding), they are independent.
348
349 if (BaseOpcode->Gradients) {
350 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
351 // There are two gradients per coordinate, we pack them separately.
352 // For the 3d case,
353 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
354 AddrWords += alignTo<2>(Value: Dim->NumGradients / 2);
355 else
356 AddrWords += Dim->NumGradients;
357 }
358 return AddrWords;
359}
360
361struct MUBUFInfo {
362 uint32_t Opcode;
363 uint32_t BaseOpcode;
364 uint8_t elements;
365 bool has_vaddr;
366 bool has_srsrc;
367 bool has_soffset;
368 bool IsBufferInv;
369 bool tfe;
370};
371
372struct MTBUFInfo {
373 uint32_t Opcode;
374 uint32_t BaseOpcode;
375 uint8_t elements;
376 bool has_vaddr;
377 bool has_srsrc;
378 bool has_soffset;
379};
380
381struct SMInfo {
382 uint32_t Opcode;
383 bool IsBuffer;
384};
385
386struct VOPInfo {
387 uint32_t Opcode;
388 bool IsSingle;
389};
390
391struct VOPC64DPPInfo {
392 uint32_t Opcode;
393};
394
395struct VOPCDPPAsmOnlyInfo {
396 uint32_t Opcode;
397};
398
399struct VOP3CDPPAsmOnlyInfo {
400 uint32_t Opcode;
401};
402
403struct VOPDComponentInfo {
404 uint16_t BaseVOP;
405 uint16_t VOPDOp;
406 bool CanBeVOPDX;
407 bool CanBeVOPD3X;
408};
409
410struct VOPDInfo {
411 uint32_t Opcode;
412 uint16_t OpX;
413 uint16_t OpY;
414 uint16_t Subtarget;
415 bool VOPD3;
416};
417
418struct VOPTrue16Info {
419 uint32_t Opcode;
420 bool IsTrue16;
421};
422
423#define GET_FP4FP8DstByteSelTable_DECL
424#define GET_FP4FP8DstByteSelTable_IMPL
425
426struct DPMACCInstructionInfo {
427 uint32_t Opcode;
428 bool IsDPMACCInstruction;
429};
430
431struct FP4FP8DstByteSelInfo {
432 uint32_t Opcode;
433 bool HasFP8DstByteSel;
434 bool HasFP4DstByteSel;
435};
436
437#define GET_DPMACCInstructionTable_DECL
438#define GET_DPMACCInstructionTable_IMPL
439#define GET_MTBUFInfoTable_DECL
440#define GET_MTBUFInfoTable_IMPL
441#define GET_MUBUFInfoTable_DECL
442#define GET_MUBUFInfoTable_IMPL
443#define GET_SMInfoTable_DECL
444#define GET_SMInfoTable_IMPL
445#define GET_VOP1InfoTable_DECL
446#define GET_VOP1InfoTable_IMPL
447#define GET_VOP2InfoTable_DECL
448#define GET_VOP2InfoTable_IMPL
449#define GET_VOP3InfoTable_DECL
450#define GET_VOP3InfoTable_IMPL
451#define GET_VOPC64DPPTable_DECL
452#define GET_VOPC64DPPTable_IMPL
453#define GET_VOPC64DPP8Table_DECL
454#define GET_VOPC64DPP8Table_IMPL
455#define GET_VOPCAsmOnlyInfoTable_DECL
456#define GET_VOPCAsmOnlyInfoTable_IMPL
457#define GET_VOP3CAsmOnlyInfoTable_DECL
458#define GET_VOP3CAsmOnlyInfoTable_IMPL
459#define GET_VOPDComponentTable_DECL
460#define GET_VOPDComponentTable_IMPL
461#define GET_VOPDPairs_DECL
462#define GET_VOPDPairs_IMPL
463#define GET_VOPTrue16Table_DECL
464#define GET_VOPTrue16Table_IMPL
465#define GET_True16D16Table_IMPL
466#define GET_WMMAOpcode2AddrMappingTable_DECL
467#define GET_WMMAOpcode2AddrMappingTable_IMPL
468#define GET_WMMAOpcode3AddrMappingTable_DECL
469#define GET_WMMAOpcode3AddrMappingTable_IMPL
470#define GET_getMFMA_F8F6F4_WithSize_DECL
471#define GET_getMFMA_F8F6F4_WithSize_IMPL
472#define GET_isMFMA_F8F6F4Table_IMPL
473#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
474
475#include "AMDGPUGenSearchableTables.inc"
476
477int getMTBUFBaseOpcode(unsigned Opc) {
478 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc);
479 return Info ? Info->BaseOpcode : -1;
480}
481
482int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
483 const MTBUFInfo *Info =
484 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
485 return Info ? Info->Opcode : -1;
486}
487
488int getMTBUFElements(unsigned Opc) {
489 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
490 return Info ? Info->elements : 0;
491}
492
493bool getMTBUFHasVAddr(unsigned Opc) {
494 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
495 return Info && Info->has_vaddr;
496}
497
498bool getMTBUFHasSrsrc(unsigned Opc) {
499 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
500 return Info && Info->has_srsrc;
501}
502
503bool getMTBUFHasSoffset(unsigned Opc) {
504 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
505 return Info && Info->has_soffset;
506}
507
508int getMUBUFBaseOpcode(unsigned Opc) {
509 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc);
510 return Info ? Info->BaseOpcode : -1;
511}
512
513int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
514 const MUBUFInfo *Info =
515 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
516 return Info ? Info->Opcode : -1;
517}
518
519int getMUBUFElements(unsigned Opc) {
520 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
521 return Info ? Info->elements : 0;
522}
523
524bool getMUBUFHasVAddr(unsigned Opc) {
525 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
526 return Info && Info->has_vaddr;
527}
528
529bool getMUBUFHasSrsrc(unsigned Opc) {
530 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
531 return Info && Info->has_srsrc;
532}
533
534bool getMUBUFHasSoffset(unsigned Opc) {
535 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
536 return Info && Info->has_soffset;
537}
538
539bool getMUBUFIsBufferInv(unsigned Opc) {
540 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
541 return Info && Info->IsBufferInv;
542}
543
544bool getMUBUFTfe(unsigned Opc) {
545 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
546 return Info && Info->tfe;
547}
548
549bool getSMEMIsBuffer(unsigned Opc) {
550 const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc);
551 return Info && Info->IsBuffer;
552}
553
554bool getVOP1IsSingle(unsigned Opc) {
555 const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc);
556 return !Info || Info->IsSingle;
557}
558
559bool getVOP2IsSingle(unsigned Opc) {
560 const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc);
561 return !Info || Info->IsSingle;
562}
563
564bool getVOP3IsSingle(unsigned Opc) {
565 const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc);
566 return !Info || Info->IsSingle;
567}
568
569bool isVOPC64DPP(unsigned Opc) {
570 return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc);
571}
572
573bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); }
574
575bool getMAIIsDGEMM(unsigned Opc) {
576 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
577 return Info && Info->is_dgemm;
578}
579
580bool getMAIIsGFX940XDL(unsigned Opc) {
581 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
582 return Info && Info->is_gfx940_xdl;
583}
584
585bool getWMMAIsXDL(unsigned Opc) {
586 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opcode: Opc);
587 return Info ? Info->is_wmma_xdl : false;
588}
589
590uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
591 switch (EncodingVal) {
592 case MFMAScaleFormats::FP6_E2M3:
593 case MFMAScaleFormats::FP6_E3M2:
594 return 6;
595 case MFMAScaleFormats::FP4_E2M1:
596 return 4;
597 case MFMAScaleFormats::FP8_E4M3:
598 case MFMAScaleFormats::FP8_E5M2:
599 default:
600 return 8;
601 }
602
603 llvm_unreachable("covered switch over mfma scale formats");
604}
605
606const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
607 unsigned BLGP,
608 unsigned F8F8Opcode) {
609 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ);
610 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP);
611 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
612}
613
614uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt) {
615 switch (Fmt) {
616 case WMMA::MATRIX_FMT_FP8:
617 case WMMA::MATRIX_FMT_BF8:
618 return 16;
619 case WMMA::MATRIX_FMT_FP6:
620 case WMMA::MATRIX_FMT_BF6:
621 return 12;
622 case WMMA::MATRIX_FMT_FP4:
623 return 8;
624 }
625
626 llvm_unreachable("covered switch over wmma scale formats");
627}
628
629const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
630 unsigned FmtB,
631 unsigned F8F8Opcode) {
632 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtA);
633 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtB);
634 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
635}
636
637unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
638 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX13Insts))
639 return SIEncodingFamily::GFX13;
640 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts))
641 return SIEncodingFamily::GFX1250;
642 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts))
643 return SIEncodingFamily::GFX12;
644 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11_7Insts))
645 return SIEncodingFamily::GFX1170;
646 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts))
647 return SIEncodingFamily::GFX11;
648 llvm_unreachable("Subtarget generation does not support VOPD!");
649}
650
651CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
652 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
653 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
654 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
655 if (Info) {
656 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
657 // VOPDX is just a placeholder here, it is supported on all encodings.
658 // TODO: This can be optimized by creating tables of supported VOPDY
659 // opcodes per encoding.
660 unsigned VOPDMov = AMDGPU::getVOPDOpcode(Opc: AMDGPU::V_MOV_B32_e32, VOPD3);
661 bool CanBeVOPDX;
662 if (VOPD3) {
663 CanBeVOPDX = getVOPDFull(OpX: AMDGPU::getVOPDOpcode(Opc, VOPD3), OpY: VOPDMov,
664 EncodingFamily, VOPD3) != -1;
665 } else {
666 // The list of VOPDX opcodes is currently the same in all encoding
667 // families, so we do not need a family-specific check.
668 CanBeVOPDX = Info->CanBeVOPDX;
669 }
670 bool CanBeVOPDY = getVOPDFull(OpX: VOPDMov, OpY: AMDGPU::getVOPDOpcode(Opc, VOPD3),
671 EncodingFamily, VOPD3) != -1;
672 return {.X: CanBeVOPDX, .Y: CanBeVOPDY};
673 }
674
675 return {.X: false, .Y: false};
676}
677
678unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
679 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
680 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
681 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
682 return Info ? Info->VOPDOp : ~0u;
683}
684
685bool isVOPD(unsigned Opc) {
686 return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X);
687}
688
689bool isMAC(unsigned Opc) {
690 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
691 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
692 Opc == AMDGPU::V_MAC_F32_e64_vi ||
693 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
694 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
695 Opc == AMDGPU::V_MAC_F16_e64_vi ||
696 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
697 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
698 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
699 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
700 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
701 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
702 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
703 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
704 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
705 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
706 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
707 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
708 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
709 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
710 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
711 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
712 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
713 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
714 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
715 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
716 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
717 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
718}
719
720bool isPermlane16(unsigned Opc) {
721 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
722 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
723 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
724 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
725 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
726 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
727 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
728 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
729}
730
731bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
732 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
733 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
734 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
735 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
736 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
737 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
738 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
739 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
740 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
741 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
742}
743
744bool isGenericAtomic(unsigned Opc) {
745 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
746 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
747 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
748 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
749 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
750 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
751 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
752 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
753 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
754 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
755 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
756 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
757 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
758 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
759 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
760 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
761 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
762 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
763 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
764}
765
766bool isAsyncStore(unsigned Opc) {
767 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
768 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
769 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
770 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
771 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
772 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
773 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
774 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
775}
776
777bool isTensorStore(unsigned Opc) {
778 return Opc == TENSOR_STORE_FROM_LDS_d2_gfx1250 ||
779 Opc == TENSOR_STORE_FROM_LDS_d4_gfx1250;
780}
781
782unsigned getTemporalHintType(const MCInstrDesc TID) {
783 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
784 return CPol::TH_TYPE_ATOMIC;
785 unsigned Opc = TID.getOpcode();
786 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
787 if (TID.mayStore() &&
788 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
789 return CPol::TH_TYPE_STORE;
790
791 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
792 // MayLoad flag is present which is the case with instructions like
793 // image_get_resinfo.
794 return CPol::TH_TYPE_LOAD;
795}
796
797bool isTrue16Inst(unsigned Opc) {
798 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc);
799 return Info && Info->IsTrue16;
800}
801
802FPType getFPDstSelType(unsigned Opc) {
803 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opcode: Opc);
804 if (!Info)
805 return FPType::None;
806 if (Info->HasFP8DstByteSel)
807 return FPType::FP8;
808 if (Info->HasFP4DstByteSel)
809 return FPType::FP4;
810
811 return FPType::None;
812}
813
814bool isDPMACCInstruction(unsigned Opc) {
815 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opcode: Opc);
816 return Info && Info->IsDPMACCInstruction;
817}
818
819unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
820 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc);
821 return Info ? Info->Opcode3Addr : ~0u;
822}
823
824unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
825 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc);
826 return Info ? Info->Opcode2Addr : ~0u;
827}
828
829// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
830// header files, so we need to wrap it in a function that takes unsigned
831// instead.
832int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
833 return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen));
834}
835
836unsigned getBitOp2(unsigned Opc) {
837 switch (Opc) {
838 default:
839 return 0;
840 case AMDGPU::V_AND_B32_e32:
841 return 0x40;
842 case AMDGPU::V_OR_B32_e32:
843 return 0x54;
844 case AMDGPU::V_XOR_B32_e32:
845 return 0x14;
846 case AMDGPU::V_XNOR_B32_e32:
847 return 0x41;
848 }
849}
850
851int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
852 bool VOPD3) {
853 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc: OpY) : 0;
854 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
855 const VOPDInfo *Info =
856 getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily, VOPD3);
857 return Info ? Info->Opcode : -1;
858}
859
860std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
861 const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode);
862 assert(Info);
863 const auto *OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX);
864 const auto *OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY);
865 assert(OpX && OpY);
866 return {OpX->BaseVOP, OpY->BaseVOP};
867}
868
869namespace VOPD {
870
871ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
872 assert(OpDesc.getNumDefs() == Component::DST_NUM);
873
874 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
875 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
876 auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO);
877 assert(TiedIdx == -1 || TiedIdx == Component::DST);
878 HasSrc2Acc = TiedIdx != -1;
879 Opcode = OpDesc.getOpcode();
880
881 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
882 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src2) ? 3
883 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm) ? 3
884 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src1) ? 2
885 : 1;
886 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
887
888 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
889 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
890 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
891 // operands.
892 NumVOPD3Mods = 2;
893 if (IsVOP3)
894 SrcOperandsNum = 3;
895 } else if (isSISrcFPOperand(Desc: OpDesc,
896 OpNo: getNamedOperandIdx(Opcode, Name: OpName::src0))) {
897 // All FP VOPD instructions have Neg modifiers for all operands except
898 // for tied src2.
899 NumVOPD3Mods = SrcOperandsNum;
900 if (HasSrc2Acc)
901 --NumVOPD3Mods;
902 }
903
904 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
905 return;
906
907 auto OperandsNum = OpDesc.getNumOperands();
908 unsigned CompOprIdx;
909 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
910 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
911 MandatoryLiteralIdx = CompOprIdx;
912 break;
913 }
914 }
915}
916
917int ComponentProps::getBitOp3OperandIdx() const {
918 return getNamedOperandIdx(Opcode, Name: OpName::bitop3);
919}
920
921unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
922 assert(CompOprIdx < Component::MAX_OPR_NUM);
923
924 if (CompOprIdx == Component::DST)
925 return getIndexOfDstInParsedOperands();
926
927 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
928 if (CompSrcIdx < getCompParsedSrcOperandsNum())
929 return getIndexOfSrcInParsedOperands(CompSrcIdx);
930
931 // The specified operand does not exist.
932 return 0;
933}
934
935std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
936 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
937 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
938 bool VOPD3) const {
939
940 auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx,
941 VOPD3: CompInfo[ComponentIndex::X].isVOP3());
942 auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx,
943 VOPD3: CompInfo[ComponentIndex::Y].isVOP3());
944
945 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
946 unsigned BanksMask) -> bool {
947 MCRegister BaseX = MRI.getSubReg(Reg: X, Idx: AMDGPU::sub0);
948 MCRegister BaseY = MRI.getSubReg(Reg: Y, Idx: AMDGPU::sub0);
949 if (!BaseX)
950 BaseX = X;
951 if (!BaseY)
952 BaseY = Y;
953 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
954 return true;
955 if (BaseX != X /* This is 64-bit register */ &&
956 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
957 return true;
958 if (BaseY != Y &&
959 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
960 return true;
961
962 // If both are 64-bit bank conflict will be detected yet while checking
963 // the first subreg.
964 return false;
965 };
966
967 unsigned CompOprIdx;
968 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
969 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
970 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
971 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
972 continue;
973
974 if (getVGPREncodingMSBs(Reg: OpXRegs[CompOprIdx], MRI) !=
975 getVGPREncodingMSBs(Reg: OpYRegs[CompOprIdx], MRI))
976 return CompOprIdx;
977
978 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
979 continue;
980
981 if (CompOprIdx < Component::DST_NUM) {
982 // Even if we do not check vdst parity, vdst operands still shall not
983 // overlap.
984 if (MRI.regsOverlap(RegA: OpXRegs[CompOprIdx], RegB: OpYRegs[CompOprIdx]))
985 return CompOprIdx;
986 if (VOPD3) // No need to check dst parity.
987 continue;
988 }
989
990 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
991 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
992 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
993 return CompOprIdx;
994 }
995
996 return {};
997}
998
999// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
1000// by the specified component. If an operand is unused
1001// or is not a VGPR, the corresponding value is 0.
1002//
1003// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1004// for the specified component and MC operand. The callback must return 0
1005// if the operand is not a register or not a VGPR.
1006InstInfo::RegIndices
1007InstInfo::getRegIndices(unsigned CompIdx,
1008 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1009 bool VOPD3) const {
1010 assert(CompIdx < COMPONENTS_NUM);
1011
1012 const auto &Comp = CompInfo[CompIdx];
1013 InstInfo::RegIndices RegIndices;
1014
1015 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1016
1017 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1018 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1019 RegIndices[CompOprIdx] =
1020 Comp.hasRegSrcOperand(CompSrcIdx)
1021 ? GetRegIdx(CompIdx,
1022 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1023 : MCRegister();
1024 }
1025 return RegIndices;
1026}
1027
1028} // namespace VOPD
1029
1030VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
1031 return VOPD::InstInfo(OpX, OpY);
1032}
1033
1034VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
1035 const MCInstrInfo *InstrInfo) {
1036 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1037 const auto &OpXDesc = InstrInfo->get(Opcode: OpX);
1038 const auto &OpYDesc = InstrInfo->get(Opcode: OpY);
1039 bool VOPD3 = InstrInfo->get(Opcode: VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1040 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
1041 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1042 return VOPD::InstInfo(OpXInfo, OpYInfo);
1043}
1044
1045namespace IsaInfo {
1046
1047AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
1048 : STI(STI), XnackSetting(TargetIDSetting::Any),
1049 SramEccSetting(TargetIDSetting::Any) {
1050 if (!STI.getFeatureBits().test(I: FeatureSupportsXNACK))
1051 XnackSetting = TargetIDSetting::Unsupported;
1052 if (!STI.getFeatureBits().test(I: FeatureSupportsSRAMECC))
1053 SramEccSetting = TargetIDSetting::Unsupported;
1054}
1055
1056void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
1057 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1058 // absence of the target features we assume we must generate code that can run
1059 // in any environment.
1060 SubtargetFeatures Features(FS);
1061 std::optional<bool> XnackRequested;
1062 std::optional<bool> SramEccRequested;
1063
1064 for (const std::string &Feature : Features.getFeatures()) {
1065 if (Feature == "+xnack")
1066 XnackRequested = true;
1067 else if (Feature == "-xnack")
1068 XnackRequested = false;
1069 else if (Feature == "+sramecc")
1070 SramEccRequested = true;
1071 else if (Feature == "-sramecc")
1072 SramEccRequested = false;
1073 }
1074
1075 bool XnackSupported = isXnackSupported();
1076 bool SramEccSupported = isSramEccSupported();
1077
1078 if (XnackRequested) {
1079 if (XnackSupported) {
1080 XnackSetting =
1081 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1082 } else {
1083 // If a specific xnack setting was requested and this GPU does not support
1084 // xnack emit a warning. Setting will remain set to "Unsupported".
1085 if (*XnackRequested) {
1086 errs() << "warning: xnack 'On' was requested for a processor that does "
1087 "not support it!\n";
1088 } else {
1089 errs() << "warning: xnack 'Off' was requested for a processor that "
1090 "does not support it!\n";
1091 }
1092 }
1093 }
1094
1095 if (SramEccRequested) {
1096 if (SramEccSupported) {
1097 SramEccSetting =
1098 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1099 } else {
1100 // If a specific sramecc setting was requested and this GPU does not
1101 // support sramecc emit a warning. Setting will remain set to
1102 // "Unsupported".
1103 if (*SramEccRequested) {
1104 errs() << "warning: sramecc 'On' was requested for a processor that "
1105 "does not support it!\n";
1106 } else {
1107 errs() << "warning: sramecc 'Off' was requested for a processor that "
1108 "does not support it!\n";
1109 }
1110 }
1111 }
1112}
1113
1114static TargetIDSetting
1115getTargetIDSettingFromFeatureString(StringRef FeatureString) {
1116 if (FeatureString.ends_with(Suffix: "-"))
1117 return TargetIDSetting::Off;
1118 if (FeatureString.ends_with(Suffix: "+"))
1119 return TargetIDSetting::On;
1120
1121 llvm_unreachable("Malformed feature string");
1122}
1123
1124void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
1125 SmallVector<StringRef, 3> TargetIDSplit;
1126 TargetID.split(A&: TargetIDSplit, Separator: ':');
1127
1128 for (const auto &FeatureString : TargetIDSplit) {
1129 if (FeatureString.starts_with(Prefix: "xnack"))
1130 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1131 if (FeatureString.starts_with(Prefix: "sramecc"))
1132 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1133 }
1134}
1135
1136void AMDGPUTargetID::print(raw_ostream &StreamRep) const {
1137 const Triple &TargetTriple = STI.getTargetTriple();
1138 auto Version = getIsaVersion(GPU: STI.getCPU());
1139
1140 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1141 << '-' << TargetTriple.getOSName() << '-'
1142 << TargetTriple.getEnvironmentName() << '-';
1143
1144 std::string Processor;
1145 // TODO: Following else statement is present here because we used various
1146 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1147 // Remove once all aliases are removed from GCNProcessors.td.
1148 if (Version.Major >= 9)
1149 Processor = STI.getCPU().str();
1150 else
1151 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1152 Twine(Version.Stepping))
1153 .str();
1154
1155 std::string Features;
1156 if (TargetTriple.getOS() == Triple::AMDHSA) {
1157 // sramecc.
1158 if (getSramEccSetting() == TargetIDSetting::Off)
1159 Features += ":sramecc-";
1160 else if (getSramEccSetting() == TargetIDSetting::On)
1161 Features += ":sramecc+";
1162 // xnack.
1163 if (getXnackSetting() == TargetIDSetting::Off)
1164 Features += ":xnack-";
1165 else if (getXnackSetting() == TargetIDSetting::On)
1166 Features += ":xnack+";
1167 }
1168
1169 StreamRep << Processor << Features;
1170}
1171
1172std::string AMDGPUTargetID::toString() const {
1173 std::string Str;
1174 raw_string_ostream OS(Str);
1175 OS << *this;
1176 return Str;
1177}
1178
1179unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1180 if (STI->getFeatureBits().test(I: FeatureWavefrontSize16))
1181 return 16;
1182 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32))
1183 return 32;
1184
1185 return 64;
1186}
1187
1188unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
1189 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1190
1191 // "Per CU" really means "per whatever functional block the waves of a
1192 // workgroup must share". So the effective local memory size is doubled in
1193 // WGP mode on gfx10.
1194 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1195 BytesPerCU *= 2;
1196
1197 return BytesPerCU;
1198}
1199
1200unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
1201 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
1202 return 32768;
1203 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
1204 return 65536;
1205 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
1206 return 163840;
1207 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
1208 return 327680;
1209 return 32768;
1210}
1211
1212unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1213 // "Per CU" really means "per whatever functional block the waves of a
1214 // workgroup must share".
1215
1216 // GFX12.5 only supports CU mode, which contains four SIMDs.
1217 if (isGFX1250(STI: *STI)) {
1218 assert(STI->getFeatureBits().test(FeatureCuMode));
1219 return 4;
1220 }
1221
1222 // For gfx10 in CU mode the functional block is the CU, which contains
1223 // two SIMDs.
1224 if (isGFX10Plus(STI: *STI) && STI->getFeatureBits().test(I: FeatureCuMode))
1225 return 2;
1226
1227 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1228 // contains two CUs, so a total of four SIMDs.
1229 return 4;
1230}
1231
1232unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
1233 unsigned FlatWorkGroupSize) {
1234 assert(FlatWorkGroupSize != 0);
1235 if (!STI->getTargetTriple().isAMDGCN())
1236 return 8;
1237 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1238 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1239 if (N == 1) {
1240 // Single-wave workgroups don't consume barrier resources.
1241 return MaxWaves;
1242 }
1243
1244 unsigned MaxBarriers = 16;
1245 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1246 MaxBarriers = 32;
1247
1248 return std::min(a: MaxWaves / N, b: MaxBarriers);
1249}
1250
1251unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1252
1253unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1254 // FIXME: Need to take scratch memory into account.
1255 if (isGFX90A(STI: *STI))
1256 return 8;
1257 if (!isGFX10Plus(STI: *STI))
1258 return 10;
1259 return hasGFX10_3Insts(STI: *STI) ? 16 : 20;
1260}
1261
1262unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1263 unsigned FlatWorkGroupSize) {
1264 return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1265 Denominator: getEUsPerCU(STI));
1266}
1267
1268unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1269
1270unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1271 unsigned FlatWorkGroupSize) {
1272 return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI));
1273}
1274
1275unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1276 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1277 if (Version.Major >= 10)
1278 return getAddressableNumSGPRs(STI);
1279 if (Version.Major >= 8)
1280 return 16;
1281 return 8;
1282}
1283
1284unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1285
1286unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1287 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1288 if (Version.Major >= 8)
1289 return 800;
1290 return 512;
1291}
1292
1293unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1294 if (STI->getFeatureBits().test(I: FeatureSGPRInitBug))
1295 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1296
1297 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1298 if (Version.Major >= 10)
1299 return 106;
1300 if (Version.Major >= 8)
1301 return 102;
1302 return 104;
1303}
1304
1305unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1306 assert(WavesPerEU != 0);
1307
1308 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1309 if (Version.Major >= 10)
1310 return 0;
1311
1312 if (WavesPerEU >= getMaxWavesPerEU(STI))
1313 return 0;
1314
1315 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1316 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1317 MinNumSGPRs -= std::min(a: MinNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1318 MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1;
1319 return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI));
1320}
1321
1322unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1323 bool Addressable) {
1324 assert(WavesPerEU != 0);
1325
1326 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1327 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1328 if (Version.Major >= 10)
1329 return Addressable ? AddressableNumSGPRs : 108;
1330 if (Version.Major >= 8 && !Addressable)
1331 AddressableNumSGPRs = 112;
1332 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1333 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1334 MaxNumSGPRs -= std::min(a: MaxNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1335 MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI));
1336 return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs);
1337}
1338
1339unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1340 bool FlatScrUsed, bool XNACKUsed) {
1341 unsigned ExtraSGPRs = 0;
1342 if (VCCUsed)
1343 ExtraSGPRs = 2;
1344
1345 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1346 if (Version.Major >= 10)
1347 return ExtraSGPRs;
1348
1349 if (Version.Major < 8) {
1350 if (FlatScrUsed)
1351 ExtraSGPRs = 4;
1352 } else {
1353 if (XNACKUsed)
1354 ExtraSGPRs = 4;
1355
1356 if (FlatScrUsed ||
1357 STI->getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch))
1358 ExtraSGPRs = 6;
1359 }
1360
1361 return ExtraSGPRs;
1362}
1363
1364unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1365 bool FlatScrUsed) {
1366 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1367 XNACKUsed: STI->getFeatureBits().test(I: AMDGPU::FeatureXNACK));
1368}
1369
1370static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1371 unsigned Granule) {
1372 return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule);
1373}
1374
1375unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1376 // SGPRBlocks is actual number of SGPR blocks minus 1.
1377 return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) -
1378 1;
1379}
1380
1381unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1382 unsigned DynamicVGPRBlockSize,
1383 std::optional<bool> EnableWavefrontSize32) {
1384 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1385 return 8;
1386
1387 if (DynamicVGPRBlockSize != 0)
1388 return DynamicVGPRBlockSize;
1389
1390 bool IsWave32 = EnableWavefrontSize32
1391 ? *EnableWavefrontSize32
1392 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1393
1394 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1395 return IsWave32 ? 24 : 12;
1396
1397 if (hasGFX10_3Insts(STI: *STI))
1398 return IsWave32 ? 16 : 8;
1399
1400 return IsWave32 ? 8 : 4;
1401}
1402
1403unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1404 std::optional<bool> EnableWavefrontSize32) {
1405 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1406 return 8;
1407
1408 bool IsWave32 = EnableWavefrontSize32
1409 ? *EnableWavefrontSize32
1410 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1411
1412 if (STI->getFeatureBits().test(I: Feature1024AddressableVGPRs))
1413 return IsWave32 ? 16 : 8;
1414
1415 return IsWave32 ? 8 : 4;
1416}
1417
1418unsigned getArchVGPRAllocGranule() { return 4; }
1419
1420unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1421 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1422 return 512;
1423 if (!isGFX10Plus(STI: *STI))
1424 return 256;
1425 bool IsWave32 = STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1426 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1427 return IsWave32 ? 1536 : 768;
1428 return IsWave32 ? 1024 : 512;
1429}
1430
1431unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) {
1432 const auto &Features = STI->getFeatureBits();
1433 if (Features.test(I: Feature1024AddressableVGPRs))
1434 return Features.test(I: FeatureWavefrontSize32) ? 1024 : 512;
1435 return 256;
1436}
1437
1438unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
1439 unsigned DynamicVGPRBlockSize) {
1440 const auto &Features = STI->getFeatureBits();
1441 if (Features.test(I: FeatureGFX90AInsts))
1442 return 512;
1443
1444 if (DynamicVGPRBlockSize != 0)
1445 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1446 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1447 return getAddressableNumArchVGPRs(STI);
1448}
1449
1450unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1451 unsigned NumVGPRs,
1452 unsigned DynamicVGPRBlockSize) {
1453 return getNumWavesPerEUWithNumVGPRs(
1454 NumVGPRs, Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1455 MaxWaves: getMaxWavesPerEU(STI), TotalNumVGPRs: getTotalNumVGPRs(STI));
1456}
1457
1458unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1459 unsigned MaxWaves,
1460 unsigned TotalNumVGPRs) {
1461 if (NumVGPRs < Granule)
1462 return MaxWaves;
1463 unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule);
1464 return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves);
1465}
1466
1467unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1468 AMDGPUSubtarget::Generation Gen) {
1469 if (Gen >= AMDGPUSubtarget::GFX10)
1470 return MaxWaves;
1471
1472 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1473 if (SGPRs <= 80)
1474 return 10;
1475 if (SGPRs <= 88)
1476 return 9;
1477 if (SGPRs <= 100)
1478 return 8;
1479 return 7;
1480 }
1481 if (SGPRs <= 48)
1482 return 10;
1483 if (SGPRs <= 56)
1484 return 9;
1485 if (SGPRs <= 64)
1486 return 8;
1487 if (SGPRs <= 72)
1488 return 7;
1489 if (SGPRs <= 80)
1490 return 6;
1491 return 5;
1492}
1493
1494unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1495 unsigned DynamicVGPRBlockSize) {
1496 assert(WavesPerEU != 0);
1497
1498 // In dynamic VGPR mode, (static) occupancy does not depend on VGPR usage,
1499 // so getMaxNumVGPRs does not depend on WavesPerEU, and thus we need to return
1500 // zero because there is no nonzero VGPR usage N where going below N
1501 // achieves higher (static) occupancy.
1502 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1503 if (DynamicVGPREnabled)
1504 return 0;
1505
1506 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1507 if (WavesPerEU >= MaxWavesPerEU)
1508 return 0;
1509
1510 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1511 unsigned AddrsableNumVGPRs =
1512 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1513 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1514 unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule);
1515
1516 if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule))
1517 return 0;
1518
1519 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs,
1520 DynamicVGPRBlockSize);
1521 if (WavesPerEU < MinWavesPerEU)
1522 return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU, DynamicVGPRBlockSize);
1523
1524 unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule);
1525 unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext);
1526 return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs);
1527}
1528
1529unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1530 unsigned DynamicVGPRBlockSize) {
1531 assert(WavesPerEU != 0);
1532
1533 // In dynamic VGPR mode, WavesPerEU does not imply a VGPR limit.
1534 bool DynamicVGPREnabled = (DynamicVGPRBlockSize != 0);
1535 unsigned MaxNumVGPRs =
1536 DynamicVGPREnabled
1537 ? getTotalNumVGPRs(STI)
1538 : alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU,
1539 Align: getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1540 unsigned AddressableNumVGPRs =
1541 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1542 return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs);
1543}
1544
1545unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1546 std::optional<bool> EnableWavefrontSize32) {
1547 return getGranulatedNumRegisterBlocks(
1548 NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1549 1;
1550}
1551
1552unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1553 unsigned NumVGPRs,
1554 unsigned DynamicVGPRBlockSize,
1555 std::optional<bool> EnableWavefrontSize32) {
1556 return getGranulatedNumRegisterBlocks(
1557 NumRegs: NumVGPRs,
1558 Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1559}
1560} // end namespace IsaInfo
1561
1562void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1563 const MCSubtargetInfo *STI) {
1564 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1565 KernelCode.amd_kernel_code_version_major = 1;
1566 KernelCode.amd_kernel_code_version_minor = 2;
1567 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1568 KernelCode.amd_machine_version_major = Version.Major;
1569 KernelCode.amd_machine_version_minor = Version.Minor;
1570 KernelCode.amd_machine_version_stepping = Version.Stepping;
1571 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1572 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) {
1573 KernelCode.wavefront_size = 5;
1574 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1575 } else {
1576 KernelCode.wavefront_size = 6;
1577 }
1578
1579 // If the code object does not support indirect functions, then the value must
1580 // be 0xffffffff.
1581 KernelCode.call_convention = -1;
1582
1583 // These alignment values are specified in powers of two, so alignment =
1584 // 2^n. The minimum alignment is 2^4 = 16.
1585 KernelCode.kernarg_segment_alignment = 4;
1586 KernelCode.group_segment_alignment = 4;
1587 KernelCode.private_segment_alignment = 4;
1588
1589 if (Version.Major >= 10) {
1590 KernelCode.compute_pgm_resource_registers |=
1591 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1592 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1593 }
1594}
1595
1596bool isGroupSegment(const GlobalValue *GV) {
1597 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1598}
1599
1600bool isGlobalSegment(const GlobalValue *GV) {
1601 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1602}
1603
1604bool isReadOnlySegment(const GlobalValue *GV) {
1605 unsigned AS = GV->getAddressSpace();
1606 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1607 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1608}
1609
1610bool shouldEmitConstantsToTextSection(const Triple &TT) {
1611 return TT.getArch() == Triple::r600;
1612}
1613
1614static bool isValidRegPrefix(char C) {
1615 return C == 'v' || C == 's' || C == 'a';
1616}
1617
1618std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1619 char Kind = RegName.front();
1620 if (!isValidRegPrefix(C: Kind))
1621 return {};
1622
1623 RegName = RegName.drop_front();
1624 if (RegName.consume_front(Prefix: "[")) {
1625 unsigned Idx, End;
1626 bool Failed = RegName.consumeInteger(Radix: 10, Result&: Idx);
1627 Failed |= !RegName.consume_front(Prefix: ":");
1628 Failed |= RegName.consumeInteger(Radix: 10, Result&: End);
1629 Failed |= !RegName.consume_back(Suffix: "]");
1630 if (!Failed) {
1631 unsigned NumRegs = End - Idx + 1;
1632 if (NumRegs > 1)
1633 return {Kind, Idx, NumRegs};
1634 }
1635 } else {
1636 unsigned Idx;
1637 bool Failed = RegName.getAsInteger(Radix: 10, Result&: Idx);
1638 if (!Failed)
1639 return {Kind, Idx, 1};
1640 }
1641
1642 return {};
1643}
1644
1645std::tuple<char, unsigned, unsigned>
1646parseAsmConstraintPhysReg(StringRef Constraint) {
1647 StringRef RegName = Constraint;
1648 if (!RegName.consume_front(Prefix: "{") || !RegName.consume_back(Suffix: "}"))
1649 return {};
1650 return parseAsmPhysRegName(RegName);
1651}
1652
1653std::pair<unsigned, unsigned>
1654getIntegerPairAttribute(const Function &F, StringRef Name,
1655 std::pair<unsigned, unsigned> Default,
1656 bool OnlyFirstRequired) {
1657 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1658 return {Attr->first, Attr->second.value_or(u&: Default.second)};
1659 return Default;
1660}
1661
1662std::optional<std::pair<unsigned, std::optional<unsigned>>>
1663getIntegerPairAttribute(const Function &F, StringRef Name,
1664 bool OnlyFirstRequired) {
1665 Attribute A = F.getFnAttribute(Kind: Name);
1666 if (!A.isStringAttribute())
1667 return std::nullopt;
1668
1669 LLVMContext &Ctx = F.getContext();
1670 std::pair<unsigned, std::optional<unsigned>> Ints;
1671 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ',');
1672 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) {
1673 Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name);
1674 return std::nullopt;
1675 }
1676 unsigned Second = 0;
1677 if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Second)) {
1678 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1679 Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name);
1680 return std::nullopt;
1681 }
1682 } else {
1683 Ints.second = Second;
1684 }
1685
1686 return Ints;
1687}
1688
1689SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1690 unsigned Size,
1691 unsigned DefaultVal) {
1692 std::optional<SmallVector<unsigned>> R =
1693 getIntegerVecAttribute(F, Name, Size);
1694 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1695}
1696
1697std::optional<SmallVector<unsigned>>
1698getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1699 assert(Size > 2);
1700 LLVMContext &Ctx = F.getContext();
1701
1702 Attribute A = F.getFnAttribute(Kind: Name);
1703 if (!A.isValid())
1704 return std::nullopt;
1705 if (!A.isStringAttribute()) {
1706 Ctx.emitError(ErrorStr: Name + " is not a string attribute");
1707 return std::nullopt;
1708 }
1709
1710 SmallVector<unsigned> Vals(Size);
1711
1712 StringRef S = A.getValueAsString();
1713 unsigned i = 0;
1714 for (; !S.empty() && i < Size; i++) {
1715 std::pair<StringRef, StringRef> Strs = S.split(Separator: ',');
1716 unsigned IntVal;
1717 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) {
1718 Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " +
1719 Name);
1720 return std::nullopt;
1721 }
1722 Vals[i] = IntVal;
1723 S = Strs.second;
1724 }
1725
1726 if (!S.empty() || i < Size) {
1727 Ctx.emitError(ErrorStr: "attribute " + Name +
1728 " has incorrect number of integers; expected " +
1729 llvm::utostr(X: Size));
1730 return std::nullopt;
1731 }
1732 return Vals;
1733}
1734
1735bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1736 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1737 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1738 auto Low =
1739 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 0))->getValue();
1740 auto High =
1741 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 1))->getValue();
1742 // There are two types of [A; B) ranges:
1743 // A < B, e.g. [4; 5) which is a range that only includes 4.
1744 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1745 // everything except 4.
1746 if (Low.ult(RHS: High)) {
1747 if (Low.ule(RHS: Val) && High.ugt(RHS: Val))
1748 return true;
1749 } else {
1750 if (Low.uge(RHS: Val) && High.ult(RHS: Val))
1751 return true;
1752 }
1753 }
1754
1755 return false;
1756}
1757
1758raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
1759 ListSeparator LS;
1760 if (Wait.LoadCnt != ~0u)
1761 OS << LS << "LoadCnt: " << Wait.LoadCnt;
1762 if (Wait.ExpCnt != ~0u)
1763 OS << LS << "ExpCnt: " << Wait.ExpCnt;
1764 if (Wait.DsCnt != ~0u)
1765 OS << LS << "DsCnt: " << Wait.DsCnt;
1766 if (Wait.StoreCnt != ~0u)
1767 OS << LS << "StoreCnt: " << Wait.StoreCnt;
1768 if (Wait.SampleCnt != ~0u)
1769 OS << LS << "SampleCnt: " << Wait.SampleCnt;
1770 if (Wait.BvhCnt != ~0u)
1771 OS << LS << "BvhCnt: " << Wait.BvhCnt;
1772 if (Wait.KmCnt != ~0u)
1773 OS << LS << "KmCnt: " << Wait.KmCnt;
1774 if (Wait.XCnt != ~0u)
1775 OS << LS << "XCnt: " << Wait.XCnt;
1776 if (LS.unused())
1777 OS << "none";
1778 OS << '\n';
1779 return OS;
1780}
1781
1782unsigned getVmcntBitMask(const IsaVersion &Version) {
1783 return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) +
1784 getVmcntBitWidthHi(VersionMajor: Version.Major))) -
1785 1;
1786}
1787
1788unsigned getLoadcntBitMask(const IsaVersion &Version) {
1789 return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1;
1790}
1791
1792unsigned getSamplecntBitMask(const IsaVersion &Version) {
1793 return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1;
1794}
1795
1796unsigned getBvhcntBitMask(const IsaVersion &Version) {
1797 return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1;
1798}
1799
1800unsigned getExpcntBitMask(const IsaVersion &Version) {
1801 return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1;
1802}
1803
1804unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1805 return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1;
1806}
1807
1808unsigned getDscntBitMask(const IsaVersion &Version) {
1809 return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1;
1810}
1811
1812unsigned getKmcntBitMask(const IsaVersion &Version) {
1813 return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1;
1814}
1815
1816unsigned getXcntBitMask(const IsaVersion &Version) {
1817 return (1 << getXcntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1818}
1819
1820unsigned getStorecntBitMask(const IsaVersion &Version) {
1821 return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1;
1822}
1823
1824HardwareLimits::HardwareLimits(const IsaVersion &IV) {
1825 bool HasExtendedWaitCounts = IV.Major >= 12;
1826 if (HasExtendedWaitCounts) {
1827 LoadcntMax = getLoadcntBitMask(Version: IV);
1828 DscntMax = getDscntBitMask(Version: IV);
1829 } else {
1830 LoadcntMax = getVmcntBitMask(Version: IV);
1831 DscntMax = getLgkmcntBitMask(Version: IV);
1832 }
1833 ExpcntMax = getExpcntBitMask(Version: IV);
1834 StorecntMax = getStorecntBitMask(Version: IV);
1835 SamplecntMax = getSamplecntBitMask(Version: IV);
1836 BvhcntMax = getBvhcntBitMask(Version: IV);
1837 KmcntMax = getKmcntBitMask(Version: IV);
1838 XcntMax = getXcntBitMask(Version: IV);
1839 VaVdstMax = DepCtr::getVaVdstBitMask();
1840 VmVsrcMax = DepCtr::getVmVsrcBitMask();
1841}
1842
1843unsigned getWaitcntBitMask(const IsaVersion &Version) {
1844 unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1845 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1846 unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major),
1847 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1848 unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1849 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1850 unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1851 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1852 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1853}
1854
1855unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1856 unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1857 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1858 unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1859 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1860 return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major);
1861}
1862
1863unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1864 return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1865 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1866}
1867
1868unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1869 return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1870 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1871}
1872
1873void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1874 unsigned &Expcnt, unsigned &Lgkmcnt) {
1875 Vmcnt = decodeVmcnt(Version, Waitcnt);
1876 Expcnt = decodeExpcnt(Version, Waitcnt);
1877 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1878}
1879
1880Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1881 Waitcnt Decoded;
1882 Decoded.set(T: LOAD_CNT, Val: decodeVmcnt(Version, Waitcnt: Encoded));
1883 Decoded.set(T: EXP_CNT, Val: decodeExpcnt(Version, Waitcnt: Encoded));
1884 Decoded.set(T: DS_CNT, Val: decodeLgkmcnt(Version, Waitcnt: Encoded));
1885 return Decoded;
1886}
1887
1888unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1889 unsigned Vmcnt) {
1890 Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1891 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1892 return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt,
1893 Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1894 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1895}
1896
1897unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1898 unsigned Expcnt) {
1899 return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1900 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1901}
1902
1903unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1904 unsigned Lgkmcnt) {
1905 return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1906 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1907}
1908
1909unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1910 unsigned Expcnt, unsigned Lgkmcnt) {
1911 unsigned Waitcnt = getWaitcntBitMask(Version);
1912 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1913 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1914 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1915 return Waitcnt;
1916}
1917
1918unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1919 return encodeWaitcnt(Version, Vmcnt: Decoded.get(T: LOAD_CNT), Expcnt: Decoded.get(T: EXP_CNT),
1920 Lgkmcnt: Decoded.get(T: DS_CNT));
1921}
1922
1923static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1924 bool IsStore) {
1925 unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major),
1926 Width: getDscntBitWidth(VersionMajor: Version.Major));
1927 if (IsStore) {
1928 unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1929 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1930 return Dscnt | Storecnt;
1931 }
1932 unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1933 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1934 return Dscnt | Loadcnt;
1935}
1936
1937Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1938 Waitcnt Decoded;
1939 Decoded.set(T: LOAD_CNT, Val: unpackBits(Src: LoadcntDscnt,
1940 Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1941 Width: getLoadcntBitWidth(VersionMajor: Version.Major)));
1942 Decoded.set(T: DS_CNT, Val: unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1943 Width: getDscntBitWidth(VersionMajor: Version.Major)));
1944 return Decoded;
1945}
1946
1947Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1948 Waitcnt Decoded;
1949 Decoded.set(T: STORE_CNT, Val: unpackBits(Src: StorecntDscnt,
1950 Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1951 Width: getStorecntBitWidth(VersionMajor: Version.Major)));
1952 Decoded.set(T: DS_CNT, Val: unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1953 Width: getDscntBitWidth(VersionMajor: Version.Major)));
1954 return Decoded;
1955}
1956
1957static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1958 unsigned Loadcnt) {
1959 return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1960 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1961}
1962
1963static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1964 unsigned Storecnt) {
1965 return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1966 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1967}
1968
1969static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1970 unsigned Dscnt) {
1971 return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1972 Width: getDscntBitWidth(VersionMajor: Version.Major));
1973}
1974
1975static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1976 unsigned Dscnt) {
1977 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false);
1978 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1979 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1980 return Waitcnt;
1981}
1982
1983unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1984 return encodeLoadcntDscnt(Version, Loadcnt: Decoded.get(T: LOAD_CNT),
1985 Dscnt: Decoded.get(T: DS_CNT));
1986}
1987
1988static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1989 unsigned Storecnt, unsigned Dscnt) {
1990 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true);
1991 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1992 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1993 return Waitcnt;
1994}
1995
1996unsigned encodeStorecntDscnt(const IsaVersion &Version,
1997 const Waitcnt &Decoded) {
1998 return encodeStorecntDscnt(Version, Storecnt: Decoded.get(T: STORE_CNT),
1999 Dscnt: Decoded.get(T: DS_CNT));
2000}
2001
2002//===----------------------------------------------------------------------===//
2003// Custom Operand Values
2004//===----------------------------------------------------------------------===//
2005
2006static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
2007 int Size,
2008 const MCSubtargetInfo &STI) {
2009 unsigned Enc = 0;
2010 for (int Idx = 0; Idx < Size; ++Idx) {
2011 const auto &Op = Opr[Idx];
2012 if (Op.isSupported(STI))
2013 Enc |= Op.encode(Val: Op.Default);
2014 }
2015 return Enc;
2016}
2017
2018static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
2019 int Size, unsigned Code,
2020 bool &HasNonDefaultVal,
2021 const MCSubtargetInfo &STI) {
2022 unsigned UsedOprMask = 0;
2023 HasNonDefaultVal = false;
2024 for (int Idx = 0; Idx < Size; ++Idx) {
2025 const auto &Op = Opr[Idx];
2026 if (!Op.isSupported(STI))
2027 continue;
2028 UsedOprMask |= Op.getMask();
2029 unsigned Val = Op.decode(Code);
2030 if (!Op.isValid(Val))
2031 return false;
2032 HasNonDefaultVal |= (Val != Op.Default);
2033 }
2034 return (Code & ~UsedOprMask) == 0;
2035}
2036
2037static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
2038 unsigned Code, int &Idx, StringRef &Name,
2039 unsigned &Val, bool &IsDefault,
2040 const MCSubtargetInfo &STI) {
2041 while (Idx < Size) {
2042 const auto &Op = Opr[Idx++];
2043 if (Op.isSupported(STI)) {
2044 Name = Op.Name;
2045 Val = Op.decode(Code);
2046 IsDefault = (Val == Op.Default);
2047 return true;
2048 }
2049 }
2050
2051 return false;
2052}
2053
2054static int encodeCustomOperandVal(const CustomOperandVal &Op,
2055 int64_t InputVal) {
2056 if (InputVal < 0 || InputVal > Op.Max)
2057 return OPR_VAL_INVALID;
2058 return Op.encode(Val: InputVal);
2059}
2060
2061static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2062 const StringRef Name, int64_t InputVal,
2063 unsigned &UsedOprMask,
2064 const MCSubtargetInfo &STI) {
2065 int InvalidId = OPR_ID_UNKNOWN;
2066 for (int Idx = 0; Idx < Size; ++Idx) {
2067 const auto &Op = Opr[Idx];
2068 if (Op.Name == Name) {
2069 if (!Op.isSupported(STI)) {
2070 InvalidId = OPR_ID_UNSUPPORTED;
2071 continue;
2072 }
2073 auto OprMask = Op.getMask();
2074 if (OprMask & UsedOprMask)
2075 return OPR_ID_DUPLICATE;
2076 UsedOprMask |= OprMask;
2077 return encodeCustomOperandVal(Op, InputVal);
2078 }
2079 }
2080 return InvalidId;
2081}
2082
2083//===----------------------------------------------------------------------===//
2084// DepCtr
2085//===----------------------------------------------------------------------===//
2086
2087namespace DepCtr {
2088
2089int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
2090 static int Default = -1;
2091 if (Default == -1)
2092 Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI);
2093 return Default;
2094}
2095
2096bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2097 const MCSubtargetInfo &STI) {
2098 return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code,
2099 HasNonDefaultVal, STI);
2100}
2101
2102bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2103 bool &IsDefault, const MCSubtargetInfo &STI) {
2104 return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val,
2105 IsDefault, STI);
2106}
2107
2108int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2109 const MCSubtargetInfo &STI) {
2110 return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask,
2111 STI);
2112}
2113
2114unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2115
2116unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2117
2118unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2119
2120unsigned getHoldCntBitMask(const IsaVersion &Version) {
2121 return (1 << getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
2122}
2123
2124unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2125
2126unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2127
2128unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2129
2130unsigned decodeFieldVmVsrc(unsigned Encoded) {
2131 return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2132}
2133
2134unsigned decodeFieldVaVdst(unsigned Encoded) {
2135 return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2136}
2137
2138unsigned decodeFieldSaSdst(unsigned Encoded) {
2139 return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2140}
2141
2142unsigned decodeFieldVaSdst(unsigned Encoded) {
2143 return unpackBits(Src: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2144}
2145
2146unsigned decodeFieldVaVcc(unsigned Encoded) {
2147 return unpackBits(Src: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2148}
2149
2150unsigned decodeFieldVaSsrc(unsigned Encoded) {
2151 return unpackBits(Src: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2152}
2153
2154unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2155 return unpackBits(Src: Encoded, Shift: getHoldCntBitShift(),
2156 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2157}
2158
2159unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2160 return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2161}
2162
2163unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2164 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2165 return encodeFieldVmVsrc(Encoded, VmVsrc);
2166}
2167
2168unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2169 return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2170}
2171
2172unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2173 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2174 return encodeFieldVaVdst(Encoded, VaVdst);
2175}
2176
2177unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2178 return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2179}
2180
2181unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2182 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2183 return encodeFieldSaSdst(Encoded, SaSdst);
2184}
2185
2186unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2187 return packBits(Src: VaSdst, Dst: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2188}
2189
2190unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2191 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2192 return encodeFieldVaSdst(Encoded, VaSdst);
2193}
2194
2195unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2196 return packBits(Src: VaVcc, Dst: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2197}
2198
2199unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2200 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2201 return encodeFieldVaVcc(Encoded, VaVcc);
2202}
2203
2204unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2205 return packBits(Src: VaSsrc, Dst: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2206}
2207
2208unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2209 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2210 return encodeFieldVaSsrc(Encoded, VaSsrc);
2211}
2212
2213unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2214 const IsaVersion &Version) {
2215 return packBits(Src: HoldCnt, Dst: Encoded, Shift: getHoldCntBitShift(),
2216 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2217}
2218
2219unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2220 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2221 return encodeFieldHoldCnt(Encoded, HoldCnt, Version: getIsaVersion(GPU: STI.getCPU()));
2222}
2223
2224} // namespace DepCtr
2225
2226//===----------------------------------------------------------------------===//
2227// exp tgt
2228//===----------------------------------------------------------------------===//
2229
2230namespace Exp {
2231
2232struct ExpTgt {
2233 StringLiteral Name;
2234 unsigned Tgt;
2235 unsigned MaxIndex;
2236};
2237
2238// clang-format off
2239static constexpr ExpTgt ExpTgtInfo[] = {
2240 {.Name: {"null"}, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX},
2241 {.Name: {"mrtz"}, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX},
2242 {.Name: {"prim"}, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX},
2243 {.Name: {"mrt"}, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX},
2244 {.Name: {"pos"}, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX},
2245 {.Name: {"dual_src_blend"},.Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX},
2246 {.Name: {"param"}, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX},
2247};
2248// clang-format on
2249
2250bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2251 for (const ExpTgt &Val : ExpTgtInfo) {
2252 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2253 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2254 Name = Val.Name;
2255 return true;
2256 }
2257 }
2258 return false;
2259}
2260
2261unsigned getTgtId(const StringRef Name) {
2262
2263 for (const ExpTgt &Val : ExpTgtInfo) {
2264 if (Val.MaxIndex == 0 && Name == Val.Name)
2265 return Val.Tgt;
2266
2267 if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) {
2268 StringRef Suffix = Name.drop_front(N: Val.Name.size());
2269
2270 unsigned Id;
2271 if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex)
2272 return ET_INVALID;
2273
2274 // Disable leading zeroes
2275 if (Suffix.size() > 1 && Suffix[0] == '0')
2276 return ET_INVALID;
2277
2278 return Val.Tgt + Id;
2279 }
2280 }
2281 return ET_INVALID;
2282}
2283
2284bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2285 switch (Id) {
2286 case ET_NULL:
2287 return !isGFX11Plus(STI);
2288 case ET_POS4:
2289 case ET_PRIM:
2290 return isGFX10Plus(STI);
2291 case ET_DUAL_SRC_BLEND0:
2292 case ET_DUAL_SRC_BLEND1:
2293 return isGFX11Plus(STI);
2294 default:
2295 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2296 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2297 return true;
2298 }
2299}
2300
2301} // namespace Exp
2302
2303//===----------------------------------------------------------------------===//
2304// MTBUF Format
2305//===----------------------------------------------------------------------===//
2306
2307namespace MTBUFFormat {
2308
2309int64_t getDfmt(const StringRef Name) {
2310 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2311 if (Name == DfmtSymbolic[Id])
2312 return Id;
2313 }
2314 return DFMT_UNDEF;
2315}
2316
2317StringRef getDfmtName(unsigned Id) {
2318 assert(Id <= DFMT_MAX);
2319 return DfmtSymbolic[Id];
2320}
2321
2322static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
2323 if (isSI(STI) || isCI(STI))
2324 return NfmtSymbolicSICI;
2325 if (isVI(STI) || isGFX9(STI))
2326 return NfmtSymbolicVI;
2327 return NfmtSymbolicGFX10;
2328}
2329
2330int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2331 const auto *lookupTable = getNfmtLookupTable(STI);
2332 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2333 if (Name == lookupTable[Id])
2334 return Id;
2335 }
2336 return NFMT_UNDEF;
2337}
2338
2339StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2340 assert(Id <= NFMT_MAX);
2341 return getNfmtLookupTable(STI)[Id];
2342}
2343
2344bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2345 unsigned Dfmt;
2346 unsigned Nfmt;
2347 decodeDfmtNfmt(Format: Id, Dfmt, Nfmt);
2348 return isValidNfmt(Val: Nfmt, STI);
2349}
2350
2351bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2352 return !getNfmtName(Id, STI).empty();
2353}
2354
2355int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2356 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2357}
2358
2359void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2360 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2361 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2362}
2363
2364int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2365 if (isGFX11Plus(STI)) {
2366 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2367 if (Name == UfmtSymbolicGFX11[Id])
2368 return Id;
2369 }
2370 } else {
2371 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2372 if (Name == UfmtSymbolicGFX10[Id])
2373 return Id;
2374 }
2375 }
2376 return UFMT_UNDEF;
2377}
2378
2379StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2380 if (isValidUnifiedFormat(Val: Id, STI))
2381 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2382 return "";
2383}
2384
2385bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2386 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2387}
2388
2389int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2390 const MCSubtargetInfo &STI) {
2391 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2392 if (isGFX11Plus(STI)) {
2393 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2394 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2395 return Id;
2396 }
2397 } else {
2398 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2399 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2400 return Id;
2401 }
2402 }
2403 return UFMT_UNDEF;
2404}
2405
2406bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2407 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2408}
2409
2410unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2411 if (isGFX10Plus(STI))
2412 return UFMT_DEFAULT;
2413 return DFMT_NFMT_DEFAULT;
2414}
2415
2416} // namespace MTBUFFormat
2417
2418//===----------------------------------------------------------------------===//
2419// SendMsg
2420//===----------------------------------------------------------------------===//
2421
2422namespace SendMsg {
2423
2424static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2425 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2426}
2427
2428bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2429 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2430}
2431
2432bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2433 bool Strict) {
2434 assert(isValidMsgId(MsgId, STI));
2435
2436 if (!Strict)
2437 return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId);
2438
2439 if (msgRequiresOp(MsgId, STI)) {
2440 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2441 return false;
2442
2443 return !getMsgOpName(MsgId, Encoding: OpId, STI).empty();
2444 }
2445
2446 return OpId == OP_NONE_;
2447}
2448
2449bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2450 const MCSubtargetInfo &STI, bool Strict) {
2451 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2452
2453 if (!Strict)
2454 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId);
2455
2456 if (!isGFX11Plus(STI)) {
2457 switch (MsgId) {
2458 case ID_GS_PreGFX11:
2459 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2460 case ID_GS_DONE_PreGFX11:
2461 return (OpId == OP_GS_NOP)
2462 ? (StreamId == STREAM_ID_NONE_)
2463 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2464 }
2465 }
2466 return StreamId == STREAM_ID_NONE_;
2467}
2468
2469bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2470 return MsgId == ID_SYSMSG ||
2471 (!isGFX11Plus(STI) &&
2472 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2473}
2474
2475bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2476 const MCSubtargetInfo &STI) {
2477 return !isGFX11Plus(STI) &&
2478 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2479 OpId != OP_GS_NOP;
2480}
2481
2482void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2483 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2484 MsgId = Val & getMsgIdMask(STI);
2485 if (isGFX11Plus(STI)) {
2486 OpId = 0;
2487 StreamId = 0;
2488 } else {
2489 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2490 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2491 }
2492}
2493
2494uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2495 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2496}
2497
2498bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI) {
2499 // Explicitly list message types that are known to not use m0.
2500 // This is safer than excluding only GS_ALLOC_REQ, in case new message
2501 // types are added in the future that do use m0.
2502 if (isGFX11Plus(STI)) {
2503 switch (MsgId) {
2504 case ID_DEALLOC_VGPRS_GFX11Plus:
2505 return true;
2506 default:
2507 break;
2508 }
2509 }
2510 switch (MsgId) {
2511 case ID_SAVEWAVE:
2512 case ID_STALL_WAVE_GEN:
2513 case ID_HALT_WAVES:
2514 case ID_ORDERED_PS_DONE:
2515 case ID_EARLY_PRIM_DEALLOC:
2516 case ID_GET_DOORBELL:
2517 case ID_GET_DDID:
2518 case ID_SYSMSG:
2519 return true;
2520 default:
2521 return false;
2522 }
2523}
2524
2525} // namespace SendMsg
2526
2527//===----------------------------------------------------------------------===//
2528//
2529//===----------------------------------------------------------------------===//
2530
2531unsigned getInitialPSInputAddr(const Function &F) {
2532 return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr", Default: 0);
2533}
2534
2535bool getHasColorExport(const Function &F) {
2536 // As a safe default always respond as if PS has color exports.
2537 return F.getFnAttributeAsParsedInteger(
2538 Kind: "amdgpu-color-export",
2539 Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2540}
2541
2542bool getHasDepthExport(const Function &F) {
2543 return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export", Default: 0) != 0;
2544}
2545
2546unsigned getDynamicVGPRBlockSize(const Function &F) {
2547 unsigned BlockSize =
2548 F.getFnAttributeAsParsedInteger(Kind: "amdgpu-dynamic-vgpr-block-size", Default: 0);
2549
2550 if (BlockSize == 16 || BlockSize == 32)
2551 return BlockSize;
2552
2553 return 0;
2554}
2555
2556bool hasXNACK(const MCSubtargetInfo &STI) {
2557 return STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2558}
2559
2560bool hasSRAMECC(const MCSubtargetInfo &STI) {
2561 return STI.hasFeature(Feature: AMDGPU::FeatureSRAMECC);
2562}
2563
2564bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2565 return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) &&
2566 !STI.hasFeature(Feature: AMDGPU::FeatureR128A16);
2567}
2568
2569bool hasA16(const MCSubtargetInfo &STI) {
2570 return STI.hasFeature(Feature: AMDGPU::FeatureA16);
2571}
2572
2573bool hasG16(const MCSubtargetInfo &STI) {
2574 return STI.hasFeature(Feature: AMDGPU::FeatureG16);
2575}
2576
2577bool hasPackedD16(const MCSubtargetInfo &STI) {
2578 return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2579 !isSI(STI);
2580}
2581
2582bool hasGDS(const MCSubtargetInfo &STI) {
2583 return STI.hasFeature(Feature: AMDGPU::FeatureGDS);
2584}
2585
2586unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2587 auto Version = getIsaVersion(GPU: STI.getCPU());
2588 if (Version.Major == 10)
2589 return Version.Minor >= 3 ? 13 : 5;
2590 if (Version.Major == 11)
2591 return 5;
2592 if (Version.Major >= 12)
2593 return HasSampler ? 4 : 5;
2594 return 0;
2595}
2596
2597unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) {
2598 if (isGFX1250Plus(STI))
2599 return 32;
2600 return 16;
2601}
2602
2603bool isSI(const MCSubtargetInfo &STI) {
2604 return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands);
2605}
2606
2607bool isCI(const MCSubtargetInfo &STI) {
2608 return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands);
2609}
2610
2611bool isVI(const MCSubtargetInfo &STI) {
2612 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2613}
2614
2615bool isGFX9(const MCSubtargetInfo &STI) {
2616 return STI.hasFeature(Feature: AMDGPU::FeatureGFX9);
2617}
2618
2619bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2620 return isGFX9(STI) || isGFX10(STI);
2621}
2622
2623bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2624 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2625}
2626
2627bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2628 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2629}
2630
2631bool isGFX8Plus(const MCSubtargetInfo &STI) {
2632 return isVI(STI) || isGFX9Plus(STI);
2633}
2634
2635bool isGFX9Plus(const MCSubtargetInfo &STI) {
2636 return isGFX9(STI) || isGFX10Plus(STI);
2637}
2638
2639bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2640
2641bool isGFX10(const MCSubtargetInfo &STI) {
2642 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10);
2643}
2644
2645bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2646 return isGFX10(STI) || isGFX11(STI);
2647}
2648
2649bool isGFX10Plus(const MCSubtargetInfo &STI) {
2650 return isGFX10(STI) || isGFX11Plus(STI);
2651}
2652
2653bool isGFX11(const MCSubtargetInfo &STI) {
2654 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2655}
2656
2657bool isGFX11Plus(const MCSubtargetInfo &STI) {
2658 return isGFX11(STI) || isGFX12Plus(STI);
2659}
2660
2661bool isGFX12(const MCSubtargetInfo &STI) {
2662 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2663}
2664
2665bool isGFX12Plus(const MCSubtargetInfo &STI) {
2666 return isGFX12(STI) || isGFX13Plus(STI);
2667}
2668
2669bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2670
2671bool isGFX1250(const MCSubtargetInfo &STI) {
2672 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2673}
2674
2675bool isGFX1250Plus(const MCSubtargetInfo &STI) {
2676 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2677}
2678
2679bool isGFX13(const MCSubtargetInfo &STI) {
2680 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2681}
2682
2683bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2684
2685bool supportsWGP(const MCSubtargetInfo &STI) {
2686 if (isGFX1250(STI))
2687 return false;
2688 return isGFX10Plus(STI);
2689}
2690
2691bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2692
2693bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2694 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2695}
2696
2697bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2698 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2699}
2700
2701bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2702 return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding);
2703}
2704
2705bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2706 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding);
2707}
2708
2709bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2710 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding);
2711}
2712
2713bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2714 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts);
2715}
2716
2717bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2718 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2719}
2720
2721bool isGFX90A(const MCSubtargetInfo &STI) {
2722 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2723}
2724
2725bool isGFX940(const MCSubtargetInfo &STI) {
2726 return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts);
2727}
2728
2729bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2730 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2731}
2732
2733bool hasMAIInsts(const MCSubtargetInfo &STI) {
2734 return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts);
2735}
2736
2737bool hasVOPD(const MCSubtargetInfo &STI) {
2738 return STI.hasFeature(Feature: AMDGPU::FeatureVOPDInsts);
2739}
2740
2741bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2742 return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR);
2743}
2744
2745unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2746 return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload);
2747}
2748
2749int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2750 int32_t ArgNumVGPR) {
2751 if (has90AInsts && ArgNumAGPR)
2752 return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR;
2753 return std::max(a: ArgNumVGPR, b: ArgNumAGPR);
2754}
2755
2756bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2757 const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID);
2758 const MCRegister FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0);
2759 return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) ||
2760 Reg == AMDGPU::SCC;
2761}
2762
2763bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2764 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2765}
2766
2767#define MAP_REG2REG \
2768 using namespace AMDGPU; \
2769 switch (Reg.id()) { \
2770 default: \
2771 return Reg; \
2772 CASE_CI_VI(FLAT_SCR) \
2773 CASE_CI_VI(FLAT_SCR_LO) \
2774 CASE_CI_VI(FLAT_SCR_HI) \
2775 CASE_VI_GFX9PLUS(TTMP0) \
2776 CASE_VI_GFX9PLUS(TTMP1) \
2777 CASE_VI_GFX9PLUS(TTMP2) \
2778 CASE_VI_GFX9PLUS(TTMP3) \
2779 CASE_VI_GFX9PLUS(TTMP4) \
2780 CASE_VI_GFX9PLUS(TTMP5) \
2781 CASE_VI_GFX9PLUS(TTMP6) \
2782 CASE_VI_GFX9PLUS(TTMP7) \
2783 CASE_VI_GFX9PLUS(TTMP8) \
2784 CASE_VI_GFX9PLUS(TTMP9) \
2785 CASE_VI_GFX9PLUS(TTMP10) \
2786 CASE_VI_GFX9PLUS(TTMP11) \
2787 CASE_VI_GFX9PLUS(TTMP12) \
2788 CASE_VI_GFX9PLUS(TTMP13) \
2789 CASE_VI_GFX9PLUS(TTMP14) \
2790 CASE_VI_GFX9PLUS(TTMP15) \
2791 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2792 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2793 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2794 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2795 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2796 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2797 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2798 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2799 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2800 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2801 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2802 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2803 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2804 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2805 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2806 CASE_VI_GFX9PLUS( \
2807 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2808 CASE_GFXPRE11_GFX11PLUS(M0) \
2809 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2810 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2811 }
2812
2813#define CASE_CI_VI(node) \
2814 assert(!isSI(STI)); \
2815 case node: \
2816 return isCI(STI) ? node##_ci : node##_vi;
2817
2818#define CASE_VI_GFX9PLUS(node) \
2819 case node: \
2820 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2821
2822#define CASE_GFXPRE11_GFX11PLUS(node) \
2823 case node: \
2824 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2825
2826#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2827 case node: \
2828 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2829
2830MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2831 if (STI.getTargetTriple().getArch() == Triple::r600)
2832 return Reg;
2833 MAP_REG2REG
2834}
2835
2836#undef CASE_CI_VI
2837#undef CASE_VI_GFX9PLUS
2838#undef CASE_GFXPRE11_GFX11PLUS
2839#undef CASE_GFXPRE11_GFX11PLUS_TO
2840
2841#define CASE_CI_VI(node) \
2842 case node##_ci: \
2843 case node##_vi: \
2844 return node;
2845#define CASE_VI_GFX9PLUS(node) \
2846 case node##_vi: \
2847 case node##_gfx9plus: \
2848 return node;
2849#define CASE_GFXPRE11_GFX11PLUS(node) \
2850 case node##_gfx11plus: \
2851 case node##_gfxpre11: \
2852 return node;
2853#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2854
2855MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2856
2857bool isInlineValue(MCRegister Reg) {
2858 switch (Reg.id()) {
2859 case AMDGPU::SRC_SHARED_BASE_LO:
2860 case AMDGPU::SRC_SHARED_BASE:
2861 case AMDGPU::SRC_SHARED_LIMIT_LO:
2862 case AMDGPU::SRC_SHARED_LIMIT:
2863 case AMDGPU::SRC_PRIVATE_BASE_LO:
2864 case AMDGPU::SRC_PRIVATE_BASE:
2865 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2866 case AMDGPU::SRC_PRIVATE_LIMIT:
2867 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2868 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2869 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2870 return true;
2871 case AMDGPU::SRC_VCCZ:
2872 case AMDGPU::SRC_EXECZ:
2873 case AMDGPU::SRC_SCC:
2874 return true;
2875 case AMDGPU::SGPR_NULL:
2876 return true;
2877 default:
2878 return false;
2879 }
2880}
2881
2882#undef CASE_CI_VI
2883#undef CASE_VI_GFX9PLUS
2884#undef CASE_GFXPRE11_GFX11PLUS
2885#undef CASE_GFXPRE11_GFX11PLUS_TO
2886#undef MAP_REG2REG
2887
2888bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2889 assert(OpNo < Desc.NumOperands);
2890 unsigned OpType = Desc.operands()[OpNo].OperandType;
2891 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2892 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2893}
2894
2895bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2896 assert(OpNo < Desc.NumOperands);
2897 unsigned OpType = Desc.operands()[OpNo].OperandType;
2898 switch (OpType) {
2899 case AMDGPU::OPERAND_REG_IMM_FP32:
2900 case AMDGPU::OPERAND_REG_IMM_FP64:
2901 case AMDGPU::OPERAND_REG_IMM_FP16:
2902 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2903 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2904 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2905 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2906 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2907 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2908 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2909 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2910 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2911 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2912 return true;
2913 default:
2914 return false;
2915 }
2916}
2917
2918bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2919 assert(OpNo < Desc.NumOperands);
2920 unsigned OpType = Desc.operands()[OpNo].OperandType;
2921 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2922 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2923 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2924 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2925}
2926
2927// Avoid using MCRegisterClass::getSize, since that function will go away
2928// (move from MC* level to Target* level). Return size in bits.
2929unsigned getRegBitWidth(unsigned RCID) {
2930 switch (RCID) {
2931 case AMDGPU::VGPR_16RegClassID:
2932 case AMDGPU::VGPR_16_Lo128RegClassID:
2933 case AMDGPU::SGPR_LO16RegClassID:
2934 case AMDGPU::AGPR_LO16RegClassID:
2935 return 16;
2936 case AMDGPU::SGPR_32RegClassID:
2937 case AMDGPU::VGPR_32RegClassID:
2938 case AMDGPU::VGPR_32_Lo256RegClassID:
2939 case AMDGPU::VRegOrLds_32RegClassID:
2940 case AMDGPU::AGPR_32RegClassID:
2941 case AMDGPU::VS_32RegClassID:
2942 case AMDGPU::AV_32RegClassID:
2943 case AMDGPU::SReg_32RegClassID:
2944 case AMDGPU::SReg_32_XM0RegClassID:
2945 case AMDGPU::SRegOrLds_32RegClassID:
2946 return 32;
2947 case AMDGPU::SGPR_64RegClassID:
2948 case AMDGPU::VS_64RegClassID:
2949 case AMDGPU::SReg_64RegClassID:
2950 case AMDGPU::VReg_64RegClassID:
2951 case AMDGPU::AReg_64RegClassID:
2952 case AMDGPU::SReg_64_XEXECRegClassID:
2953 case AMDGPU::VReg_64_Align2RegClassID:
2954 case AMDGPU::AReg_64_Align2RegClassID:
2955 case AMDGPU::AV_64RegClassID:
2956 case AMDGPU::AV_64_Align2RegClassID:
2957 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2958 case AMDGPU::VS_64_Lo256RegClassID:
2959 return 64;
2960 case AMDGPU::SGPR_96RegClassID:
2961 case AMDGPU::SReg_96RegClassID:
2962 case AMDGPU::VReg_96RegClassID:
2963 case AMDGPU::AReg_96RegClassID:
2964 case AMDGPU::VReg_96_Align2RegClassID:
2965 case AMDGPU::AReg_96_Align2RegClassID:
2966 case AMDGPU::AV_96RegClassID:
2967 case AMDGPU::AV_96_Align2RegClassID:
2968 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2969 return 96;
2970 case AMDGPU::SGPR_128RegClassID:
2971 case AMDGPU::SReg_128RegClassID:
2972 case AMDGPU::VReg_128RegClassID:
2973 case AMDGPU::AReg_128RegClassID:
2974 case AMDGPU::VReg_128_Align2RegClassID:
2975 case AMDGPU::AReg_128_Align2RegClassID:
2976 case AMDGPU::AV_128RegClassID:
2977 case AMDGPU::AV_128_Align2RegClassID:
2978 case AMDGPU::SReg_128_XNULLRegClassID:
2979 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2980 return 128;
2981 case AMDGPU::SGPR_160RegClassID:
2982 case AMDGPU::SReg_160RegClassID:
2983 case AMDGPU::VReg_160RegClassID:
2984 case AMDGPU::AReg_160RegClassID:
2985 case AMDGPU::VReg_160_Align2RegClassID:
2986 case AMDGPU::AReg_160_Align2RegClassID:
2987 case AMDGPU::AV_160RegClassID:
2988 case AMDGPU::AV_160_Align2RegClassID:
2989 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2990 return 160;
2991 case AMDGPU::SGPR_192RegClassID:
2992 case AMDGPU::SReg_192RegClassID:
2993 case AMDGPU::VReg_192RegClassID:
2994 case AMDGPU::AReg_192RegClassID:
2995 case AMDGPU::VReg_192_Align2RegClassID:
2996 case AMDGPU::AReg_192_Align2RegClassID:
2997 case AMDGPU::AV_192RegClassID:
2998 case AMDGPU::AV_192_Align2RegClassID:
2999 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
3000 return 192;
3001 case AMDGPU::SGPR_224RegClassID:
3002 case AMDGPU::SReg_224RegClassID:
3003 case AMDGPU::VReg_224RegClassID:
3004 case AMDGPU::AReg_224RegClassID:
3005 case AMDGPU::VReg_224_Align2RegClassID:
3006 case AMDGPU::AReg_224_Align2RegClassID:
3007 case AMDGPU::AV_224RegClassID:
3008 case AMDGPU::AV_224_Align2RegClassID:
3009 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
3010 return 224;
3011 case AMDGPU::SGPR_256RegClassID:
3012 case AMDGPU::SReg_256RegClassID:
3013 case AMDGPU::VReg_256RegClassID:
3014 case AMDGPU::AReg_256RegClassID:
3015 case AMDGPU::VReg_256_Align2RegClassID:
3016 case AMDGPU::AReg_256_Align2RegClassID:
3017 case AMDGPU::AV_256RegClassID:
3018 case AMDGPU::AV_256_Align2RegClassID:
3019 case AMDGPU::SReg_256_XNULLRegClassID:
3020 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
3021 return 256;
3022 case AMDGPU::SGPR_288RegClassID:
3023 case AMDGPU::SReg_288RegClassID:
3024 case AMDGPU::VReg_288RegClassID:
3025 case AMDGPU::AReg_288RegClassID:
3026 case AMDGPU::VReg_288_Align2RegClassID:
3027 case AMDGPU::AReg_288_Align2RegClassID:
3028 case AMDGPU::AV_288RegClassID:
3029 case AMDGPU::AV_288_Align2RegClassID:
3030 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
3031 return 288;
3032 case AMDGPU::SGPR_320RegClassID:
3033 case AMDGPU::SReg_320RegClassID:
3034 case AMDGPU::VReg_320RegClassID:
3035 case AMDGPU::AReg_320RegClassID:
3036 case AMDGPU::VReg_320_Align2RegClassID:
3037 case AMDGPU::AReg_320_Align2RegClassID:
3038 case AMDGPU::AV_320RegClassID:
3039 case AMDGPU::AV_320_Align2RegClassID:
3040 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
3041 return 320;
3042 case AMDGPU::SGPR_352RegClassID:
3043 case AMDGPU::SReg_352RegClassID:
3044 case AMDGPU::VReg_352RegClassID:
3045 case AMDGPU::AReg_352RegClassID:
3046 case AMDGPU::VReg_352_Align2RegClassID:
3047 case AMDGPU::AReg_352_Align2RegClassID:
3048 case AMDGPU::AV_352RegClassID:
3049 case AMDGPU::AV_352_Align2RegClassID:
3050 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3051 return 352;
3052 case AMDGPU::SGPR_384RegClassID:
3053 case AMDGPU::SReg_384RegClassID:
3054 case AMDGPU::VReg_384RegClassID:
3055 case AMDGPU::AReg_384RegClassID:
3056 case AMDGPU::VReg_384_Align2RegClassID:
3057 case AMDGPU::AReg_384_Align2RegClassID:
3058 case AMDGPU::AV_384RegClassID:
3059 case AMDGPU::AV_384_Align2RegClassID:
3060 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3061 return 384;
3062 case AMDGPU::SGPR_512RegClassID:
3063 case AMDGPU::SReg_512RegClassID:
3064 case AMDGPU::VReg_512RegClassID:
3065 case AMDGPU::AReg_512RegClassID:
3066 case AMDGPU::VReg_512_Align2RegClassID:
3067 case AMDGPU::AReg_512_Align2RegClassID:
3068 case AMDGPU::AV_512RegClassID:
3069 case AMDGPU::AV_512_Align2RegClassID:
3070 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3071 return 512;
3072 case AMDGPU::SGPR_1024RegClassID:
3073 case AMDGPU::SReg_1024RegClassID:
3074 case AMDGPU::VReg_1024RegClassID:
3075 case AMDGPU::AReg_1024RegClassID:
3076 case AMDGPU::VReg_1024_Align2RegClassID:
3077 case AMDGPU::AReg_1024_Align2RegClassID:
3078 case AMDGPU::AV_1024RegClassID:
3079 case AMDGPU::AV_1024_Align2RegClassID:
3080 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3081 return 1024;
3082 default:
3083 llvm_unreachable("Unexpected register class");
3084 }
3085}
3086
3087unsigned getRegBitWidth(const MCRegisterClass &RC) {
3088 return getRegBitWidth(RCID: RC.getID());
3089}
3090
3091bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3092 if (isInlinableIntLiteral(Literal))
3093 return true;
3094
3095 uint64_t Val = static_cast<uint64_t>(Literal);
3096 return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) ||
3097 (Val == llvm::bit_cast<uint64_t>(from: 1.0)) ||
3098 (Val == llvm::bit_cast<uint64_t>(from: -1.0)) ||
3099 (Val == llvm::bit_cast<uint64_t>(from: 0.5)) ||
3100 (Val == llvm::bit_cast<uint64_t>(from: -0.5)) ||
3101 (Val == llvm::bit_cast<uint64_t>(from: 2.0)) ||
3102 (Val == llvm::bit_cast<uint64_t>(from: -2.0)) ||
3103 (Val == llvm::bit_cast<uint64_t>(from: 4.0)) ||
3104 (Val == llvm::bit_cast<uint64_t>(from: -4.0)) ||
3105 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3106}
3107
3108bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3109 if (isInlinableIntLiteral(Literal))
3110 return true;
3111
3112 // The actual type of the operand does not seem to matter as long
3113 // as the bits match one of the inline immediate values. For example:
3114 //
3115 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3116 // so it is a legal inline immediate.
3117 //
3118 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3119 // floating-point, so it is a legal inline immediate.
3120
3121 uint32_t Val = static_cast<uint32_t>(Literal);
3122 return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) ||
3123 (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) ||
3124 (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) ||
3125 (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) ||
3126 (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) ||
3127 (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) ||
3128 (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) ||
3129 (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) ||
3130 (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) ||
3131 (Val == 0x3e22f983 && HasInv2Pi);
3132}
3133
3134bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3135 if (!HasInv2Pi)
3136 return false;
3137 if (isInlinableIntLiteral(Literal))
3138 return true;
3139 uint16_t Val = static_cast<uint16_t>(Literal);
3140 return Val == 0x3F00 || // 0.5
3141 Val == 0xBF00 || // -0.5
3142 Val == 0x3F80 || // 1.0
3143 Val == 0xBF80 || // -1.0
3144 Val == 0x4000 || // 2.0
3145 Val == 0xC000 || // -2.0
3146 Val == 0x4080 || // 4.0
3147 Val == 0xC080 || // -4.0
3148 Val == 0x3E22; // 1.0 / (2.0 * pi)
3149}
3150
3151bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3152 return isInlinableLiteral32(Literal, HasInv2Pi);
3153}
3154
3155bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3156 if (!HasInv2Pi)
3157 return false;
3158 if (isInlinableIntLiteral(Literal))
3159 return true;
3160 uint16_t Val = static_cast<uint16_t>(Literal);
3161 return Val == 0x3C00 || // 1.0
3162 Val == 0xBC00 || // -1.0
3163 Val == 0x3800 || // 0.5
3164 Val == 0xB800 || // -0.5
3165 Val == 0x4000 || // 2.0
3166 Val == 0xC000 || // -2.0
3167 Val == 0x4400 || // 4.0
3168 Val == 0xC400 || // -4.0
3169 Val == 0x3118; // 1/2pi
3170}
3171
3172std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3173 // Unfortunately, the Instruction Set Architecture Reference Guide is
3174 // misleading about how the inline operands work for (packed) 16-bit
3175 // instructions. In a nutshell, the actual HW behavior is:
3176 //
3177 // - integer encodings (-16 .. 64) are always produced as sign-extended
3178 // 32-bit values
3179 // - float encodings are produced as:
3180 // - for F16 instructions: corresponding half-precision float values in
3181 // the LSBs, 0 in the MSBs
3182 // - for UI16 instructions: corresponding single-precision float value
3183 int32_t Signed = static_cast<int32_t>(Literal);
3184 if (Signed >= 0 && Signed <= 64)
3185 return 128 + Signed;
3186
3187 if (Signed >= -16 && Signed <= -1)
3188 return 192 + std::abs(x: Signed);
3189
3190 if (IsFloat) {
3191 // clang-format off
3192 switch (Literal) {
3193 case 0x3800: return 240; // 0.5
3194 case 0xB800: return 241; // -0.5
3195 case 0x3C00: return 242; // 1.0
3196 case 0xBC00: return 243; // -1.0
3197 case 0x4000: return 244; // 2.0
3198 case 0xC000: return 245; // -2.0
3199 case 0x4400: return 246; // 4.0
3200 case 0xC400: return 247; // -4.0
3201 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3202 default: break;
3203 }
3204 // clang-format on
3205 } else {
3206 // clang-format off
3207 switch (Literal) {
3208 case 0x3F000000: return 240; // 0.5
3209 case 0xBF000000: return 241; // -0.5
3210 case 0x3F800000: return 242; // 1.0
3211 case 0xBF800000: return 243; // -1.0
3212 case 0x40000000: return 244; // 2.0
3213 case 0xC0000000: return 245; // -2.0
3214 case 0x40800000: return 246; // 4.0
3215 case 0xC0800000: return 247; // -4.0
3216 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3217 default: break;
3218 }
3219 // clang-format on
3220 }
3221
3222 return {};
3223}
3224
3225// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3226// or nullopt.
3227std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3228 return getInlineEncodingV216(IsFloat: false, Literal);
3229}
3230
3231// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3232// or nullopt.
3233std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3234 int32_t Signed = static_cast<int32_t>(Literal);
3235 if (Signed >= 0 && Signed <= 64)
3236 return 128 + Signed;
3237
3238 if (Signed >= -16 && Signed <= -1)
3239 return 192 + std::abs(x: Signed);
3240
3241 // clang-format off
3242 switch (Literal) {
3243 case 0x3F00: return 240; // 0.5
3244 case 0xBF00: return 241; // -0.5
3245 case 0x3F80: return 242; // 1.0
3246 case 0xBF80: return 243; // -1.0
3247 case 0x4000: return 244; // 2.0
3248 case 0xC000: return 245; // -2.0
3249 case 0x4080: return 246; // 4.0
3250 case 0xC080: return 247; // -4.0
3251 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3252 default: break;
3253 }
3254 // clang-format on
3255
3256 return std::nullopt;
3257}
3258
3259// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3260// or nullopt.
3261std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3262 return getInlineEncodingV216(IsFloat: true, Literal);
3263}
3264
3265// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3266// or nullopt. This accounts for different inline constant behavior:
3267// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3268// - GFX11+: fp16 inline constants are duplicated into both halves
3269std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
3270 bool IsGFX11Plus) {
3271 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3272 if (!IsGFX11Plus)
3273 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3274
3275 // GFX11+ behavior: f16 duplicated in both halves
3276 // First, check for sign-extended integer inline constants (-16 to 64)
3277 // These work the same across all generations
3278 int32_t Signed = static_cast<int32_t>(Literal);
3279 if (Signed >= 0 && Signed <= 64)
3280 return 128 + Signed;
3281
3282 if (Signed >= -16 && Signed <= -1)
3283 return 192 + std::abs(x: Signed);
3284
3285 // For float inline constants on GFX11+, both halves must be equal
3286 uint16_t Lo = static_cast<uint16_t>(Literal);
3287 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3288 if (Lo != Hi)
3289 return std::nullopt;
3290 return getInlineEncodingV216(/*IsFloat=*/true, Literal: Lo);
3291}
3292
3293// Whether the given literal can be inlined for a V_PK_* instruction.
3294bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
3295 switch (OpType) {
3296 case AMDGPU::OPERAND_REG_IMM_V2INT16:
3297 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
3298 return getInlineEncodingV216(IsFloat: false, Literal).has_value();
3299 case AMDGPU::OPERAND_REG_IMM_V2FP16:
3300 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
3301 return getInlineEncodingV216(IsFloat: true, Literal).has_value();
3302 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
3303 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3304 case AMDGPU::OPERAND_REG_IMM_V2BF16:
3305 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
3306 return isInlinableLiteralV2BF16(Literal);
3307 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
3308 return false;
3309 default:
3310 llvm_unreachable("bad packed operand type");
3311 }
3312}
3313
3314// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3315bool isInlinableLiteralV2I16(uint32_t Literal) {
3316 return getInlineEncodingV2I16(Literal).has_value();
3317}
3318
3319// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3320bool isInlinableLiteralV2BF16(uint32_t Literal) {
3321 return getInlineEncodingV2BF16(Literal).has_value();
3322}
3323
3324// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3325bool isInlinableLiteralV2F16(uint32_t Literal) {
3326 return getInlineEncodingV2F16(Literal).has_value();
3327}
3328
3329// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3330bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus) {
3331 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3332}
3333
3334bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3335 if (IsFP64)
3336 return !Lo_32(Value: Val);
3337
3338 return isUInt<32>(x: Val) || isInt<32>(x: Val);
3339}
3340
3341int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3342 switch (Type) {
3343 default:
3344 break;
3345 case OPERAND_REG_IMM_BF16:
3346 case OPERAND_REG_IMM_FP16:
3347 case OPERAND_REG_INLINE_C_BF16:
3348 case OPERAND_REG_INLINE_C_FP16:
3349 return Imm & 0xffff;
3350 case OPERAND_INLINE_SPLIT_BARRIER_INT32:
3351 case OPERAND_REG_IMM_FP32:
3352 case OPERAND_REG_IMM_INT32:
3353 case OPERAND_REG_IMM_V2BF16:
3354 case OPERAND_REG_IMM_V2FP16:
3355 case OPERAND_REG_IMM_V2FP16_SPLAT:
3356 case OPERAND_REG_IMM_V2FP32:
3357 case OPERAND_REG_IMM_V2INT16:
3358 case OPERAND_REG_IMM_V2INT32:
3359 case OPERAND_REG_INLINE_AC_FP32:
3360 case OPERAND_REG_INLINE_AC_INT32:
3361 case OPERAND_REG_INLINE_C_FP32:
3362 case OPERAND_REG_INLINE_C_INT32:
3363 return Lo_32(Value: Imm);
3364 case OPERAND_REG_IMM_FP64:
3365 return IsLit ? Imm : Hi_32(Value: Imm);
3366 }
3367 return Imm;
3368}
3369
3370bool isArgPassedInSGPR(const Argument *A) {
3371 const Function *F = A->getParent();
3372
3373 // Arguments to compute shaders are never a source of divergence.
3374 CallingConv::ID CC = F->getCallingConv();
3375 switch (CC) {
3376 case CallingConv::AMDGPU_KERNEL:
3377 case CallingConv::SPIR_KERNEL:
3378 return true;
3379 case CallingConv::AMDGPU_VS:
3380 case CallingConv::AMDGPU_LS:
3381 case CallingConv::AMDGPU_HS:
3382 case CallingConv::AMDGPU_ES:
3383 case CallingConv::AMDGPU_GS:
3384 case CallingConv::AMDGPU_PS:
3385 case CallingConv::AMDGPU_CS:
3386 case CallingConv::AMDGPU_Gfx:
3387 case CallingConv::AMDGPU_CS_Chain:
3388 case CallingConv::AMDGPU_CS_ChainPreserve:
3389 // For non-compute shaders, SGPR inputs are marked with either inreg or
3390 // byval. Everything else is in VGPRs.
3391 return A->hasAttribute(Kind: Attribute::InReg) ||
3392 A->hasAttribute(Kind: Attribute::ByVal);
3393 default:
3394 // TODO: treat i1 as divergent?
3395 return A->hasAttribute(Kind: Attribute::InReg);
3396 }
3397}
3398
3399bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3400 // Arguments to compute shaders are never a source of divergence.
3401 CallingConv::ID CC = CB->getCallingConv();
3402 switch (CC) {
3403 case CallingConv::AMDGPU_KERNEL:
3404 case CallingConv::SPIR_KERNEL:
3405 return true;
3406 case CallingConv::AMDGPU_VS:
3407 case CallingConv::AMDGPU_LS:
3408 case CallingConv::AMDGPU_HS:
3409 case CallingConv::AMDGPU_ES:
3410 case CallingConv::AMDGPU_GS:
3411 case CallingConv::AMDGPU_PS:
3412 case CallingConv::AMDGPU_CS:
3413 case CallingConv::AMDGPU_Gfx:
3414 case CallingConv::AMDGPU_CS_Chain:
3415 case CallingConv::AMDGPU_CS_ChainPreserve:
3416 // For non-compute shaders, SGPR inputs are marked with either inreg or
3417 // byval. Everything else is in VGPRs.
3418 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) ||
3419 CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal);
3420 default:
3421 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg);
3422 }
3423}
3424
3425static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3426 return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST);
3427}
3428
3429bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
3430 int64_t EncodedOffset) {
3431 if (isGFX12Plus(STI: ST))
3432 return isUInt<23>(x: EncodedOffset);
3433
3434 return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset)
3435 : isUInt<8>(x: EncodedOffset);
3436}
3437
3438bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
3439 int64_t EncodedOffset, bool IsBuffer) {
3440 if (isGFX12Plus(STI: ST)) {
3441 if (IsBuffer && EncodedOffset < 0)
3442 return false;
3443 return isInt<24>(x: EncodedOffset);
3444 }
3445
3446 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(x: EncodedOffset);
3447}
3448
3449static bool isDwordAligned(uint64_t ByteOffset) {
3450 return (ByteOffset & 3) == 0;
3451}
3452
3453uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
3454 uint64_t ByteOffset) {
3455 if (hasSMEMByteOffset(ST))
3456 return ByteOffset;
3457
3458 assert(isDwordAligned(ByteOffset));
3459 return ByteOffset >> 2;
3460}
3461
3462std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3463 int64_t ByteOffset, bool IsBuffer,
3464 bool HasSOffset) {
3465 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3466 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3467 // Handle case where SOffset is not present.
3468 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3469 return std::nullopt;
3470
3471 if (isGFX12Plus(STI: ST)) // 24 bit signed offsets
3472 return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3473 : std::nullopt;
3474
3475 // The signed version is always a byte offset.
3476 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3477 assert(hasSMEMByteOffset(ST));
3478 return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3479 : std::nullopt;
3480 }
3481
3482 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3483 return std::nullopt;
3484
3485 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3486 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3487 ? std::optional<int64_t>(EncodedOffset)
3488 : std::nullopt;
3489}
3490
3491std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3492 int64_t ByteOffset) {
3493 if (!isCI(STI: ST) || !isDwordAligned(ByteOffset))
3494 return std::nullopt;
3495
3496 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3497 return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3498 : std::nullopt;
3499}
3500
3501unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3502 if (ST.getFeatureBits().test(I: FeatureFlatOffsetBits12))
3503 return 12;
3504 if (ST.getFeatureBits().test(I: FeatureFlatOffsetBits24))
3505 return 24;
3506 return 13;
3507}
3508
3509namespace {
3510
3511struct SourceOfDivergence {
3512 unsigned Intr;
3513};
3514const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3515
3516struct AlwaysUniform {
3517 unsigned Intr;
3518};
3519const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3520
3521#define GET_SourcesOfDivergence_IMPL
3522#define GET_UniformIntrinsics_IMPL
3523#define GET_Gfx9BufferFormat_IMPL
3524#define GET_Gfx10BufferFormat_IMPL
3525#define GET_Gfx11PlusBufferFormat_IMPL
3526
3527#include "AMDGPUGenSearchableTables.inc"
3528
3529} // end anonymous namespace
3530
3531bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3532 return lookupSourceOfDivergence(Intr: IntrID);
3533}
3534
3535bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3536 return lookupAlwaysUniform(Intr: IntrID);
3537}
3538
3539const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3540 uint8_t NumComponents,
3541 uint8_t NumFormat,
3542 const MCSubtargetInfo &STI) {
3543 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3544 BitsPerComp, NumComponents, NumFormat)
3545 : isGFX10(STI)
3546 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3547 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3548}
3549
3550const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3551 const MCSubtargetInfo &STI) {
3552 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3553 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3554 : getGfx9BufferFormatInfo(Format);
3555}
3556
3557const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
3558 const MCRegisterInfo &MRI) {
3559 const unsigned VGPRClasses[] = {
3560 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3561 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3562 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3563 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3564 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3565 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3566 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3567 AMDGPU::VReg_1024RegClassID};
3568
3569 for (unsigned RCID : VGPRClasses) {
3570 const MCRegisterClass &RC = MRI.getRegClass(i: RCID);
3571 if (RC.contains(Reg))
3572 return &RC;
3573 }
3574
3575 return nullptr;
3576}
3577
3578unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI) {
3579 unsigned Enc = MRI.getEncodingValue(Reg);
3580 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3581 return Idx >> 8;
3582}
3583
3584MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
3585 const MCRegisterInfo &MRI) {
3586 unsigned Enc = MRI.getEncodingValue(Reg);
3587 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3588 if (Idx >= 0x100)
3589 return MCRegister();
3590
3591 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3592 if (!RC)
3593 return MCRegister();
3594
3595 Idx |= MSBs << 8;
3596 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3597 // This class has 2048 registers with interleaved lo16 and hi16.
3598 Idx *= 2;
3599 if (Enc & AMDGPU::HWEncoding::IS_HI16)
3600 ++Idx;
3601 }
3602
3603 return RC->getRegister(i: Idx);
3604}
3605
3606static std::optional<unsigned>
3607convertSetRegImmToVgprMSBs(unsigned Imm, unsigned Simm16,
3608 bool HasSetregVGPRMSBFixup) {
3609 constexpr unsigned VGPRMSBShift =
3610 llvm::countr_zero_constexpr<unsigned>(Val: AMDGPU::Hwreg::DST_VGPR_MSB);
3611
3612 auto [HwRegId, Offset, Size] = Hwreg::HwregEncoding::decode(Encoded: Simm16);
3613 if (HwRegId != Hwreg::ID_MODE ||
3614 (!HasSetregVGPRMSBFixup && (Offset + Size) <= VGPRMSBShift))
3615 return {};
3616 Imm = ((Imm >> Offset) & Hwreg::VGPR_MSB_MASK) >> VGPRMSBShift;
3617 if (!HasSetregVGPRMSBFixup)
3618 Imm &= llvm::maskTrailingOnes<unsigned>(N: Size);
3619 return llvm::rotr<uint8_t>(V: static_cast<uint8_t>(Imm), /*R=*/2);
3620}
3621
3622std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
3623 bool HasSetregVGPRMSBFixup) {
3624 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);
3625 return convertSetRegImmToVgprMSBs(Imm: MI.getOperand(i: 0).getImm(),
3626 Simm16: MI.getOperand(i: 1).getImm(),
3627 HasSetregVGPRMSBFixup);
3628}
3629
3630std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
3631 bool HasSetregVGPRMSBFixup) {
3632 assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32_gfx12);
3633 return convertSetRegImmToVgprMSBs(Imm: MI.getOperand(i: 0).getImm(),
3634 Simm16: MI.getOperand(i: 1).getImm(),
3635 HasSetregVGPRMSBFixup);
3636}
3637
3638std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3639getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
3640 static const AMDGPU::OpName VOPOps[4] = {
3641 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3642 AMDGPU::OpName::vdst};
3643 static const AMDGPU::OpName VDSOps[4] = {
3644 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3645 AMDGPU::OpName::vdst};
3646 static const AMDGPU::OpName FLATOps[4] = {
3647 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3648 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3649 static const AMDGPU::OpName BUFOps[4] = {
3650 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3651 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3652 static const AMDGPU::OpName VIMGOps[4] = {
3653 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3654 AMDGPU::OpName::vdata};
3655
3656 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3657 // address is supposed to match X operand, otherwise VOPD shall not be
3658 // combined.
3659 static const AMDGPU::OpName VOPDOpsX[4] = {
3660 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3661 AMDGPU::OpName::vdstX};
3662 static const AMDGPU::OpName VOPDOpsY[4] = {
3663 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3664 AMDGPU::OpName::vdstY};
3665
3666 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3667 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3668 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3669 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3670 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3671 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3672 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3673 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3674 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3675 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3676
3677 unsigned TSFlags = Desc.TSFlags;
3678
3679 if (TSFlags &
3680 (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
3681 SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
3682 switch (Desc.getOpcode()) {
3683 // LD_SCALE operands ignore MSB.
3684 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3685 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3686 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3687 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3688 return {};
3689 case AMDGPU::V_FMAMK_F16:
3690 case AMDGPU::V_FMAMK_F16_t16:
3691 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3692 case AMDGPU::V_FMAMK_F16_fake16:
3693 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3694 case AMDGPU::V_FMAMK_F32:
3695 case AMDGPU::V_FMAMK_F32_gfx12:
3696 case AMDGPU::V_FMAMK_F64:
3697 case AMDGPU::V_FMAMK_F64_gfx1250:
3698 return {VOP2MADMKOps, nullptr};
3699 default:
3700 break;
3701 }
3702 return {VOPOps, nullptr};
3703 }
3704
3705 if (TSFlags & SIInstrFlags::DS)
3706 return {VDSOps, nullptr};
3707
3708 if (TSFlags & SIInstrFlags::FLAT)
3709 return {FLATOps, nullptr};
3710
3711 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3712 return {BUFOps, nullptr};
3713
3714 if (TSFlags & SIInstrFlags::VIMAGE)
3715 return {VIMGOps, nullptr};
3716
3717 if (AMDGPU::isVOPD(Opc: Desc.getOpcode())) {
3718 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode: Desc.getOpcode());
3719 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3720 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3721 }
3722
3723 assert(!(TSFlags & SIInstrFlags::MIMG));
3724
3725 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3726 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3727 " these instructions are not expected on gfx1250");
3728
3729 return {};
3730}
3731
3732bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3733 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3734
3735 if (TSFlags & SIInstrFlags::SMRD)
3736 return !getSMEMIsBuffer(Opc: Opcode);
3737 if (!(TSFlags & SIInstrFlags::FLAT))
3738 return false;
3739
3740 // Only SV and SVS modes are supported.
3741 if (TSFlags & SIInstrFlags::FlatScratch)
3742 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr);
3743
3744 // Only GVS mode is supported.
3745 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr) &&
3746 hasNamedOperand(Opcode, NamedIdx: OpName::saddr);
3747
3748 return false;
3749}
3750
3751bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3752 const MCSubtargetInfo &ST) {
3753 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3754 int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), Name: OpName);
3755 if (Idx == -1)
3756 continue;
3757
3758 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3759 int16_t RegClass = MII.getOpRegClassID(
3760 OpInfo, HwModeId: ST.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo));
3761 if (RegClass == AMDGPU::VReg_64RegClassID ||
3762 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3763 return true;
3764 }
3765
3766 return false;
3767}
3768
3769bool isDPALU_DPP32BitOpc(unsigned Opc) {
3770 switch (Opc) {
3771 case AMDGPU::V_MUL_LO_U32_e64:
3772 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3773 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3774 case AMDGPU::V_MUL_HI_U32_e64:
3775 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3776 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3777 case AMDGPU::V_MUL_HI_I32_e64:
3778 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3779 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3780 case AMDGPU::V_MAD_U32_e64:
3781 case AMDGPU::V_MAD_U32_e64_dpp:
3782 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3783 return true;
3784 default:
3785 return false;
3786 }
3787}
3788
3789bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3790 const MCSubtargetInfo &ST) {
3791 if (!ST.hasFeature(Feature: AMDGPU::FeatureDPALU_DPP))
3792 return false;
3793
3794 if (isDPALU_DPP32BitOpc(Opc: OpDesc.getOpcode()))
3795 return ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts);
3796
3797 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3798}
3799
3800unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3801 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
3802 return 64;
3803 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
3804 return 128;
3805 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
3806 return 320;
3807 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
3808 return 512;
3809 return 64; // In sync with getAddressableLocalMemorySize
3810}
3811
3812bool isPackedFP32Inst(unsigned Opc) {
3813 switch (Opc) {
3814 case AMDGPU::V_PK_ADD_F32:
3815 case AMDGPU::V_PK_ADD_F32_gfx12:
3816 case AMDGPU::V_PK_MUL_F32:
3817 case AMDGPU::V_PK_MUL_F32_gfx12:
3818 case AMDGPU::V_PK_FMA_F32:
3819 case AMDGPU::V_PK_FMA_F32_gfx12:
3820 return true;
3821 default:
3822 return false;
3823 }
3824}
3825
3826const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3827 assert(isFixedDims() && "expect kind to be FixedDims");
3828 return Dims;
3829}
3830
3831std::string ClusterDimsAttr::to_string() const {
3832 SmallString<10> Buffer;
3833 raw_svector_ostream OS(Buffer);
3834
3835 switch (getKind()) {
3836 case Kind::Unknown:
3837 return "";
3838 case Kind::NoCluster: {
3839 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3840 return Buffer.c_str();
3841 }
3842 case Kind::VariableDims: {
3843 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3844 << EncoVariableDims;
3845 return Buffer.c_str();
3846 }
3847 case Kind::FixedDims: {
3848 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3849 return Buffer.c_str();
3850 }
3851 }
3852 llvm_unreachable("Unknown ClusterDimsAttr kind");
3853}
3854
3855ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
3856 std::optional<SmallVector<unsigned>> Attr =
3857 getIntegerVecAttribute(F, Name: "amdgpu-cluster-dims", /*Size=*/3);
3858 ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
3859
3860 if (!Attr.has_value())
3861 AttrKind = Kind::Unknown;
3862 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoNoCluster)))
3863 AttrKind = Kind::NoCluster;
3864 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoVariableDims)))
3865 AttrKind = Kind::VariableDims;
3866
3867 ClusterDimsAttr A(AttrKind);
3868 if (AttrKind == Kind::FixedDims)
3869 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3870
3871 return A;
3872}
3873
3874} // namespace AMDGPU
3875
3876raw_ostream &operator<<(raw_ostream &OS,
3877 const AMDGPU::IsaInfo::TargetIDSetting S) {
3878 switch (S) {
3879 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3880 OS << "Unsupported";
3881 break;
3882 case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3883 OS << "Any";
3884 break;
3885 case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3886 OS << "Off";
3887 break;
3888 case (AMDGPU::IsaInfo::TargetIDSetting::On):
3889 OS << "On";
3890 break;
3891 }
3892 return OS;
3893}
3894
3895} // namespace llvm
3896