1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "Utils/AMDKernelCodeTUtils.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCRegisterInfo.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/TargetParser/TargetParser.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
36static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
37 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV6),
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
139unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
140 return VersionMajor >= 12 ? 8 : 0;
141}
142
143/// \returns VaSdst bit width
144inline unsigned getVaSdstBitWidth() { return 3; }
145
146/// \returns VaSdst bit shift
147inline unsigned getVaSdstBitShift() { return 9; }
148
149/// \returns VmVsrc bit width
150inline unsigned getVmVsrcBitWidth() { return 3; }
151
152/// \returns VmVsrc bit shift
153inline unsigned getVmVsrcBitShift() { return 2; }
154
155/// \returns VaVdst bit width
156inline unsigned getVaVdstBitWidth() { return 4; }
157
158/// \returns VaVdst bit shift
159inline unsigned getVaVdstBitShift() { return 12; }
160
161/// \returns VaVcc bit width
162inline unsigned getVaVccBitWidth() { return 1; }
163
164/// \returns VaVcc bit shift
165inline unsigned getVaVccBitShift() { return 1; }
166
167/// \returns SaSdst bit width
168inline unsigned getSaSdstBitWidth() { return 1; }
169
170/// \returns SaSdst bit shift
171inline unsigned getSaSdstBitShift() { return 0; }
172
173/// \returns VaSsrc width
174inline unsigned getVaSsrcBitWidth() { return 1; }
175
176/// \returns VaSsrc bit shift
177inline unsigned getVaSsrcBitShift() { return 8; }
178
179/// \returns HoldCnt bit shift
180inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
181 static constexpr const unsigned MinMajor = 10;
182 static constexpr const unsigned MinMinor = 3;
183 return std::tie(args&: VersionMajor, args&: VersionMinor) >= std::tie(args: MinMajor, args: MinMinor)
184 ? 1
185 : 0;
186}
187
188/// \returns HoldCnt bit shift
189inline unsigned getHoldCntBitShift() { return 7; }
190
191} // end anonymous namespace
192
193namespace llvm {
194
195namespace AMDGPU {
196
197iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
198 return enum_seq(Begin: LOAD_CNT, End: MaxCounter);
199}
200
201/// \returns true if the target supports signed immediate offset for SMRD
202/// instructions.
203bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
204 return isGFX9Plus(STI: ST);
205}
206
207/// \returns True if \p STI is AMDHSA.
208bool isHsaAbi(const MCSubtargetInfo &STI) {
209 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
210}
211
212unsigned getAMDHSACodeObjectVersion(const Module &M) {
213 if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
214 MD: M.getModuleFlag(Key: "amdhsa_code_object_version"))) {
215 return (unsigned)Ver->getZExtValue() / 100;
216 }
217
218 return getDefaultAMDHSACodeObjectVersion();
219}
220
221unsigned getDefaultAMDHSACodeObjectVersion() {
222 return DefaultAMDHSACodeObjectVersion;
223}
224
225unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
226 switch (ABIVersion) {
227 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
228 return 4;
229 case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
230 return 5;
231 case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
232 return 6;
233 default:
234 return getDefaultAMDHSACodeObjectVersion();
235 }
236}
237
238uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
239 if (T.getOS() != Triple::AMDHSA)
240 return 0;
241
242 switch (CodeObjectVersion) {
243 case 4:
244 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
245 case 5:
246 return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
247 case 6:
248 return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
249 default:
250 report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " +
251 Twine(CodeObjectVersion));
252 }
253}
254
255unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
256 switch (CodeObjectVersion) {
257 case AMDHSA_COV4:
258 return 48;
259 case AMDHSA_COV5:
260 case AMDHSA_COV6:
261 default:
262 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
263 }
264}
265
266// FIXME: All such magic numbers about the ABI should be in a
267// central TD file.
268unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
269 switch (CodeObjectVersion) {
270 case AMDHSA_COV4:
271 return 24;
272 case AMDHSA_COV5:
273 case AMDHSA_COV6:
274 default:
275 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
276 }
277}
278
279unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
280 switch (CodeObjectVersion) {
281 case AMDHSA_COV4:
282 return 32;
283 case AMDHSA_COV5:
284 case AMDHSA_COV6:
285 default:
286 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
287 }
288}
289
290unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
291 switch (CodeObjectVersion) {
292 case AMDHSA_COV4:
293 return 40;
294 case AMDHSA_COV5:
295 case AMDHSA_COV6:
296 default:
297 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
298 }
299}
300
301#define GET_MIMGBaseOpcodesTable_IMPL
302#define GET_MIMGDimInfoTable_IMPL
303#define GET_MIMGInfoTable_IMPL
304#define GET_MIMGLZMappingTable_IMPL
305#define GET_MIMGMIPMappingTable_IMPL
306#define GET_MIMGBiasMappingTable_IMPL
307#define GET_MIMGOffsetMappingTable_IMPL
308#define GET_MIMGG16MappingTable_IMPL
309#define GET_MAIInstInfoTable_IMPL
310#define GET_WMMAInstInfoTable_IMPL
311#include "AMDGPUGenSearchableTables.inc"
312
313int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
314 unsigned VDataDwords, unsigned VAddrDwords) {
315 const MIMGInfo *Info =
316 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
317 return Info ? Info->Opcode : -1;
318}
319
320const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
321 const MIMGInfo *Info = getMIMGInfo(Opcode: Opc);
322 return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr;
323}
324
325int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
326 const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc);
327 const MIMGInfo *NewInfo =
328 getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding,
329 VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords);
330 return NewInfo ? NewInfo->Opcode : -1;
331}
332
333unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
334 const MIMGDimInfo *Dim, bool IsA16,
335 bool IsG16Supported) {
336 unsigned AddrWords = BaseOpcode->NumExtraArgs;
337 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
338 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
339 if (IsA16)
340 AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2);
341 else
342 AddrWords += AddrComponents;
343
344 // Note: For subtargets that support A16 but not G16, enabling A16 also
345 // enables 16 bit gradients.
346 // For subtargets that support A16 (operand) and G16 (done with a different
347 // instruction encoding), they are independent.
348
349 if (BaseOpcode->Gradients) {
350 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
351 // There are two gradients per coordinate, we pack them separately.
352 // For the 3d case,
353 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
354 AddrWords += alignTo<2>(Value: Dim->NumGradients / 2);
355 else
356 AddrWords += Dim->NumGradients;
357 }
358 return AddrWords;
359}
360
361struct MUBUFInfo {
362 uint32_t Opcode;
363 uint32_t BaseOpcode;
364 uint8_t elements;
365 bool has_vaddr;
366 bool has_srsrc;
367 bool has_soffset;
368 bool IsBufferInv;
369 bool tfe;
370};
371
372struct MTBUFInfo {
373 uint32_t Opcode;
374 uint32_t BaseOpcode;
375 uint8_t elements;
376 bool has_vaddr;
377 bool has_srsrc;
378 bool has_soffset;
379};
380
381struct SMInfo {
382 uint32_t Opcode;
383 bool IsBuffer;
384};
385
386struct VOPInfo {
387 uint32_t Opcode;
388 bool IsSingle;
389};
390
391struct VOPC64DPPInfo {
392 uint32_t Opcode;
393};
394
395struct VOPCDPPAsmOnlyInfo {
396 uint32_t Opcode;
397};
398
399struct VOP3CDPPAsmOnlyInfo {
400 uint32_t Opcode;
401};
402
403struct VOPDComponentInfo {
404 uint16_t BaseVOP;
405 uint16_t VOPDOp;
406 bool CanBeVOPDX;
407 bool CanBeVOPD3X;
408};
409
410struct VOPDInfo {
411 uint32_t Opcode;
412 uint16_t OpX;
413 uint16_t OpY;
414 uint16_t Subtarget;
415 bool VOPD3;
416};
417
418struct VOPTrue16Info {
419 uint32_t Opcode;
420 bool IsTrue16;
421};
422
423#define GET_FP4FP8DstByteSelTable_DECL
424#define GET_FP4FP8DstByteSelTable_IMPL
425
426struct DPMACCInstructionInfo {
427 uint32_t Opcode;
428 bool IsDPMACCInstruction;
429};
430
431struct FP4FP8DstByteSelInfo {
432 uint32_t Opcode;
433 bool HasFP8DstByteSel;
434 bool HasFP4DstByteSel;
435};
436
437#define GET_DPMACCInstructionTable_DECL
438#define GET_DPMACCInstructionTable_IMPL
439#define GET_MTBUFInfoTable_DECL
440#define GET_MTBUFInfoTable_IMPL
441#define GET_MUBUFInfoTable_DECL
442#define GET_MUBUFInfoTable_IMPL
443#define GET_SMInfoTable_DECL
444#define GET_SMInfoTable_IMPL
445#define GET_VOP1InfoTable_DECL
446#define GET_VOP1InfoTable_IMPL
447#define GET_VOP2InfoTable_DECL
448#define GET_VOP2InfoTable_IMPL
449#define GET_VOP3InfoTable_DECL
450#define GET_VOP3InfoTable_IMPL
451#define GET_VOPC64DPPTable_DECL
452#define GET_VOPC64DPPTable_IMPL
453#define GET_VOPC64DPP8Table_DECL
454#define GET_VOPC64DPP8Table_IMPL
455#define GET_VOPCAsmOnlyInfoTable_DECL
456#define GET_VOPCAsmOnlyInfoTable_IMPL
457#define GET_VOP3CAsmOnlyInfoTable_DECL
458#define GET_VOP3CAsmOnlyInfoTable_IMPL
459#define GET_VOPDComponentTable_DECL
460#define GET_VOPDComponentTable_IMPL
461#define GET_VOPDPairs_DECL
462#define GET_VOPDPairs_IMPL
463#define GET_VOPTrue16Table_DECL
464#define GET_VOPTrue16Table_IMPL
465#define GET_True16D16Table_IMPL
466#define GET_WMMAOpcode2AddrMappingTable_DECL
467#define GET_WMMAOpcode2AddrMappingTable_IMPL
468#define GET_WMMAOpcode3AddrMappingTable_DECL
469#define GET_WMMAOpcode3AddrMappingTable_IMPL
470#define GET_getMFMA_F8F6F4_WithSize_DECL
471#define GET_getMFMA_F8F6F4_WithSize_IMPL
472#define GET_isMFMA_F8F6F4Table_IMPL
473#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
474
475#include "AMDGPUGenSearchableTables.inc"
476
477int getMTBUFBaseOpcode(unsigned Opc) {
478 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc);
479 return Info ? Info->BaseOpcode : -1;
480}
481
482int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
483 const MTBUFInfo *Info =
484 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
485 return Info ? Info->Opcode : -1;
486}
487
488int getMTBUFElements(unsigned Opc) {
489 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
490 return Info ? Info->elements : 0;
491}
492
493bool getMTBUFHasVAddr(unsigned Opc) {
494 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
495 return Info && Info->has_vaddr;
496}
497
498bool getMTBUFHasSrsrc(unsigned Opc) {
499 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
500 return Info && Info->has_srsrc;
501}
502
503bool getMTBUFHasSoffset(unsigned Opc) {
504 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
505 return Info && Info->has_soffset;
506}
507
508int getMUBUFBaseOpcode(unsigned Opc) {
509 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc);
510 return Info ? Info->BaseOpcode : -1;
511}
512
513int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
514 const MUBUFInfo *Info =
515 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
516 return Info ? Info->Opcode : -1;
517}
518
519int getMUBUFElements(unsigned Opc) {
520 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
521 return Info ? Info->elements : 0;
522}
523
524bool getMUBUFHasVAddr(unsigned Opc) {
525 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
526 return Info && Info->has_vaddr;
527}
528
529bool getMUBUFHasSrsrc(unsigned Opc) {
530 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
531 return Info && Info->has_srsrc;
532}
533
534bool getMUBUFHasSoffset(unsigned Opc) {
535 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
536 return Info && Info->has_soffset;
537}
538
539bool getMUBUFIsBufferInv(unsigned Opc) {
540 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
541 return Info && Info->IsBufferInv;
542}
543
544bool getMUBUFTfe(unsigned Opc) {
545 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
546 return Info && Info->tfe;
547}
548
549bool getSMEMIsBuffer(unsigned Opc) {
550 const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc);
551 return Info && Info->IsBuffer;
552}
553
554bool getVOP1IsSingle(unsigned Opc) {
555 const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc);
556 return !Info || Info->IsSingle;
557}
558
559bool getVOP2IsSingle(unsigned Opc) {
560 const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc);
561 return !Info || Info->IsSingle;
562}
563
564bool getVOP3IsSingle(unsigned Opc) {
565 const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc);
566 return !Info || Info->IsSingle;
567}
568
569bool isVOPC64DPP(unsigned Opc) {
570 return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc);
571}
572
573bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); }
574
575bool getMAIIsDGEMM(unsigned Opc) {
576 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
577 return Info && Info->is_dgemm;
578}
579
580bool getMAIIsGFX940XDL(unsigned Opc) {
581 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
582 return Info && Info->is_gfx940_xdl;
583}
584
585bool getWMMAIsXDL(unsigned Opc) {
586 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opcode: Opc);
587 return Info ? Info->is_wmma_xdl : false;
588}
589
590uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
591 switch (EncodingVal) {
592 case MFMAScaleFormats::FP6_E2M3:
593 case MFMAScaleFormats::FP6_E3M2:
594 return 6;
595 case MFMAScaleFormats::FP4_E2M1:
596 return 4;
597 case MFMAScaleFormats::FP8_E4M3:
598 case MFMAScaleFormats::FP8_E5M2:
599 default:
600 return 8;
601 }
602
603 llvm_unreachable("covered switch over mfma scale formats");
604}
605
606const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
607 unsigned BLGP,
608 unsigned F8F8Opcode) {
609 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ);
610 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP);
611 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
612}
613
614uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt) {
615 switch (Fmt) {
616 case WMMA::MATRIX_FMT_FP8:
617 case WMMA::MATRIX_FMT_BF8:
618 return 16;
619 case WMMA::MATRIX_FMT_FP6:
620 case WMMA::MATRIX_FMT_BF6:
621 return 12;
622 case WMMA::MATRIX_FMT_FP4:
623 return 8;
624 }
625
626 llvm_unreachable("covered switch over wmma scale formats");
627}
628
629const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
630 unsigned FmtB,
631 unsigned F8F8Opcode) {
632 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtA);
633 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtB);
634 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
635}
636
637unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
638 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX13Insts))
639 return SIEncodingFamily::GFX13;
640 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts))
641 return SIEncodingFamily::GFX1250;
642 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts))
643 return SIEncodingFamily::GFX12;
644 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts))
645 return SIEncodingFamily::GFX11;
646 llvm_unreachable("Subtarget generation does not support VOPD!");
647}
648
649CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
650 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
651 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
652 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
653 if (Info) {
654 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
655 // VOPDX is just a placeholder here, it is supported on all encodings.
656 // TODO: This can be optimized by creating tables of supported VOPDY
657 // opcodes per encoding.
658 unsigned VOPDMov = AMDGPU::getVOPDOpcode(Opc: AMDGPU::V_MOV_B32_e32, VOPD3);
659 bool CanBeVOPDX;
660 if (VOPD3) {
661 CanBeVOPDX = getVOPDFull(OpX: AMDGPU::getVOPDOpcode(Opc, VOPD3), OpY: VOPDMov,
662 EncodingFamily, VOPD3) != -1;
663 } else {
664 // The list of VOPDX opcodes is currently the same in all encoding
665 // families, so we do not need a family-specific check.
666 CanBeVOPDX = Info->CanBeVOPDX;
667 }
668 bool CanBeVOPDY = getVOPDFull(OpX: VOPDMov, OpY: AMDGPU::getVOPDOpcode(Opc, VOPD3),
669 EncodingFamily, VOPD3) != -1;
670 return {.X: CanBeVOPDX, .Y: CanBeVOPDY};
671 }
672
673 return {.X: false, .Y: false};
674}
675
676unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
677 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
678 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
679 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
680 return Info ? Info->VOPDOp : ~0u;
681}
682
683bool isVOPD(unsigned Opc) {
684 return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X);
685}
686
687bool isMAC(unsigned Opc) {
688 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
689 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
690 Opc == AMDGPU::V_MAC_F32_e64_vi ||
691 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
692 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
693 Opc == AMDGPU::V_MAC_F16_e64_vi ||
694 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
695 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
696 Opc == AMDGPU::V_FMAC_F64_e64_gfx13 ||
697 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
698 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
699 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
700 Opc == AMDGPU::V_FMAC_F32_e64_gfx13 ||
701 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
702 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
703 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
704 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
705 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
706 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
707 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
708 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
709 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx13 ||
710 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx13 ||
711 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
712 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
713 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
714 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
715 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
716}
717
718bool isPermlane16(unsigned Opc) {
719 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
720 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
721 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
722 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
723 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
724 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
725 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
726 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
727}
728
729bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
730 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
731 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
732 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
733 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
734 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
735 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
736 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
737 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
738 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
739 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
740}
741
742bool isGenericAtomic(unsigned Opc) {
743 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
744 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
745 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
746 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
747 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
748 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
749 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
750 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
751 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
752 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
753 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
754 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
755 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
756 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
757 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
758 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
759 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
760 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
761 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
762}
763
764bool isAsyncStore(unsigned Opc) {
765 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
766 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
767 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
768 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
769 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
770 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
771 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
772 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
773}
774
775bool isTensorStore(unsigned Opc) {
776 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
777 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
778}
779
780unsigned getTemporalHintType(const MCInstrDesc TID) {
781 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
782 return CPol::TH_TYPE_ATOMIC;
783 unsigned Opc = TID.getOpcode();
784 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
785 if (TID.mayStore() &&
786 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
787 return CPol::TH_TYPE_STORE;
788
789 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
790 // MayLoad flag is present which is the case with instructions like
791 // image_get_resinfo.
792 return CPol::TH_TYPE_LOAD;
793}
794
795bool isTrue16Inst(unsigned Opc) {
796 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc);
797 return Info && Info->IsTrue16;
798}
799
800FPType getFPDstSelType(unsigned Opc) {
801 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opcode: Opc);
802 if (!Info)
803 return FPType::None;
804 if (Info->HasFP8DstByteSel)
805 return FPType::FP8;
806 if (Info->HasFP4DstByteSel)
807 return FPType::FP4;
808
809 return FPType::None;
810}
811
812bool isDPMACCInstruction(unsigned Opc) {
813 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opcode: Opc);
814 return Info && Info->IsDPMACCInstruction;
815}
816
817unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
818 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc);
819 return Info ? Info->Opcode3Addr : ~0u;
820}
821
822unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
823 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc);
824 return Info ? Info->Opcode2Addr : ~0u;
825}
826
827// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
828// header files, so we need to wrap it in a function that takes unsigned
829// instead.
830int32_t getMCOpcode(uint32_t Opcode, unsigned Gen) {
831 return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen));
832}
833
834unsigned getBitOp2(unsigned Opc) {
835 switch (Opc) {
836 default:
837 return 0;
838 case AMDGPU::V_AND_B32_e32:
839 return 0x40;
840 case AMDGPU::V_OR_B32_e32:
841 return 0x54;
842 case AMDGPU::V_XOR_B32_e32:
843 return 0x14;
844 case AMDGPU::V_XNOR_B32_e32:
845 return 0x41;
846 }
847}
848
849int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
850 bool VOPD3) {
851 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc: OpY) : 0;
852 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
853 const VOPDInfo *Info =
854 getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily, VOPD3);
855 return Info ? Info->Opcode : -1;
856}
857
858std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
859 const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode);
860 assert(Info);
861 const auto *OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX);
862 const auto *OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY);
863 assert(OpX && OpY);
864 return {OpX->BaseVOP, OpY->BaseVOP};
865}
866
867namespace VOPD {
868
869ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
870 assert(OpDesc.getNumDefs() == Component::DST_NUM);
871
872 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
873 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
874 auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO);
875 assert(TiedIdx == -1 || TiedIdx == Component::DST);
876 HasSrc2Acc = TiedIdx != -1;
877 Opcode = OpDesc.getOpcode();
878
879 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
880 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src2) ? 3
881 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm) ? 3
882 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src1) ? 2
883 : 1;
884 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
885
886 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
887 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
888 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
889 // operands.
890 NumVOPD3Mods = 2;
891 if (IsVOP3)
892 SrcOperandsNum = 3;
893 } else if (isSISrcFPOperand(Desc: OpDesc,
894 OpNo: getNamedOperandIdx(Opcode, Name: OpName::src0))) {
895 // All FP VOPD instructions have Neg modifiers for all operands except
896 // for tied src2.
897 NumVOPD3Mods = SrcOperandsNum;
898 if (HasSrc2Acc)
899 --NumVOPD3Mods;
900 }
901
902 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
903 return;
904
905 auto OperandsNum = OpDesc.getNumOperands();
906 unsigned CompOprIdx;
907 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
908 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
909 MandatoryLiteralIdx = CompOprIdx;
910 break;
911 }
912 }
913}
914
915int ComponentProps::getBitOp3OperandIdx() const {
916 return getNamedOperandIdx(Opcode, Name: OpName::bitop3);
917}
918
919unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
920 assert(CompOprIdx < Component::MAX_OPR_NUM);
921
922 if (CompOprIdx == Component::DST)
923 return getIndexOfDstInParsedOperands();
924
925 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
926 if (CompSrcIdx < getCompParsedSrcOperandsNum())
927 return getIndexOfSrcInParsedOperands(CompSrcIdx);
928
929 // The specified operand does not exist.
930 return 0;
931}
932
933std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
934 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
935 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
936 bool VOPD3) const {
937
938 auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx,
939 VOPD3: CompInfo[ComponentIndex::X].isVOP3());
940 auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx,
941 VOPD3: CompInfo[ComponentIndex::Y].isVOP3());
942
943 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
944 unsigned BanksMask) -> bool {
945 MCRegister BaseX = MRI.getSubReg(Reg: X, Idx: AMDGPU::sub0);
946 MCRegister BaseY = MRI.getSubReg(Reg: Y, Idx: AMDGPU::sub0);
947 if (!BaseX)
948 BaseX = X;
949 if (!BaseY)
950 BaseY = Y;
951 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
952 return true;
953 if (BaseX != X /* This is 64-bit register */ &&
954 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
955 return true;
956 if (BaseY != Y &&
957 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
958 return true;
959
960 // If both are 64-bit bank conflict will be detected yet while checking
961 // the first subreg.
962 return false;
963 };
964
965 unsigned CompOprIdx;
966 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
967 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
968 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
969 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
970 continue;
971
972 if (getVGPREncodingMSBs(Reg: OpXRegs[CompOprIdx], MRI) !=
973 getVGPREncodingMSBs(Reg: OpYRegs[CompOprIdx], MRI))
974 return CompOprIdx;
975
976 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
977 continue;
978
979 if (CompOprIdx < Component::DST_NUM) {
980 // Even if we do not check vdst parity, vdst operands still shall not
981 // overlap.
982 if (MRI.regsOverlap(RegA: OpXRegs[CompOprIdx], RegB: OpYRegs[CompOprIdx]))
983 return CompOprIdx;
984 if (VOPD3) // No need to check dst parity.
985 continue;
986 }
987
988 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
989 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
990 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
991 return CompOprIdx;
992 }
993
994 return {};
995}
996
997// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
998// by the specified component. If an operand is unused
999// or is not a VGPR, the corresponding value is 0.
1000//
1001// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
1002// for the specified component and MC operand. The callback must return 0
1003// if the operand is not a register or not a VGPR.
1004InstInfo::RegIndices
1005InstInfo::getRegIndices(unsigned CompIdx,
1006 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
1007 bool VOPD3) const {
1008 assert(CompIdx < COMPONENTS_NUM);
1009
1010 const auto &Comp = CompInfo[CompIdx];
1011 InstInfo::RegIndices RegIndices;
1012
1013 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
1014
1015 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
1016 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
1017 RegIndices[CompOprIdx] =
1018 Comp.hasRegSrcOperand(CompSrcIdx)
1019 ? GetRegIdx(CompIdx,
1020 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1021 : MCRegister();
1022 }
1023 return RegIndices;
1024}
1025
1026} // namespace VOPD
1027
1028VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
1029 return VOPD::InstInfo(OpX, OpY);
1030}
1031
1032VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
1033 const MCInstrInfo *InstrInfo) {
1034 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1035 const auto &OpXDesc = InstrInfo->get(Opcode: OpX);
1036 const auto &OpYDesc = InstrInfo->get(Opcode: OpY);
1037 bool VOPD3 = InstrInfo->get(Opcode: VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1038 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
1039 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1040 return VOPD::InstInfo(OpXInfo, OpYInfo);
1041}
1042
1043namespace IsaInfo {
1044
1045AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
1046 : STI(STI), XnackSetting(TargetIDSetting::Any),
1047 SramEccSetting(TargetIDSetting::Any) {
1048 if (!STI.getFeatureBits().test(I: FeatureSupportsXNACK))
1049 XnackSetting = TargetIDSetting::Unsupported;
1050 if (!STI.getFeatureBits().test(I: FeatureSupportsSRAMECC))
1051 SramEccSetting = TargetIDSetting::Unsupported;
1052}
1053
1054void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
1055 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1056 // absence of the target features we assume we must generate code that can run
1057 // in any environment.
1058 SubtargetFeatures Features(FS);
1059 std::optional<bool> XnackRequested;
1060 std::optional<bool> SramEccRequested;
1061
1062 for (const std::string &Feature : Features.getFeatures()) {
1063 if (Feature == "+xnack")
1064 XnackRequested = true;
1065 else if (Feature == "-xnack")
1066 XnackRequested = false;
1067 else if (Feature == "+sramecc")
1068 SramEccRequested = true;
1069 else if (Feature == "-sramecc")
1070 SramEccRequested = false;
1071 }
1072
1073 bool XnackSupported = isXnackSupported();
1074 bool SramEccSupported = isSramEccSupported();
1075
1076 if (XnackRequested) {
1077 if (XnackSupported) {
1078 XnackSetting =
1079 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1080 } else {
1081 // If a specific xnack setting was requested and this GPU does not support
1082 // xnack emit a warning. Setting will remain set to "Unsupported".
1083 if (*XnackRequested) {
1084 errs() << "warning: xnack 'On' was requested for a processor that does "
1085 "not support it!\n";
1086 } else {
1087 errs() << "warning: xnack 'Off' was requested for a processor that "
1088 "does not support it!\n";
1089 }
1090 }
1091 }
1092
1093 if (SramEccRequested) {
1094 if (SramEccSupported) {
1095 SramEccSetting =
1096 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1097 } else {
1098 // If a specific sramecc setting was requested and this GPU does not
1099 // support sramecc emit a warning. Setting will remain set to
1100 // "Unsupported".
1101 if (*SramEccRequested) {
1102 errs() << "warning: sramecc 'On' was requested for a processor that "
1103 "does not support it!\n";
1104 } else {
1105 errs() << "warning: sramecc 'Off' was requested for a processor that "
1106 "does not support it!\n";
1107 }
1108 }
1109 }
1110}
1111
1112static TargetIDSetting
1113getTargetIDSettingFromFeatureString(StringRef FeatureString) {
1114 if (FeatureString.ends_with(Suffix: "-"))
1115 return TargetIDSetting::Off;
1116 if (FeatureString.ends_with(Suffix: "+"))
1117 return TargetIDSetting::On;
1118
1119 llvm_unreachable("Malformed feature string");
1120}
1121
1122void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
1123 SmallVector<StringRef, 3> TargetIDSplit;
1124 TargetID.split(A&: TargetIDSplit, Separator: ':');
1125
1126 for (const auto &FeatureString : TargetIDSplit) {
1127 if (FeatureString.starts_with(Prefix: "xnack"))
1128 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1129 if (FeatureString.starts_with(Prefix: "sramecc"))
1130 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1131 }
1132}
1133
1134std::string AMDGPUTargetID::toString() const {
1135 std::string StringRep;
1136 raw_string_ostream StreamRep(StringRep);
1137
1138 auto TargetTriple = STI.getTargetTriple();
1139 auto Version = getIsaVersion(GPU: STI.getCPU());
1140
1141 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1142 << '-' << TargetTriple.getOSName() << '-'
1143 << TargetTriple.getEnvironmentName() << '-';
1144
1145 std::string Processor;
1146 // TODO: Following else statement is present here because we used various
1147 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1148 // Remove once all aliases are removed from GCNProcessors.td.
1149 if (Version.Major >= 9)
1150 Processor = STI.getCPU().str();
1151 else
1152 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1153 Twine(Version.Stepping))
1154 .str();
1155
1156 std::string Features;
1157 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
1158 // sramecc.
1159 if (getSramEccSetting() == TargetIDSetting::Off)
1160 Features += ":sramecc-";
1161 else if (getSramEccSetting() == TargetIDSetting::On)
1162 Features += ":sramecc+";
1163 // xnack.
1164 if (getXnackSetting() == TargetIDSetting::Off)
1165 Features += ":xnack-";
1166 else if (getXnackSetting() == TargetIDSetting::On)
1167 Features += ":xnack+";
1168 }
1169
1170 StreamRep << Processor << Features;
1171
1172 return StringRep;
1173}
1174
1175unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1176 if (STI->getFeatureBits().test(I: FeatureWavefrontSize16))
1177 return 16;
1178 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32))
1179 return 32;
1180
1181 return 64;
1182}
1183
1184unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
1185 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1186
1187 // "Per CU" really means "per whatever functional block the waves of a
1188 // workgroup must share". So the effective local memory size is doubled in
1189 // WGP mode on gfx10.
1190 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1191 BytesPerCU *= 2;
1192
1193 return BytesPerCU;
1194}
1195
1196unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
1197 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
1198 return 32768;
1199 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
1200 return 65536;
1201 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
1202 return 163840;
1203 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
1204 return 327680;
1205 return 32768;
1206}
1207
1208unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1209 // "Per CU" really means "per whatever functional block the waves of a
1210 // workgroup must share".
1211
1212 // GFX12.5 only supports CU mode, which contains four SIMDs.
1213 if (isGFX1250(STI: *STI)) {
1214 assert(STI->getFeatureBits().test(FeatureCuMode));
1215 return 4;
1216 }
1217
1218 // For gfx10 in CU mode the functional block is the CU, which contains
1219 // two SIMDs.
1220 if (isGFX10Plus(STI: *STI) && STI->getFeatureBits().test(I: FeatureCuMode))
1221 return 2;
1222
1223 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1224 // contains two CUs, so a total of four SIMDs.
1225 return 4;
1226}
1227
1228unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
1229 unsigned FlatWorkGroupSize) {
1230 assert(FlatWorkGroupSize != 0);
1231 if (!STI->getTargetTriple().isAMDGCN())
1232 return 8;
1233 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1234 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1235 if (N == 1) {
1236 // Single-wave workgroups don't consume barrier resources.
1237 return MaxWaves;
1238 }
1239
1240 unsigned MaxBarriers = 16;
1241 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1242 MaxBarriers = 32;
1243
1244 return std::min(a: MaxWaves / N, b: MaxBarriers);
1245}
1246
1247unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1248
1249unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1250 // FIXME: Need to take scratch memory into account.
1251 if (isGFX90A(STI: *STI))
1252 return 8;
1253 if (!isGFX10Plus(STI: *STI))
1254 return 10;
1255 return hasGFX10_3Insts(STI: *STI) ? 16 : 20;
1256}
1257
1258unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1259 unsigned FlatWorkGroupSize) {
1260 return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1261 Denominator: getEUsPerCU(STI));
1262}
1263
1264unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1265
1266unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
1267 // Some subtargets allow encoding 2048, but this isn't tested or supported.
1268 return 1024;
1269}
1270
1271unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1272 unsigned FlatWorkGroupSize) {
1273 return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI));
1274}
1275
1276unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1277 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1278 if (Version.Major >= 10)
1279 return getAddressableNumSGPRs(STI);
1280 if (Version.Major >= 8)
1281 return 16;
1282 return 8;
1283}
1284
1285unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1286
1287unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1288 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1289 if (Version.Major >= 8)
1290 return 800;
1291 return 512;
1292}
1293
1294unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1295 if (STI->getFeatureBits().test(I: FeatureSGPRInitBug))
1296 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1297
1298 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1299 if (Version.Major >= 10)
1300 return 106;
1301 if (Version.Major >= 8)
1302 return 102;
1303 return 104;
1304}
1305
1306unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1307 assert(WavesPerEU != 0);
1308
1309 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1310 if (Version.Major >= 10)
1311 return 0;
1312
1313 if (WavesPerEU >= getMaxWavesPerEU(STI))
1314 return 0;
1315
1316 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1317 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1318 MinNumSGPRs -= std::min(a: MinNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1319 MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1;
1320 return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI));
1321}
1322
1323unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1324 bool Addressable) {
1325 assert(WavesPerEU != 0);
1326
1327 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1328 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1329 if (Version.Major >= 10)
1330 return Addressable ? AddressableNumSGPRs : 108;
1331 if (Version.Major >= 8 && !Addressable)
1332 AddressableNumSGPRs = 112;
1333 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1334 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1335 MaxNumSGPRs -= std::min(a: MaxNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1336 MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI));
1337 return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs);
1338}
1339
1340unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1341 bool FlatScrUsed, bool XNACKUsed) {
1342 unsigned ExtraSGPRs = 0;
1343 if (VCCUsed)
1344 ExtraSGPRs = 2;
1345
1346 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1347 if (Version.Major >= 10)
1348 return ExtraSGPRs;
1349
1350 if (Version.Major < 8) {
1351 if (FlatScrUsed)
1352 ExtraSGPRs = 4;
1353 } else {
1354 if (XNACKUsed)
1355 ExtraSGPRs = 4;
1356
1357 if (FlatScrUsed ||
1358 STI->getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch))
1359 ExtraSGPRs = 6;
1360 }
1361
1362 return ExtraSGPRs;
1363}
1364
1365unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1366 bool FlatScrUsed) {
1367 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1368 XNACKUsed: STI->getFeatureBits().test(I: AMDGPU::FeatureXNACK));
1369}
1370
1371static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1372 unsigned Granule) {
1373 return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule);
1374}
1375
1376unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1377 // SGPRBlocks is actual number of SGPR blocks minus 1.
1378 return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) -
1379 1;
1380}
1381
1382unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1383 unsigned DynamicVGPRBlockSize,
1384 std::optional<bool> EnableWavefrontSize32) {
1385 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1386 return 8;
1387
1388 if (DynamicVGPRBlockSize != 0)
1389 return DynamicVGPRBlockSize;
1390
1391 bool IsWave32 = EnableWavefrontSize32
1392 ? *EnableWavefrontSize32
1393 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1394
1395 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1396 return IsWave32 ? 24 : 12;
1397
1398 if (hasGFX10_3Insts(STI: *STI))
1399 return IsWave32 ? 16 : 8;
1400
1401 return IsWave32 ? 8 : 4;
1402}
1403
1404unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1405 std::optional<bool> EnableWavefrontSize32) {
1406 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1407 return 8;
1408
1409 bool IsWave32 = EnableWavefrontSize32
1410 ? *EnableWavefrontSize32
1411 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1412
1413 if (STI->getFeatureBits().test(I: Feature1024AddressableVGPRs))
1414 return IsWave32 ? 16 : 8;
1415
1416 return IsWave32 ? 8 : 4;
1417}
1418
1419unsigned getArchVGPRAllocGranule() { return 4; }
1420
1421unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1422 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1423 return 512;
1424 if (!isGFX10Plus(STI: *STI))
1425 return 256;
1426 bool IsWave32 = STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1427 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1428 return IsWave32 ? 1536 : 768;
1429 return IsWave32 ? 1024 : 512;
1430}
1431
1432unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) {
1433 const auto &Features = STI->getFeatureBits();
1434 if (Features.test(I: Feature1024AddressableVGPRs))
1435 return Features.test(I: FeatureWavefrontSize32) ? 1024 : 512;
1436 return 256;
1437}
1438
1439unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
1440 unsigned DynamicVGPRBlockSize) {
1441 const auto &Features = STI->getFeatureBits();
1442 if (Features.test(I: FeatureGFX90AInsts))
1443 return 512;
1444
1445 if (DynamicVGPRBlockSize != 0)
1446 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1447 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1448 return getAddressableNumArchVGPRs(STI);
1449}
1450
1451unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1452 unsigned NumVGPRs,
1453 unsigned DynamicVGPRBlockSize) {
1454 return getNumWavesPerEUWithNumVGPRs(
1455 NumVGPRs, Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1456 MaxWaves: getMaxWavesPerEU(STI), TotalNumVGPRs: getTotalNumVGPRs(STI));
1457}
1458
1459unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1460 unsigned MaxWaves,
1461 unsigned TotalNumVGPRs) {
1462 if (NumVGPRs < Granule)
1463 return MaxWaves;
1464 unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule);
1465 return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves);
1466}
1467
1468unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1469 AMDGPUSubtarget::Generation Gen) {
1470 if (Gen >= AMDGPUSubtarget::GFX10)
1471 return MaxWaves;
1472
1473 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1474 if (SGPRs <= 80)
1475 return 10;
1476 if (SGPRs <= 88)
1477 return 9;
1478 if (SGPRs <= 100)
1479 return 8;
1480 return 7;
1481 }
1482 if (SGPRs <= 48)
1483 return 10;
1484 if (SGPRs <= 56)
1485 return 9;
1486 if (SGPRs <= 64)
1487 return 8;
1488 if (SGPRs <= 72)
1489 return 7;
1490 if (SGPRs <= 80)
1491 return 6;
1492 return 5;
1493}
1494
1495unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1496 unsigned DynamicVGPRBlockSize) {
1497 assert(WavesPerEU != 0);
1498
1499 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1500 if (WavesPerEU >= MaxWavesPerEU)
1501 return 0;
1502
1503 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1504 unsigned AddrsableNumVGPRs =
1505 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1506 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1507 unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule);
1508
1509 if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule))
1510 return 0;
1511
1512 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs,
1513 DynamicVGPRBlockSize);
1514 if (WavesPerEU < MinWavesPerEU)
1515 return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU, DynamicVGPRBlockSize);
1516
1517 unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule);
1518 unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext);
1519 return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs);
1520}
1521
1522unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1523 unsigned DynamicVGPRBlockSize) {
1524 assert(WavesPerEU != 0);
1525
1526 unsigned MaxNumVGPRs =
1527 alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU,
1528 Align: getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1529 unsigned AddressableNumVGPRs =
1530 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1531 return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs);
1532}
1533
1534unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1535 std::optional<bool> EnableWavefrontSize32) {
1536 return getGranulatedNumRegisterBlocks(
1537 NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1538 1;
1539}
1540
1541unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1542 unsigned NumVGPRs,
1543 unsigned DynamicVGPRBlockSize,
1544 std::optional<bool> EnableWavefrontSize32) {
1545 return getGranulatedNumRegisterBlocks(
1546 NumRegs: NumVGPRs,
1547 Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1548}
1549} // end namespace IsaInfo
1550
1551void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1552 const MCSubtargetInfo *STI) {
1553 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1554 KernelCode.amd_kernel_code_version_major = 1;
1555 KernelCode.amd_kernel_code_version_minor = 2;
1556 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1557 KernelCode.amd_machine_version_major = Version.Major;
1558 KernelCode.amd_machine_version_minor = Version.Minor;
1559 KernelCode.amd_machine_version_stepping = Version.Stepping;
1560 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1561 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) {
1562 KernelCode.wavefront_size = 5;
1563 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1564 } else {
1565 KernelCode.wavefront_size = 6;
1566 }
1567
1568 // If the code object does not support indirect functions, then the value must
1569 // be 0xffffffff.
1570 KernelCode.call_convention = -1;
1571
1572 // These alignment values are specified in powers of two, so alignment =
1573 // 2^n. The minimum alignment is 2^4 = 16.
1574 KernelCode.kernarg_segment_alignment = 4;
1575 KernelCode.group_segment_alignment = 4;
1576 KernelCode.private_segment_alignment = 4;
1577
1578 if (Version.Major >= 10) {
1579 KernelCode.compute_pgm_resource_registers |=
1580 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1581 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1582 }
1583}
1584
1585bool isGroupSegment(const GlobalValue *GV) {
1586 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1587}
1588
1589bool isGlobalSegment(const GlobalValue *GV) {
1590 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1591}
1592
1593bool isReadOnlySegment(const GlobalValue *GV) {
1594 unsigned AS = GV->getAddressSpace();
1595 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1596 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1597}
1598
1599bool shouldEmitConstantsToTextSection(const Triple &TT) {
1600 return TT.getArch() == Triple::r600;
1601}
1602
1603static bool isValidRegPrefix(char C) {
1604 return C == 'v' || C == 's' || C == 'a';
1605}
1606
1607std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1608 char Kind = RegName.front();
1609 if (!isValidRegPrefix(C: Kind))
1610 return {};
1611
1612 RegName = RegName.drop_front();
1613 if (RegName.consume_front(Prefix: "[")) {
1614 unsigned Idx, End;
1615 bool Failed = RegName.consumeInteger(Radix: 10, Result&: Idx);
1616 Failed |= !RegName.consume_front(Prefix: ":");
1617 Failed |= RegName.consumeInteger(Radix: 10, Result&: End);
1618 Failed |= !RegName.consume_back(Suffix: "]");
1619 if (!Failed) {
1620 unsigned NumRegs = End - Idx + 1;
1621 if (NumRegs > 1)
1622 return {Kind, Idx, NumRegs};
1623 }
1624 } else {
1625 unsigned Idx;
1626 bool Failed = RegName.getAsInteger(Radix: 10, Result&: Idx);
1627 if (!Failed)
1628 return {Kind, Idx, 1};
1629 }
1630
1631 return {};
1632}
1633
1634std::tuple<char, unsigned, unsigned>
1635parseAsmConstraintPhysReg(StringRef Constraint) {
1636 StringRef RegName = Constraint;
1637 if (!RegName.consume_front(Prefix: "{") || !RegName.consume_back(Suffix: "}"))
1638 return {};
1639 return parseAsmPhysRegName(RegName);
1640}
1641
1642std::pair<unsigned, unsigned>
1643getIntegerPairAttribute(const Function &F, StringRef Name,
1644 std::pair<unsigned, unsigned> Default,
1645 bool OnlyFirstRequired) {
1646 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1647 return {Attr->first, Attr->second.value_or(u&: Default.second)};
1648 return Default;
1649}
1650
1651std::optional<std::pair<unsigned, std::optional<unsigned>>>
1652getIntegerPairAttribute(const Function &F, StringRef Name,
1653 bool OnlyFirstRequired) {
1654 Attribute A = F.getFnAttribute(Kind: Name);
1655 if (!A.isStringAttribute())
1656 return std::nullopt;
1657
1658 LLVMContext &Ctx = F.getContext();
1659 std::pair<unsigned, std::optional<unsigned>> Ints;
1660 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ',');
1661 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) {
1662 Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name);
1663 return std::nullopt;
1664 }
1665 unsigned Second = 0;
1666 if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Second)) {
1667 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1668 Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name);
1669 return std::nullopt;
1670 }
1671 } else {
1672 Ints.second = Second;
1673 }
1674
1675 return Ints;
1676}
1677
1678SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1679 unsigned Size,
1680 unsigned DefaultVal) {
1681 std::optional<SmallVector<unsigned>> R =
1682 getIntegerVecAttribute(F, Name, Size);
1683 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1684}
1685
1686std::optional<SmallVector<unsigned>>
1687getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1688 assert(Size > 2);
1689 LLVMContext &Ctx = F.getContext();
1690
1691 Attribute A = F.getFnAttribute(Kind: Name);
1692 if (!A.isValid())
1693 return std::nullopt;
1694 if (!A.isStringAttribute()) {
1695 Ctx.emitError(ErrorStr: Name + " is not a string attribute");
1696 return std::nullopt;
1697 }
1698
1699 SmallVector<unsigned> Vals(Size);
1700
1701 StringRef S = A.getValueAsString();
1702 unsigned i = 0;
1703 for (; !S.empty() && i < Size; i++) {
1704 std::pair<StringRef, StringRef> Strs = S.split(Separator: ',');
1705 unsigned IntVal;
1706 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) {
1707 Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " +
1708 Name);
1709 return std::nullopt;
1710 }
1711 Vals[i] = IntVal;
1712 S = Strs.second;
1713 }
1714
1715 if (!S.empty() || i < Size) {
1716 Ctx.emitError(ErrorStr: "attribute " + Name +
1717 " has incorrect number of integers; expected " +
1718 llvm::utostr(X: Size));
1719 return std::nullopt;
1720 }
1721 return Vals;
1722}
1723
1724bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1725 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1726 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1727 auto Low =
1728 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 0))->getValue();
1729 auto High =
1730 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 1))->getValue();
1731 // There are two types of [A; B) ranges:
1732 // A < B, e.g. [4; 5) which is a range that only includes 4.
1733 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1734 // everything except 4.
1735 if (Low.ult(RHS: High)) {
1736 if (Low.ule(RHS: Val) && High.ugt(RHS: Val))
1737 return true;
1738 } else {
1739 if (Low.uge(RHS: Val) && High.ult(RHS: Val))
1740 return true;
1741 }
1742 }
1743
1744 return false;
1745}
1746
1747raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
1748 ListSeparator LS;
1749 if (Wait.LoadCnt != ~0u)
1750 OS << LS << "LoadCnt: " << Wait.LoadCnt;
1751 if (Wait.ExpCnt != ~0u)
1752 OS << LS << "ExpCnt: " << Wait.ExpCnt;
1753 if (Wait.DsCnt != ~0u)
1754 OS << LS << "DsCnt: " << Wait.DsCnt;
1755 if (Wait.StoreCnt != ~0u)
1756 OS << LS << "StoreCnt: " << Wait.StoreCnt;
1757 if (Wait.SampleCnt != ~0u)
1758 OS << LS << "SampleCnt: " << Wait.SampleCnt;
1759 if (Wait.BvhCnt != ~0u)
1760 OS << LS << "BvhCnt: " << Wait.BvhCnt;
1761 if (Wait.KmCnt != ~0u)
1762 OS << LS << "KmCnt: " << Wait.KmCnt;
1763 if (Wait.XCnt != ~0u)
1764 OS << LS << "XCnt: " << Wait.XCnt;
1765 if (LS.unused())
1766 OS << "none";
1767 OS << '\n';
1768 return OS;
1769}
1770
1771unsigned getVmcntBitMask(const IsaVersion &Version) {
1772 return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) +
1773 getVmcntBitWidthHi(VersionMajor: Version.Major))) -
1774 1;
1775}
1776
1777unsigned getLoadcntBitMask(const IsaVersion &Version) {
1778 return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1;
1779}
1780
1781unsigned getSamplecntBitMask(const IsaVersion &Version) {
1782 return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1;
1783}
1784
1785unsigned getBvhcntBitMask(const IsaVersion &Version) {
1786 return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1;
1787}
1788
1789unsigned getExpcntBitMask(const IsaVersion &Version) {
1790 return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1;
1791}
1792
1793unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1794 return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1;
1795}
1796
1797unsigned getDscntBitMask(const IsaVersion &Version) {
1798 return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1;
1799}
1800
1801unsigned getKmcntBitMask(const IsaVersion &Version) {
1802 return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1;
1803}
1804
1805unsigned getXcntBitMask(const IsaVersion &Version) {
1806 return (1 << getXcntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1807}
1808
1809unsigned getStorecntBitMask(const IsaVersion &Version) {
1810 return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1;
1811}
1812
1813HardwareLimits::HardwareLimits(const IsaVersion &IV) {
1814 bool HasExtendedWaitCounts = IV.Major >= 12;
1815 if (HasExtendedWaitCounts) {
1816 LoadcntMax = getLoadcntBitMask(Version: IV);
1817 DscntMax = getDscntBitMask(Version: IV);
1818 } else {
1819 LoadcntMax = getVmcntBitMask(Version: IV);
1820 DscntMax = getLgkmcntBitMask(Version: IV);
1821 }
1822 ExpcntMax = getExpcntBitMask(Version: IV);
1823 StorecntMax = getStorecntBitMask(Version: IV);
1824 SamplecntMax = getSamplecntBitMask(Version: IV);
1825 BvhcntMax = getBvhcntBitMask(Version: IV);
1826 KmcntMax = getKmcntBitMask(Version: IV);
1827 XcntMax = getXcntBitMask(Version: IV);
1828 VaVdstMax = DepCtr::getVaVdstBitMask();
1829 VmVsrcMax = DepCtr::getVmVsrcBitMask();
1830}
1831
1832unsigned getWaitcntBitMask(const IsaVersion &Version) {
1833 unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1834 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1835 unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major),
1836 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1837 unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1838 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1839 unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1840 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1841 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1842}
1843
1844unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1845 unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1846 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1847 unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1848 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1849 return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major);
1850}
1851
1852unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1853 return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1854 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1855}
1856
1857unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1858 return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1859 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1860}
1861
1862void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1863 unsigned &Expcnt, unsigned &Lgkmcnt) {
1864 Vmcnt = decodeVmcnt(Version, Waitcnt);
1865 Expcnt = decodeExpcnt(Version, Waitcnt);
1866 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1867}
1868
1869Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1870 Waitcnt Decoded;
1871 Decoded.set(T: LOAD_CNT, Val: decodeVmcnt(Version, Waitcnt: Encoded));
1872 Decoded.set(T: EXP_CNT, Val: decodeExpcnt(Version, Waitcnt: Encoded));
1873 Decoded.set(T: DS_CNT, Val: decodeLgkmcnt(Version, Waitcnt: Encoded));
1874 return Decoded;
1875}
1876
1877unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1878 unsigned Vmcnt) {
1879 Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1880 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1881 return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt,
1882 Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1883 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1884}
1885
1886unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1887 unsigned Expcnt) {
1888 return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1889 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1890}
1891
1892unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1893 unsigned Lgkmcnt) {
1894 return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1895 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1896}
1897
1898unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1899 unsigned Expcnt, unsigned Lgkmcnt) {
1900 unsigned Waitcnt = getWaitcntBitMask(Version);
1901 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1902 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1903 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1904 return Waitcnt;
1905}
1906
1907unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1908 return encodeWaitcnt(Version, Vmcnt: Decoded.get(T: LOAD_CNT), Expcnt: Decoded.get(T: EXP_CNT),
1909 Lgkmcnt: Decoded.get(T: DS_CNT));
1910}
1911
1912static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1913 bool IsStore) {
1914 unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major),
1915 Width: getDscntBitWidth(VersionMajor: Version.Major));
1916 if (IsStore) {
1917 unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1918 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1919 return Dscnt | Storecnt;
1920 }
1921 unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1922 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1923 return Dscnt | Loadcnt;
1924}
1925
1926Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1927 Waitcnt Decoded;
1928 Decoded.set(T: LOAD_CNT, Val: unpackBits(Src: LoadcntDscnt,
1929 Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1930 Width: getLoadcntBitWidth(VersionMajor: Version.Major)));
1931 Decoded.set(T: DS_CNT, Val: unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1932 Width: getDscntBitWidth(VersionMajor: Version.Major)));
1933 return Decoded;
1934}
1935
1936Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1937 Waitcnt Decoded;
1938 Decoded.set(T: STORE_CNT, Val: unpackBits(Src: StorecntDscnt,
1939 Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1940 Width: getStorecntBitWidth(VersionMajor: Version.Major)));
1941 Decoded.set(T: DS_CNT, Val: unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1942 Width: getDscntBitWidth(VersionMajor: Version.Major)));
1943 return Decoded;
1944}
1945
1946static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1947 unsigned Loadcnt) {
1948 return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1949 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1950}
1951
1952static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1953 unsigned Storecnt) {
1954 return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1955 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1956}
1957
1958static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1959 unsigned Dscnt) {
1960 return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1961 Width: getDscntBitWidth(VersionMajor: Version.Major));
1962}
1963
1964static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1965 unsigned Dscnt) {
1966 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false);
1967 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1968 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1969 return Waitcnt;
1970}
1971
1972unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1973 return encodeLoadcntDscnt(Version, Loadcnt: Decoded.get(T: LOAD_CNT),
1974 Dscnt: Decoded.get(T: DS_CNT));
1975}
1976
1977static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1978 unsigned Storecnt, unsigned Dscnt) {
1979 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true);
1980 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1981 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1982 return Waitcnt;
1983}
1984
1985unsigned encodeStorecntDscnt(const IsaVersion &Version,
1986 const Waitcnt &Decoded) {
1987 return encodeStorecntDscnt(Version, Storecnt: Decoded.get(T: STORE_CNT),
1988 Dscnt: Decoded.get(T: DS_CNT));
1989}
1990
1991//===----------------------------------------------------------------------===//
1992// Custom Operand Values
1993//===----------------------------------------------------------------------===//
1994
1995static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1996 int Size,
1997 const MCSubtargetInfo &STI) {
1998 unsigned Enc = 0;
1999 for (int Idx = 0; Idx < Size; ++Idx) {
2000 const auto &Op = Opr[Idx];
2001 if (Op.isSupported(STI))
2002 Enc |= Op.encode(Val: Op.Default);
2003 }
2004 return Enc;
2005}
2006
2007static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
2008 int Size, unsigned Code,
2009 bool &HasNonDefaultVal,
2010 const MCSubtargetInfo &STI) {
2011 unsigned UsedOprMask = 0;
2012 HasNonDefaultVal = false;
2013 for (int Idx = 0; Idx < Size; ++Idx) {
2014 const auto &Op = Opr[Idx];
2015 if (!Op.isSupported(STI))
2016 continue;
2017 UsedOprMask |= Op.getMask();
2018 unsigned Val = Op.decode(Code);
2019 if (!Op.isValid(Val))
2020 return false;
2021 HasNonDefaultVal |= (Val != Op.Default);
2022 }
2023 return (Code & ~UsedOprMask) == 0;
2024}
2025
2026static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
2027 unsigned Code, int &Idx, StringRef &Name,
2028 unsigned &Val, bool &IsDefault,
2029 const MCSubtargetInfo &STI) {
2030 while (Idx < Size) {
2031 const auto &Op = Opr[Idx++];
2032 if (Op.isSupported(STI)) {
2033 Name = Op.Name;
2034 Val = Op.decode(Code);
2035 IsDefault = (Val == Op.Default);
2036 return true;
2037 }
2038 }
2039
2040 return false;
2041}
2042
2043static int encodeCustomOperandVal(const CustomOperandVal &Op,
2044 int64_t InputVal) {
2045 if (InputVal < 0 || InputVal > Op.Max)
2046 return OPR_VAL_INVALID;
2047 return Op.encode(Val: InputVal);
2048}
2049
2050static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2051 const StringRef Name, int64_t InputVal,
2052 unsigned &UsedOprMask,
2053 const MCSubtargetInfo &STI) {
2054 int InvalidId = OPR_ID_UNKNOWN;
2055 for (int Idx = 0; Idx < Size; ++Idx) {
2056 const auto &Op = Opr[Idx];
2057 if (Op.Name == Name) {
2058 if (!Op.isSupported(STI)) {
2059 InvalidId = OPR_ID_UNSUPPORTED;
2060 continue;
2061 }
2062 auto OprMask = Op.getMask();
2063 if (OprMask & UsedOprMask)
2064 return OPR_ID_DUPLICATE;
2065 UsedOprMask |= OprMask;
2066 return encodeCustomOperandVal(Op, InputVal);
2067 }
2068 }
2069 return InvalidId;
2070}
2071
2072//===----------------------------------------------------------------------===//
2073// DepCtr
2074//===----------------------------------------------------------------------===//
2075
2076namespace DepCtr {
2077
2078int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
2079 static int Default = -1;
2080 if (Default == -1)
2081 Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI);
2082 return Default;
2083}
2084
2085bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2086 const MCSubtargetInfo &STI) {
2087 return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code,
2088 HasNonDefaultVal, STI);
2089}
2090
2091bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2092 bool &IsDefault, const MCSubtargetInfo &STI) {
2093 return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val,
2094 IsDefault, STI);
2095}
2096
2097int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2098 const MCSubtargetInfo &STI) {
2099 return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask,
2100 STI);
2101}
2102
2103unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2104
2105unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2106
2107unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2108
2109unsigned getHoldCntBitMask(const IsaVersion &Version) {
2110 return (1 << getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
2111}
2112
2113unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2114
2115unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2116
2117unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2118
2119unsigned decodeFieldVmVsrc(unsigned Encoded) {
2120 return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2121}
2122
2123unsigned decodeFieldVaVdst(unsigned Encoded) {
2124 return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2125}
2126
2127unsigned decodeFieldSaSdst(unsigned Encoded) {
2128 return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2129}
2130
2131unsigned decodeFieldVaSdst(unsigned Encoded) {
2132 return unpackBits(Src: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2133}
2134
2135unsigned decodeFieldVaVcc(unsigned Encoded) {
2136 return unpackBits(Src: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2137}
2138
2139unsigned decodeFieldVaSsrc(unsigned Encoded) {
2140 return unpackBits(Src: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2141}
2142
2143unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2144 return unpackBits(Src: Encoded, Shift: getHoldCntBitShift(),
2145 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2146}
2147
2148unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2149 return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2150}
2151
2152unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2153 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2154 return encodeFieldVmVsrc(Encoded, VmVsrc);
2155}
2156
2157unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2158 return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2159}
2160
2161unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2162 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2163 return encodeFieldVaVdst(Encoded, VaVdst);
2164}
2165
2166unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2167 return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2168}
2169
2170unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2171 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2172 return encodeFieldSaSdst(Encoded, SaSdst);
2173}
2174
2175unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2176 return packBits(Src: VaSdst, Dst: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2177}
2178
2179unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2180 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2181 return encodeFieldVaSdst(Encoded, VaSdst);
2182}
2183
2184unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2185 return packBits(Src: VaVcc, Dst: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2186}
2187
2188unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2189 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2190 return encodeFieldVaVcc(Encoded, VaVcc);
2191}
2192
2193unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2194 return packBits(Src: VaSsrc, Dst: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2195}
2196
2197unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2198 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2199 return encodeFieldVaSsrc(Encoded, VaSsrc);
2200}
2201
2202unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2203 const IsaVersion &Version) {
2204 return packBits(Src: HoldCnt, Dst: Encoded, Shift: getHoldCntBitShift(),
2205 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2206}
2207
2208unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2209 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2210 return encodeFieldHoldCnt(Encoded, HoldCnt, Version: getIsaVersion(GPU: STI.getCPU()));
2211}
2212
2213} // namespace DepCtr
2214
2215//===----------------------------------------------------------------------===//
2216// exp tgt
2217//===----------------------------------------------------------------------===//
2218
2219namespace Exp {
2220
2221struct ExpTgt {
2222 StringLiteral Name;
2223 unsigned Tgt;
2224 unsigned MaxIndex;
2225};
2226
2227// clang-format off
2228static constexpr ExpTgt ExpTgtInfo[] = {
2229 {.Name: {"null"}, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX},
2230 {.Name: {"mrtz"}, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX},
2231 {.Name: {"prim"}, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX},
2232 {.Name: {"mrt"}, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX},
2233 {.Name: {"pos"}, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX},
2234 {.Name: {"dual_src_blend"},.Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX},
2235 {.Name: {"param"}, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX},
2236};
2237// clang-format on
2238
2239bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2240 for (const ExpTgt &Val : ExpTgtInfo) {
2241 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2242 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2243 Name = Val.Name;
2244 return true;
2245 }
2246 }
2247 return false;
2248}
2249
2250unsigned getTgtId(const StringRef Name) {
2251
2252 for (const ExpTgt &Val : ExpTgtInfo) {
2253 if (Val.MaxIndex == 0 && Name == Val.Name)
2254 return Val.Tgt;
2255
2256 if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) {
2257 StringRef Suffix = Name.drop_front(N: Val.Name.size());
2258
2259 unsigned Id;
2260 if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex)
2261 return ET_INVALID;
2262
2263 // Disable leading zeroes
2264 if (Suffix.size() > 1 && Suffix[0] == '0')
2265 return ET_INVALID;
2266
2267 return Val.Tgt + Id;
2268 }
2269 }
2270 return ET_INVALID;
2271}
2272
2273bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2274 switch (Id) {
2275 case ET_NULL:
2276 return !isGFX11Plus(STI);
2277 case ET_POS4:
2278 case ET_PRIM:
2279 return isGFX10Plus(STI);
2280 case ET_DUAL_SRC_BLEND0:
2281 case ET_DUAL_SRC_BLEND1:
2282 return isGFX11Plus(STI);
2283 default:
2284 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2285 return !isGFX11Plus(STI) || isGFX13Plus(STI);
2286 return true;
2287 }
2288}
2289
2290} // namespace Exp
2291
2292//===----------------------------------------------------------------------===//
2293// MTBUF Format
2294//===----------------------------------------------------------------------===//
2295
2296namespace MTBUFFormat {
2297
2298int64_t getDfmt(const StringRef Name) {
2299 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2300 if (Name == DfmtSymbolic[Id])
2301 return Id;
2302 }
2303 return DFMT_UNDEF;
2304}
2305
2306StringRef getDfmtName(unsigned Id) {
2307 assert(Id <= DFMT_MAX);
2308 return DfmtSymbolic[Id];
2309}
2310
2311static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
2312 if (isSI(STI) || isCI(STI))
2313 return NfmtSymbolicSICI;
2314 if (isVI(STI) || isGFX9(STI))
2315 return NfmtSymbolicVI;
2316 return NfmtSymbolicGFX10;
2317}
2318
2319int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2320 const auto *lookupTable = getNfmtLookupTable(STI);
2321 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2322 if (Name == lookupTable[Id])
2323 return Id;
2324 }
2325 return NFMT_UNDEF;
2326}
2327
2328StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2329 assert(Id <= NFMT_MAX);
2330 return getNfmtLookupTable(STI)[Id];
2331}
2332
2333bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2334 unsigned Dfmt;
2335 unsigned Nfmt;
2336 decodeDfmtNfmt(Format: Id, Dfmt, Nfmt);
2337 return isValidNfmt(Val: Nfmt, STI);
2338}
2339
2340bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2341 return !getNfmtName(Id, STI).empty();
2342}
2343
2344int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2345 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2346}
2347
2348void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2349 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2350 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2351}
2352
2353int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2354 if (isGFX11Plus(STI)) {
2355 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2356 if (Name == UfmtSymbolicGFX11[Id])
2357 return Id;
2358 }
2359 } else {
2360 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2361 if (Name == UfmtSymbolicGFX10[Id])
2362 return Id;
2363 }
2364 }
2365 return UFMT_UNDEF;
2366}
2367
2368StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2369 if (isValidUnifiedFormat(Val: Id, STI))
2370 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2371 return "";
2372}
2373
2374bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2375 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2376}
2377
2378int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2379 const MCSubtargetInfo &STI) {
2380 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2381 if (isGFX11Plus(STI)) {
2382 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2383 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2384 return Id;
2385 }
2386 } else {
2387 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2388 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2389 return Id;
2390 }
2391 }
2392 return UFMT_UNDEF;
2393}
2394
2395bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2396 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2397}
2398
2399unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2400 if (isGFX10Plus(STI))
2401 return UFMT_DEFAULT;
2402 return DFMT_NFMT_DEFAULT;
2403}
2404
2405} // namespace MTBUFFormat
2406
2407//===----------------------------------------------------------------------===//
2408// SendMsg
2409//===----------------------------------------------------------------------===//
2410
2411namespace SendMsg {
2412
2413static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2414 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2415}
2416
2417bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2418 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2419}
2420
2421bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2422 bool Strict) {
2423 assert(isValidMsgId(MsgId, STI));
2424
2425 if (!Strict)
2426 return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId);
2427
2428 if (msgRequiresOp(MsgId, STI)) {
2429 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2430 return false;
2431
2432 return !getMsgOpName(MsgId, Encoding: OpId, STI).empty();
2433 }
2434
2435 return OpId == OP_NONE_;
2436}
2437
2438bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2439 const MCSubtargetInfo &STI, bool Strict) {
2440 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2441
2442 if (!Strict)
2443 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId);
2444
2445 if (!isGFX11Plus(STI)) {
2446 switch (MsgId) {
2447 case ID_GS_PreGFX11:
2448 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2449 case ID_GS_DONE_PreGFX11:
2450 return (OpId == OP_GS_NOP)
2451 ? (StreamId == STREAM_ID_NONE_)
2452 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2453 }
2454 }
2455 return StreamId == STREAM_ID_NONE_;
2456}
2457
2458bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2459 return MsgId == ID_SYSMSG ||
2460 (!isGFX11Plus(STI) &&
2461 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2462}
2463
2464bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2465 const MCSubtargetInfo &STI) {
2466 return !isGFX11Plus(STI) &&
2467 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2468 OpId != OP_GS_NOP;
2469}
2470
2471void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2472 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2473 MsgId = Val & getMsgIdMask(STI);
2474 if (isGFX11Plus(STI)) {
2475 OpId = 0;
2476 StreamId = 0;
2477 } else {
2478 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2479 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2480 }
2481}
2482
2483uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2484 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2485}
2486
2487} // namespace SendMsg
2488
2489//===----------------------------------------------------------------------===//
2490//
2491//===----------------------------------------------------------------------===//
2492
2493unsigned getInitialPSInputAddr(const Function &F) {
2494 return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr", Default: 0);
2495}
2496
2497bool getHasColorExport(const Function &F) {
2498 // As a safe default always respond as if PS has color exports.
2499 return F.getFnAttributeAsParsedInteger(
2500 Kind: "amdgpu-color-export",
2501 Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2502}
2503
2504bool getHasDepthExport(const Function &F) {
2505 return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export", Default: 0) != 0;
2506}
2507
2508unsigned getDynamicVGPRBlockSize(const Function &F) {
2509 unsigned BlockSize =
2510 F.getFnAttributeAsParsedInteger(Kind: "amdgpu-dynamic-vgpr-block-size", Default: 0);
2511
2512 if (BlockSize == 16 || BlockSize == 32)
2513 return BlockSize;
2514
2515 return 0;
2516}
2517
2518bool hasXNACK(const MCSubtargetInfo &STI) {
2519 return STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2520}
2521
2522bool hasSRAMECC(const MCSubtargetInfo &STI) {
2523 return STI.hasFeature(Feature: AMDGPU::FeatureSRAMECC);
2524}
2525
2526bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2527 return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) &&
2528 !STI.hasFeature(Feature: AMDGPU::FeatureR128A16);
2529}
2530
2531bool hasA16(const MCSubtargetInfo &STI) {
2532 return STI.hasFeature(Feature: AMDGPU::FeatureA16);
2533}
2534
2535bool hasG16(const MCSubtargetInfo &STI) {
2536 return STI.hasFeature(Feature: AMDGPU::FeatureG16);
2537}
2538
2539bool hasPackedD16(const MCSubtargetInfo &STI) {
2540 return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2541 !isSI(STI);
2542}
2543
2544bool hasGDS(const MCSubtargetInfo &STI) {
2545 return STI.hasFeature(Feature: AMDGPU::FeatureGDS);
2546}
2547
2548unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2549 auto Version = getIsaVersion(GPU: STI.getCPU());
2550 if (Version.Major == 10)
2551 return Version.Minor >= 3 ? 13 : 5;
2552 if (Version.Major == 11)
2553 return 5;
2554 if (Version.Major >= 12)
2555 return HasSampler ? 4 : 5;
2556 return 0;
2557}
2558
2559unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) {
2560 if (isGFX1250Plus(STI))
2561 return 32;
2562 return 16;
2563}
2564
2565bool isSI(const MCSubtargetInfo &STI) {
2566 return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands);
2567}
2568
2569bool isCI(const MCSubtargetInfo &STI) {
2570 return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands);
2571}
2572
2573bool isVI(const MCSubtargetInfo &STI) {
2574 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2575}
2576
2577bool isGFX9(const MCSubtargetInfo &STI) {
2578 return STI.hasFeature(Feature: AMDGPU::FeatureGFX9);
2579}
2580
2581bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2582 return isGFX9(STI) || isGFX10(STI);
2583}
2584
2585bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2586 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2587}
2588
2589bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2590 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2591}
2592
2593bool isGFX8Plus(const MCSubtargetInfo &STI) {
2594 return isVI(STI) || isGFX9Plus(STI);
2595}
2596
2597bool isGFX9Plus(const MCSubtargetInfo &STI) {
2598 return isGFX9(STI) || isGFX10Plus(STI);
2599}
2600
2601bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2602
2603bool isGFX10(const MCSubtargetInfo &STI) {
2604 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10);
2605}
2606
2607bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2608 return isGFX10(STI) || isGFX11(STI);
2609}
2610
2611bool isGFX10Plus(const MCSubtargetInfo &STI) {
2612 return isGFX10(STI) || isGFX11Plus(STI);
2613}
2614
2615bool isGFX11(const MCSubtargetInfo &STI) {
2616 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2617}
2618
2619bool isGFX1170(const MCSubtargetInfo &STI) {
2620 return isGFX11(STI) && STI.hasFeature(Feature: AMDGPU::FeatureWMMA128bInsts);
2621}
2622
2623bool isGFX11Plus(const MCSubtargetInfo &STI) {
2624 return isGFX11(STI) || isGFX12Plus(STI);
2625}
2626
2627bool isGFX12(const MCSubtargetInfo &STI) {
2628 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2629}
2630
2631bool isGFX12Plus(const MCSubtargetInfo &STI) {
2632 return isGFX12(STI) || isGFX13Plus(STI);
2633}
2634
2635bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2636
2637bool isGFX1250(const MCSubtargetInfo &STI) {
2638 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2639}
2640
2641bool isGFX1250Plus(const MCSubtargetInfo &STI) {
2642 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2643}
2644
2645bool isGFX13(const MCSubtargetInfo &STI) {
2646 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2647}
2648
2649bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2650
2651bool supportsWGP(const MCSubtargetInfo &STI) {
2652 if (isGFX1250(STI))
2653 return false;
2654 return isGFX10Plus(STI);
2655}
2656
2657bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2658
2659bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2660 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2661}
2662
2663bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2664 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2665}
2666
2667bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2668 return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding);
2669}
2670
2671bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2672 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding);
2673}
2674
2675bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2676 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding);
2677}
2678
2679bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2680 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts);
2681}
2682
2683bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2684 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2685}
2686
2687bool isGFX90A(const MCSubtargetInfo &STI) {
2688 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2689}
2690
2691bool isGFX940(const MCSubtargetInfo &STI) {
2692 return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts);
2693}
2694
2695bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2696 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2697}
2698
2699bool hasMAIInsts(const MCSubtargetInfo &STI) {
2700 return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts);
2701}
2702
2703bool hasVOPD(const MCSubtargetInfo &STI) {
2704 return STI.hasFeature(Feature: AMDGPU::FeatureVOPDInsts);
2705}
2706
2707bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2708 return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR);
2709}
2710
2711unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2712 return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload);
2713}
2714
2715int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2716 int32_t ArgNumVGPR) {
2717 if (has90AInsts && ArgNumAGPR)
2718 return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR;
2719 return std::max(a: ArgNumVGPR, b: ArgNumAGPR);
2720}
2721
2722bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2723 const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID);
2724 const MCRegister FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0);
2725 return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) ||
2726 Reg == AMDGPU::SCC;
2727}
2728
2729bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2730 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2731}
2732
2733#define MAP_REG2REG \
2734 using namespace AMDGPU; \
2735 switch (Reg.id()) { \
2736 default: \
2737 return Reg; \
2738 CASE_CI_VI(FLAT_SCR) \
2739 CASE_CI_VI(FLAT_SCR_LO) \
2740 CASE_CI_VI(FLAT_SCR_HI) \
2741 CASE_VI_GFX9PLUS(TTMP0) \
2742 CASE_VI_GFX9PLUS(TTMP1) \
2743 CASE_VI_GFX9PLUS(TTMP2) \
2744 CASE_VI_GFX9PLUS(TTMP3) \
2745 CASE_VI_GFX9PLUS(TTMP4) \
2746 CASE_VI_GFX9PLUS(TTMP5) \
2747 CASE_VI_GFX9PLUS(TTMP6) \
2748 CASE_VI_GFX9PLUS(TTMP7) \
2749 CASE_VI_GFX9PLUS(TTMP8) \
2750 CASE_VI_GFX9PLUS(TTMP9) \
2751 CASE_VI_GFX9PLUS(TTMP10) \
2752 CASE_VI_GFX9PLUS(TTMP11) \
2753 CASE_VI_GFX9PLUS(TTMP12) \
2754 CASE_VI_GFX9PLUS(TTMP13) \
2755 CASE_VI_GFX9PLUS(TTMP14) \
2756 CASE_VI_GFX9PLUS(TTMP15) \
2757 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2758 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2759 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2760 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2761 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2762 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2763 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2764 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2765 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2766 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2767 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2768 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2769 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2770 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2771 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2772 CASE_VI_GFX9PLUS( \
2773 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2774 CASE_GFXPRE11_GFX11PLUS(M0) \
2775 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2776 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2777 }
2778
2779#define CASE_CI_VI(node) \
2780 assert(!isSI(STI)); \
2781 case node: \
2782 return isCI(STI) ? node##_ci : node##_vi;
2783
2784#define CASE_VI_GFX9PLUS(node) \
2785 case node: \
2786 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2787
2788#define CASE_GFXPRE11_GFX11PLUS(node) \
2789 case node: \
2790 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2791
2792#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2793 case node: \
2794 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2795
2796MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2797 if (STI.getTargetTriple().getArch() == Triple::r600)
2798 return Reg;
2799 MAP_REG2REG
2800}
2801
2802#undef CASE_CI_VI
2803#undef CASE_VI_GFX9PLUS
2804#undef CASE_GFXPRE11_GFX11PLUS
2805#undef CASE_GFXPRE11_GFX11PLUS_TO
2806
2807#define CASE_CI_VI(node) \
2808 case node##_ci: \
2809 case node##_vi: \
2810 return node;
2811#define CASE_VI_GFX9PLUS(node) \
2812 case node##_vi: \
2813 case node##_gfx9plus: \
2814 return node;
2815#define CASE_GFXPRE11_GFX11PLUS(node) \
2816 case node##_gfx11plus: \
2817 case node##_gfxpre11: \
2818 return node;
2819#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2820
2821MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2822
2823bool isInlineValue(MCRegister Reg) {
2824 switch (Reg.id()) {
2825 case AMDGPU::SRC_SHARED_BASE_LO:
2826 case AMDGPU::SRC_SHARED_BASE:
2827 case AMDGPU::SRC_SHARED_LIMIT_LO:
2828 case AMDGPU::SRC_SHARED_LIMIT:
2829 case AMDGPU::SRC_PRIVATE_BASE_LO:
2830 case AMDGPU::SRC_PRIVATE_BASE:
2831 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2832 case AMDGPU::SRC_PRIVATE_LIMIT:
2833 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2834 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2835 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2836 return true;
2837 case AMDGPU::SRC_VCCZ:
2838 case AMDGPU::SRC_EXECZ:
2839 case AMDGPU::SRC_SCC:
2840 return true;
2841 case AMDGPU::SGPR_NULL:
2842 return true;
2843 default:
2844 return false;
2845 }
2846}
2847
2848#undef CASE_CI_VI
2849#undef CASE_VI_GFX9PLUS
2850#undef CASE_GFXPRE11_GFX11PLUS
2851#undef CASE_GFXPRE11_GFX11PLUS_TO
2852#undef MAP_REG2REG
2853
2854bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2855 assert(OpNo < Desc.NumOperands);
2856 unsigned OpType = Desc.operands()[OpNo].OperandType;
2857 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2858 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2859}
2860
2861bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2862 assert(OpNo < Desc.NumOperands);
2863 unsigned OpType = Desc.operands()[OpNo].OperandType;
2864 switch (OpType) {
2865 case AMDGPU::OPERAND_REG_IMM_FP32:
2866 case AMDGPU::OPERAND_REG_IMM_FP64:
2867 case AMDGPU::OPERAND_REG_IMM_FP16:
2868 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2869 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2870 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2871 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2872 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2873 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2874 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2875 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2876 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2877 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2878 return true;
2879 default:
2880 return false;
2881 }
2882}
2883
2884bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2885 assert(OpNo < Desc.NumOperands);
2886 unsigned OpType = Desc.operands()[OpNo].OperandType;
2887 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2888 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2889 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2890 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2891}
2892
2893// Avoid using MCRegisterClass::getSize, since that function will go away
2894// (move from MC* level to Target* level). Return size in bits.
2895unsigned getRegBitWidth(unsigned RCID) {
2896 switch (RCID) {
2897 case AMDGPU::VGPR_16RegClassID:
2898 case AMDGPU::VGPR_16_Lo128RegClassID:
2899 case AMDGPU::SGPR_LO16RegClassID:
2900 case AMDGPU::AGPR_LO16RegClassID:
2901 return 16;
2902 case AMDGPU::SGPR_32RegClassID:
2903 case AMDGPU::VGPR_32RegClassID:
2904 case AMDGPU::VGPR_32_Lo256RegClassID:
2905 case AMDGPU::VRegOrLds_32RegClassID:
2906 case AMDGPU::AGPR_32RegClassID:
2907 case AMDGPU::VS_32RegClassID:
2908 case AMDGPU::AV_32RegClassID:
2909 case AMDGPU::SReg_32RegClassID:
2910 case AMDGPU::SReg_32_XM0RegClassID:
2911 case AMDGPU::SRegOrLds_32RegClassID:
2912 return 32;
2913 case AMDGPU::SGPR_64RegClassID:
2914 case AMDGPU::VS_64RegClassID:
2915 case AMDGPU::SReg_64RegClassID:
2916 case AMDGPU::VReg_64RegClassID:
2917 case AMDGPU::AReg_64RegClassID:
2918 case AMDGPU::SReg_64_XEXECRegClassID:
2919 case AMDGPU::VReg_64_Align2RegClassID:
2920 case AMDGPU::AReg_64_Align2RegClassID:
2921 case AMDGPU::AV_64RegClassID:
2922 case AMDGPU::AV_64_Align2RegClassID:
2923 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2924 case AMDGPU::VS_64_Lo256RegClassID:
2925 return 64;
2926 case AMDGPU::SGPR_96RegClassID:
2927 case AMDGPU::SReg_96RegClassID:
2928 case AMDGPU::VReg_96RegClassID:
2929 case AMDGPU::AReg_96RegClassID:
2930 case AMDGPU::VReg_96_Align2RegClassID:
2931 case AMDGPU::AReg_96_Align2RegClassID:
2932 case AMDGPU::AV_96RegClassID:
2933 case AMDGPU::AV_96_Align2RegClassID:
2934 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2935 return 96;
2936 case AMDGPU::SGPR_128RegClassID:
2937 case AMDGPU::SReg_128RegClassID:
2938 case AMDGPU::VReg_128RegClassID:
2939 case AMDGPU::AReg_128RegClassID:
2940 case AMDGPU::VReg_128_Align2RegClassID:
2941 case AMDGPU::AReg_128_Align2RegClassID:
2942 case AMDGPU::AV_128RegClassID:
2943 case AMDGPU::AV_128_Align2RegClassID:
2944 case AMDGPU::SReg_128_XNULLRegClassID:
2945 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2946 return 128;
2947 case AMDGPU::SGPR_160RegClassID:
2948 case AMDGPU::SReg_160RegClassID:
2949 case AMDGPU::VReg_160RegClassID:
2950 case AMDGPU::AReg_160RegClassID:
2951 case AMDGPU::VReg_160_Align2RegClassID:
2952 case AMDGPU::AReg_160_Align2RegClassID:
2953 case AMDGPU::AV_160RegClassID:
2954 case AMDGPU::AV_160_Align2RegClassID:
2955 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2956 return 160;
2957 case AMDGPU::SGPR_192RegClassID:
2958 case AMDGPU::SReg_192RegClassID:
2959 case AMDGPU::VReg_192RegClassID:
2960 case AMDGPU::AReg_192RegClassID:
2961 case AMDGPU::VReg_192_Align2RegClassID:
2962 case AMDGPU::AReg_192_Align2RegClassID:
2963 case AMDGPU::AV_192RegClassID:
2964 case AMDGPU::AV_192_Align2RegClassID:
2965 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2966 return 192;
2967 case AMDGPU::SGPR_224RegClassID:
2968 case AMDGPU::SReg_224RegClassID:
2969 case AMDGPU::VReg_224RegClassID:
2970 case AMDGPU::AReg_224RegClassID:
2971 case AMDGPU::VReg_224_Align2RegClassID:
2972 case AMDGPU::AReg_224_Align2RegClassID:
2973 case AMDGPU::AV_224RegClassID:
2974 case AMDGPU::AV_224_Align2RegClassID:
2975 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2976 return 224;
2977 case AMDGPU::SGPR_256RegClassID:
2978 case AMDGPU::SReg_256RegClassID:
2979 case AMDGPU::VReg_256RegClassID:
2980 case AMDGPU::AReg_256RegClassID:
2981 case AMDGPU::VReg_256_Align2RegClassID:
2982 case AMDGPU::AReg_256_Align2RegClassID:
2983 case AMDGPU::AV_256RegClassID:
2984 case AMDGPU::AV_256_Align2RegClassID:
2985 case AMDGPU::SReg_256_XNULLRegClassID:
2986 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2987 return 256;
2988 case AMDGPU::SGPR_288RegClassID:
2989 case AMDGPU::SReg_288RegClassID:
2990 case AMDGPU::VReg_288RegClassID:
2991 case AMDGPU::AReg_288RegClassID:
2992 case AMDGPU::VReg_288_Align2RegClassID:
2993 case AMDGPU::AReg_288_Align2RegClassID:
2994 case AMDGPU::AV_288RegClassID:
2995 case AMDGPU::AV_288_Align2RegClassID:
2996 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2997 return 288;
2998 case AMDGPU::SGPR_320RegClassID:
2999 case AMDGPU::SReg_320RegClassID:
3000 case AMDGPU::VReg_320RegClassID:
3001 case AMDGPU::AReg_320RegClassID:
3002 case AMDGPU::VReg_320_Align2RegClassID:
3003 case AMDGPU::AReg_320_Align2RegClassID:
3004 case AMDGPU::AV_320RegClassID:
3005 case AMDGPU::AV_320_Align2RegClassID:
3006 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
3007 return 320;
3008 case AMDGPU::SGPR_352RegClassID:
3009 case AMDGPU::SReg_352RegClassID:
3010 case AMDGPU::VReg_352RegClassID:
3011 case AMDGPU::AReg_352RegClassID:
3012 case AMDGPU::VReg_352_Align2RegClassID:
3013 case AMDGPU::AReg_352_Align2RegClassID:
3014 case AMDGPU::AV_352RegClassID:
3015 case AMDGPU::AV_352_Align2RegClassID:
3016 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
3017 return 352;
3018 case AMDGPU::SGPR_384RegClassID:
3019 case AMDGPU::SReg_384RegClassID:
3020 case AMDGPU::VReg_384RegClassID:
3021 case AMDGPU::AReg_384RegClassID:
3022 case AMDGPU::VReg_384_Align2RegClassID:
3023 case AMDGPU::AReg_384_Align2RegClassID:
3024 case AMDGPU::AV_384RegClassID:
3025 case AMDGPU::AV_384_Align2RegClassID:
3026 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3027 return 384;
3028 case AMDGPU::SGPR_512RegClassID:
3029 case AMDGPU::SReg_512RegClassID:
3030 case AMDGPU::VReg_512RegClassID:
3031 case AMDGPU::AReg_512RegClassID:
3032 case AMDGPU::VReg_512_Align2RegClassID:
3033 case AMDGPU::AReg_512_Align2RegClassID:
3034 case AMDGPU::AV_512RegClassID:
3035 case AMDGPU::AV_512_Align2RegClassID:
3036 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3037 return 512;
3038 case AMDGPU::SGPR_1024RegClassID:
3039 case AMDGPU::SReg_1024RegClassID:
3040 case AMDGPU::VReg_1024RegClassID:
3041 case AMDGPU::AReg_1024RegClassID:
3042 case AMDGPU::VReg_1024_Align2RegClassID:
3043 case AMDGPU::AReg_1024_Align2RegClassID:
3044 case AMDGPU::AV_1024RegClassID:
3045 case AMDGPU::AV_1024_Align2RegClassID:
3046 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3047 return 1024;
3048 default:
3049 llvm_unreachable("Unexpected register class");
3050 }
3051}
3052
3053unsigned getRegBitWidth(const MCRegisterClass &RC) {
3054 return getRegBitWidth(RCID: RC.getID());
3055}
3056
3057bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3058 if (isInlinableIntLiteral(Literal))
3059 return true;
3060
3061 uint64_t Val = static_cast<uint64_t>(Literal);
3062 return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) ||
3063 (Val == llvm::bit_cast<uint64_t>(from: 1.0)) ||
3064 (Val == llvm::bit_cast<uint64_t>(from: -1.0)) ||
3065 (Val == llvm::bit_cast<uint64_t>(from: 0.5)) ||
3066 (Val == llvm::bit_cast<uint64_t>(from: -0.5)) ||
3067 (Val == llvm::bit_cast<uint64_t>(from: 2.0)) ||
3068 (Val == llvm::bit_cast<uint64_t>(from: -2.0)) ||
3069 (Val == llvm::bit_cast<uint64_t>(from: 4.0)) ||
3070 (Val == llvm::bit_cast<uint64_t>(from: -4.0)) ||
3071 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3072}
3073
3074bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3075 if (isInlinableIntLiteral(Literal))
3076 return true;
3077
3078 // The actual type of the operand does not seem to matter as long
3079 // as the bits match one of the inline immediate values. For example:
3080 //
3081 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3082 // so it is a legal inline immediate.
3083 //
3084 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3085 // floating-point, so it is a legal inline immediate.
3086
3087 uint32_t Val = static_cast<uint32_t>(Literal);
3088 return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) ||
3089 (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) ||
3090 (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) ||
3091 (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) ||
3092 (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) ||
3093 (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) ||
3094 (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) ||
3095 (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) ||
3096 (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) ||
3097 (Val == 0x3e22f983 && HasInv2Pi);
3098}
3099
3100bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3101 if (!HasInv2Pi)
3102 return false;
3103 if (isInlinableIntLiteral(Literal))
3104 return true;
3105 uint16_t Val = static_cast<uint16_t>(Literal);
3106 return Val == 0x3F00 || // 0.5
3107 Val == 0xBF00 || // -0.5
3108 Val == 0x3F80 || // 1.0
3109 Val == 0xBF80 || // -1.0
3110 Val == 0x4000 || // 2.0
3111 Val == 0xC000 || // -2.0
3112 Val == 0x4080 || // 4.0
3113 Val == 0xC080 || // -4.0
3114 Val == 0x3E22; // 1.0 / (2.0 * pi)
3115}
3116
3117bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3118 return isInlinableLiteral32(Literal, HasInv2Pi);
3119}
3120
3121bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3122 if (!HasInv2Pi)
3123 return false;
3124 if (isInlinableIntLiteral(Literal))
3125 return true;
3126 uint16_t Val = static_cast<uint16_t>(Literal);
3127 return Val == 0x3C00 || // 1.0
3128 Val == 0xBC00 || // -1.0
3129 Val == 0x3800 || // 0.5
3130 Val == 0xB800 || // -0.5
3131 Val == 0x4000 || // 2.0
3132 Val == 0xC000 || // -2.0
3133 Val == 0x4400 || // 4.0
3134 Val == 0xC400 || // -4.0
3135 Val == 0x3118; // 1/2pi
3136}
3137
3138std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3139 // Unfortunately, the Instruction Set Architecture Reference Guide is
3140 // misleading about how the inline operands work for (packed) 16-bit
3141 // instructions. In a nutshell, the actual HW behavior is:
3142 //
3143 // - integer encodings (-16 .. 64) are always produced as sign-extended
3144 // 32-bit values
3145 // - float encodings are produced as:
3146 // - for F16 instructions: corresponding half-precision float values in
3147 // the LSBs, 0 in the MSBs
3148 // - for UI16 instructions: corresponding single-precision float value
3149 int32_t Signed = static_cast<int32_t>(Literal);
3150 if (Signed >= 0 && Signed <= 64)
3151 return 128 + Signed;
3152
3153 if (Signed >= -16 && Signed <= -1)
3154 return 192 + std::abs(x: Signed);
3155
3156 if (IsFloat) {
3157 // clang-format off
3158 switch (Literal) {
3159 case 0x3800: return 240; // 0.5
3160 case 0xB800: return 241; // -0.5
3161 case 0x3C00: return 242; // 1.0
3162 case 0xBC00: return 243; // -1.0
3163 case 0x4000: return 244; // 2.0
3164 case 0xC000: return 245; // -2.0
3165 case 0x4400: return 246; // 4.0
3166 case 0xC400: return 247; // -4.0
3167 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3168 default: break;
3169 }
3170 // clang-format on
3171 } else {
3172 // clang-format off
3173 switch (Literal) {
3174 case 0x3F000000: return 240; // 0.5
3175 case 0xBF000000: return 241; // -0.5
3176 case 0x3F800000: return 242; // 1.0
3177 case 0xBF800000: return 243; // -1.0
3178 case 0x40000000: return 244; // 2.0
3179 case 0xC0000000: return 245; // -2.0
3180 case 0x40800000: return 246; // 4.0
3181 case 0xC0800000: return 247; // -4.0
3182 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3183 default: break;
3184 }
3185 // clang-format on
3186 }
3187
3188 return {};
3189}
3190
3191// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3192// or nullopt.
3193std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3194 return getInlineEncodingV216(IsFloat: false, Literal);
3195}
3196
3197// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3198// or nullopt.
3199std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3200 int32_t Signed = static_cast<int32_t>(Literal);
3201 if (Signed >= 0 && Signed <= 64)
3202 return 128 + Signed;
3203
3204 if (Signed >= -16 && Signed <= -1)
3205 return 192 + std::abs(x: Signed);
3206
3207 // clang-format off
3208 switch (Literal) {
3209 case 0x3F00: return 240; // 0.5
3210 case 0xBF00: return 241; // -0.5
3211 case 0x3F80: return 242; // 1.0
3212 case 0xBF80: return 243; // -1.0
3213 case 0x4000: return 244; // 2.0
3214 case 0xC000: return 245; // -2.0
3215 case 0x4080: return 246; // 4.0
3216 case 0xC080: return 247; // -4.0
3217 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3218 default: break;
3219 }
3220 // clang-format on
3221
3222 return std::nullopt;
3223}
3224
3225// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3226// or nullopt.
3227std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3228 return getInlineEncodingV216(IsFloat: true, Literal);
3229}
3230
3231// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3232// or nullopt. This accounts for different inline constant behavior:
3233// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3234// - GFX11+: fp16 inline constants are duplicated into both halves
3235std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
3236 bool IsGFX11Plus) {
3237 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3238 if (!IsGFX11Plus)
3239 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3240
3241 // GFX11+ behavior: f16 duplicated in both halves
3242 // First, check for sign-extended integer inline constants (-16 to 64)
3243 // These work the same across all generations
3244 int32_t Signed = static_cast<int32_t>(Literal);
3245 if (Signed >= 0 && Signed <= 64)
3246 return 128 + Signed;
3247
3248 if (Signed >= -16 && Signed <= -1)
3249 return 192 + std::abs(x: Signed);
3250
3251 // For float inline constants on GFX11+, both halves must be equal
3252 uint16_t Lo = static_cast<uint16_t>(Literal);
3253 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3254 if (Lo != Hi)
3255 return std::nullopt;
3256 return getInlineEncodingV216(/*IsFloat=*/true, Literal: Lo);
3257}
3258
3259// Whether the given literal can be inlined for a V_PK_* instruction.
3260bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
3261 switch (OpType) {
3262 case AMDGPU::OPERAND_REG_IMM_V2INT16:
3263 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
3264 return getInlineEncodingV216(IsFloat: false, Literal).has_value();
3265 case AMDGPU::OPERAND_REG_IMM_V2FP16:
3266 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
3267 return getInlineEncodingV216(IsFloat: true, Literal).has_value();
3268 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
3269 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3270 case AMDGPU::OPERAND_REG_IMM_V2BF16:
3271 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
3272 return isInlinableLiteralV2BF16(Literal);
3273 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
3274 return false;
3275 default:
3276 llvm_unreachable("bad packed operand type");
3277 }
3278}
3279
3280// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3281bool isInlinableLiteralV2I16(uint32_t Literal) {
3282 return getInlineEncodingV2I16(Literal).has_value();
3283}
3284
3285// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3286bool isInlinableLiteralV2BF16(uint32_t Literal) {
3287 return getInlineEncodingV2BF16(Literal).has_value();
3288}
3289
3290// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3291bool isInlinableLiteralV2F16(uint32_t Literal) {
3292 return getInlineEncodingV2F16(Literal).has_value();
3293}
3294
3295// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3296bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus) {
3297 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3298}
3299
3300bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3301 if (IsFP64)
3302 return !Lo_32(Value: Val);
3303
3304 return isUInt<32>(x: Val) || isInt<32>(x: Val);
3305}
3306
3307int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3308 switch (Type) {
3309 default:
3310 break;
3311 case OPERAND_REG_IMM_BF16:
3312 case OPERAND_REG_IMM_FP16:
3313 case OPERAND_REG_INLINE_C_BF16:
3314 case OPERAND_REG_INLINE_C_FP16:
3315 return Imm & 0xffff;
3316 case OPERAND_INLINE_SPLIT_BARRIER_INT32:
3317 case OPERAND_REG_IMM_FP32:
3318 case OPERAND_REG_IMM_INT32:
3319 case OPERAND_REG_IMM_V2BF16:
3320 case OPERAND_REG_IMM_V2FP16:
3321 case OPERAND_REG_IMM_V2FP16_SPLAT:
3322 case OPERAND_REG_IMM_V2FP32:
3323 case OPERAND_REG_IMM_V2INT16:
3324 case OPERAND_REG_IMM_V2INT32:
3325 case OPERAND_REG_INLINE_AC_FP32:
3326 case OPERAND_REG_INLINE_AC_INT32:
3327 case OPERAND_REG_INLINE_C_FP32:
3328 case OPERAND_REG_INLINE_C_INT32:
3329 return Lo_32(Value: Imm);
3330 case OPERAND_REG_IMM_FP64:
3331 return IsLit ? Imm : Hi_32(Value: Imm);
3332 }
3333 return Imm;
3334}
3335
3336bool isArgPassedInSGPR(const Argument *A) {
3337 const Function *F = A->getParent();
3338
3339 // Arguments to compute shaders are never a source of divergence.
3340 CallingConv::ID CC = F->getCallingConv();
3341 switch (CC) {
3342 case CallingConv::AMDGPU_KERNEL:
3343 case CallingConv::SPIR_KERNEL:
3344 return true;
3345 case CallingConv::AMDGPU_VS:
3346 case CallingConv::AMDGPU_LS:
3347 case CallingConv::AMDGPU_HS:
3348 case CallingConv::AMDGPU_ES:
3349 case CallingConv::AMDGPU_GS:
3350 case CallingConv::AMDGPU_PS:
3351 case CallingConv::AMDGPU_CS:
3352 case CallingConv::AMDGPU_Gfx:
3353 case CallingConv::AMDGPU_CS_Chain:
3354 case CallingConv::AMDGPU_CS_ChainPreserve:
3355 // For non-compute shaders, SGPR inputs are marked with either inreg or
3356 // byval. Everything else is in VGPRs.
3357 return A->hasAttribute(Kind: Attribute::InReg) ||
3358 A->hasAttribute(Kind: Attribute::ByVal);
3359 default:
3360 // TODO: treat i1 as divergent?
3361 return A->hasAttribute(Kind: Attribute::InReg);
3362 }
3363}
3364
3365bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3366 // Arguments to compute shaders are never a source of divergence.
3367 CallingConv::ID CC = CB->getCallingConv();
3368 switch (CC) {
3369 case CallingConv::AMDGPU_KERNEL:
3370 case CallingConv::SPIR_KERNEL:
3371 return true;
3372 case CallingConv::AMDGPU_VS:
3373 case CallingConv::AMDGPU_LS:
3374 case CallingConv::AMDGPU_HS:
3375 case CallingConv::AMDGPU_ES:
3376 case CallingConv::AMDGPU_GS:
3377 case CallingConv::AMDGPU_PS:
3378 case CallingConv::AMDGPU_CS:
3379 case CallingConv::AMDGPU_Gfx:
3380 case CallingConv::AMDGPU_CS_Chain:
3381 case CallingConv::AMDGPU_CS_ChainPreserve:
3382 // For non-compute shaders, SGPR inputs are marked with either inreg or
3383 // byval. Everything else is in VGPRs.
3384 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) ||
3385 CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal);
3386 default:
3387 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg);
3388 }
3389}
3390
3391static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3392 return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST);
3393}
3394
3395bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
3396 int64_t EncodedOffset) {
3397 if (isGFX12Plus(STI: ST))
3398 return isUInt<23>(x: EncodedOffset);
3399
3400 return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset)
3401 : isUInt<8>(x: EncodedOffset);
3402}
3403
3404bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
3405 int64_t EncodedOffset, bool IsBuffer) {
3406 if (isGFX12Plus(STI: ST)) {
3407 if (IsBuffer && EncodedOffset < 0)
3408 return false;
3409 return isInt<24>(x: EncodedOffset);
3410 }
3411
3412 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(x: EncodedOffset);
3413}
3414
3415static bool isDwordAligned(uint64_t ByteOffset) {
3416 return (ByteOffset & 3) == 0;
3417}
3418
3419uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
3420 uint64_t ByteOffset) {
3421 if (hasSMEMByteOffset(ST))
3422 return ByteOffset;
3423
3424 assert(isDwordAligned(ByteOffset));
3425 return ByteOffset >> 2;
3426}
3427
3428std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3429 int64_t ByteOffset, bool IsBuffer,
3430 bool HasSOffset) {
3431 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3432 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3433 // Handle case where SOffset is not present.
3434 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3435 return std::nullopt;
3436
3437 if (isGFX12Plus(STI: ST)) // 24 bit signed offsets
3438 return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3439 : std::nullopt;
3440
3441 // The signed version is always a byte offset.
3442 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3443 assert(hasSMEMByteOffset(ST));
3444 return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3445 : std::nullopt;
3446 }
3447
3448 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3449 return std::nullopt;
3450
3451 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3452 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3453 ? std::optional<int64_t>(EncodedOffset)
3454 : std::nullopt;
3455}
3456
3457std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3458 int64_t ByteOffset) {
3459 if (!isCI(STI: ST) || !isDwordAligned(ByteOffset))
3460 return std::nullopt;
3461
3462 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3463 return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3464 : std::nullopt;
3465}
3466
3467unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3468 if (AMDGPU::isGFX10(STI: ST))
3469 return 12;
3470
3471 if (AMDGPU::isGFX12(STI: ST))
3472 return 24;
3473 return 13;
3474}
3475
3476namespace {
3477
3478struct SourceOfDivergence {
3479 unsigned Intr;
3480};
3481const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3482
3483struct AlwaysUniform {
3484 unsigned Intr;
3485};
3486const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3487
3488#define GET_SourcesOfDivergence_IMPL
3489#define GET_UniformIntrinsics_IMPL
3490#define GET_Gfx9BufferFormat_IMPL
3491#define GET_Gfx10BufferFormat_IMPL
3492#define GET_Gfx11PlusBufferFormat_IMPL
3493
3494#include "AMDGPUGenSearchableTables.inc"
3495
3496} // end anonymous namespace
3497
3498bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3499 return lookupSourceOfDivergence(Intr: IntrID);
3500}
3501
3502bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3503 return lookupAlwaysUniform(Intr: IntrID);
3504}
3505
3506const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3507 uint8_t NumComponents,
3508 uint8_t NumFormat,
3509 const MCSubtargetInfo &STI) {
3510 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3511 BitsPerComp, NumComponents, NumFormat)
3512 : isGFX10(STI)
3513 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3514 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3515}
3516
3517const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3518 const MCSubtargetInfo &STI) {
3519 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3520 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3521 : getGfx9BufferFormatInfo(Format);
3522}
3523
3524const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
3525 const MCRegisterInfo &MRI) {
3526 const unsigned VGPRClasses[] = {
3527 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3528 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3529 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3530 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3531 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3532 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3533 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3534 AMDGPU::VReg_1024RegClassID};
3535
3536 for (unsigned RCID : VGPRClasses) {
3537 const MCRegisterClass &RC = MRI.getRegClass(i: RCID);
3538 if (RC.contains(Reg))
3539 return &RC;
3540 }
3541
3542 return nullptr;
3543}
3544
3545unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI) {
3546 unsigned Enc = MRI.getEncodingValue(Reg);
3547 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3548 return Idx >> 8;
3549}
3550
3551MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
3552 const MCRegisterInfo &MRI) {
3553 unsigned Enc = MRI.getEncodingValue(Reg);
3554 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3555 if (Idx >= 0x100)
3556 return MCRegister();
3557
3558 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3559 if (!RC)
3560 return MCRegister();
3561
3562 Idx |= MSBs << 8;
3563 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3564 // This class has 2048 registers with interleaved lo16 and hi16.
3565 Idx *= 2;
3566 if (Enc & AMDGPU::HWEncoding::IS_HI16)
3567 ++Idx;
3568 }
3569
3570 return RC->getRegister(i: Idx);
3571}
3572
3573std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3574getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
3575 static const AMDGPU::OpName VOPOps[4] = {
3576 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3577 AMDGPU::OpName::vdst};
3578 static const AMDGPU::OpName VDSOps[4] = {
3579 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3580 AMDGPU::OpName::vdst};
3581 static const AMDGPU::OpName FLATOps[4] = {
3582 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3583 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3584 static const AMDGPU::OpName BUFOps[4] = {
3585 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3586 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3587 static const AMDGPU::OpName VIMGOps[4] = {
3588 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3589 AMDGPU::OpName::vdata};
3590
3591 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3592 // address is supposed to match X operand, otherwise VOPD shall not be
3593 // combined.
3594 static const AMDGPU::OpName VOPDOpsX[4] = {
3595 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3596 AMDGPU::OpName::vdstX};
3597 static const AMDGPU::OpName VOPDOpsY[4] = {
3598 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3599 AMDGPU::OpName::vdstY};
3600
3601 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3602 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3603 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3604 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3605 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3606 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3607 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3608 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3609 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3610 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3611
3612 unsigned TSFlags = Desc.TSFlags;
3613
3614 if (TSFlags &
3615 (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
3616 SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
3617 switch (Desc.getOpcode()) {
3618 // LD_SCALE operands ignore MSB.
3619 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3620 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3621 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3622 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3623 return {};
3624 case AMDGPU::V_FMAMK_F16:
3625 case AMDGPU::V_FMAMK_F16_t16:
3626 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3627 case AMDGPU::V_FMAMK_F16_fake16:
3628 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3629 case AMDGPU::V_FMAMK_F32:
3630 case AMDGPU::V_FMAMK_F32_gfx12:
3631 case AMDGPU::V_FMAMK_F64:
3632 case AMDGPU::V_FMAMK_F64_gfx1250:
3633 return {VOP2MADMKOps, nullptr};
3634 default:
3635 break;
3636 }
3637 return {VOPOps, nullptr};
3638 }
3639
3640 if (TSFlags & SIInstrFlags::DS)
3641 return {VDSOps, nullptr};
3642
3643 if (TSFlags & SIInstrFlags::FLAT)
3644 return {FLATOps, nullptr};
3645
3646 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3647 return {BUFOps, nullptr};
3648
3649 if (TSFlags & SIInstrFlags::VIMAGE)
3650 return {VIMGOps, nullptr};
3651
3652 if (AMDGPU::isVOPD(Opc: Desc.getOpcode())) {
3653 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode: Desc.getOpcode());
3654 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3655 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3656 }
3657
3658 assert(!(TSFlags & SIInstrFlags::MIMG));
3659
3660 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3661 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3662 " these instructions are not expected on gfx1250");
3663
3664 return {};
3665}
3666
3667bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3668 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3669
3670 if (TSFlags & SIInstrFlags::SMRD)
3671 return !getSMEMIsBuffer(Opc: Opcode);
3672 if (!(TSFlags & SIInstrFlags::FLAT))
3673 return false;
3674
3675 // Only SV and SVS modes are supported.
3676 if (TSFlags & SIInstrFlags::FlatScratch)
3677 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr);
3678
3679 // Only GVS mode is supported.
3680 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr) &&
3681 hasNamedOperand(Opcode, NamedIdx: OpName::saddr);
3682
3683 return false;
3684}
3685
3686bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3687 const MCSubtargetInfo &ST) {
3688 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3689 int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), Name: OpName);
3690 if (Idx == -1)
3691 continue;
3692
3693 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3694 int16_t RegClass = MII.getOpRegClassID(
3695 OpInfo, HwModeId: ST.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo));
3696 if (RegClass == AMDGPU::VReg_64RegClassID ||
3697 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3698 return true;
3699 }
3700
3701 return false;
3702}
3703
3704bool isDPALU_DPP32BitOpc(unsigned Opc) {
3705 switch (Opc) {
3706 case AMDGPU::V_MUL_LO_U32_e64:
3707 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3708 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3709 case AMDGPU::V_MUL_HI_U32_e64:
3710 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3711 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3712 case AMDGPU::V_MUL_HI_I32_e64:
3713 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3714 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3715 case AMDGPU::V_MAD_U32_e64:
3716 case AMDGPU::V_MAD_U32_e64_dpp:
3717 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3718 return true;
3719 default:
3720 return false;
3721 }
3722}
3723
3724bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3725 const MCSubtargetInfo &ST) {
3726 if (!ST.hasFeature(Feature: AMDGPU::FeatureDPALU_DPP))
3727 return false;
3728
3729 if (isDPALU_DPP32BitOpc(Opc: OpDesc.getOpcode()))
3730 return ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts);
3731
3732 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3733}
3734
3735unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3736 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
3737 return 64;
3738 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
3739 return 128;
3740 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
3741 return 320;
3742 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
3743 return 512;
3744 return 64; // In sync with getAddressableLocalMemorySize
3745}
3746
3747bool isPackedFP32Inst(unsigned Opc) {
3748 switch (Opc) {
3749 case AMDGPU::V_PK_ADD_F32:
3750 case AMDGPU::V_PK_ADD_F32_gfx12:
3751 case AMDGPU::V_PK_MUL_F32:
3752 case AMDGPU::V_PK_MUL_F32_gfx12:
3753 case AMDGPU::V_PK_FMA_F32:
3754 case AMDGPU::V_PK_FMA_F32_gfx12:
3755 return true;
3756 default:
3757 return false;
3758 }
3759}
3760
3761const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3762 assert(isFixedDims() && "expect kind to be FixedDims");
3763 return Dims;
3764}
3765
3766std::string ClusterDimsAttr::to_string() const {
3767 SmallString<10> Buffer;
3768 raw_svector_ostream OS(Buffer);
3769
3770 switch (getKind()) {
3771 case Kind::Unknown:
3772 return "";
3773 case Kind::NoCluster: {
3774 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3775 return Buffer.c_str();
3776 }
3777 case Kind::VariableDims: {
3778 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3779 << EncoVariableDims;
3780 return Buffer.c_str();
3781 }
3782 case Kind::FixedDims: {
3783 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3784 return Buffer.c_str();
3785 }
3786 }
3787 llvm_unreachable("Unknown ClusterDimsAttr kind");
3788}
3789
3790ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
3791 std::optional<SmallVector<unsigned>> Attr =
3792 getIntegerVecAttribute(F, Name: "amdgpu-cluster-dims", /*Size=*/3);
3793 ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
3794
3795 if (!Attr.has_value())
3796 AttrKind = Kind::Unknown;
3797 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoNoCluster)))
3798 AttrKind = Kind::NoCluster;
3799 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoVariableDims)))
3800 AttrKind = Kind::VariableDims;
3801
3802 ClusterDimsAttr A(AttrKind);
3803 if (AttrKind == Kind::FixedDims)
3804 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3805
3806 return A;
3807}
3808
3809} // namespace AMDGPU
3810
3811raw_ostream &operator<<(raw_ostream &OS,
3812 const AMDGPU::IsaInfo::TargetIDSetting S) {
3813 switch (S) {
3814 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3815 OS << "Unsupported";
3816 break;
3817 case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3818 OS << "Any";
3819 break;
3820 case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3821 OS << "Off";
3822 break;
3823 case (AMDGPU::IsaInfo::TargetIDSetting::On):
3824 OS << "On";
3825 break;
3826 }
3827 return OS;
3828}
3829
3830} // namespace llvm
3831