1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "Utils/AMDKernelCodeTUtils.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/IR/Metadata.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCRegisterInfo.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/TargetParser/TargetParser.h"
30#include <optional>
31
32#define GET_INSTRINFO_NAMED_OPS
33#define GET_INSTRMAP_INFO
34#include "AMDGPUGenInstrInfo.inc"
35
36static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
37 "amdhsa-code-object-version", llvm::cl::Hidden,
38 llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV6),
39 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
40 "or asm directive still take priority if present)"));
41
42namespace {
43
44/// \returns Bit mask for given bit \p Shift and bit \p Width.
45unsigned getBitMask(unsigned Shift, unsigned Width) {
46 return ((1 << Width) - 1) << Shift;
47}
48
49/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
50///
51/// \returns Packed \p Dst.
52unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
53 unsigned Mask = getBitMask(Shift, Width);
54 return ((Src << Shift) & Mask) | (Dst & ~Mask);
55}
56
57/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
58///
59/// \returns Unpacked bits.
60unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
61 return (Src & getBitMask(Shift, Width)) >> Shift;
62}
63
64/// \returns Vmcnt bit shift (lower bits).
65unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
66 return VersionMajor >= 11 ? 10 : 0;
67}
68
69/// \returns Vmcnt bit width (lower bits).
70unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
71 return VersionMajor >= 11 ? 6 : 4;
72}
73
74/// \returns Expcnt bit shift.
75unsigned getExpcntBitShift(unsigned VersionMajor) {
76 return VersionMajor >= 11 ? 0 : 4;
77}
78
79/// \returns Expcnt bit width.
80unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
81
82/// \returns Lgkmcnt bit shift.
83unsigned getLgkmcntBitShift(unsigned VersionMajor) {
84 return VersionMajor >= 11 ? 4 : 8;
85}
86
87/// \returns Lgkmcnt bit width.
88unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
89 return VersionMajor >= 10 ? 6 : 4;
90}
91
92/// \returns Vmcnt bit shift (higher bits).
93unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
94
95/// \returns Vmcnt bit width (higher bits).
96unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
97 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
98}
99
100/// \returns Loadcnt bit width
101unsigned getLoadcntBitWidth(unsigned VersionMajor) {
102 return VersionMajor >= 12 ? 6 : 0;
103}
104
105/// \returns Samplecnt bit width.
106unsigned getSamplecntBitWidth(unsigned VersionMajor) {
107 return VersionMajor >= 12 ? 6 : 0;
108}
109
110/// \returns Bvhcnt bit width.
111unsigned getBvhcntBitWidth(unsigned VersionMajor) {
112 return VersionMajor >= 12 ? 3 : 0;
113}
114
115/// \returns Dscnt bit width.
116unsigned getDscntBitWidth(unsigned VersionMajor) {
117 return VersionMajor >= 12 ? 6 : 0;
118}
119
120/// \returns Dscnt bit shift in combined S_WAIT instructions.
121unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
122
123/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
124unsigned getStorecntBitWidth(unsigned VersionMajor) {
125 return VersionMajor >= 10 ? 6 : 0;
126}
127
128/// \returns Kmcnt bit width.
129unsigned getKmcntBitWidth(unsigned VersionMajor) {
130 return VersionMajor >= 12 ? 5 : 0;
131}
132
133/// \returns Xcnt bit width.
134unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
135 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
136}
137
138/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
139unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
140 return VersionMajor >= 12 ? 8 : 0;
141}
142
143/// \returns VaSdst bit width
144inline unsigned getVaSdstBitWidth() { return 3; }
145
146/// \returns VaSdst bit shift
147inline unsigned getVaSdstBitShift() { return 9; }
148
149/// \returns VmVsrc bit width
150inline unsigned getVmVsrcBitWidth() { return 3; }
151
152/// \returns VmVsrc bit shift
153inline unsigned getVmVsrcBitShift() { return 2; }
154
155/// \returns VaVdst bit width
156inline unsigned getVaVdstBitWidth() { return 4; }
157
158/// \returns VaVdst bit shift
159inline unsigned getVaVdstBitShift() { return 12; }
160
161/// \returns VaVcc bit width
162inline unsigned getVaVccBitWidth() { return 1; }
163
164/// \returns VaVcc bit shift
165inline unsigned getVaVccBitShift() { return 1; }
166
167/// \returns SaSdst bit width
168inline unsigned getSaSdstBitWidth() { return 1; }
169
170/// \returns SaSdst bit shift
171inline unsigned getSaSdstBitShift() { return 0; }
172
173/// \returns VaSsrc width
174inline unsigned getVaSsrcBitWidth() { return 1; }
175
176/// \returns VaSsrc bit shift
177inline unsigned getVaSsrcBitShift() { return 8; }
178
179/// \returns HoldCnt bit shift
180inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) {
181 static constexpr const unsigned MinMajor = 10;
182 static constexpr const unsigned MinMinor = 3;
183 return std::tie(args&: VersionMajor, args&: VersionMinor) >= std::tie(args: MinMajor, args: MinMinor)
184 ? 1
185 : 0;
186}
187
188/// \returns HoldCnt bit shift
189inline unsigned getHoldCntBitShift() { return 7; }
190
191} // end anonymous namespace
192
193namespace llvm {
194
195namespace AMDGPU {
196
197/// \returns true if the target supports signed immediate offset for SMRD
198/// instructions.
199bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
200 return isGFX9Plus(STI: ST);
201}
202
203/// \returns True if \p STI is AMDHSA.
204bool isHsaAbi(const MCSubtargetInfo &STI) {
205 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
206}
207
208unsigned getAMDHSACodeObjectVersion(const Module &M) {
209 if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
210 MD: M.getModuleFlag(Key: "amdhsa_code_object_version"))) {
211 return (unsigned)Ver->getZExtValue() / 100;
212 }
213
214 return getDefaultAMDHSACodeObjectVersion();
215}
216
217unsigned getDefaultAMDHSACodeObjectVersion() {
218 return DefaultAMDHSACodeObjectVersion;
219}
220
221unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
222 switch (ABIVersion) {
223 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
224 return 4;
225 case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
226 return 5;
227 case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
228 return 6;
229 default:
230 return getDefaultAMDHSACodeObjectVersion();
231 }
232}
233
234uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
235 if (T.getOS() != Triple::AMDHSA)
236 return 0;
237
238 switch (CodeObjectVersion) {
239 case 4:
240 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
241 case 5:
242 return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
243 case 6:
244 return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
245 default:
246 report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " +
247 Twine(CodeObjectVersion));
248 }
249}
250
251unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
252 switch (CodeObjectVersion) {
253 case AMDHSA_COV4:
254 return 48;
255 case AMDHSA_COV5:
256 case AMDHSA_COV6:
257 default:
258 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
259 }
260}
261
262// FIXME: All such magic numbers about the ABI should be in a
263// central TD file.
264unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
265 switch (CodeObjectVersion) {
266 case AMDHSA_COV4:
267 return 24;
268 case AMDHSA_COV5:
269 case AMDHSA_COV6:
270 default:
271 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
272 }
273}
274
275unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
276 switch (CodeObjectVersion) {
277 case AMDHSA_COV4:
278 return 32;
279 case AMDHSA_COV5:
280 case AMDHSA_COV6:
281 default:
282 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
283 }
284}
285
286unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
287 switch (CodeObjectVersion) {
288 case AMDHSA_COV4:
289 return 40;
290 case AMDHSA_COV5:
291 case AMDHSA_COV6:
292 default:
293 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
294 }
295}
296
297#define GET_MIMGBaseOpcodesTable_IMPL
298#define GET_MIMGDimInfoTable_IMPL
299#define GET_MIMGInfoTable_IMPL
300#define GET_MIMGLZMappingTable_IMPL
301#define GET_MIMGMIPMappingTable_IMPL
302#define GET_MIMGBiasMappingTable_IMPL
303#define GET_MIMGOffsetMappingTable_IMPL
304#define GET_MIMGG16MappingTable_IMPL
305#define GET_MAIInstInfoTable_IMPL
306#define GET_WMMAInstInfoTable_IMPL
307#include "AMDGPUGenSearchableTables.inc"
308
309int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
310 unsigned VDataDwords, unsigned VAddrDwords) {
311 const MIMGInfo *Info =
312 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
313 return Info ? Info->Opcode : -1;
314}
315
316const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
317 const MIMGInfo *Info = getMIMGInfo(Opcode: Opc);
318 return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr;
319}
320
321int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
322 const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc);
323 const MIMGInfo *NewInfo =
324 getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding,
325 VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords);
326 return NewInfo ? NewInfo->Opcode : -1;
327}
328
329unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
330 const MIMGDimInfo *Dim, bool IsA16,
331 bool IsG16Supported) {
332 unsigned AddrWords = BaseOpcode->NumExtraArgs;
333 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
334 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
335 if (IsA16)
336 AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2);
337 else
338 AddrWords += AddrComponents;
339
340 // Note: For subtargets that support A16 but not G16, enabling A16 also
341 // enables 16 bit gradients.
342 // For subtargets that support A16 (operand) and G16 (done with a different
343 // instruction encoding), they are independent.
344
345 if (BaseOpcode->Gradients) {
346 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
347 // There are two gradients per coordinate, we pack them separately.
348 // For the 3d case,
349 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
350 AddrWords += alignTo<2>(Value: Dim->NumGradients / 2);
351 else
352 AddrWords += Dim->NumGradients;
353 }
354 return AddrWords;
355}
356
357struct MUBUFInfo {
358 uint16_t Opcode;
359 uint16_t BaseOpcode;
360 uint8_t elements;
361 bool has_vaddr;
362 bool has_srsrc;
363 bool has_soffset;
364 bool IsBufferInv;
365 bool tfe;
366};
367
368struct MTBUFInfo {
369 uint16_t Opcode;
370 uint16_t BaseOpcode;
371 uint8_t elements;
372 bool has_vaddr;
373 bool has_srsrc;
374 bool has_soffset;
375};
376
377struct SMInfo {
378 uint16_t Opcode;
379 bool IsBuffer;
380};
381
382struct VOPInfo {
383 uint16_t Opcode;
384 bool IsSingle;
385};
386
387struct VOPC64DPPInfo {
388 uint16_t Opcode;
389};
390
391struct VOPCDPPAsmOnlyInfo {
392 uint16_t Opcode;
393};
394
395struct VOP3CDPPAsmOnlyInfo {
396 uint16_t Opcode;
397};
398
399struct VOPDComponentInfo {
400 uint16_t BaseVOP;
401 uint16_t VOPDOp;
402 bool CanBeVOPDX;
403 bool CanBeVOPD3X;
404};
405
406struct VOPDInfo {
407 uint16_t Opcode;
408 uint16_t OpX;
409 uint16_t OpY;
410 uint16_t Subtarget;
411 bool VOPD3;
412};
413
414struct VOPTrue16Info {
415 uint16_t Opcode;
416 bool IsTrue16;
417};
418
419#define GET_FP4FP8DstByteSelTable_DECL
420#define GET_FP4FP8DstByteSelTable_IMPL
421
422struct DPMACCInstructionInfo {
423 uint16_t Opcode;
424 bool IsDPMACCInstruction;
425};
426
427struct FP4FP8DstByteSelInfo {
428 uint16_t Opcode;
429 bool HasFP8DstByteSel;
430 bool HasFP4DstByteSel;
431};
432
433#define GET_DPMACCInstructionTable_DECL
434#define GET_DPMACCInstructionTable_IMPL
435#define GET_MTBUFInfoTable_DECL
436#define GET_MTBUFInfoTable_IMPL
437#define GET_MUBUFInfoTable_DECL
438#define GET_MUBUFInfoTable_IMPL
439#define GET_SMInfoTable_DECL
440#define GET_SMInfoTable_IMPL
441#define GET_VOP1InfoTable_DECL
442#define GET_VOP1InfoTable_IMPL
443#define GET_VOP2InfoTable_DECL
444#define GET_VOP2InfoTable_IMPL
445#define GET_VOP3InfoTable_DECL
446#define GET_VOP3InfoTable_IMPL
447#define GET_VOPC64DPPTable_DECL
448#define GET_VOPC64DPPTable_IMPL
449#define GET_VOPC64DPP8Table_DECL
450#define GET_VOPC64DPP8Table_IMPL
451#define GET_VOPCAsmOnlyInfoTable_DECL
452#define GET_VOPCAsmOnlyInfoTable_IMPL
453#define GET_VOP3CAsmOnlyInfoTable_DECL
454#define GET_VOP3CAsmOnlyInfoTable_IMPL
455#define GET_VOPDComponentTable_DECL
456#define GET_VOPDComponentTable_IMPL
457#define GET_VOPDPairs_DECL
458#define GET_VOPDPairs_IMPL
459#define GET_VOPTrue16Table_DECL
460#define GET_VOPTrue16Table_IMPL
461#define GET_True16D16Table_IMPL
462#define GET_WMMAOpcode2AddrMappingTable_DECL
463#define GET_WMMAOpcode2AddrMappingTable_IMPL
464#define GET_WMMAOpcode3AddrMappingTable_DECL
465#define GET_WMMAOpcode3AddrMappingTable_IMPL
466#define GET_getMFMA_F8F6F4_WithSize_DECL
467#define GET_getMFMA_F8F6F4_WithSize_IMPL
468#define GET_isMFMA_F8F6F4Table_IMPL
469#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
470
471#include "AMDGPUGenSearchableTables.inc"
472
473int getMTBUFBaseOpcode(unsigned Opc) {
474 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc);
475 return Info ? Info->BaseOpcode : -1;
476}
477
478int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
479 const MTBUFInfo *Info =
480 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
481 return Info ? Info->Opcode : -1;
482}
483
484int getMTBUFElements(unsigned Opc) {
485 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
486 return Info ? Info->elements : 0;
487}
488
489bool getMTBUFHasVAddr(unsigned Opc) {
490 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
491 return Info && Info->has_vaddr;
492}
493
494bool getMTBUFHasSrsrc(unsigned Opc) {
495 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
496 return Info && Info->has_srsrc;
497}
498
499bool getMTBUFHasSoffset(unsigned Opc) {
500 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
501 return Info && Info->has_soffset;
502}
503
504int getMUBUFBaseOpcode(unsigned Opc) {
505 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc);
506 return Info ? Info->BaseOpcode : -1;
507}
508
509int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
510 const MUBUFInfo *Info =
511 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
512 return Info ? Info->Opcode : -1;
513}
514
515int getMUBUFElements(unsigned Opc) {
516 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
517 return Info ? Info->elements : 0;
518}
519
520bool getMUBUFHasVAddr(unsigned Opc) {
521 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
522 return Info && Info->has_vaddr;
523}
524
525bool getMUBUFHasSrsrc(unsigned Opc) {
526 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
527 return Info && Info->has_srsrc;
528}
529
530bool getMUBUFHasSoffset(unsigned Opc) {
531 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
532 return Info && Info->has_soffset;
533}
534
535bool getMUBUFIsBufferInv(unsigned Opc) {
536 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
537 return Info && Info->IsBufferInv;
538}
539
540bool getMUBUFTfe(unsigned Opc) {
541 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
542 return Info && Info->tfe;
543}
544
545bool getSMEMIsBuffer(unsigned Opc) {
546 const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc);
547 return Info && Info->IsBuffer;
548}
549
550bool getVOP1IsSingle(unsigned Opc) {
551 const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc);
552 return !Info || Info->IsSingle;
553}
554
555bool getVOP2IsSingle(unsigned Opc) {
556 const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc);
557 return !Info || Info->IsSingle;
558}
559
560bool getVOP3IsSingle(unsigned Opc) {
561 const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc);
562 return !Info || Info->IsSingle;
563}
564
565bool isVOPC64DPP(unsigned Opc) {
566 return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc);
567}
568
569bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); }
570
571bool getMAIIsDGEMM(unsigned Opc) {
572 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
573 return Info && Info->is_dgemm;
574}
575
576bool getMAIIsGFX940XDL(unsigned Opc) {
577 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
578 return Info && Info->is_gfx940_xdl;
579}
580
581bool getWMMAIsXDL(unsigned Opc) {
582 const WMMAInstInfo *Info = getWMMAInstInfoHelper(Opcode: Opc);
583 return Info ? Info->is_wmma_xdl : false;
584}
585
586uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
587 switch (EncodingVal) {
588 case MFMAScaleFormats::FP6_E2M3:
589 case MFMAScaleFormats::FP6_E3M2:
590 return 6;
591 case MFMAScaleFormats::FP4_E2M1:
592 return 4;
593 case MFMAScaleFormats::FP8_E4M3:
594 case MFMAScaleFormats::FP8_E5M2:
595 default:
596 return 8;
597 }
598
599 llvm_unreachable("covered switch over mfma scale formats");
600}
601
602const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
603 unsigned BLGP,
604 unsigned F8F8Opcode) {
605 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ);
606 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP);
607 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
608}
609
610uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt) {
611 switch (Fmt) {
612 case WMMA::MATRIX_FMT_FP8:
613 case WMMA::MATRIX_FMT_BF8:
614 return 16;
615 case WMMA::MATRIX_FMT_FP6:
616 case WMMA::MATRIX_FMT_BF6:
617 return 12;
618 case WMMA::MATRIX_FMT_FP4:
619 return 8;
620 }
621
622 llvm_unreachable("covered switch over wmma scale formats");
623}
624
625const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
626 unsigned FmtB,
627 unsigned F8F8Opcode) {
628 uint8_t SrcANumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtA);
629 uint8_t SrcBNumRegs = wmmaScaleF8F6F4FormatToNumRegs(Fmt: FmtB);
630 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
631}
632
633unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
634 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts))
635 return SIEncodingFamily::GFX1250;
636 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts))
637 return SIEncodingFamily::GFX12;
638 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts))
639 return SIEncodingFamily::GFX11;
640 llvm_unreachable("Subtarget generation does not support VOPD!");
641}
642
643CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3) {
644 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
645 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
646 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
647 if (Info) {
648 // Check that Opc can be used as VOPDY for this encoding. V_MOV_B32 as a
649 // VOPDX is just a placeholder here, it is supported on all encodings.
650 // TODO: This can be optimized by creating tables of supported VOPDY
651 // opcodes per encoding.
652 unsigned VOPDMov = AMDGPU::getVOPDOpcode(Opc: AMDGPU::V_MOV_B32_e32, VOPD3);
653 bool CanBeVOPDY = getVOPDFull(OpX: VOPDMov, OpY: AMDGPU::getVOPDOpcode(Opc, VOPD3),
654 EncodingFamily, VOPD3) != -1;
655 return {.X: VOPD3 ? Info->CanBeVOPD3X : Info->CanBeVOPDX, .Y: CanBeVOPDY};
656 }
657
658 return {.X: false, .Y: false};
659}
660
661unsigned getVOPDOpcode(unsigned Opc, bool VOPD3) {
662 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc) : 0;
663 Opc = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : Opc;
664 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
665 return Info ? Info->VOPDOp : ~0u;
666}
667
668bool isVOPD(unsigned Opc) {
669 return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X);
670}
671
672bool isMAC(unsigned Opc) {
673 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
674 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
675 Opc == AMDGPU::V_MAC_F32_e64_vi ||
676 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
677 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
678 Opc == AMDGPU::V_MAC_F16_e64_vi ||
679 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
680 Opc == AMDGPU::V_FMAC_F64_e64_gfx12 ||
681 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
682 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
683 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
684 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
685 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
686 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
687 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
688 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
689 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
690 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
691 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
692 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
693 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
694 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
695 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
696 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
697}
698
699bool isPermlane16(unsigned Opc) {
700 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
701 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
702 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
703 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
704 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
705 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
706 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
707 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
708}
709
710bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
711 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
712 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
713 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
714 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
715 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
716 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
717 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
718 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
719 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
720 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
721}
722
723bool isGenericAtomic(unsigned Opc) {
724 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
725 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
726 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
727 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
728 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
729 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
730 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
731 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
732 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
733 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
734 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
735 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
736 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
737 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
738 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
739 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
740 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB_CLAMP_U32 ||
741 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_COND_SUB_U32 ||
742 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
743}
744
745bool isAsyncStore(unsigned Opc) {
746 return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
747 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
748 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
749 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
750 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
751 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
752 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
753 Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
754}
755
756bool isTensorStore(unsigned Opc) {
757 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
758 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
759}
760
761unsigned getTemporalHintType(const MCInstrDesc TID) {
762 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
763 return CPol::TH_TYPE_ATOMIC;
764 unsigned Opc = TID.getOpcode();
765 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
766 if (TID.mayStore() &&
767 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
768 return CPol::TH_TYPE_STORE;
769
770 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
771 // MayLoad flag is present which is the case with instructions like
772 // image_get_resinfo.
773 return CPol::TH_TYPE_LOAD;
774}
775
776bool isTrue16Inst(unsigned Opc) {
777 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc);
778 return Info && Info->IsTrue16;
779}
780
781FPType getFPDstSelType(unsigned Opc) {
782 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opcode: Opc);
783 if (!Info)
784 return FPType::None;
785 if (Info->HasFP8DstByteSel)
786 return FPType::FP8;
787 if (Info->HasFP4DstByteSel)
788 return FPType::FP4;
789
790 return FPType::None;
791}
792
793bool isDPMACCInstruction(unsigned Opc) {
794 const DPMACCInstructionInfo *Info = getDPMACCInstructionHelper(Opcode: Opc);
795 return Info && Info->IsDPMACCInstruction;
796}
797
798unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
799 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc);
800 return Info ? Info->Opcode3Addr : ~0u;
801}
802
803unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
804 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc);
805 return Info ? Info->Opcode2Addr : ~0u;
806}
807
808// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
809// header files, so we need to wrap it in a function that takes unsigned
810// instead.
811int getMCOpcode(uint16_t Opcode, unsigned Gen) {
812 return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen));
813}
814
815unsigned getBitOp2(unsigned Opc) {
816 switch (Opc) {
817 default:
818 return 0;
819 case AMDGPU::V_AND_B32_e32:
820 return 0x40;
821 case AMDGPU::V_OR_B32_e32:
822 return 0x54;
823 case AMDGPU::V_XOR_B32_e32:
824 return 0x14;
825 case AMDGPU::V_XNOR_B32_e32:
826 return 0x41;
827 }
828}
829
830int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
831 bool VOPD3) {
832 bool IsConvertibleToBitOp = VOPD3 ? getBitOp2(Opc: OpY) : 0;
833 OpY = IsConvertibleToBitOp ? (unsigned)AMDGPU::V_BITOP3_B32_e64 : OpY;
834 const VOPDInfo *Info =
835 getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily, VOPD3);
836 return Info ? Info->Opcode : -1;
837}
838
839std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
840 const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode);
841 assert(Info);
842 const auto *OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX);
843 const auto *OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY);
844 assert(OpX && OpY);
845 return {OpX->BaseVOP, OpY->BaseVOP};
846}
847
848namespace VOPD {
849
850ComponentProps::ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout) {
851 assert(OpDesc.getNumDefs() == Component::DST_NUM);
852
853 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
854 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
855 auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO);
856 assert(TiedIdx == -1 || TiedIdx == Component::DST);
857 HasSrc2Acc = TiedIdx != -1;
858 Opcode = OpDesc.getOpcode();
859
860 IsVOP3 = VOP3Layout || (OpDesc.TSFlags & SIInstrFlags::VOP3);
861 SrcOperandsNum = AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src2) ? 3
862 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::imm) ? 3
863 : AMDGPU::hasNamedOperand(Opcode, NamedIdx: AMDGPU::OpName::src1) ? 2
864 : 1;
865 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
866
867 if (Opcode == AMDGPU::V_CNDMASK_B32_e32 ||
868 Opcode == AMDGPU::V_CNDMASK_B32_e64) {
869 // CNDMASK is an awkward exception, it has FP modifiers, but not FP
870 // operands.
871 NumVOPD3Mods = 2;
872 if (IsVOP3)
873 SrcOperandsNum = 3;
874 } else if (isSISrcFPOperand(Desc: OpDesc,
875 OpNo: getNamedOperandIdx(Opcode, Name: OpName::src0))) {
876 // All FP VOPD instructions have Neg modifiers for all operands except
877 // for tied src2.
878 NumVOPD3Mods = SrcOperandsNum;
879 if (HasSrc2Acc)
880 --NumVOPD3Mods;
881 }
882
883 if (OpDesc.TSFlags & SIInstrFlags::VOP3)
884 return;
885
886 auto OperandsNum = OpDesc.getNumOperands();
887 unsigned CompOprIdx;
888 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
889 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
890 MandatoryLiteralIdx = CompOprIdx;
891 break;
892 }
893 }
894}
895
896int ComponentProps::getBitOp3OperandIdx() const {
897 return getNamedOperandIdx(Opcode, Name: OpName::bitop3);
898}
899
900unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
901 assert(CompOprIdx < Component::MAX_OPR_NUM);
902
903 if (CompOprIdx == Component::DST)
904 return getIndexOfDstInParsedOperands();
905
906 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
907 if (CompSrcIdx < getCompParsedSrcOperandsNum())
908 return getIndexOfSrcInParsedOperands(CompSrcIdx);
909
910 // The specified operand does not exist.
911 return 0;
912}
913
914std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
915 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
916 const MCRegisterInfo &MRI, bool SkipSrc, bool AllowSameVGPR,
917 bool VOPD3) const {
918
919 auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx,
920 VOPD3: CompInfo[ComponentIndex::X].isVOP3());
921 auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx,
922 VOPD3: CompInfo[ComponentIndex::Y].isVOP3());
923
924 const auto banksOverlap = [&MRI](MCRegister X, MCRegister Y,
925 unsigned BanksMask) -> bool {
926 MCRegister BaseX = MRI.getSubReg(Reg: X, Idx: AMDGPU::sub0);
927 MCRegister BaseY = MRI.getSubReg(Reg: Y, Idx: AMDGPU::sub0);
928 if (!BaseX)
929 BaseX = X;
930 if (!BaseY)
931 BaseY = Y;
932 if ((BaseX.id() & BanksMask) == (BaseY.id() & BanksMask))
933 return true;
934 if (BaseX != X /* This is 64-bit register */ &&
935 ((BaseX.id() + 1) & BanksMask) == (BaseY.id() & BanksMask))
936 return true;
937 if (BaseY != Y &&
938 (BaseX.id() & BanksMask) == ((BaseY.id() + 1) & BanksMask))
939 return true;
940
941 // If both are 64-bit bank conflict will be detected yet while checking
942 // the first subreg.
943 return false;
944 };
945
946 unsigned CompOprIdx;
947 for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
948 unsigned BanksMasks = VOPD3 ? VOPD3_VGPR_BANK_MASKS[CompOprIdx]
949 : VOPD_VGPR_BANK_MASKS[CompOprIdx];
950 if (!OpXRegs[CompOprIdx] || !OpYRegs[CompOprIdx])
951 continue;
952
953 if (getVGPREncodingMSBs(Reg: OpXRegs[CompOprIdx], MRI) !=
954 getVGPREncodingMSBs(Reg: OpYRegs[CompOprIdx], MRI))
955 return CompOprIdx;
956
957 if (SkipSrc && CompOprIdx >= Component::DST_NUM)
958 continue;
959
960 if (CompOprIdx < Component::DST_NUM) {
961 // Even if we do not check vdst parity, vdst operands still shall not
962 // overlap.
963 if (MRI.regsOverlap(RegA: OpXRegs[CompOprIdx], RegB: OpYRegs[CompOprIdx]))
964 return CompOprIdx;
965 if (VOPD3) // No need to check dst parity.
966 continue;
967 }
968
969 if (banksOverlap(OpXRegs[CompOprIdx], OpYRegs[CompOprIdx], BanksMasks) &&
970 (!AllowSameVGPR || CompOprIdx < Component::DST_NUM ||
971 OpXRegs[CompOprIdx] != OpYRegs[CompOprIdx]))
972 return CompOprIdx;
973 }
974
975 return {};
976}
977
978// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
979// by the specified component. If an operand is unused
980// or is not a VGPR, the corresponding value is 0.
981//
982// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
983// for the specified component and MC operand. The callback must return 0
984// if the operand is not a register or not a VGPR.
985InstInfo::RegIndices
986InstInfo::getRegIndices(unsigned CompIdx,
987 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
988 bool VOPD3) const {
989 assert(CompIdx < COMPONENTS_NUM);
990
991 const auto &Comp = CompInfo[CompIdx];
992 InstInfo::RegIndices RegIndices;
993
994 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
995
996 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
997 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
998 RegIndices[CompOprIdx] =
999 Comp.hasRegSrcOperand(CompSrcIdx)
1000 ? GetRegIdx(CompIdx,
1001 Comp.getIndexOfSrcInMCOperands(CompSrcIdx, VOPD3))
1002 : MCRegister();
1003 }
1004 return RegIndices;
1005}
1006
1007} // namespace VOPD
1008
1009VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
1010 return VOPD::InstInfo(OpX, OpY);
1011}
1012
1013VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
1014 const MCInstrInfo *InstrInfo) {
1015 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
1016 const auto &OpXDesc = InstrInfo->get(Opcode: OpX);
1017 const auto &OpYDesc = InstrInfo->get(Opcode: OpY);
1018 bool VOPD3 = InstrInfo->get(Opcode: VOPDOpcode).TSFlags & SIInstrFlags::VOPD3;
1019 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X, VOPD3);
1020 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo, VOPD3);
1021 return VOPD::InstInfo(OpXInfo, OpYInfo);
1022}
1023
1024namespace IsaInfo {
1025
1026AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
1027 : STI(STI), XnackSetting(TargetIDSetting::Any),
1028 SramEccSetting(TargetIDSetting::Any) {
1029 if (!STI.getFeatureBits().test(I: FeatureSupportsXNACK))
1030 XnackSetting = TargetIDSetting::Unsupported;
1031 if (!STI.getFeatureBits().test(I: FeatureSupportsSRAMECC))
1032 SramEccSetting = TargetIDSetting::Unsupported;
1033}
1034
1035void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
1036 // Check if xnack or sramecc is explicitly enabled or disabled. In the
1037 // absence of the target features we assume we must generate code that can run
1038 // in any environment.
1039 SubtargetFeatures Features(FS);
1040 std::optional<bool> XnackRequested;
1041 std::optional<bool> SramEccRequested;
1042
1043 for (const std::string &Feature : Features.getFeatures()) {
1044 if (Feature == "+xnack")
1045 XnackRequested = true;
1046 else if (Feature == "-xnack")
1047 XnackRequested = false;
1048 else if (Feature == "+sramecc")
1049 SramEccRequested = true;
1050 else if (Feature == "-sramecc")
1051 SramEccRequested = false;
1052 }
1053
1054 bool XnackSupported = isXnackSupported();
1055 bool SramEccSupported = isSramEccSupported();
1056
1057 if (XnackRequested) {
1058 if (XnackSupported) {
1059 XnackSetting =
1060 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1061 } else {
1062 // If a specific xnack setting was requested and this GPU does not support
1063 // xnack emit a warning. Setting will remain set to "Unsupported".
1064 if (*XnackRequested) {
1065 errs() << "warning: xnack 'On' was requested for a processor that does "
1066 "not support it!\n";
1067 } else {
1068 errs() << "warning: xnack 'Off' was requested for a processor that "
1069 "does not support it!\n";
1070 }
1071 }
1072 }
1073
1074 if (SramEccRequested) {
1075 if (SramEccSupported) {
1076 SramEccSetting =
1077 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
1078 } else {
1079 // If a specific sramecc setting was requested and this GPU does not
1080 // support sramecc emit a warning. Setting will remain set to
1081 // "Unsupported".
1082 if (*SramEccRequested) {
1083 errs() << "warning: sramecc 'On' was requested for a processor that "
1084 "does not support it!\n";
1085 } else {
1086 errs() << "warning: sramecc 'Off' was requested for a processor that "
1087 "does not support it!\n";
1088 }
1089 }
1090 }
1091}
1092
1093static TargetIDSetting
1094getTargetIDSettingFromFeatureString(StringRef FeatureString) {
1095 if (FeatureString.ends_with(Suffix: "-"))
1096 return TargetIDSetting::Off;
1097 if (FeatureString.ends_with(Suffix: "+"))
1098 return TargetIDSetting::On;
1099
1100 llvm_unreachable("Malformed feature string");
1101}
1102
1103void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
1104 SmallVector<StringRef, 3> TargetIDSplit;
1105 TargetID.split(A&: TargetIDSplit, Separator: ':');
1106
1107 for (const auto &FeatureString : TargetIDSplit) {
1108 if (FeatureString.starts_with(Prefix: "xnack"))
1109 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
1110 if (FeatureString.starts_with(Prefix: "sramecc"))
1111 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
1112 }
1113}
1114
1115std::string AMDGPUTargetID::toString() const {
1116 std::string StringRep;
1117 raw_string_ostream StreamRep(StringRep);
1118
1119 auto TargetTriple = STI.getTargetTriple();
1120 auto Version = getIsaVersion(GPU: STI.getCPU());
1121
1122 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
1123 << '-' << TargetTriple.getOSName() << '-'
1124 << TargetTriple.getEnvironmentName() << '-';
1125
1126 std::string Processor;
1127 // TODO: Following else statement is present here because we used various
1128 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
1129 // Remove once all aliases are removed from GCNProcessors.td.
1130 if (Version.Major >= 9)
1131 Processor = STI.getCPU().str();
1132 else
1133 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
1134 Twine(Version.Stepping))
1135 .str();
1136
1137 std::string Features;
1138 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
1139 // sramecc.
1140 if (getSramEccSetting() == TargetIDSetting::Off)
1141 Features += ":sramecc-";
1142 else if (getSramEccSetting() == TargetIDSetting::On)
1143 Features += ":sramecc+";
1144 // xnack.
1145 if (getXnackSetting() == TargetIDSetting::Off)
1146 Features += ":xnack-";
1147 else if (getXnackSetting() == TargetIDSetting::On)
1148 Features += ":xnack+";
1149 }
1150
1151 StreamRep << Processor << Features;
1152
1153 return StringRep;
1154}
1155
1156unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
1157 if (STI->getFeatureBits().test(I: FeatureWavefrontSize16))
1158 return 16;
1159 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32))
1160 return 32;
1161
1162 return 64;
1163}
1164
1165unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
1166 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1167
1168 // "Per CU" really means "per whatever functional block the waves of a
1169 // workgroup must share". So the effective local memory size is doubled in
1170 // WGP mode on gfx10.
1171 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1172 BytesPerCU *= 2;
1173
1174 return BytesPerCU;
1175}
1176
1177unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
1178 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
1179 return 32768;
1180 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
1181 return 65536;
1182 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
1183 return 163840;
1184 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
1185 return 327680;
1186 return 32768;
1187}
1188
1189unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1190 // "Per CU" really means "per whatever functional block the waves of a
1191 // workgroup must share".
1192
1193 // GFX12.5 only supports CU mode, which contains four SIMDs.
1194 if (isGFX1250(STI: *STI)) {
1195 assert(STI->getFeatureBits().test(FeatureCuMode));
1196 return 4;
1197 }
1198
1199 // For gfx10 in CU mode the functional block is the CU, which contains
1200 // two SIMDs.
1201 if (isGFX10Plus(STI: *STI) && STI->getFeatureBits().test(I: FeatureCuMode))
1202 return 2;
1203
1204 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP
1205 // contains two CUs, so a total of four SIMDs.
1206 return 4;
1207}
1208
1209unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
1210 unsigned FlatWorkGroupSize) {
1211 assert(FlatWorkGroupSize != 0);
1212 if (!STI->getTargetTriple().isAMDGCN())
1213 return 8;
1214 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1215 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1216 if (N == 1) {
1217 // Single-wave workgroups don't consume barrier resources.
1218 return MaxWaves;
1219 }
1220
1221 unsigned MaxBarriers = 16;
1222 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1223 MaxBarriers = 32;
1224
1225 return std::min(a: MaxWaves / N, b: MaxBarriers);
1226}
1227
1228unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1229
1230unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1231 // FIXME: Need to take scratch memory into account.
1232 if (isGFX90A(STI: *STI))
1233 return 8;
1234 if (!isGFX10Plus(STI: *STI))
1235 return 10;
1236 return hasGFX10_3Insts(STI: *STI) ? 16 : 20;
1237}
1238
1239unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1240 unsigned FlatWorkGroupSize) {
1241 return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1242 Denominator: getEUsPerCU(STI));
1243}
1244
1245unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1246
1247unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
1248 // Some subtargets allow encoding 2048, but this isn't tested or supported.
1249 return 1024;
1250}
1251
1252unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1253 unsigned FlatWorkGroupSize) {
1254 return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI));
1255}
1256
1257unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1258 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1259 if (Version.Major >= 10)
1260 return getAddressableNumSGPRs(STI);
1261 if (Version.Major >= 8)
1262 return 16;
1263 return 8;
1264}
1265
1266unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1267
1268unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1269 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1270 if (Version.Major >= 8)
1271 return 800;
1272 return 512;
1273}
1274
1275unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1276 if (STI->getFeatureBits().test(I: FeatureSGPRInitBug))
1277 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1278
1279 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1280 if (Version.Major >= 10)
1281 return 106;
1282 if (Version.Major >= 8)
1283 return 102;
1284 return 104;
1285}
1286
1287unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1288 assert(WavesPerEU != 0);
1289
1290 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1291 if (Version.Major >= 10)
1292 return 0;
1293
1294 if (WavesPerEU >= getMaxWavesPerEU(STI))
1295 return 0;
1296
1297 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1298 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1299 MinNumSGPRs -= std::min(a: MinNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1300 MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1;
1301 return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI));
1302}
1303
1304unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1305 bool Addressable) {
1306 assert(WavesPerEU != 0);
1307
1308 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1309 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1310 if (Version.Major >= 10)
1311 return Addressable ? AddressableNumSGPRs : 108;
1312 if (Version.Major >= 8 && !Addressable)
1313 AddressableNumSGPRs = 112;
1314 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1315 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1316 MaxNumSGPRs -= std::min(a: MaxNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1317 MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI));
1318 return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs);
1319}
1320
1321unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1322 bool FlatScrUsed, bool XNACKUsed) {
1323 unsigned ExtraSGPRs = 0;
1324 if (VCCUsed)
1325 ExtraSGPRs = 2;
1326
1327 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1328 if (Version.Major >= 10)
1329 return ExtraSGPRs;
1330
1331 if (Version.Major < 8) {
1332 if (FlatScrUsed)
1333 ExtraSGPRs = 4;
1334 } else {
1335 if (XNACKUsed)
1336 ExtraSGPRs = 4;
1337
1338 if (FlatScrUsed ||
1339 STI->getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch))
1340 ExtraSGPRs = 6;
1341 }
1342
1343 return ExtraSGPRs;
1344}
1345
1346unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1347 bool FlatScrUsed) {
1348 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1349 XNACKUsed: STI->getFeatureBits().test(I: AMDGPU::FeatureXNACK));
1350}
1351
1352static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1353 unsigned Granule) {
1354 return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule);
1355}
1356
1357unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1358 // SGPRBlocks is actual number of SGPR blocks minus 1.
1359 return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) -
1360 1;
1361}
1362
1363unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1364 unsigned DynamicVGPRBlockSize,
1365 std::optional<bool> EnableWavefrontSize32) {
1366 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1367 return 8;
1368
1369 if (DynamicVGPRBlockSize != 0)
1370 return DynamicVGPRBlockSize;
1371
1372 bool IsWave32 = EnableWavefrontSize32
1373 ? *EnableWavefrontSize32
1374 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1375
1376 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1377 return IsWave32 ? 24 : 12;
1378
1379 if (hasGFX10_3Insts(STI: *STI))
1380 return IsWave32 ? 16 : 8;
1381
1382 return IsWave32 ? 8 : 4;
1383}
1384
1385unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1386 std::optional<bool> EnableWavefrontSize32) {
1387 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1388 return 8;
1389
1390 bool IsWave32 = EnableWavefrontSize32
1391 ? *EnableWavefrontSize32
1392 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1393
1394 if (STI->getFeatureBits().test(I: Feature1024AddressableVGPRs))
1395 return IsWave32 ? 16 : 8;
1396
1397 return IsWave32 ? 8 : 4;
1398}
1399
1400unsigned getArchVGPRAllocGranule() { return 4; }
1401
1402unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1403 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1404 return 512;
1405 if (!isGFX10Plus(STI: *STI))
1406 return 256;
1407 bool IsWave32 = STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1408 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1409 return IsWave32 ? 1536 : 768;
1410 return IsWave32 ? 1024 : 512;
1411}
1412
1413unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) {
1414 const auto &Features = STI->getFeatureBits();
1415 if (Features.test(I: Feature1024AddressableVGPRs))
1416 return Features.test(I: FeatureWavefrontSize32) ? 1024 : 512;
1417 return 256;
1418}
1419
1420unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
1421 unsigned DynamicVGPRBlockSize) {
1422 const auto &Features = STI->getFeatureBits();
1423 if (Features.test(I: FeatureGFX90AInsts))
1424 return 512;
1425
1426 if (DynamicVGPRBlockSize != 0)
1427 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1428 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1429 return getAddressableNumArchVGPRs(STI);
1430}
1431
1432unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1433 unsigned NumVGPRs,
1434 unsigned DynamicVGPRBlockSize) {
1435 return getNumWavesPerEUWithNumVGPRs(
1436 NumVGPRs, Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1437 MaxWaves: getMaxWavesPerEU(STI), TotalNumVGPRs: getTotalNumVGPRs(STI));
1438}
1439
1440unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1441 unsigned MaxWaves,
1442 unsigned TotalNumVGPRs) {
1443 if (NumVGPRs < Granule)
1444 return MaxWaves;
1445 unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule);
1446 return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves);
1447}
1448
1449unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1450 AMDGPUSubtarget::Generation Gen) {
1451 if (Gen >= AMDGPUSubtarget::GFX10)
1452 return MaxWaves;
1453
1454 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1455 if (SGPRs <= 80)
1456 return 10;
1457 if (SGPRs <= 88)
1458 return 9;
1459 if (SGPRs <= 100)
1460 return 8;
1461 return 7;
1462 }
1463 if (SGPRs <= 48)
1464 return 10;
1465 if (SGPRs <= 56)
1466 return 9;
1467 if (SGPRs <= 64)
1468 return 8;
1469 if (SGPRs <= 72)
1470 return 7;
1471 if (SGPRs <= 80)
1472 return 6;
1473 return 5;
1474}
1475
1476unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1477 unsigned DynamicVGPRBlockSize) {
1478 assert(WavesPerEU != 0);
1479
1480 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1481 if (WavesPerEU >= MaxWavesPerEU)
1482 return 0;
1483
1484 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1485 unsigned AddrsableNumVGPRs =
1486 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1487 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1488 unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule);
1489
1490 if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule))
1491 return 0;
1492
1493 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs,
1494 DynamicVGPRBlockSize);
1495 if (WavesPerEU < MinWavesPerEU)
1496 return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU, DynamicVGPRBlockSize);
1497
1498 unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule);
1499 unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext);
1500 return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs);
1501}
1502
1503unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1504 unsigned DynamicVGPRBlockSize) {
1505 assert(WavesPerEU != 0);
1506
1507 unsigned MaxNumVGPRs =
1508 alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU,
1509 Align: getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1510 unsigned AddressableNumVGPRs =
1511 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1512 return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs);
1513}
1514
1515unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1516 std::optional<bool> EnableWavefrontSize32) {
1517 return getGranulatedNumRegisterBlocks(
1518 NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1519 1;
1520}
1521
1522unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1523 unsigned NumVGPRs,
1524 unsigned DynamicVGPRBlockSize,
1525 std::optional<bool> EnableWavefrontSize32) {
1526 return getGranulatedNumRegisterBlocks(
1527 NumRegs: NumVGPRs,
1528 Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1529}
1530} // end namespace IsaInfo
1531
1532void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1533 const MCSubtargetInfo *STI) {
1534 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1535 KernelCode.amd_kernel_code_version_major = 1;
1536 KernelCode.amd_kernel_code_version_minor = 2;
1537 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1538 KernelCode.amd_machine_version_major = Version.Major;
1539 KernelCode.amd_machine_version_minor = Version.Minor;
1540 KernelCode.amd_machine_version_stepping = Version.Stepping;
1541 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1542 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) {
1543 KernelCode.wavefront_size = 5;
1544 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1545 } else {
1546 KernelCode.wavefront_size = 6;
1547 }
1548
1549 // If the code object does not support indirect functions, then the value must
1550 // be 0xffffffff.
1551 KernelCode.call_convention = -1;
1552
1553 // These alignment values are specified in powers of two, so alignment =
1554 // 2^n. The minimum alignment is 2^4 = 16.
1555 KernelCode.kernarg_segment_alignment = 4;
1556 KernelCode.group_segment_alignment = 4;
1557 KernelCode.private_segment_alignment = 4;
1558
1559 if (Version.Major >= 10) {
1560 KernelCode.compute_pgm_resource_registers |=
1561 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1562 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1563 }
1564}
1565
1566bool isGroupSegment(const GlobalValue *GV) {
1567 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1568}
1569
1570bool isGlobalSegment(const GlobalValue *GV) {
1571 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1572}
1573
1574bool isReadOnlySegment(const GlobalValue *GV) {
1575 unsigned AS = GV->getAddressSpace();
1576 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1577 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1578}
1579
1580bool shouldEmitConstantsToTextSection(const Triple &TT) {
1581 return TT.getArch() == Triple::r600;
1582}
1583
1584static bool isValidRegPrefix(char C) {
1585 return C == 'v' || C == 's' || C == 'a';
1586}
1587
1588std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef RegName) {
1589 char Kind = RegName.front();
1590 if (!isValidRegPrefix(C: Kind))
1591 return {};
1592
1593 RegName = RegName.drop_front();
1594 if (RegName.consume_front(Prefix: "[")) {
1595 unsigned Idx, End;
1596 bool Failed = RegName.consumeInteger(Radix: 10, Result&: Idx);
1597 Failed |= !RegName.consume_front(Prefix: ":");
1598 Failed |= RegName.consumeInteger(Radix: 10, Result&: End);
1599 Failed |= !RegName.consume_back(Suffix: "]");
1600 if (!Failed) {
1601 unsigned NumRegs = End - Idx + 1;
1602 if (NumRegs > 1)
1603 return {Kind, Idx, NumRegs};
1604 }
1605 } else {
1606 unsigned Idx;
1607 bool Failed = RegName.getAsInteger(Radix: 10, Result&: Idx);
1608 if (!Failed)
1609 return {Kind, Idx, 1};
1610 }
1611
1612 return {};
1613}
1614
1615std::tuple<char, unsigned, unsigned>
1616parseAsmConstraintPhysReg(StringRef Constraint) {
1617 StringRef RegName = Constraint;
1618 if (!RegName.consume_front(Prefix: "{") || !RegName.consume_back(Suffix: "}"))
1619 return {};
1620 return parseAsmPhysRegName(RegName);
1621}
1622
1623std::pair<unsigned, unsigned>
1624getIntegerPairAttribute(const Function &F, StringRef Name,
1625 std::pair<unsigned, unsigned> Default,
1626 bool OnlyFirstRequired) {
1627 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1628 return {Attr->first, Attr->second.value_or(u&: Default.second)};
1629 return Default;
1630}
1631
1632std::optional<std::pair<unsigned, std::optional<unsigned>>>
1633getIntegerPairAttribute(const Function &F, StringRef Name,
1634 bool OnlyFirstRequired) {
1635 Attribute A = F.getFnAttribute(Kind: Name);
1636 if (!A.isStringAttribute())
1637 return std::nullopt;
1638
1639 LLVMContext &Ctx = F.getContext();
1640 std::pair<unsigned, std::optional<unsigned>> Ints;
1641 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ',');
1642 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) {
1643 Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name);
1644 return std::nullopt;
1645 }
1646 unsigned Second = 0;
1647 if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Second)) {
1648 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1649 Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name);
1650 return std::nullopt;
1651 }
1652 } else {
1653 Ints.second = Second;
1654 }
1655
1656 return Ints;
1657}
1658
1659SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1660 unsigned Size,
1661 unsigned DefaultVal) {
1662 std::optional<SmallVector<unsigned>> R =
1663 getIntegerVecAttribute(F, Name, Size);
1664 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1665}
1666
1667std::optional<SmallVector<unsigned>>
1668getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1669 assert(Size > 2);
1670 LLVMContext &Ctx = F.getContext();
1671
1672 Attribute A = F.getFnAttribute(Kind: Name);
1673 if (!A.isValid())
1674 return std::nullopt;
1675 if (!A.isStringAttribute()) {
1676 Ctx.emitError(ErrorStr: Name + " is not a string attribute");
1677 return std::nullopt;
1678 }
1679
1680 SmallVector<unsigned> Vals(Size);
1681
1682 StringRef S = A.getValueAsString();
1683 unsigned i = 0;
1684 for (; !S.empty() && i < Size; i++) {
1685 std::pair<StringRef, StringRef> Strs = S.split(Separator: ',');
1686 unsigned IntVal;
1687 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) {
1688 Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " +
1689 Name);
1690 return std::nullopt;
1691 }
1692 Vals[i] = IntVal;
1693 S = Strs.second;
1694 }
1695
1696 if (!S.empty() || i < Size) {
1697 Ctx.emitError(ErrorStr: "attribute " + Name +
1698 " has incorrect number of integers; expected " +
1699 llvm::utostr(X: Size));
1700 return std::nullopt;
1701 }
1702 return Vals;
1703}
1704
1705bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1706 assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1707 for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1708 auto Low =
1709 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 0))->getValue();
1710 auto High =
1711 mdconst::extract<ConstantInt>(MD: MD.getOperand(I: 2 * I + 1))->getValue();
1712 // There are two types of [A; B) ranges:
1713 // A < B, e.g. [4; 5) which is a range that only includes 4.
1714 // A > B, e.g. [5; 4) which is a range that wraps around and includes
1715 // everything except 4.
1716 if (Low.ult(RHS: High)) {
1717 if (Low.ule(RHS: Val) && High.ugt(RHS: Val))
1718 return true;
1719 } else {
1720 if (Low.uge(RHS: Val) && High.ult(RHS: Val))
1721 return true;
1722 }
1723 }
1724
1725 return false;
1726}
1727
1728raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
1729 ListSeparator LS;
1730 if (Wait.LoadCnt != ~0u)
1731 OS << LS << "LoadCnt: " << Wait.LoadCnt;
1732 if (Wait.ExpCnt != ~0u)
1733 OS << LS << "ExpCnt: " << Wait.ExpCnt;
1734 if (Wait.DsCnt != ~0u)
1735 OS << LS << "DsCnt: " << Wait.DsCnt;
1736 if (Wait.StoreCnt != ~0u)
1737 OS << LS << "StoreCnt: " << Wait.StoreCnt;
1738 if (Wait.SampleCnt != ~0u)
1739 OS << LS << "SampleCnt: " << Wait.SampleCnt;
1740 if (Wait.BvhCnt != ~0u)
1741 OS << LS << "BvhCnt: " << Wait.BvhCnt;
1742 if (Wait.KmCnt != ~0u)
1743 OS << LS << "KmCnt: " << Wait.KmCnt;
1744 if (Wait.XCnt != ~0u)
1745 OS << LS << "XCnt: " << Wait.XCnt;
1746 if (LS.unused())
1747 OS << "none";
1748 OS << '\n';
1749 return OS;
1750}
1751
1752unsigned getVmcntBitMask(const IsaVersion &Version) {
1753 return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) +
1754 getVmcntBitWidthHi(VersionMajor: Version.Major))) -
1755 1;
1756}
1757
1758unsigned getLoadcntBitMask(const IsaVersion &Version) {
1759 return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1;
1760}
1761
1762unsigned getSamplecntBitMask(const IsaVersion &Version) {
1763 return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1;
1764}
1765
1766unsigned getBvhcntBitMask(const IsaVersion &Version) {
1767 return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1;
1768}
1769
1770unsigned getExpcntBitMask(const IsaVersion &Version) {
1771 return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1;
1772}
1773
1774unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1775 return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1;
1776}
1777
1778unsigned getDscntBitMask(const IsaVersion &Version) {
1779 return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1;
1780}
1781
1782unsigned getKmcntBitMask(const IsaVersion &Version) {
1783 return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1;
1784}
1785
1786unsigned getXcntBitMask(const IsaVersion &Version) {
1787 return (1 << getXcntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1788}
1789
1790unsigned getStorecntBitMask(const IsaVersion &Version) {
1791 return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1;
1792}
1793
1794HardwareLimits::HardwareLimits(const IsaVersion &IV) {
1795 bool HasExtendedWaitCounts = IV.Major >= 12;
1796 if (HasExtendedWaitCounts) {
1797 LoadcntMax = getLoadcntBitMask(Version: IV);
1798 DscntMax = getDscntBitMask(Version: IV);
1799 } else {
1800 LoadcntMax = getVmcntBitMask(Version: IV);
1801 DscntMax = getLgkmcntBitMask(Version: IV);
1802 }
1803 ExpcntMax = getExpcntBitMask(Version: IV);
1804 StorecntMax = getStorecntBitMask(Version: IV);
1805 SamplecntMax = getSamplecntBitMask(Version: IV);
1806 BvhcntMax = getBvhcntBitMask(Version: IV);
1807 KmcntMax = getKmcntBitMask(Version: IV);
1808 XcntMax = getXcntBitMask(Version: IV);
1809 VaVdstMax = DepCtr::getVaVdstBitMask();
1810 VmVsrcMax = DepCtr::getVmVsrcBitMask();
1811}
1812
1813unsigned getWaitcntBitMask(const IsaVersion &Version) {
1814 unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1815 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1816 unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major),
1817 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1818 unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1819 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1820 unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1821 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1822 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1823}
1824
1825unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1826 unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1827 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1828 unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1829 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1830 return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major);
1831}
1832
1833unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1834 return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1835 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1836}
1837
1838unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1839 return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1840 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1841}
1842
1843void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1844 unsigned &Expcnt, unsigned &Lgkmcnt) {
1845 Vmcnt = decodeVmcnt(Version, Waitcnt);
1846 Expcnt = decodeExpcnt(Version, Waitcnt);
1847 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1848}
1849
1850Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1851 Waitcnt Decoded;
1852 Decoded.LoadCnt = decodeVmcnt(Version, Waitcnt: Encoded);
1853 Decoded.ExpCnt = decodeExpcnt(Version, Waitcnt: Encoded);
1854 Decoded.DsCnt = decodeLgkmcnt(Version, Waitcnt: Encoded);
1855 return Decoded;
1856}
1857
1858unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1859 unsigned Vmcnt) {
1860 Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1861 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1862 return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt,
1863 Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1864 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1865}
1866
1867unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1868 unsigned Expcnt) {
1869 return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1870 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1871}
1872
1873unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1874 unsigned Lgkmcnt) {
1875 return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1876 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1877}
1878
1879unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1880 unsigned Expcnt, unsigned Lgkmcnt) {
1881 unsigned Waitcnt = getWaitcntBitMask(Version);
1882 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1883 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1884 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1885 return Waitcnt;
1886}
1887
1888unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1889 return encodeWaitcnt(Version, Vmcnt: Decoded.LoadCnt, Expcnt: Decoded.ExpCnt, Lgkmcnt: Decoded.DsCnt);
1890}
1891
1892static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1893 bool IsStore) {
1894 unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major),
1895 Width: getDscntBitWidth(VersionMajor: Version.Major));
1896 if (IsStore) {
1897 unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1898 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1899 return Dscnt | Storecnt;
1900 }
1901 unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1902 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1903 return Dscnt | Loadcnt;
1904}
1905
1906Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1907 Waitcnt Decoded;
1908 Decoded.LoadCnt =
1909 unpackBits(Src: LoadcntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1910 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1911 Decoded.DsCnt = unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1912 Width: getDscntBitWidth(VersionMajor: Version.Major));
1913 return Decoded;
1914}
1915
1916Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1917 Waitcnt Decoded;
1918 Decoded.StoreCnt =
1919 unpackBits(Src: StorecntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1920 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1921 Decoded.DsCnt = unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1922 Width: getDscntBitWidth(VersionMajor: Version.Major));
1923 return Decoded;
1924}
1925
1926static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1927 unsigned Loadcnt) {
1928 return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1929 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1930}
1931
1932static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1933 unsigned Storecnt) {
1934 return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1935 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1936}
1937
1938static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1939 unsigned Dscnt) {
1940 return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1941 Width: getDscntBitWidth(VersionMajor: Version.Major));
1942}
1943
1944static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1945 unsigned Dscnt) {
1946 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false);
1947 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1948 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1949 return Waitcnt;
1950}
1951
1952unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1953 return encodeLoadcntDscnt(Version, Loadcnt: Decoded.LoadCnt, Dscnt: Decoded.DsCnt);
1954}
1955
1956static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1957 unsigned Storecnt, unsigned Dscnt) {
1958 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true);
1959 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1960 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1961 return Waitcnt;
1962}
1963
1964unsigned encodeStorecntDscnt(const IsaVersion &Version,
1965 const Waitcnt &Decoded) {
1966 return encodeStorecntDscnt(Version, Storecnt: Decoded.StoreCnt, Dscnt: Decoded.DsCnt);
1967}
1968
1969//===----------------------------------------------------------------------===//
1970// Custom Operand Values
1971//===----------------------------------------------------------------------===//
1972
1973static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1974 int Size,
1975 const MCSubtargetInfo &STI) {
1976 unsigned Enc = 0;
1977 for (int Idx = 0; Idx < Size; ++Idx) {
1978 const auto &Op = Opr[Idx];
1979 if (Op.isSupported(STI))
1980 Enc |= Op.encode(Val: Op.Default);
1981 }
1982 return Enc;
1983}
1984
1985static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1986 int Size, unsigned Code,
1987 bool &HasNonDefaultVal,
1988 const MCSubtargetInfo &STI) {
1989 unsigned UsedOprMask = 0;
1990 HasNonDefaultVal = false;
1991 for (int Idx = 0; Idx < Size; ++Idx) {
1992 const auto &Op = Opr[Idx];
1993 if (!Op.isSupported(STI))
1994 continue;
1995 UsedOprMask |= Op.getMask();
1996 unsigned Val = Op.decode(Code);
1997 if (!Op.isValid(Val))
1998 return false;
1999 HasNonDefaultVal |= (Val != Op.Default);
2000 }
2001 return (Code & ~UsedOprMask) == 0;
2002}
2003
2004static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
2005 unsigned Code, int &Idx, StringRef &Name,
2006 unsigned &Val, bool &IsDefault,
2007 const MCSubtargetInfo &STI) {
2008 while (Idx < Size) {
2009 const auto &Op = Opr[Idx++];
2010 if (Op.isSupported(STI)) {
2011 Name = Op.Name;
2012 Val = Op.decode(Code);
2013 IsDefault = (Val == Op.Default);
2014 return true;
2015 }
2016 }
2017
2018 return false;
2019}
2020
2021static int encodeCustomOperandVal(const CustomOperandVal &Op,
2022 int64_t InputVal) {
2023 if (InputVal < 0 || InputVal > Op.Max)
2024 return OPR_VAL_INVALID;
2025 return Op.encode(Val: InputVal);
2026}
2027
2028static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
2029 const StringRef Name, int64_t InputVal,
2030 unsigned &UsedOprMask,
2031 const MCSubtargetInfo &STI) {
2032 int InvalidId = OPR_ID_UNKNOWN;
2033 for (int Idx = 0; Idx < Size; ++Idx) {
2034 const auto &Op = Opr[Idx];
2035 if (Op.Name == Name) {
2036 if (!Op.isSupported(STI)) {
2037 InvalidId = OPR_ID_UNSUPPORTED;
2038 continue;
2039 }
2040 auto OprMask = Op.getMask();
2041 if (OprMask & UsedOprMask)
2042 return OPR_ID_DUPLICATE;
2043 UsedOprMask |= OprMask;
2044 return encodeCustomOperandVal(Op, InputVal);
2045 }
2046 }
2047 return InvalidId;
2048}
2049
2050//===----------------------------------------------------------------------===//
2051// DepCtr
2052//===----------------------------------------------------------------------===//
2053
2054namespace DepCtr {
2055
2056int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
2057 static int Default = -1;
2058 if (Default == -1)
2059 Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI);
2060 return Default;
2061}
2062
2063bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
2064 const MCSubtargetInfo &STI) {
2065 return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code,
2066 HasNonDefaultVal, STI);
2067}
2068
2069bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
2070 bool &IsDefault, const MCSubtargetInfo &STI) {
2071 return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val,
2072 IsDefault, STI);
2073}
2074
2075int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
2076 const MCSubtargetInfo &STI) {
2077 return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask,
2078 STI);
2079}
2080
2081unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; }
2082
2083unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; }
2084
2085unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; }
2086
2087unsigned getHoldCntBitMask(const IsaVersion &Version) {
2088 return (1 << getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
2089}
2090
2091unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; }
2092
2093unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; }
2094
2095unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; }
2096
2097unsigned decodeFieldVmVsrc(unsigned Encoded) {
2098 return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2099}
2100
2101unsigned decodeFieldVaVdst(unsigned Encoded) {
2102 return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2103}
2104
2105unsigned decodeFieldSaSdst(unsigned Encoded) {
2106 return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2107}
2108
2109unsigned decodeFieldVaSdst(unsigned Encoded) {
2110 return unpackBits(Src: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2111}
2112
2113unsigned decodeFieldVaVcc(unsigned Encoded) {
2114 return unpackBits(Src: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2115}
2116
2117unsigned decodeFieldVaSsrc(unsigned Encoded) {
2118 return unpackBits(Src: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2119}
2120
2121unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) {
2122 return unpackBits(Src: Encoded, Shift: getHoldCntBitShift(),
2123 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2124}
2125
2126unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
2127 return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
2128}
2129
2130unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI) {
2131 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2132 return encodeFieldVmVsrc(Encoded, VmVsrc);
2133}
2134
2135unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
2136 return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
2137}
2138
2139unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI) {
2140 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2141 return encodeFieldVaVdst(Encoded, VaVdst);
2142}
2143
2144unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
2145 return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
2146}
2147
2148unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI) {
2149 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2150 return encodeFieldSaSdst(Encoded, SaSdst);
2151}
2152
2153unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
2154 return packBits(Src: VaSdst, Dst: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
2155}
2156
2157unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI) {
2158 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2159 return encodeFieldVaSdst(Encoded, VaSdst);
2160}
2161
2162unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
2163 return packBits(Src: VaVcc, Dst: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
2164}
2165
2166unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI) {
2167 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2168 return encodeFieldVaVcc(Encoded, VaVcc);
2169}
2170
2171unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
2172 return packBits(Src: VaSsrc, Dst: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
2173}
2174
2175unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) {
2176 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2177 return encodeFieldVaSsrc(Encoded, VaSsrc);
2178}
2179
2180unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
2181 const IsaVersion &Version) {
2182 return packBits(Src: HoldCnt, Dst: Encoded, Shift: getHoldCntBitShift(),
2183 Width: getHoldCntWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor));
2184}
2185
2186unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) {
2187 unsigned Encoded = getDefaultDepCtrEncoding(STI);
2188 return encodeFieldHoldCnt(Encoded, HoldCnt, Version: getIsaVersion(GPU: STI.getCPU()));
2189}
2190
2191} // namespace DepCtr
2192
2193//===----------------------------------------------------------------------===//
2194// exp tgt
2195//===----------------------------------------------------------------------===//
2196
2197namespace Exp {
2198
2199struct ExpTgt {
2200 StringLiteral Name;
2201 unsigned Tgt;
2202 unsigned MaxIndex;
2203};
2204
2205// clang-format off
2206static constexpr ExpTgt ExpTgtInfo[] = {
2207 {.Name: {"null"}, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX},
2208 {.Name: {"mrtz"}, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX},
2209 {.Name: {"prim"}, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX},
2210 {.Name: {"mrt"}, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX},
2211 {.Name: {"pos"}, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX},
2212 {.Name: {"dual_src_blend"},.Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX},
2213 {.Name: {"param"}, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX},
2214};
2215// clang-format on
2216
2217bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
2218 for (const ExpTgt &Val : ExpTgtInfo) {
2219 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
2220 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
2221 Name = Val.Name;
2222 return true;
2223 }
2224 }
2225 return false;
2226}
2227
2228unsigned getTgtId(const StringRef Name) {
2229
2230 for (const ExpTgt &Val : ExpTgtInfo) {
2231 if (Val.MaxIndex == 0 && Name == Val.Name)
2232 return Val.Tgt;
2233
2234 if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) {
2235 StringRef Suffix = Name.drop_front(N: Val.Name.size());
2236
2237 unsigned Id;
2238 if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex)
2239 return ET_INVALID;
2240
2241 // Disable leading zeroes
2242 if (Suffix.size() > 1 && Suffix[0] == '0')
2243 return ET_INVALID;
2244
2245 return Val.Tgt + Id;
2246 }
2247 }
2248 return ET_INVALID;
2249}
2250
2251bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
2252 switch (Id) {
2253 case ET_NULL:
2254 return !isGFX11Plus(STI);
2255 case ET_POS4:
2256 case ET_PRIM:
2257 return isGFX10Plus(STI);
2258 case ET_DUAL_SRC_BLEND0:
2259 case ET_DUAL_SRC_BLEND1:
2260 return isGFX11Plus(STI);
2261 default:
2262 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
2263 return !isGFX11Plus(STI);
2264 return true;
2265 }
2266}
2267
2268} // namespace Exp
2269
2270//===----------------------------------------------------------------------===//
2271// MTBUF Format
2272//===----------------------------------------------------------------------===//
2273
2274namespace MTBUFFormat {
2275
2276int64_t getDfmt(const StringRef Name) {
2277 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
2278 if (Name == DfmtSymbolic[Id])
2279 return Id;
2280 }
2281 return DFMT_UNDEF;
2282}
2283
2284StringRef getDfmtName(unsigned Id) {
2285 assert(Id <= DFMT_MAX);
2286 return DfmtSymbolic[Id];
2287}
2288
2289static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
2290 if (isSI(STI) || isCI(STI))
2291 return NfmtSymbolicSICI;
2292 if (isVI(STI) || isGFX9(STI))
2293 return NfmtSymbolicVI;
2294 return NfmtSymbolicGFX10;
2295}
2296
2297int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
2298 const auto *lookupTable = getNfmtLookupTable(STI);
2299 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
2300 if (Name == lookupTable[Id])
2301 return Id;
2302 }
2303 return NFMT_UNDEF;
2304}
2305
2306StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2307 assert(Id <= NFMT_MAX);
2308 return getNfmtLookupTable(STI)[Id];
2309}
2310
2311bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2312 unsigned Dfmt;
2313 unsigned Nfmt;
2314 decodeDfmtNfmt(Format: Id, Dfmt, Nfmt);
2315 return isValidNfmt(Val: Nfmt, STI);
2316}
2317
2318bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2319 return !getNfmtName(Id, STI).empty();
2320}
2321
2322int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2323 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2324}
2325
2326void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2327 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2328 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2329}
2330
2331int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2332 if (isGFX11Plus(STI)) {
2333 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2334 if (Name == UfmtSymbolicGFX11[Id])
2335 return Id;
2336 }
2337 } else {
2338 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2339 if (Name == UfmtSymbolicGFX10[Id])
2340 return Id;
2341 }
2342 }
2343 return UFMT_UNDEF;
2344}
2345
2346StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2347 if (isValidUnifiedFormat(Val: Id, STI))
2348 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2349 return "";
2350}
2351
2352bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2353 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2354}
2355
2356int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2357 const MCSubtargetInfo &STI) {
2358 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2359 if (isGFX11Plus(STI)) {
2360 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2361 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2362 return Id;
2363 }
2364 } else {
2365 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2366 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2367 return Id;
2368 }
2369 }
2370 return UFMT_UNDEF;
2371}
2372
2373bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2374 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2375}
2376
2377unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2378 if (isGFX10Plus(STI))
2379 return UFMT_DEFAULT;
2380 return DFMT_NFMT_DEFAULT;
2381}
2382
2383} // namespace MTBUFFormat
2384
2385//===----------------------------------------------------------------------===//
2386// SendMsg
2387//===----------------------------------------------------------------------===//
2388
2389namespace SendMsg {
2390
2391static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2392 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2393}
2394
2395bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2396 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2397}
2398
2399bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2400 bool Strict) {
2401 assert(isValidMsgId(MsgId, STI));
2402
2403 if (!Strict)
2404 return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId);
2405
2406 if (msgRequiresOp(MsgId, STI)) {
2407 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2408 return false;
2409
2410 return !getMsgOpName(MsgId, Encoding: OpId, STI).empty();
2411 }
2412
2413 return OpId == OP_NONE_;
2414}
2415
2416bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2417 const MCSubtargetInfo &STI, bool Strict) {
2418 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2419
2420 if (!Strict)
2421 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId);
2422
2423 if (!isGFX11Plus(STI)) {
2424 switch (MsgId) {
2425 case ID_GS_PreGFX11:
2426 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2427 case ID_GS_DONE_PreGFX11:
2428 return (OpId == OP_GS_NOP)
2429 ? (StreamId == STREAM_ID_NONE_)
2430 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2431 }
2432 }
2433 return StreamId == STREAM_ID_NONE_;
2434}
2435
2436bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2437 return MsgId == ID_SYSMSG ||
2438 (!isGFX11Plus(STI) &&
2439 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2440}
2441
2442bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2443 const MCSubtargetInfo &STI) {
2444 return !isGFX11Plus(STI) &&
2445 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2446 OpId != OP_GS_NOP;
2447}
2448
2449void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2450 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2451 MsgId = Val & getMsgIdMask(STI);
2452 if (isGFX11Plus(STI)) {
2453 OpId = 0;
2454 StreamId = 0;
2455 } else {
2456 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2457 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2458 }
2459}
2460
2461uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2462 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2463}
2464
2465} // namespace SendMsg
2466
2467//===----------------------------------------------------------------------===//
2468//
2469//===----------------------------------------------------------------------===//
2470
2471unsigned getInitialPSInputAddr(const Function &F) {
2472 return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr", Default: 0);
2473}
2474
2475bool getHasColorExport(const Function &F) {
2476 // As a safe default always respond as if PS has color exports.
2477 return F.getFnAttributeAsParsedInteger(
2478 Kind: "amdgpu-color-export",
2479 Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2480}
2481
2482bool getHasDepthExport(const Function &F) {
2483 return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export", Default: 0) != 0;
2484}
2485
2486unsigned getDynamicVGPRBlockSize(const Function &F) {
2487 unsigned BlockSize =
2488 F.getFnAttributeAsParsedInteger(Kind: "amdgpu-dynamic-vgpr-block-size", Default: 0);
2489
2490 if (BlockSize == 16 || BlockSize == 32)
2491 return BlockSize;
2492
2493 return 0;
2494}
2495
2496bool hasXNACK(const MCSubtargetInfo &STI) {
2497 return STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2498}
2499
2500bool hasSRAMECC(const MCSubtargetInfo &STI) {
2501 return STI.hasFeature(Feature: AMDGPU::FeatureSRAMECC);
2502}
2503
2504bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2505 return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) &&
2506 !STI.hasFeature(Feature: AMDGPU::FeatureR128A16);
2507}
2508
2509bool hasA16(const MCSubtargetInfo &STI) {
2510 return STI.hasFeature(Feature: AMDGPU::FeatureA16);
2511}
2512
2513bool hasG16(const MCSubtargetInfo &STI) {
2514 return STI.hasFeature(Feature: AMDGPU::FeatureG16);
2515}
2516
2517bool hasPackedD16(const MCSubtargetInfo &STI) {
2518 return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2519 !isSI(STI);
2520}
2521
2522bool hasGDS(const MCSubtargetInfo &STI) {
2523 return STI.hasFeature(Feature: AMDGPU::FeatureGDS);
2524}
2525
2526unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2527 auto Version = getIsaVersion(GPU: STI.getCPU());
2528 if (Version.Major == 10)
2529 return Version.Minor >= 3 ? 13 : 5;
2530 if (Version.Major == 11)
2531 return 5;
2532 if (Version.Major >= 12)
2533 return HasSampler ? 4 : 5;
2534 return 0;
2535}
2536
2537unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) {
2538 if (isGFX1250Plus(STI))
2539 return 32;
2540 return 16;
2541}
2542
2543bool isSI(const MCSubtargetInfo &STI) {
2544 return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands);
2545}
2546
2547bool isCI(const MCSubtargetInfo &STI) {
2548 return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands);
2549}
2550
2551bool isVI(const MCSubtargetInfo &STI) {
2552 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2553}
2554
2555bool isGFX9(const MCSubtargetInfo &STI) {
2556 return STI.hasFeature(Feature: AMDGPU::FeatureGFX9);
2557}
2558
2559bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2560 return isGFX9(STI) || isGFX10(STI);
2561}
2562
2563bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2564 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2565}
2566
2567bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2568 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2569}
2570
2571bool isGFX8Plus(const MCSubtargetInfo &STI) {
2572 return isVI(STI) || isGFX9Plus(STI);
2573}
2574
2575bool isGFX9Plus(const MCSubtargetInfo &STI) {
2576 return isGFX9(STI) || isGFX10Plus(STI);
2577}
2578
2579bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2580
2581bool isGFX10(const MCSubtargetInfo &STI) {
2582 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10);
2583}
2584
2585bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2586 return isGFX10(STI) || isGFX11(STI);
2587}
2588
2589bool isGFX10Plus(const MCSubtargetInfo &STI) {
2590 return isGFX10(STI) || isGFX11Plus(STI);
2591}
2592
2593bool isGFX11(const MCSubtargetInfo &STI) {
2594 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2595}
2596
2597bool isGFX11Plus(const MCSubtargetInfo &STI) {
2598 return isGFX11(STI) || isGFX12Plus(STI);
2599}
2600
2601bool isGFX12(const MCSubtargetInfo &STI) {
2602 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2603}
2604
2605bool isGFX12Plus(const MCSubtargetInfo &STI) {
2606 return isGFX12(STI) || isGFX13Plus(STI);
2607}
2608
2609bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2610
2611bool isGFX1250(const MCSubtargetInfo &STI) {
2612 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
2613}
2614
2615bool isGFX1250Plus(const MCSubtargetInfo &STI) {
2616 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2617}
2618
2619bool isGFX13(const MCSubtargetInfo &STI) {
2620 return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
2621}
2622
2623bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
2624
2625bool supportsWGP(const MCSubtargetInfo &STI) {
2626 if (isGFX1250(STI))
2627 return false;
2628 return isGFX10Plus(STI);
2629}
2630
2631bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2632
2633bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2634 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2635}
2636
2637bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2638 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2639}
2640
2641bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2642 return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding);
2643}
2644
2645bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2646 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding);
2647}
2648
2649bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2650 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding);
2651}
2652
2653bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2654 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts);
2655}
2656
2657bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2658 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2659}
2660
2661bool isGFX90A(const MCSubtargetInfo &STI) {
2662 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2663}
2664
2665bool isGFX940(const MCSubtargetInfo &STI) {
2666 return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts);
2667}
2668
2669bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2670 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2671}
2672
2673bool hasMAIInsts(const MCSubtargetInfo &STI) {
2674 return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts);
2675}
2676
2677bool hasVOPD(const MCSubtargetInfo &STI) {
2678 return STI.hasFeature(Feature: AMDGPU::FeatureVOPDInsts);
2679}
2680
2681bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2682 return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR);
2683}
2684
2685unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2686 return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload);
2687}
2688
2689int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2690 int32_t ArgNumVGPR) {
2691 if (has90AInsts && ArgNumAGPR)
2692 return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR;
2693 return std::max(a: ArgNumVGPR, b: ArgNumAGPR);
2694}
2695
2696bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2697 const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID);
2698 const MCRegister FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0);
2699 return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) ||
2700 Reg == AMDGPU::SCC;
2701}
2702
2703bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2704 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2705}
2706
2707#define MAP_REG2REG \
2708 using namespace AMDGPU; \
2709 switch (Reg.id()) { \
2710 default: \
2711 return Reg; \
2712 CASE_CI_VI(FLAT_SCR) \
2713 CASE_CI_VI(FLAT_SCR_LO) \
2714 CASE_CI_VI(FLAT_SCR_HI) \
2715 CASE_VI_GFX9PLUS(TTMP0) \
2716 CASE_VI_GFX9PLUS(TTMP1) \
2717 CASE_VI_GFX9PLUS(TTMP2) \
2718 CASE_VI_GFX9PLUS(TTMP3) \
2719 CASE_VI_GFX9PLUS(TTMP4) \
2720 CASE_VI_GFX9PLUS(TTMP5) \
2721 CASE_VI_GFX9PLUS(TTMP6) \
2722 CASE_VI_GFX9PLUS(TTMP7) \
2723 CASE_VI_GFX9PLUS(TTMP8) \
2724 CASE_VI_GFX9PLUS(TTMP9) \
2725 CASE_VI_GFX9PLUS(TTMP10) \
2726 CASE_VI_GFX9PLUS(TTMP11) \
2727 CASE_VI_GFX9PLUS(TTMP12) \
2728 CASE_VI_GFX9PLUS(TTMP13) \
2729 CASE_VI_GFX9PLUS(TTMP14) \
2730 CASE_VI_GFX9PLUS(TTMP15) \
2731 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2732 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2733 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2734 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2735 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2736 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2737 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2738 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2739 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2740 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2741 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2742 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2743 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2744 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2745 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2746 CASE_VI_GFX9PLUS( \
2747 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2748 CASE_GFXPRE11_GFX11PLUS(M0) \
2749 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2750 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2751 }
2752
2753#define CASE_CI_VI(node) \
2754 assert(!isSI(STI)); \
2755 case node: \
2756 return isCI(STI) ? node##_ci : node##_vi;
2757
2758#define CASE_VI_GFX9PLUS(node) \
2759 case node: \
2760 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2761
2762#define CASE_GFXPRE11_GFX11PLUS(node) \
2763 case node: \
2764 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2765
2766#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2767 case node: \
2768 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2769
2770MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2771 if (STI.getTargetTriple().getArch() == Triple::r600)
2772 return Reg;
2773 MAP_REG2REG
2774}
2775
2776#undef CASE_CI_VI
2777#undef CASE_VI_GFX9PLUS
2778#undef CASE_GFXPRE11_GFX11PLUS
2779#undef CASE_GFXPRE11_GFX11PLUS_TO
2780
2781#define CASE_CI_VI(node) \
2782 case node##_ci: \
2783 case node##_vi: \
2784 return node;
2785#define CASE_VI_GFX9PLUS(node) \
2786 case node##_vi: \
2787 case node##_gfx9plus: \
2788 return node;
2789#define CASE_GFXPRE11_GFX11PLUS(node) \
2790 case node##_gfx11plus: \
2791 case node##_gfxpre11: \
2792 return node;
2793#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2794
2795MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2796
2797bool isInlineValue(MCRegister Reg) {
2798 switch (Reg.id()) {
2799 case AMDGPU::SRC_SHARED_BASE_LO:
2800 case AMDGPU::SRC_SHARED_BASE:
2801 case AMDGPU::SRC_SHARED_LIMIT_LO:
2802 case AMDGPU::SRC_SHARED_LIMIT:
2803 case AMDGPU::SRC_PRIVATE_BASE_LO:
2804 case AMDGPU::SRC_PRIVATE_BASE:
2805 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2806 case AMDGPU::SRC_PRIVATE_LIMIT:
2807 case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
2808 case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
2809 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2810 return true;
2811 case AMDGPU::SRC_VCCZ:
2812 case AMDGPU::SRC_EXECZ:
2813 case AMDGPU::SRC_SCC:
2814 return true;
2815 case AMDGPU::SGPR_NULL:
2816 return true;
2817 default:
2818 return false;
2819 }
2820}
2821
2822#undef CASE_CI_VI
2823#undef CASE_VI_GFX9PLUS
2824#undef CASE_GFXPRE11_GFX11PLUS
2825#undef CASE_GFXPRE11_GFX11PLUS_TO
2826#undef MAP_REG2REG
2827
2828bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2829 assert(OpNo < Desc.NumOperands);
2830 unsigned OpType = Desc.operands()[OpNo].OperandType;
2831 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2832 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2833}
2834
2835bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2836 assert(OpNo < Desc.NumOperands);
2837 unsigned OpType = Desc.operands()[OpNo].OperandType;
2838 switch (OpType) {
2839 case AMDGPU::OPERAND_REG_IMM_FP32:
2840 case AMDGPU::OPERAND_REG_IMM_FP64:
2841 case AMDGPU::OPERAND_REG_IMM_FP16:
2842 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2843 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
2844 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
2845 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2846 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2847 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2848 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2849 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2850 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2851 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2852 return true;
2853 default:
2854 return false;
2855 }
2856}
2857
2858bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2859 assert(OpNo < Desc.NumOperands);
2860 unsigned OpType = Desc.operands()[OpNo].OperandType;
2861 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2862 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2863 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2864 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2865}
2866
2867// Avoid using MCRegisterClass::getSize, since that function will go away
2868// (move from MC* level to Target* level). Return size in bits.
2869unsigned getRegBitWidth(unsigned RCID) {
2870 switch (RCID) {
2871 case AMDGPU::VGPR_16RegClassID:
2872 case AMDGPU::VGPR_16_Lo128RegClassID:
2873 case AMDGPU::SGPR_LO16RegClassID:
2874 case AMDGPU::AGPR_LO16RegClassID:
2875 return 16;
2876 case AMDGPU::SGPR_32RegClassID:
2877 case AMDGPU::VGPR_32RegClassID:
2878 case AMDGPU::VGPR_32_Lo256RegClassID:
2879 case AMDGPU::VRegOrLds_32RegClassID:
2880 case AMDGPU::AGPR_32RegClassID:
2881 case AMDGPU::VS_32RegClassID:
2882 case AMDGPU::AV_32RegClassID:
2883 case AMDGPU::SReg_32RegClassID:
2884 case AMDGPU::SReg_32_XM0RegClassID:
2885 case AMDGPU::SRegOrLds_32RegClassID:
2886 return 32;
2887 case AMDGPU::SGPR_64RegClassID:
2888 case AMDGPU::VS_64RegClassID:
2889 case AMDGPU::SReg_64RegClassID:
2890 case AMDGPU::VReg_64RegClassID:
2891 case AMDGPU::AReg_64RegClassID:
2892 case AMDGPU::SReg_64_XEXECRegClassID:
2893 case AMDGPU::VReg_64_Align2RegClassID:
2894 case AMDGPU::AReg_64_Align2RegClassID:
2895 case AMDGPU::AV_64RegClassID:
2896 case AMDGPU::AV_64_Align2RegClassID:
2897 case AMDGPU::VReg_64_Lo256_Align2RegClassID:
2898 case AMDGPU::VS_64_Lo256RegClassID:
2899 return 64;
2900 case AMDGPU::SGPR_96RegClassID:
2901 case AMDGPU::SReg_96RegClassID:
2902 case AMDGPU::VReg_96RegClassID:
2903 case AMDGPU::AReg_96RegClassID:
2904 case AMDGPU::VReg_96_Align2RegClassID:
2905 case AMDGPU::AReg_96_Align2RegClassID:
2906 case AMDGPU::AV_96RegClassID:
2907 case AMDGPU::AV_96_Align2RegClassID:
2908 case AMDGPU::VReg_96_Lo256_Align2RegClassID:
2909 return 96;
2910 case AMDGPU::SGPR_128RegClassID:
2911 case AMDGPU::SReg_128RegClassID:
2912 case AMDGPU::VReg_128RegClassID:
2913 case AMDGPU::AReg_128RegClassID:
2914 case AMDGPU::VReg_128_Align2RegClassID:
2915 case AMDGPU::AReg_128_Align2RegClassID:
2916 case AMDGPU::AV_128RegClassID:
2917 case AMDGPU::AV_128_Align2RegClassID:
2918 case AMDGPU::SReg_128_XNULLRegClassID:
2919 case AMDGPU::VReg_128_Lo256_Align2RegClassID:
2920 return 128;
2921 case AMDGPU::SGPR_160RegClassID:
2922 case AMDGPU::SReg_160RegClassID:
2923 case AMDGPU::VReg_160RegClassID:
2924 case AMDGPU::AReg_160RegClassID:
2925 case AMDGPU::VReg_160_Align2RegClassID:
2926 case AMDGPU::AReg_160_Align2RegClassID:
2927 case AMDGPU::AV_160RegClassID:
2928 case AMDGPU::AV_160_Align2RegClassID:
2929 case AMDGPU::VReg_160_Lo256_Align2RegClassID:
2930 return 160;
2931 case AMDGPU::SGPR_192RegClassID:
2932 case AMDGPU::SReg_192RegClassID:
2933 case AMDGPU::VReg_192RegClassID:
2934 case AMDGPU::AReg_192RegClassID:
2935 case AMDGPU::VReg_192_Align2RegClassID:
2936 case AMDGPU::AReg_192_Align2RegClassID:
2937 case AMDGPU::AV_192RegClassID:
2938 case AMDGPU::AV_192_Align2RegClassID:
2939 case AMDGPU::VReg_192_Lo256_Align2RegClassID:
2940 return 192;
2941 case AMDGPU::SGPR_224RegClassID:
2942 case AMDGPU::SReg_224RegClassID:
2943 case AMDGPU::VReg_224RegClassID:
2944 case AMDGPU::AReg_224RegClassID:
2945 case AMDGPU::VReg_224_Align2RegClassID:
2946 case AMDGPU::AReg_224_Align2RegClassID:
2947 case AMDGPU::AV_224RegClassID:
2948 case AMDGPU::AV_224_Align2RegClassID:
2949 case AMDGPU::VReg_224_Lo256_Align2RegClassID:
2950 return 224;
2951 case AMDGPU::SGPR_256RegClassID:
2952 case AMDGPU::SReg_256RegClassID:
2953 case AMDGPU::VReg_256RegClassID:
2954 case AMDGPU::AReg_256RegClassID:
2955 case AMDGPU::VReg_256_Align2RegClassID:
2956 case AMDGPU::AReg_256_Align2RegClassID:
2957 case AMDGPU::AV_256RegClassID:
2958 case AMDGPU::AV_256_Align2RegClassID:
2959 case AMDGPU::SReg_256_XNULLRegClassID:
2960 case AMDGPU::VReg_256_Lo256_Align2RegClassID:
2961 return 256;
2962 case AMDGPU::SGPR_288RegClassID:
2963 case AMDGPU::SReg_288RegClassID:
2964 case AMDGPU::VReg_288RegClassID:
2965 case AMDGPU::AReg_288RegClassID:
2966 case AMDGPU::VReg_288_Align2RegClassID:
2967 case AMDGPU::AReg_288_Align2RegClassID:
2968 case AMDGPU::AV_288RegClassID:
2969 case AMDGPU::AV_288_Align2RegClassID:
2970 case AMDGPU::VReg_288_Lo256_Align2RegClassID:
2971 return 288;
2972 case AMDGPU::SGPR_320RegClassID:
2973 case AMDGPU::SReg_320RegClassID:
2974 case AMDGPU::VReg_320RegClassID:
2975 case AMDGPU::AReg_320RegClassID:
2976 case AMDGPU::VReg_320_Align2RegClassID:
2977 case AMDGPU::AReg_320_Align2RegClassID:
2978 case AMDGPU::AV_320RegClassID:
2979 case AMDGPU::AV_320_Align2RegClassID:
2980 case AMDGPU::VReg_320_Lo256_Align2RegClassID:
2981 return 320;
2982 case AMDGPU::SGPR_352RegClassID:
2983 case AMDGPU::SReg_352RegClassID:
2984 case AMDGPU::VReg_352RegClassID:
2985 case AMDGPU::AReg_352RegClassID:
2986 case AMDGPU::VReg_352_Align2RegClassID:
2987 case AMDGPU::AReg_352_Align2RegClassID:
2988 case AMDGPU::AV_352RegClassID:
2989 case AMDGPU::AV_352_Align2RegClassID:
2990 case AMDGPU::VReg_352_Lo256_Align2RegClassID:
2991 return 352;
2992 case AMDGPU::SGPR_384RegClassID:
2993 case AMDGPU::SReg_384RegClassID:
2994 case AMDGPU::VReg_384RegClassID:
2995 case AMDGPU::AReg_384RegClassID:
2996 case AMDGPU::VReg_384_Align2RegClassID:
2997 case AMDGPU::AReg_384_Align2RegClassID:
2998 case AMDGPU::AV_384RegClassID:
2999 case AMDGPU::AV_384_Align2RegClassID:
3000 case AMDGPU::VReg_384_Lo256_Align2RegClassID:
3001 return 384;
3002 case AMDGPU::SGPR_512RegClassID:
3003 case AMDGPU::SReg_512RegClassID:
3004 case AMDGPU::VReg_512RegClassID:
3005 case AMDGPU::AReg_512RegClassID:
3006 case AMDGPU::VReg_512_Align2RegClassID:
3007 case AMDGPU::AReg_512_Align2RegClassID:
3008 case AMDGPU::AV_512RegClassID:
3009 case AMDGPU::AV_512_Align2RegClassID:
3010 case AMDGPU::VReg_512_Lo256_Align2RegClassID:
3011 return 512;
3012 case AMDGPU::SGPR_1024RegClassID:
3013 case AMDGPU::SReg_1024RegClassID:
3014 case AMDGPU::VReg_1024RegClassID:
3015 case AMDGPU::AReg_1024RegClassID:
3016 case AMDGPU::VReg_1024_Align2RegClassID:
3017 case AMDGPU::AReg_1024_Align2RegClassID:
3018 case AMDGPU::AV_1024RegClassID:
3019 case AMDGPU::AV_1024_Align2RegClassID:
3020 case AMDGPU::VReg_1024_Lo256_Align2RegClassID:
3021 return 1024;
3022 default:
3023 llvm_unreachable("Unexpected register class");
3024 }
3025}
3026
3027unsigned getRegBitWidth(const MCRegisterClass &RC) {
3028 return getRegBitWidth(RCID: RC.getID());
3029}
3030
3031bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
3032 if (isInlinableIntLiteral(Literal))
3033 return true;
3034
3035 uint64_t Val = static_cast<uint64_t>(Literal);
3036 return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) ||
3037 (Val == llvm::bit_cast<uint64_t>(from: 1.0)) ||
3038 (Val == llvm::bit_cast<uint64_t>(from: -1.0)) ||
3039 (Val == llvm::bit_cast<uint64_t>(from: 0.5)) ||
3040 (Val == llvm::bit_cast<uint64_t>(from: -0.5)) ||
3041 (Val == llvm::bit_cast<uint64_t>(from: 2.0)) ||
3042 (Val == llvm::bit_cast<uint64_t>(from: -2.0)) ||
3043 (Val == llvm::bit_cast<uint64_t>(from: 4.0)) ||
3044 (Val == llvm::bit_cast<uint64_t>(from: -4.0)) ||
3045 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
3046}
3047
3048bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
3049 if (isInlinableIntLiteral(Literal))
3050 return true;
3051
3052 // The actual type of the operand does not seem to matter as long
3053 // as the bits match one of the inline immediate values. For example:
3054 //
3055 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
3056 // so it is a legal inline immediate.
3057 //
3058 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
3059 // floating-point, so it is a legal inline immediate.
3060
3061 uint32_t Val = static_cast<uint32_t>(Literal);
3062 return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) ||
3063 (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) ||
3064 (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) ||
3065 (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) ||
3066 (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) ||
3067 (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) ||
3068 (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) ||
3069 (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) ||
3070 (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) ||
3071 (Val == 0x3e22f983 && HasInv2Pi);
3072}
3073
3074bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
3075 if (!HasInv2Pi)
3076 return false;
3077 if (isInlinableIntLiteral(Literal))
3078 return true;
3079 uint16_t Val = static_cast<uint16_t>(Literal);
3080 return Val == 0x3F00 || // 0.5
3081 Val == 0xBF00 || // -0.5
3082 Val == 0x3F80 || // 1.0
3083 Val == 0xBF80 || // -1.0
3084 Val == 0x4000 || // 2.0
3085 Val == 0xC000 || // -2.0
3086 Val == 0x4080 || // 4.0
3087 Val == 0xC080 || // -4.0
3088 Val == 0x3E22; // 1.0 / (2.0 * pi)
3089}
3090
3091bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
3092 return isInlinableLiteral32(Literal, HasInv2Pi);
3093}
3094
3095bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
3096 if (!HasInv2Pi)
3097 return false;
3098 if (isInlinableIntLiteral(Literal))
3099 return true;
3100 uint16_t Val = static_cast<uint16_t>(Literal);
3101 return Val == 0x3C00 || // 1.0
3102 Val == 0xBC00 || // -1.0
3103 Val == 0x3800 || // 0.5
3104 Val == 0xB800 || // -0.5
3105 Val == 0x4000 || // 2.0
3106 Val == 0xC000 || // -2.0
3107 Val == 0x4400 || // 4.0
3108 Val == 0xC400 || // -4.0
3109 Val == 0x3118; // 1/2pi
3110}
3111
3112std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
3113 // Unfortunately, the Instruction Set Architecture Reference Guide is
3114 // misleading about how the inline operands work for (packed) 16-bit
3115 // instructions. In a nutshell, the actual HW behavior is:
3116 //
3117 // - integer encodings (-16 .. 64) are always produced as sign-extended
3118 // 32-bit values
3119 // - float encodings are produced as:
3120 // - for F16 instructions: corresponding half-precision float values in
3121 // the LSBs, 0 in the MSBs
3122 // - for UI16 instructions: corresponding single-precision float value
3123 int32_t Signed = static_cast<int32_t>(Literal);
3124 if (Signed >= 0 && Signed <= 64)
3125 return 128 + Signed;
3126
3127 if (Signed >= -16 && Signed <= -1)
3128 return 192 + std::abs(x: Signed);
3129
3130 if (IsFloat) {
3131 // clang-format off
3132 switch (Literal) {
3133 case 0x3800: return 240; // 0.5
3134 case 0xB800: return 241; // -0.5
3135 case 0x3C00: return 242; // 1.0
3136 case 0xBC00: return 243; // -1.0
3137 case 0x4000: return 244; // 2.0
3138 case 0xC000: return 245; // -2.0
3139 case 0x4400: return 246; // 4.0
3140 case 0xC400: return 247; // -4.0
3141 case 0x3118: return 248; // 1.0 / (2.0 * pi)
3142 default: break;
3143 }
3144 // clang-format on
3145 } else {
3146 // clang-format off
3147 switch (Literal) {
3148 case 0x3F000000: return 240; // 0.5
3149 case 0xBF000000: return 241; // -0.5
3150 case 0x3F800000: return 242; // 1.0
3151 case 0xBF800000: return 243; // -1.0
3152 case 0x40000000: return 244; // 2.0
3153 case 0xC0000000: return 245; // -2.0
3154 case 0x40800000: return 246; // 4.0
3155 case 0xC0800000: return 247; // -4.0
3156 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
3157 default: break;
3158 }
3159 // clang-format on
3160 }
3161
3162 return {};
3163}
3164
3165// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
3166// or nullopt.
3167std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
3168 return getInlineEncodingV216(IsFloat: false, Literal);
3169}
3170
3171// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
3172// or nullopt.
3173std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
3174 int32_t Signed = static_cast<int32_t>(Literal);
3175 if (Signed >= 0 && Signed <= 64)
3176 return 128 + Signed;
3177
3178 if (Signed >= -16 && Signed <= -1)
3179 return 192 + std::abs(x: Signed);
3180
3181 // clang-format off
3182 switch (Literal) {
3183 case 0x3F00: return 240; // 0.5
3184 case 0xBF00: return 241; // -0.5
3185 case 0x3F80: return 242; // 1.0
3186 case 0xBF80: return 243; // -1.0
3187 case 0x4000: return 244; // 2.0
3188 case 0xC000: return 245; // -2.0
3189 case 0x4080: return 246; // 4.0
3190 case 0xC080: return 247; // -4.0
3191 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
3192 default: break;
3193 }
3194 // clang-format on
3195
3196 return std::nullopt;
3197}
3198
3199// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
3200// or nullopt.
3201std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
3202 return getInlineEncodingV216(IsFloat: true, Literal);
3203}
3204
3205// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction
3206// or nullopt. This accounts for different inline constant behavior:
3207// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high
3208// - GFX11+: fp16 inline constants are duplicated into both halves
3209std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
3210 bool IsGFX11Plus) {
3211 // Pre-GFX11 behavior: f16 in low bits, 0 in high bits
3212 if (!IsGFX11Plus)
3213 return getInlineEncodingV216(/*IsFloat=*/true, Literal);
3214
3215 // GFX11+ behavior: f16 duplicated in both halves
3216 // First, check for sign-extended integer inline constants (-16 to 64)
3217 // These work the same across all generations
3218 int32_t Signed = static_cast<int32_t>(Literal);
3219 if (Signed >= 0 && Signed <= 64)
3220 return 128 + Signed;
3221
3222 if (Signed >= -16 && Signed <= -1)
3223 return 192 + std::abs(x: Signed);
3224
3225 // For float inline constants on GFX11+, both halves must be equal
3226 uint16_t Lo = static_cast<uint16_t>(Literal);
3227 uint16_t Hi = static_cast<uint16_t>(Literal >> 16);
3228 if (Lo != Hi)
3229 return std::nullopt;
3230 return getInlineEncodingV216(/*IsFloat=*/true, Literal: Lo);
3231}
3232
3233// Whether the given literal can be inlined for a V_PK_* instruction.
3234bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
3235 switch (OpType) {
3236 case AMDGPU::OPERAND_REG_IMM_V2INT16:
3237 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
3238 return getInlineEncodingV216(IsFloat: false, Literal).has_value();
3239 case AMDGPU::OPERAND_REG_IMM_V2FP16:
3240 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
3241 return getInlineEncodingV216(IsFloat: true, Literal).has_value();
3242 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
3243 llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported");
3244 case AMDGPU::OPERAND_REG_IMM_V2BF16:
3245 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
3246 return isInlinableLiteralV2BF16(Literal);
3247 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
3248 return false;
3249 default:
3250 llvm_unreachable("bad packed operand type");
3251 }
3252}
3253
3254// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
3255bool isInlinableLiteralV2I16(uint32_t Literal) {
3256 return getInlineEncodingV2I16(Literal).has_value();
3257}
3258
3259// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
3260bool isInlinableLiteralV2BF16(uint32_t Literal) {
3261 return getInlineEncodingV2BF16(Literal).has_value();
3262}
3263
3264// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
3265bool isInlinableLiteralV2F16(uint32_t Literal) {
3266 return getInlineEncodingV2F16(Literal).has_value();
3267}
3268
3269// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction.
3270bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus) {
3271 return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value();
3272}
3273
3274bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
3275 if (IsFP64)
3276 return !Lo_32(Value: Val);
3277
3278 return isUInt<32>(x: Val) || isInt<32>(x: Val);
3279}
3280
3281int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) {
3282 switch (Type) {
3283 default:
3284 break;
3285 case OPERAND_REG_IMM_BF16:
3286 case OPERAND_REG_IMM_FP16:
3287 case OPERAND_REG_INLINE_C_BF16:
3288 case OPERAND_REG_INLINE_C_FP16:
3289 return Imm & 0xffff;
3290 case OPERAND_INLINE_SPLIT_BARRIER_INT32:
3291 case OPERAND_REG_IMM_FP32:
3292 case OPERAND_REG_IMM_INT32:
3293 case OPERAND_REG_IMM_V2BF16:
3294 case OPERAND_REG_IMM_V2FP16:
3295 case OPERAND_REG_IMM_V2FP16_SPLAT:
3296 case OPERAND_REG_IMM_V2FP32:
3297 case OPERAND_REG_IMM_V2INT16:
3298 case OPERAND_REG_IMM_V2INT32:
3299 case OPERAND_REG_INLINE_AC_FP32:
3300 case OPERAND_REG_INLINE_AC_INT32:
3301 case OPERAND_REG_INLINE_C_FP32:
3302 case OPERAND_REG_INLINE_C_INT32:
3303 return Lo_32(Value: Imm);
3304 case OPERAND_REG_IMM_FP64:
3305 return IsLit ? Imm : Hi_32(Value: Imm);
3306 }
3307 return Imm;
3308}
3309
3310bool isArgPassedInSGPR(const Argument *A) {
3311 const Function *F = A->getParent();
3312
3313 // Arguments to compute shaders are never a source of divergence.
3314 CallingConv::ID CC = F->getCallingConv();
3315 switch (CC) {
3316 case CallingConv::AMDGPU_KERNEL:
3317 case CallingConv::SPIR_KERNEL:
3318 return true;
3319 case CallingConv::AMDGPU_VS:
3320 case CallingConv::AMDGPU_LS:
3321 case CallingConv::AMDGPU_HS:
3322 case CallingConv::AMDGPU_ES:
3323 case CallingConv::AMDGPU_GS:
3324 case CallingConv::AMDGPU_PS:
3325 case CallingConv::AMDGPU_CS:
3326 case CallingConv::AMDGPU_Gfx:
3327 case CallingConv::AMDGPU_CS_Chain:
3328 case CallingConv::AMDGPU_CS_ChainPreserve:
3329 // For non-compute shaders, SGPR inputs are marked with either inreg or
3330 // byval. Everything else is in VGPRs.
3331 return A->hasAttribute(Kind: Attribute::InReg) ||
3332 A->hasAttribute(Kind: Attribute::ByVal);
3333 default:
3334 // TODO: treat i1 as divergent?
3335 return A->hasAttribute(Kind: Attribute::InReg);
3336 }
3337}
3338
3339bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
3340 // Arguments to compute shaders are never a source of divergence.
3341 CallingConv::ID CC = CB->getCallingConv();
3342 switch (CC) {
3343 case CallingConv::AMDGPU_KERNEL:
3344 case CallingConv::SPIR_KERNEL:
3345 return true;
3346 case CallingConv::AMDGPU_VS:
3347 case CallingConv::AMDGPU_LS:
3348 case CallingConv::AMDGPU_HS:
3349 case CallingConv::AMDGPU_ES:
3350 case CallingConv::AMDGPU_GS:
3351 case CallingConv::AMDGPU_PS:
3352 case CallingConv::AMDGPU_CS:
3353 case CallingConv::AMDGPU_Gfx:
3354 case CallingConv::AMDGPU_CS_Chain:
3355 case CallingConv::AMDGPU_CS_ChainPreserve:
3356 // For non-compute shaders, SGPR inputs are marked with either inreg or
3357 // byval. Everything else is in VGPRs.
3358 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) ||
3359 CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal);
3360 default:
3361 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg);
3362 }
3363}
3364
3365static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
3366 return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST);
3367}
3368
3369bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
3370 int64_t EncodedOffset) {
3371 if (isGFX12Plus(STI: ST))
3372 return isUInt<23>(x: EncodedOffset);
3373
3374 return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset)
3375 : isUInt<8>(x: EncodedOffset);
3376}
3377
3378bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
3379 int64_t EncodedOffset, bool IsBuffer) {
3380 if (isGFX12Plus(STI: ST)) {
3381 if (IsBuffer && EncodedOffset < 0)
3382 return false;
3383 return isInt<24>(x: EncodedOffset);
3384 }
3385
3386 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(x: EncodedOffset);
3387}
3388
3389static bool isDwordAligned(uint64_t ByteOffset) {
3390 return (ByteOffset & 3) == 0;
3391}
3392
3393uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
3394 uint64_t ByteOffset) {
3395 if (hasSMEMByteOffset(ST))
3396 return ByteOffset;
3397
3398 assert(isDwordAligned(ByteOffset));
3399 return ByteOffset >> 2;
3400}
3401
3402std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3403 int64_t ByteOffset, bool IsBuffer,
3404 bool HasSOffset) {
3405 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3406 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3407 // Handle case where SOffset is not present.
3408 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3409 return std::nullopt;
3410
3411 if (isGFX12Plus(STI: ST)) // 24 bit signed offsets
3412 return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3413 : std::nullopt;
3414
3415 // The signed version is always a byte offset.
3416 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3417 assert(hasSMEMByteOffset(ST));
3418 return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3419 : std::nullopt;
3420 }
3421
3422 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3423 return std::nullopt;
3424
3425 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3426 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3427 ? std::optional<int64_t>(EncodedOffset)
3428 : std::nullopt;
3429}
3430
3431std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3432 int64_t ByteOffset) {
3433 if (!isCI(STI: ST) || !isDwordAligned(ByteOffset))
3434 return std::nullopt;
3435
3436 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3437 return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3438 : std::nullopt;
3439}
3440
3441unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3442 if (AMDGPU::isGFX10(STI: ST))
3443 return 12;
3444
3445 if (AMDGPU::isGFX12(STI: ST))
3446 return 24;
3447 return 13;
3448}
3449
3450namespace {
3451
3452struct SourceOfDivergence {
3453 unsigned Intr;
3454};
3455const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3456
3457struct AlwaysUniform {
3458 unsigned Intr;
3459};
3460const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3461
3462#define GET_SourcesOfDivergence_IMPL
3463#define GET_UniformIntrinsics_IMPL
3464#define GET_Gfx9BufferFormat_IMPL
3465#define GET_Gfx10BufferFormat_IMPL
3466#define GET_Gfx11PlusBufferFormat_IMPL
3467
3468#include "AMDGPUGenSearchableTables.inc"
3469
3470} // end anonymous namespace
3471
3472bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3473 return lookupSourceOfDivergence(Intr: IntrID);
3474}
3475
3476bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3477 return lookupAlwaysUniform(Intr: IntrID);
3478}
3479
3480const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3481 uint8_t NumComponents,
3482 uint8_t NumFormat,
3483 const MCSubtargetInfo &STI) {
3484 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3485 BitsPerComp, NumComponents, NumFormat)
3486 : isGFX10(STI)
3487 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3488 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3489}
3490
3491const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3492 const MCSubtargetInfo &STI) {
3493 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3494 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3495 : getGfx9BufferFormatInfo(Format);
3496}
3497
3498const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
3499 const MCRegisterInfo &MRI) {
3500 const unsigned VGPRClasses[] = {
3501 AMDGPU::VGPR_16RegClassID, AMDGPU::VGPR_32RegClassID,
3502 AMDGPU::VReg_64RegClassID, AMDGPU::VReg_96RegClassID,
3503 AMDGPU::VReg_128RegClassID, AMDGPU::VReg_160RegClassID,
3504 AMDGPU::VReg_192RegClassID, AMDGPU::VReg_224RegClassID,
3505 AMDGPU::VReg_256RegClassID, AMDGPU::VReg_288RegClassID,
3506 AMDGPU::VReg_320RegClassID, AMDGPU::VReg_352RegClassID,
3507 AMDGPU::VReg_384RegClassID, AMDGPU::VReg_512RegClassID,
3508 AMDGPU::VReg_1024RegClassID};
3509
3510 for (unsigned RCID : VGPRClasses) {
3511 const MCRegisterClass &RC = MRI.getRegClass(i: RCID);
3512 if (RC.contains(Reg))
3513 return &RC;
3514 }
3515
3516 return nullptr;
3517}
3518
3519unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI) {
3520 unsigned Enc = MRI.getEncodingValue(Reg);
3521 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3522 return Idx >> 8;
3523}
3524
3525MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
3526 const MCRegisterInfo &MRI) {
3527 unsigned Enc = MRI.getEncodingValue(Reg);
3528 unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
3529 if (Idx >= 0x100)
3530 return MCRegister();
3531
3532 const MCRegisterClass *RC = getVGPRPhysRegClass(Reg, MRI);
3533 if (!RC)
3534 return MCRegister();
3535
3536 Idx |= MSBs << 8;
3537 if (RC->getID() == AMDGPU::VGPR_16RegClassID) {
3538 // This class has 2048 registers with interleaved lo16 and hi16.
3539 Idx *= 2;
3540 if (Enc & AMDGPU::HWEncoding::IS_HI16)
3541 ++Idx;
3542 }
3543
3544 return RC->getRegister(i: Idx);
3545}
3546
3547std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
3548getVGPRLoweringOperandTables(const MCInstrDesc &Desc) {
3549 static const AMDGPU::OpName VOPOps[4] = {
3550 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2,
3551 AMDGPU::OpName::vdst};
3552 static const AMDGPU::OpName VDSOps[4] = {
3553 AMDGPU::OpName::addr, AMDGPU::OpName::data0, AMDGPU::OpName::data1,
3554 AMDGPU::OpName::vdst};
3555 static const AMDGPU::OpName FLATOps[4] = {
3556 AMDGPU::OpName::vaddr, AMDGPU::OpName::vdata,
3557 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdst};
3558 static const AMDGPU::OpName BUFOps[4] = {
3559 AMDGPU::OpName::vaddr, AMDGPU::OpName::NUM_OPERAND_NAMES,
3560 AMDGPU::OpName::NUM_OPERAND_NAMES, AMDGPU::OpName::vdata};
3561 static const AMDGPU::OpName VIMGOps[4] = {
3562 AMDGPU::OpName::vaddr0, AMDGPU::OpName::vaddr1, AMDGPU::OpName::vaddr2,
3563 AMDGPU::OpName::vdata};
3564
3565 // For VOPD instructions MSB of a corresponding Y component operand VGPR
3566 // address is supposed to match X operand, otherwise VOPD shall not be
3567 // combined.
3568 static const AMDGPU::OpName VOPDOpsX[4] = {
3569 AMDGPU::OpName::src0X, AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vsrc2X,
3570 AMDGPU::OpName::vdstX};
3571 static const AMDGPU::OpName VOPDOpsY[4] = {
3572 AMDGPU::OpName::src0Y, AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vsrc2Y,
3573 AMDGPU::OpName::vdstY};
3574
3575 // VOP2 MADMK instructions use src0, imm, src1 scheme.
3576 static const AMDGPU::OpName VOP2MADMKOps[4] = {
3577 AMDGPU::OpName::src0, AMDGPU::OpName::NUM_OPERAND_NAMES,
3578 AMDGPU::OpName::src1, AMDGPU::OpName::vdst};
3579 static const AMDGPU::OpName VOPDFMAMKOpsX[4] = {
3580 AMDGPU::OpName::src0X, AMDGPU::OpName::NUM_OPERAND_NAMES,
3581 AMDGPU::OpName::vsrc1X, AMDGPU::OpName::vdstX};
3582 static const AMDGPU::OpName VOPDFMAMKOpsY[4] = {
3583 AMDGPU::OpName::src0Y, AMDGPU::OpName::NUM_OPERAND_NAMES,
3584 AMDGPU::OpName::vsrc1Y, AMDGPU::OpName::vdstY};
3585
3586 unsigned TSFlags = Desc.TSFlags;
3587
3588 if (TSFlags &
3589 (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | SIInstrFlags::VOP3 |
3590 SIInstrFlags::VOP3P | SIInstrFlags::VOPC | SIInstrFlags::DPP)) {
3591 switch (Desc.getOpcode()) {
3592 // LD_SCALE operands ignore MSB.
3593 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32:
3594 case AMDGPU::V_WMMA_LD_SCALE_PAIRED_B32_gfx1250:
3595 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64:
3596 case AMDGPU::V_WMMA_LD_SCALE16_PAIRED_B64_gfx1250:
3597 return {};
3598 case AMDGPU::V_FMAMK_F16:
3599 case AMDGPU::V_FMAMK_F16_t16:
3600 case AMDGPU::V_FMAMK_F16_t16_gfx12:
3601 case AMDGPU::V_FMAMK_F16_fake16:
3602 case AMDGPU::V_FMAMK_F16_fake16_gfx12:
3603 case AMDGPU::V_FMAMK_F32:
3604 case AMDGPU::V_FMAMK_F32_gfx12:
3605 case AMDGPU::V_FMAMK_F64:
3606 case AMDGPU::V_FMAMK_F64_gfx1250:
3607 return {VOP2MADMKOps, nullptr};
3608 default:
3609 break;
3610 }
3611 return {VOPOps, nullptr};
3612 }
3613
3614 if (TSFlags & SIInstrFlags::DS)
3615 return {VDSOps, nullptr};
3616
3617 if (TSFlags & SIInstrFlags::FLAT)
3618 return {FLATOps, nullptr};
3619
3620 if (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))
3621 return {BUFOps, nullptr};
3622
3623 if (TSFlags & SIInstrFlags::VIMAGE)
3624 return {VIMGOps, nullptr};
3625
3626 if (AMDGPU::isVOPD(Opc: Desc.getOpcode())) {
3627 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode: Desc.getOpcode());
3628 return {(OpX == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsX : VOPDOpsX,
3629 (OpY == AMDGPU::V_FMAMK_F32) ? VOPDFMAMKOpsY : VOPDOpsY};
3630 }
3631
3632 assert(!(TSFlags & SIInstrFlags::MIMG));
3633
3634 if (TSFlags & (SIInstrFlags::VSAMPLE | SIInstrFlags::EXP))
3635 llvm_unreachable("Sample and export VGPR lowering is not implemented and"
3636 " these instructions are not expected on gfx1250");
3637
3638 return {};
3639}
3640
3641bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode) {
3642 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3643
3644 if (TSFlags & SIInstrFlags::SMRD)
3645 return !getSMEMIsBuffer(Opc: Opcode);
3646 if (!(TSFlags & SIInstrFlags::FLAT))
3647 return false;
3648
3649 // Only SV and SVS modes are supported.
3650 if (TSFlags & SIInstrFlags::FlatScratch)
3651 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr);
3652
3653 // Only GVS mode is supported.
3654 return hasNamedOperand(Opcode, NamedIdx: OpName::vaddr) &&
3655 hasNamedOperand(Opcode, NamedIdx: OpName::saddr);
3656
3657 return false;
3658}
3659
3660bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3661 const MCSubtargetInfo &ST) {
3662 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3663 int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), Name: OpName);
3664 if (Idx == -1)
3665 continue;
3666
3667 const MCOperandInfo &OpInfo = OpDesc.operands()[Idx];
3668 int16_t RegClass = MII.getOpRegClassID(
3669 OpInfo, HwModeId: ST.getHwMode(type: MCSubtargetInfo::HwMode_RegInfo));
3670 if (RegClass == AMDGPU::VReg_64RegClassID ||
3671 RegClass == AMDGPU::VReg_64_Align2RegClassID)
3672 return true;
3673 }
3674
3675 return false;
3676}
3677
3678bool isDPALU_DPP32BitOpc(unsigned Opc) {
3679 switch (Opc) {
3680 case AMDGPU::V_MUL_LO_U32_e64:
3681 case AMDGPU::V_MUL_LO_U32_e64_dpp:
3682 case AMDGPU::V_MUL_LO_U32_e64_dpp_gfx1250:
3683 case AMDGPU::V_MUL_HI_U32_e64:
3684 case AMDGPU::V_MUL_HI_U32_e64_dpp:
3685 case AMDGPU::V_MUL_HI_U32_e64_dpp_gfx1250:
3686 case AMDGPU::V_MUL_HI_I32_e64:
3687 case AMDGPU::V_MUL_HI_I32_e64_dpp:
3688 case AMDGPU::V_MUL_HI_I32_e64_dpp_gfx1250:
3689 case AMDGPU::V_MAD_U32_e64:
3690 case AMDGPU::V_MAD_U32_e64_dpp:
3691 case AMDGPU::V_MAD_U32_e64_dpp_gfx1250:
3692 return true;
3693 default:
3694 return false;
3695 }
3696}
3697
3698bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
3699 const MCSubtargetInfo &ST) {
3700 if (!ST.hasFeature(Feature: AMDGPU::FeatureDPALU_DPP))
3701 return false;
3702
3703 if (isDPALU_DPP32BitOpc(Opc: OpDesc.getOpcode()))
3704 return ST.hasFeature(Feature: AMDGPU::FeatureGFX1250Insts);
3705
3706 return hasAny64BitVGPROperands(OpDesc, MII, ST);
3707}
3708
3709unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3710 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
3711 return 64;
3712 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
3713 return 128;
3714 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
3715 return 320;
3716 if (ST.getFeatureBits().test(I: FeatureAddressableLocalMemorySize327680))
3717 return 512;
3718 return 64; // In sync with getAddressableLocalMemorySize
3719}
3720
3721bool isPackedFP32Inst(unsigned Opc) {
3722 switch (Opc) {
3723 case AMDGPU::V_PK_ADD_F32:
3724 case AMDGPU::V_PK_ADD_F32_gfx12:
3725 case AMDGPU::V_PK_MUL_F32:
3726 case AMDGPU::V_PK_MUL_F32_gfx12:
3727 case AMDGPU::V_PK_FMA_F32:
3728 case AMDGPU::V_PK_FMA_F32_gfx12:
3729 return true;
3730 default:
3731 return false;
3732 }
3733}
3734
3735const std::array<unsigned, 3> &ClusterDimsAttr::getDims() const {
3736 assert(isFixedDims() && "expect kind to be FixedDims");
3737 return Dims;
3738}
3739
3740std::string ClusterDimsAttr::to_string() const {
3741 SmallString<10> Buffer;
3742 raw_svector_ostream OS(Buffer);
3743
3744 switch (getKind()) {
3745 case Kind::Unknown:
3746 return "";
3747 case Kind::NoCluster: {
3748 OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster;
3749 return Buffer.c_str();
3750 }
3751 case Kind::VariableDims: {
3752 OS << EncoVariableDims << ',' << EncoVariableDims << ','
3753 << EncoVariableDims;
3754 return Buffer.c_str();
3755 }
3756 case Kind::FixedDims: {
3757 OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2];
3758 return Buffer.c_str();
3759 }
3760 }
3761 llvm_unreachable("Unknown ClusterDimsAttr kind");
3762}
3763
3764ClusterDimsAttr ClusterDimsAttr::get(const Function &F) {
3765 std::optional<SmallVector<unsigned>> Attr =
3766 getIntegerVecAttribute(F, Name: "amdgpu-cluster-dims", /*Size=*/3);
3767 ClusterDimsAttr::Kind AttrKind = Kind::FixedDims;
3768
3769 if (!Attr.has_value())
3770 AttrKind = Kind::Unknown;
3771 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoNoCluster)))
3772 AttrKind = Kind::NoCluster;
3773 else if (all_of(Range&: *Attr, P: equal_to(Arg: EncoVariableDims)))
3774 AttrKind = Kind::VariableDims;
3775
3776 ClusterDimsAttr A(AttrKind);
3777 if (AttrKind == Kind::FixedDims)
3778 A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]};
3779
3780 return A;
3781}
3782
3783} // namespace AMDGPU
3784
3785raw_ostream &operator<<(raw_ostream &OS,
3786 const AMDGPU::IsaInfo::TargetIDSetting S) {
3787 switch (S) {
3788 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3789 OS << "Unsupported";
3790 break;
3791 case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3792 OS << "Any";
3793 break;
3794 case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3795 OS << "Off";
3796 break;
3797 case (AMDGPU::IsaInfo::TargetIDSetting::On):
3798 OS << "On";
3799 break;
3800 }
3801 return OS;
3802}
3803
3804} // namespace llvm
3805