1//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUBaseInfo.h"
10#include "AMDGPU.h"
11#include "AMDGPUAsmUtils.h"
12#include "AMDKernelCodeT.h"
13#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14#include "Utils/AMDKernelCodeTUtils.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/BinaryFormat/ELF.h"
17#include "llvm/IR/Attributes.h"
18#include "llvm/IR/Constants.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/IR/IntrinsicsR600.h"
23#include "llvm/IR/LLVMContext.h"
24#include "llvm/MC/MCInstrInfo.h"
25#include "llvm/MC/MCRegisterInfo.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/TargetParser/TargetParser.h"
29#include <optional>
30
31#define GET_INSTRINFO_NAMED_OPS
32#define GET_INSTRMAP_INFO
33#include "AMDGPUGenInstrInfo.inc"
34
35static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
36 "amdhsa-code-object-version", llvm::cl::Hidden,
37 llvm::cl::init(Val: llvm::AMDGPU::AMDHSA_COV6),
38 llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
39 "or asm directive still take priority if present)"));
40
41namespace {
42
43/// \returns Bit mask for given bit \p Shift and bit \p Width.
44unsigned getBitMask(unsigned Shift, unsigned Width) {
45 return ((1 << Width) - 1) << Shift;
46}
47
48/// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49///
50/// \returns Packed \p Dst.
51unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
52 unsigned Mask = getBitMask(Shift, Width);
53 return ((Src << Shift) & Mask) | (Dst & ~Mask);
54}
55
56/// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57///
58/// \returns Unpacked bits.
59unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
60 return (Src & getBitMask(Shift, Width)) >> Shift;
61}
62
63/// \returns Vmcnt bit shift (lower bits).
64unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65 return VersionMajor >= 11 ? 10 : 0;
66}
67
68/// \returns Vmcnt bit width (lower bits).
69unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70 return VersionMajor >= 11 ? 6 : 4;
71}
72
73/// \returns Expcnt bit shift.
74unsigned getExpcntBitShift(unsigned VersionMajor) {
75 return VersionMajor >= 11 ? 0 : 4;
76}
77
78/// \returns Expcnt bit width.
79unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
80
81/// \returns Lgkmcnt bit shift.
82unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83 return VersionMajor >= 11 ? 4 : 8;
84}
85
86/// \returns Lgkmcnt bit width.
87unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88 return VersionMajor >= 10 ? 6 : 4;
89}
90
91/// \returns Vmcnt bit shift (higher bits).
92unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
93
94/// \returns Vmcnt bit width (higher bits).
95unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96 return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97}
98
99/// \returns Loadcnt bit width
100unsigned getLoadcntBitWidth(unsigned VersionMajor) {
101 return VersionMajor >= 12 ? 6 : 0;
102}
103
104/// \returns Samplecnt bit width.
105unsigned getSamplecntBitWidth(unsigned VersionMajor) {
106 return VersionMajor >= 12 ? 6 : 0;
107}
108
109/// \returns Bvhcnt bit width.
110unsigned getBvhcntBitWidth(unsigned VersionMajor) {
111 return VersionMajor >= 12 ? 3 : 0;
112}
113
114/// \returns Dscnt bit width.
115unsigned getDscntBitWidth(unsigned VersionMajor) {
116 return VersionMajor >= 12 ? 6 : 0;
117}
118
119/// \returns Dscnt bit shift in combined S_WAIT instructions.
120unsigned getDscntBitShift(unsigned VersionMajor) { return 0; }
121
122/// \returns Storecnt or Vscnt bit width, depending on VersionMajor.
123unsigned getStorecntBitWidth(unsigned VersionMajor) {
124 return VersionMajor >= 10 ? 6 : 0;
125}
126
127/// \returns Kmcnt bit width.
128unsigned getKmcntBitWidth(unsigned VersionMajor) {
129 return VersionMajor >= 12 ? 5 : 0;
130}
131
132/// \returns Xcnt bit width.
133unsigned getXcntBitWidth(unsigned VersionMajor, unsigned VersionMinor) {
134 return VersionMajor == 12 && VersionMinor == 5 ? 6 : 0;
135}
136
137/// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions.
138unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) {
139 return VersionMajor >= 12 ? 8 : 0;
140}
141
142/// \returns VaSdst bit width
143inline unsigned getVaSdstBitWidth() { return 3; }
144
145/// \returns VaSdst bit shift
146inline unsigned getVaSdstBitShift() { return 9; }
147
148/// \returns VmVsrc bit width
149inline unsigned getVmVsrcBitWidth() { return 3; }
150
151/// \returns VmVsrc bit shift
152inline unsigned getVmVsrcBitShift() { return 2; }
153
154/// \returns VaVdst bit width
155inline unsigned getVaVdstBitWidth() { return 4; }
156
157/// \returns VaVdst bit shift
158inline unsigned getVaVdstBitShift() { return 12; }
159
160/// \returns VaVcc bit width
161inline unsigned getVaVccBitWidth() { return 1; }
162
163/// \returns VaVcc bit shift
164inline unsigned getVaVccBitShift() { return 1; }
165
166/// \returns SaSdst bit width
167inline unsigned getSaSdstBitWidth() { return 1; }
168
169/// \returns SaSdst bit shift
170inline unsigned getSaSdstBitShift() { return 0; }
171
172/// \returns VaSsrc width
173inline unsigned getVaSsrcBitWidth() { return 1; }
174
175/// \returns VaSsrc bit shift
176inline unsigned getVaSsrcBitShift() { return 8; }
177
178/// \returns HoldCnt bit shift
179inline unsigned getHoldCntWidth() { return 1; }
180
181/// \returns HoldCnt bit shift
182inline unsigned getHoldCntBitShift() { return 7; }
183
184} // end anonymous namespace
185
186namespace llvm {
187
188namespace AMDGPU {
189
190/// \returns true if the target supports signed immediate offset for SMRD
191/// instructions.
192bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
193 return isGFX9Plus(STI: ST);
194}
195
196/// \returns True if \p STI is AMDHSA.
197bool isHsaAbi(const MCSubtargetInfo &STI) {
198 return STI.getTargetTriple().getOS() == Triple::AMDHSA;
199}
200
201unsigned getAMDHSACodeObjectVersion(const Module &M) {
202 if (auto *Ver = mdconst::extract_or_null<ConstantInt>(
203 MD: M.getModuleFlag(Key: "amdhsa_code_object_version"))) {
204 return (unsigned)Ver->getZExtValue() / 100;
205 }
206
207 return getDefaultAMDHSACodeObjectVersion();
208}
209
210unsigned getDefaultAMDHSACodeObjectVersion() {
211 return DefaultAMDHSACodeObjectVersion;
212}
213
214unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) {
215 switch (ABIVersion) {
216 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
217 return 4;
218 case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
219 return 5;
220 case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
221 return 6;
222 default:
223 return getDefaultAMDHSACodeObjectVersion();
224 }
225}
226
227uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) {
228 if (T.getOS() != Triple::AMDHSA)
229 return 0;
230
231 switch (CodeObjectVersion) {
232 case 4:
233 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
234 case 5:
235 return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
236 case 6:
237 return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
238 default:
239 report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " +
240 Twine(CodeObjectVersion));
241 }
242}
243
244unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
245 switch (CodeObjectVersion) {
246 case AMDHSA_COV4:
247 return 48;
248 case AMDHSA_COV5:
249 case AMDHSA_COV6:
250 default:
251 return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
252 }
253}
254
255// FIXME: All such magic numbers about the ABI should be in a
256// central TD file.
257unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
258 switch (CodeObjectVersion) {
259 case AMDHSA_COV4:
260 return 24;
261 case AMDHSA_COV5:
262 case AMDHSA_COV6:
263 default:
264 return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
265 }
266}
267
268unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
269 switch (CodeObjectVersion) {
270 case AMDHSA_COV4:
271 return 32;
272 case AMDHSA_COV5:
273 case AMDHSA_COV6:
274 default:
275 return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
276 }
277}
278
279unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
280 switch (CodeObjectVersion) {
281 case AMDHSA_COV4:
282 return 40;
283 case AMDHSA_COV5:
284 case AMDHSA_COV6:
285 default:
286 return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
287 }
288}
289
290#define GET_MIMGBaseOpcodesTable_IMPL
291#define GET_MIMGDimInfoTable_IMPL
292#define GET_MIMGInfoTable_IMPL
293#define GET_MIMGLZMappingTable_IMPL
294#define GET_MIMGMIPMappingTable_IMPL
295#define GET_MIMGBiasMappingTable_IMPL
296#define GET_MIMGOffsetMappingTable_IMPL
297#define GET_MIMGG16MappingTable_IMPL
298#define GET_MAIInstInfoTable_IMPL
299#include "AMDGPUGenSearchableTables.inc"
300
301int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
302 unsigned VDataDwords, unsigned VAddrDwords) {
303 const MIMGInfo *Info =
304 getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, VDataDwords, VAddrDwords);
305 return Info ? Info->Opcode : -1;
306}
307
308const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
309 const MIMGInfo *Info = getMIMGInfo(Opcode: Opc);
310 return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr;
311}
312
313int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
314 const MIMGInfo *OrigInfo = getMIMGInfo(Opcode: Opc);
315 const MIMGInfo *NewInfo =
316 getMIMGOpcodeHelper(BaseOpcode: OrigInfo->BaseOpcode, MIMGEncoding: OrigInfo->MIMGEncoding,
317 VDataDwords: NewChannels, VAddrDwords: OrigInfo->VAddrDwords);
318 return NewInfo ? NewInfo->Opcode : -1;
319}
320
321unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
322 const MIMGDimInfo *Dim, bool IsA16,
323 bool IsG16Supported) {
324 unsigned AddrWords = BaseOpcode->NumExtraArgs;
325 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
326 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
327 if (IsA16)
328 AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2);
329 else
330 AddrWords += AddrComponents;
331
332 // Note: For subtargets that support A16 but not G16, enabling A16 also
333 // enables 16 bit gradients.
334 // For subtargets that support A16 (operand) and G16 (done with a different
335 // instruction encoding), they are independent.
336
337 if (BaseOpcode->Gradients) {
338 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
339 // There are two gradients per coordinate, we pack them separately.
340 // For the 3d case,
341 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
342 AddrWords += alignTo<2>(Value: Dim->NumGradients / 2);
343 else
344 AddrWords += Dim->NumGradients;
345 }
346 return AddrWords;
347}
348
349struct MUBUFInfo {
350 uint16_t Opcode;
351 uint16_t BaseOpcode;
352 uint8_t elements;
353 bool has_vaddr;
354 bool has_srsrc;
355 bool has_soffset;
356 bool IsBufferInv;
357 bool tfe;
358};
359
360struct MTBUFInfo {
361 uint16_t Opcode;
362 uint16_t BaseOpcode;
363 uint8_t elements;
364 bool has_vaddr;
365 bool has_srsrc;
366 bool has_soffset;
367};
368
369struct SMInfo {
370 uint16_t Opcode;
371 bool IsBuffer;
372};
373
374struct VOPInfo {
375 uint16_t Opcode;
376 bool IsSingle;
377};
378
379struct VOPC64DPPInfo {
380 uint16_t Opcode;
381};
382
383struct VOPCDPPAsmOnlyInfo {
384 uint16_t Opcode;
385};
386
387struct VOP3CDPPAsmOnlyInfo {
388 uint16_t Opcode;
389};
390
391struct VOPDComponentInfo {
392 uint16_t BaseVOP;
393 uint16_t VOPDOp;
394 bool CanBeVOPDX;
395};
396
397struct VOPDInfo {
398 uint16_t Opcode;
399 uint16_t OpX;
400 uint16_t OpY;
401 uint16_t Subtarget;
402};
403
404struct VOPTrue16Info {
405 uint16_t Opcode;
406 bool IsTrue16;
407};
408
409#define GET_FP4FP8DstByteSelTable_DECL
410#define GET_FP4FP8DstByteSelTable_IMPL
411
412struct DPMACCInstructionInfo {
413 uint16_t Opcode;
414 bool IsDPMACCInstruction;
415};
416
417struct FP4FP8DstByteSelInfo {
418 uint16_t Opcode;
419 bool HasFP8DstByteSel;
420 bool HasFP4DstByteSel;
421};
422
423#define GET_MTBUFInfoTable_DECL
424#define GET_MTBUFInfoTable_IMPL
425#define GET_MUBUFInfoTable_DECL
426#define GET_MUBUFInfoTable_IMPL
427#define GET_SMInfoTable_DECL
428#define GET_SMInfoTable_IMPL
429#define GET_VOP1InfoTable_DECL
430#define GET_VOP1InfoTable_IMPL
431#define GET_VOP2InfoTable_DECL
432#define GET_VOP2InfoTable_IMPL
433#define GET_VOP3InfoTable_DECL
434#define GET_VOP3InfoTable_IMPL
435#define GET_VOPC64DPPTable_DECL
436#define GET_VOPC64DPPTable_IMPL
437#define GET_VOPC64DPP8Table_DECL
438#define GET_VOPC64DPP8Table_IMPL
439#define GET_VOPCAsmOnlyInfoTable_DECL
440#define GET_VOPCAsmOnlyInfoTable_IMPL
441#define GET_VOP3CAsmOnlyInfoTable_DECL
442#define GET_VOP3CAsmOnlyInfoTable_IMPL
443#define GET_VOPDComponentTable_DECL
444#define GET_VOPDComponentTable_IMPL
445#define GET_VOPDPairs_DECL
446#define GET_VOPDPairs_IMPL
447#define GET_VOPTrue16Table_DECL
448#define GET_VOPTrue16Table_IMPL
449#define GET_True16D16Table_IMPL
450#define GET_WMMAOpcode2AddrMappingTable_DECL
451#define GET_WMMAOpcode2AddrMappingTable_IMPL
452#define GET_WMMAOpcode3AddrMappingTable_DECL
453#define GET_WMMAOpcode3AddrMappingTable_IMPL
454#define GET_getMFMA_F8F6F4_WithSize_DECL
455#define GET_getMFMA_F8F6F4_WithSize_IMPL
456#define GET_isMFMA_F8F6F4Table_IMPL
457#define GET_isCvtScaleF32_F32F16ToF8F4Table_IMPL
458
459#include "AMDGPUGenSearchableTables.inc"
460
461int getMTBUFBaseOpcode(unsigned Opc) {
462 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opcode: Opc);
463 return Info ? Info->BaseOpcode : -1;
464}
465
466int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
467 const MTBUFInfo *Info =
468 getMTBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
469 return Info ? Info->Opcode : -1;
470}
471
472int getMTBUFElements(unsigned Opc) {
473 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
474 return Info ? Info->elements : 0;
475}
476
477bool getMTBUFHasVAddr(unsigned Opc) {
478 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
479 return Info && Info->has_vaddr;
480}
481
482bool getMTBUFHasSrsrc(unsigned Opc) {
483 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
484 return Info && Info->has_srsrc;
485}
486
487bool getMTBUFHasSoffset(unsigned Opc) {
488 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opcode: Opc);
489 return Info && Info->has_soffset;
490}
491
492int getMUBUFBaseOpcode(unsigned Opc) {
493 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opcode: Opc);
494 return Info ? Info->BaseOpcode : -1;
495}
496
497int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
498 const MUBUFInfo *Info =
499 getMUBUFInfoFromBaseOpcodeAndElements(BaseOpcode: BaseOpc, elements: Elements);
500 return Info ? Info->Opcode : -1;
501}
502
503int getMUBUFElements(unsigned Opc) {
504 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
505 return Info ? Info->elements : 0;
506}
507
508bool getMUBUFHasVAddr(unsigned Opc) {
509 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
510 return Info && Info->has_vaddr;
511}
512
513bool getMUBUFHasSrsrc(unsigned Opc) {
514 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
515 return Info && Info->has_srsrc;
516}
517
518bool getMUBUFHasSoffset(unsigned Opc) {
519 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
520 return Info && Info->has_soffset;
521}
522
523bool getMUBUFIsBufferInv(unsigned Opc) {
524 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
525 return Info && Info->IsBufferInv;
526}
527
528bool getMUBUFTfe(unsigned Opc) {
529 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opcode: Opc);
530 return Info && Info->tfe;
531}
532
533bool getSMEMIsBuffer(unsigned Opc) {
534 const SMInfo *Info = getSMEMOpcodeHelper(Opcode: Opc);
535 return Info && Info->IsBuffer;
536}
537
538bool getVOP1IsSingle(unsigned Opc) {
539 const VOPInfo *Info = getVOP1OpcodeHelper(Opcode: Opc);
540 return !Info || Info->IsSingle;
541}
542
543bool getVOP2IsSingle(unsigned Opc) {
544 const VOPInfo *Info = getVOP2OpcodeHelper(Opcode: Opc);
545 return !Info || Info->IsSingle;
546}
547
548bool getVOP3IsSingle(unsigned Opc) {
549 const VOPInfo *Info = getVOP3OpcodeHelper(Opcode: Opc);
550 return !Info || Info->IsSingle;
551}
552
553bool isVOPC64DPP(unsigned Opc) {
554 return isVOPC64DPPOpcodeHelper(Opcode: Opc) || isVOPC64DPP8OpcodeHelper(Opcode: Opc);
555}
556
557bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opcode: Opc); }
558
559bool getMAIIsDGEMM(unsigned Opc) {
560 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
561 return Info && Info->is_dgemm;
562}
563
564bool getMAIIsGFX940XDL(unsigned Opc) {
565 const MAIInstInfo *Info = getMAIInstInfoHelper(Opcode: Opc);
566 return Info && Info->is_gfx940_xdl;
567}
568
569uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
570 switch (EncodingVal) {
571 case MFMAScaleFormats::FP6_E2M3:
572 case MFMAScaleFormats::FP6_E3M2:
573 return 6;
574 case MFMAScaleFormats::FP4_E2M1:
575 return 4;
576 case MFMAScaleFormats::FP8_E4M3:
577 case MFMAScaleFormats::FP8_E5M2:
578 default:
579 return 8;
580 }
581
582 llvm_unreachable("covered switch over mfma scale formats");
583}
584
585const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
586 unsigned BLGP,
587 unsigned F8F8Opcode) {
588 uint8_t SrcANumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: CBSZ);
589 uint8_t SrcBNumRegs = mfmaScaleF8F6F4FormatToNumRegs(EncodingVal: BLGP);
590 return getMFMA_F8F6F4_InstWithNumRegs(NumRegsSrcA: SrcANumRegs, NumRegsSrcB: SrcBNumRegs, F8F8Opcode);
591}
592
593unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
594 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX12Insts))
595 return SIEncodingFamily::GFX12;
596 if (ST.hasFeature(Feature: AMDGPU::FeatureGFX11Insts))
597 return SIEncodingFamily::GFX11;
598 llvm_unreachable("Subtarget generation does not support VOPD!");
599}
600
601CanBeVOPD getCanBeVOPD(unsigned Opc) {
602 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
603 if (Info)
604 return {.X: Info->CanBeVOPDX, .Y: true};
605 return {.X: false, .Y: false};
606}
607
608unsigned getVOPDOpcode(unsigned Opc) {
609 const VOPDComponentInfo *Info = getVOPDComponentHelper(BaseVOP: Opc);
610 return Info ? Info->VOPDOp : ~0u;
611}
612
613bool isVOPD(unsigned Opc) {
614 return AMDGPU::hasNamedOperand(Opcode: Opc, NamedIdx: AMDGPU::OpName::src0X);
615}
616
617bool isMAC(unsigned Opc) {
618 return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
619 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
620 Opc == AMDGPU::V_MAC_F32_e64_vi ||
621 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
622 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
623 Opc == AMDGPU::V_MAC_F16_e64_vi ||
624 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
625 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
626 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
627 Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
628 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
629 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
630 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
631 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
632 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
633 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx11 ||
634 Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
635 Opc == AMDGPU::V_FMAC_F16_fake16_e64_gfx12 ||
636 Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
637 Opc == AMDGPU::V_DOT2C_F32_BF16_e64_vi ||
638 Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
639 Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
640 Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
641}
642
643bool isPermlane16(unsigned Opc) {
644 return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
645 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
646 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
647 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
648 Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
649 Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
650 Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
651 Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
652}
653
654bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) {
655 return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 ||
656 Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 ||
657 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 ||
658 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 ||
659 Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 ||
660 Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 ||
661 Opc == AMDGPU::V_CVT_PK_F32_BF8_fake16_e64_gfx12 ||
662 Opc == AMDGPU::V_CVT_PK_F32_FP8_fake16_e64_gfx12 ||
663 Opc == AMDGPU::V_CVT_PK_F32_BF8_t16_e64_gfx12 ||
664 Opc == AMDGPU::V_CVT_PK_F32_FP8_t16_e64_gfx12;
665}
666
667bool isGenericAtomic(unsigned Opc) {
668 return Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
669 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
670 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
671 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
672 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
673 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
674 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
675 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
676 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
677 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
678 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
679 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
680 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
681 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
682 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
683 Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
684 Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
685}
686
687bool isAsyncStore(unsigned Opc) {
688 return false; // placeholder before async store implementation.
689}
690
691bool isTensorStore(unsigned Opc) {
692 return Opc == TENSOR_STORE_FROM_LDS_gfx1250 ||
693 Opc == TENSOR_STORE_FROM_LDS_D2_gfx1250;
694}
695
696unsigned getTemporalHintType(const MCInstrDesc TID) {
697 if (TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))
698 return CPol::TH_TYPE_ATOMIC;
699 unsigned Opc = TID.getOpcode();
700 // Async and Tensor store should have the temporal hint type of TH_TYPE_STORE
701 if (TID.mayStore() &&
702 (isAsyncStore(Opc) || isTensorStore(Opc) || !TID.mayLoad()))
703 return CPol::TH_TYPE_STORE;
704
705 // This will default to returning TH_TYPE_LOAD when neither MayStore nor
706 // MayLoad flag is present which is the case with instructions like
707 // image_get_resinfo.
708 return CPol::TH_TYPE_LOAD;
709}
710
711bool isTrue16Inst(unsigned Opc) {
712 const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opcode: Opc);
713 return Info && Info->IsTrue16;
714}
715
716FPType getFPDstSelType(unsigned Opc) {
717 const FP4FP8DstByteSelInfo *Info = getFP4FP8DstByteSelHelper(Opcode: Opc);
718 if (!Info)
719 return FPType::None;
720 if (Info->HasFP8DstByteSel)
721 return FPType::FP8;
722 if (Info->HasFP4DstByteSel)
723 return FPType::FP4;
724
725 return FPType::None;
726}
727
728unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
729 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opcode2Addr: Opc);
730 return Info ? Info->Opcode3Addr : ~0u;
731}
732
733unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
734 const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opcode3Addr: Opc);
735 return Info ? Info->Opcode2Addr : ~0u;
736}
737
738// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
739// header files, so we need to wrap it in a function that takes unsigned
740// instead.
741int getMCOpcode(uint16_t Opcode, unsigned Gen) {
742 return getMCOpcodeGen(Opcode, inSubtarget: static_cast<Subtarget>(Gen));
743}
744
745int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
746 const VOPDInfo *Info =
747 getVOPDInfoFromComponentOpcodes(OpX, OpY, SubTgt: EncodingFamily);
748 return Info ? Info->Opcode : -1;
749}
750
751std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
752 const VOPDInfo *Info = getVOPDOpcodeHelper(Opcode: VOPDOpcode);
753 assert(Info);
754 const auto *OpX = getVOPDBaseFromComponent(VOPDOp: Info->OpX);
755 const auto *OpY = getVOPDBaseFromComponent(VOPDOp: Info->OpY);
756 assert(OpX && OpY);
757 return {OpX->BaseVOP, OpY->BaseVOP};
758}
759
760namespace VOPD {
761
762ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
763 assert(OpDesc.getNumDefs() == Component::DST_NUM);
764
765 assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
766 assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
767 auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO);
768 assert(TiedIdx == -1 || TiedIdx == Component::DST);
769 HasSrc2Acc = TiedIdx != -1;
770
771 SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
772 assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
773
774 auto OperandsNum = OpDesc.getNumOperands();
775 unsigned CompOprIdx;
776 for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
777 if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
778 MandatoryLiteralIdx = CompOprIdx;
779 break;
780 }
781 }
782}
783
784unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
785 assert(CompOprIdx < Component::MAX_OPR_NUM);
786
787 if (CompOprIdx == Component::DST)
788 return getIndexOfDstInParsedOperands();
789
790 auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
791 if (CompSrcIdx < getCompParsedSrcOperandsNum())
792 return getIndexOfSrcInParsedOperands(CompSrcIdx);
793
794 // The specified operand does not exist.
795 return 0;
796}
797
798std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
799 std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
800
801 auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx);
802 auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx);
803
804 const unsigned CompOprNum =
805 SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
806 unsigned CompOprIdx;
807 for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
808 unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
809 if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
810 ((OpXRegs[CompOprIdx] & BanksMasks) ==
811 (OpYRegs[CompOprIdx] & BanksMasks)))
812 return CompOprIdx;
813 }
814
815 return {};
816}
817
818// Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
819// by the specified component. If an operand is unused
820// or is not a VGPR, the corresponding value is 0.
821//
822// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
823// for the specified component and MC operand. The callback must return 0
824// if the operand is not a register or not a VGPR.
825InstInfo::RegIndices InstInfo::getRegIndices(
826 unsigned CompIdx,
827 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
828 assert(CompIdx < COMPONENTS_NUM);
829
830 const auto &Comp = CompInfo[CompIdx];
831 InstInfo::RegIndices RegIndices;
832
833 RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
834
835 for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
836 unsigned CompSrcIdx = CompOprIdx - DST_NUM;
837 RegIndices[CompOprIdx] =
838 Comp.hasRegSrcOperand(CompSrcIdx)
839 ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
840 : 0;
841 }
842 return RegIndices;
843}
844
845} // namespace VOPD
846
847VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
848 return VOPD::InstInfo(OpX, OpY);
849}
850
851VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
852 const MCInstrInfo *InstrInfo) {
853 auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
854 const auto &OpXDesc = InstrInfo->get(Opcode: OpX);
855 const auto &OpYDesc = InstrInfo->get(Opcode: OpY);
856 VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
857 VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
858 return VOPD::InstInfo(OpXInfo, OpYInfo);
859}
860
861namespace IsaInfo {
862
863AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
864 : STI(STI), XnackSetting(TargetIDSetting::Any),
865 SramEccSetting(TargetIDSetting::Any) {
866 if (!STI.getFeatureBits().test(I: FeatureSupportsXNACK))
867 XnackSetting = TargetIDSetting::Unsupported;
868 if (!STI.getFeatureBits().test(I: FeatureSupportsSRAMECC))
869 SramEccSetting = TargetIDSetting::Unsupported;
870}
871
872void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
873 // Check if xnack or sramecc is explicitly enabled or disabled. In the
874 // absence of the target features we assume we must generate code that can run
875 // in any environment.
876 SubtargetFeatures Features(FS);
877 std::optional<bool> XnackRequested;
878 std::optional<bool> SramEccRequested;
879
880 for (const std::string &Feature : Features.getFeatures()) {
881 if (Feature == "+xnack")
882 XnackRequested = true;
883 else if (Feature == "-xnack")
884 XnackRequested = false;
885 else if (Feature == "+sramecc")
886 SramEccRequested = true;
887 else if (Feature == "-sramecc")
888 SramEccRequested = false;
889 }
890
891 bool XnackSupported = isXnackSupported();
892 bool SramEccSupported = isSramEccSupported();
893
894 if (XnackRequested) {
895 if (XnackSupported) {
896 XnackSetting =
897 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
898 } else {
899 // If a specific xnack setting was requested and this GPU does not support
900 // xnack emit a warning. Setting will remain set to "Unsupported".
901 if (*XnackRequested) {
902 errs() << "warning: xnack 'On' was requested for a processor that does "
903 "not support it!\n";
904 } else {
905 errs() << "warning: xnack 'Off' was requested for a processor that "
906 "does not support it!\n";
907 }
908 }
909 }
910
911 if (SramEccRequested) {
912 if (SramEccSupported) {
913 SramEccSetting =
914 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
915 } else {
916 // If a specific sramecc setting was requested and this GPU does not
917 // support sramecc emit a warning. Setting will remain set to
918 // "Unsupported".
919 if (*SramEccRequested) {
920 errs() << "warning: sramecc 'On' was requested for a processor that "
921 "does not support it!\n";
922 } else {
923 errs() << "warning: sramecc 'Off' was requested for a processor that "
924 "does not support it!\n";
925 }
926 }
927 }
928}
929
930static TargetIDSetting
931getTargetIDSettingFromFeatureString(StringRef FeatureString) {
932 if (FeatureString.ends_with(Suffix: "-"))
933 return TargetIDSetting::Off;
934 if (FeatureString.ends_with(Suffix: "+"))
935 return TargetIDSetting::On;
936
937 llvm_unreachable("Malformed feature string");
938}
939
940void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
941 SmallVector<StringRef, 3> TargetIDSplit;
942 TargetID.split(A&: TargetIDSplit, Separator: ':');
943
944 for (const auto &FeatureString : TargetIDSplit) {
945 if (FeatureString.starts_with(Prefix: "xnack"))
946 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
947 if (FeatureString.starts_with(Prefix: "sramecc"))
948 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
949 }
950}
951
952std::string AMDGPUTargetID::toString() const {
953 std::string StringRep;
954 raw_string_ostream StreamRep(StringRep);
955
956 auto TargetTriple = STI.getTargetTriple();
957 auto Version = getIsaVersion(GPU: STI.getCPU());
958
959 StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName()
960 << '-' << TargetTriple.getOSName() << '-'
961 << TargetTriple.getEnvironmentName() << '-';
962
963 std::string Processor;
964 // TODO: Following else statement is present here because we used various
965 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
966 // Remove once all aliases are removed from GCNProcessors.td.
967 if (Version.Major >= 9)
968 Processor = STI.getCPU().str();
969 else
970 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
971 Twine(Version.Stepping))
972 .str();
973
974 std::string Features;
975 if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
976 // sramecc.
977 if (getSramEccSetting() == TargetIDSetting::Off)
978 Features += ":sramecc-";
979 else if (getSramEccSetting() == TargetIDSetting::On)
980 Features += ":sramecc+";
981 // xnack.
982 if (getXnackSetting() == TargetIDSetting::Off)
983 Features += ":xnack-";
984 else if (getXnackSetting() == TargetIDSetting::On)
985 Features += ":xnack+";
986 }
987
988 StreamRep << Processor << Features;
989
990 return StringRep;
991}
992
993unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
994 if (STI->getFeatureBits().test(I: FeatureWavefrontSize16))
995 return 16;
996 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32))
997 return 32;
998
999 return 64;
1000}
1001
1002unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
1003 unsigned BytesPerCU = getAddressableLocalMemorySize(STI);
1004
1005 // "Per CU" really means "per whatever functional block the waves of a
1006 // workgroup must share". So the effective local memory size is doubled in
1007 // WGP mode on gfx10.
1008 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1009 BytesPerCU *= 2;
1010
1011 return BytesPerCU;
1012}
1013
1014unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
1015 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize32768))
1016 return 32768;
1017 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize65536))
1018 return 65536;
1019 if (STI->getFeatureBits().test(I: FeatureAddressableLocalMemorySize163840))
1020 return 163840;
1021 return 0;
1022}
1023
1024unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
1025 // "Per CU" really means "per whatever functional block the waves of a
1026 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
1027 // two SIMDs.
1028 if (isGFX10Plus(STI: *STI) && STI->getFeatureBits().test(I: FeatureCuMode))
1029 return 2;
1030 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
1031 // two CUs, so a total of four SIMDs.
1032 return 4;
1033}
1034
1035unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
1036 unsigned FlatWorkGroupSize) {
1037 assert(FlatWorkGroupSize != 0);
1038 if (!STI->getTargetTriple().isAMDGCN())
1039 return 8;
1040 unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
1041 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
1042 if (N == 1) {
1043 // Single-wave workgroups don't consume barrier resources.
1044 return MaxWaves;
1045 }
1046
1047 unsigned MaxBarriers = 16;
1048 if (isGFX10Plus(STI: *STI) && !STI->getFeatureBits().test(I: FeatureCuMode))
1049 MaxBarriers = 32;
1050
1051 return std::min(a: MaxWaves / N, b: MaxBarriers);
1052}
1053
1054unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; }
1055
1056unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
1057 // FIXME: Need to take scratch memory into account.
1058 if (isGFX90A(STI: *STI))
1059 return 8;
1060 if (!isGFX10Plus(STI: *STI))
1061 return 10;
1062 return hasGFX10_3Insts(STI: *STI) ? 16 : 20;
1063}
1064
1065unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
1066 unsigned FlatWorkGroupSize) {
1067 return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize),
1068 Denominator: getEUsPerCU(STI));
1069}
1070
1071unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { return 1; }
1072
1073unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
1074 // Some subtargets allow encoding 2048, but this isn't tested or supported.
1075 return 1024;
1076}
1077
1078unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
1079 unsigned FlatWorkGroupSize) {
1080 return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI));
1081}
1082
1083unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
1084 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1085 if (Version.Major >= 10)
1086 return getAddressableNumSGPRs(STI);
1087 if (Version.Major >= 8)
1088 return 16;
1089 return 8;
1090}
1091
1092unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { return 8; }
1093
1094unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
1095 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1096 if (Version.Major >= 8)
1097 return 800;
1098 return 512;
1099}
1100
1101unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
1102 if (STI->getFeatureBits().test(I: FeatureSGPRInitBug))
1103 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
1104
1105 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1106 if (Version.Major >= 10)
1107 return 106;
1108 if (Version.Major >= 8)
1109 return 102;
1110 return 104;
1111}
1112
1113unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1114 assert(WavesPerEU != 0);
1115
1116 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1117 if (Version.Major >= 10)
1118 return 0;
1119
1120 if (WavesPerEU >= getMaxWavesPerEU(STI))
1121 return 0;
1122
1123 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
1124 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1125 MinNumSGPRs -= std::min(a: MinNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1126 MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1;
1127 return std::min(a: MinNumSGPRs, b: getAddressableNumSGPRs(STI));
1128}
1129
1130unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1131 bool Addressable) {
1132 assert(WavesPerEU != 0);
1133
1134 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
1135 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1136 if (Version.Major >= 10)
1137 return Addressable ? AddressableNumSGPRs : 108;
1138 if (Version.Major >= 8 && !Addressable)
1139 AddressableNumSGPRs = 112;
1140 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
1141 if (STI->getFeatureBits().test(I: FeatureTrapHandler))
1142 MaxNumSGPRs -= std::min(a: MaxNumSGPRs, b: (unsigned)TRAP_NUM_SGPRS);
1143 MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI));
1144 return std::min(a: MaxNumSGPRs, b: AddressableNumSGPRs);
1145}
1146
1147unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1148 bool FlatScrUsed, bool XNACKUsed) {
1149 unsigned ExtraSGPRs = 0;
1150 if (VCCUsed)
1151 ExtraSGPRs = 2;
1152
1153 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1154 if (Version.Major >= 10)
1155 return ExtraSGPRs;
1156
1157 if (Version.Major < 8) {
1158 if (FlatScrUsed)
1159 ExtraSGPRs = 4;
1160 } else {
1161 if (XNACKUsed)
1162 ExtraSGPRs = 4;
1163
1164 if (FlatScrUsed ||
1165 STI->getFeatureBits().test(I: AMDGPU::FeatureArchitectedFlatScratch))
1166 ExtraSGPRs = 6;
1167 }
1168
1169 return ExtraSGPRs;
1170}
1171
1172unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
1173 bool FlatScrUsed) {
1174 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
1175 XNACKUsed: STI->getFeatureBits().test(I: AMDGPU::FeatureXNACK));
1176}
1177
1178static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs,
1179 unsigned Granule) {
1180 return divideCeil(Numerator: std::max(a: 1u, b: NumRegs), Denominator: Granule);
1181}
1182
1183unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1184 // SGPRBlocks is actual number of SGPR blocks minus 1.
1185 return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) -
1186 1;
1187}
1188
1189unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1190 unsigned DynamicVGPRBlockSize,
1191 std::optional<bool> EnableWavefrontSize32) {
1192 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1193 return 8;
1194
1195 if (DynamicVGPRBlockSize != 0)
1196 return DynamicVGPRBlockSize;
1197
1198 // Temporarily check the subtarget feature, until we fully switch to using
1199 // attributes.
1200 if (STI->getFeatureBits().test(I: FeatureDynamicVGPR))
1201 return STI->getFeatureBits().test(I: FeatureDynamicVGPRBlockSize32) ? 32 : 16;
1202
1203 bool IsWave32 = EnableWavefrontSize32
1204 ? *EnableWavefrontSize32
1205 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1206
1207 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1208 return IsWave32 ? 24 : 12;
1209
1210 if (hasGFX10_3Insts(STI: *STI))
1211 return IsWave32 ? 16 : 8;
1212
1213 return IsWave32 ? 8 : 4;
1214}
1215
1216unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1217 std::optional<bool> EnableWavefrontSize32) {
1218 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1219 return 8;
1220
1221 bool IsWave32 = EnableWavefrontSize32
1222 ? *EnableWavefrontSize32
1223 : STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1224
1225 return IsWave32 ? 8 : 4;
1226}
1227
1228unsigned getArchVGPRAllocGranule() { return 4; }
1229
1230unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1231 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1232 return 512;
1233 if (!isGFX10Plus(STI: *STI))
1234 return 256;
1235 bool IsWave32 = STI->getFeatureBits().test(I: FeatureWavefrontSize32);
1236 if (STI->getFeatureBits().test(I: Feature1_5xVGPRs))
1237 return IsWave32 ? 1536 : 768;
1238 return IsWave32 ? 1024 : 512;
1239}
1240
1241unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
1242
1243unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
1244 unsigned DynamicVGPRBlockSize) {
1245 if (STI->getFeatureBits().test(I: FeatureGFX90AInsts))
1246 return 512;
1247
1248 // Temporarily check the subtarget feature, until we fully switch to using
1249 // attributes.
1250 if (DynamicVGPRBlockSize != 0 ||
1251 STI->getFeatureBits().test(I: FeatureDynamicVGPR))
1252 // On GFX12 we can allocate at most 8 blocks of VGPRs.
1253 return 8 * getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1254 return getAddressableNumArchVGPRs(STI);
1255}
1256
1257unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1258 unsigned NumVGPRs,
1259 unsigned DynamicVGPRBlockSize) {
1260 return getNumWavesPerEUWithNumVGPRs(
1261 NumVGPRs, Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize),
1262 MaxWaves: getMaxWavesPerEU(STI), TotalNumVGPRs: getTotalNumVGPRs(STI));
1263}
1264
1265unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
1266 unsigned MaxWaves,
1267 unsigned TotalNumVGPRs) {
1268 if (NumVGPRs < Granule)
1269 return MaxWaves;
1270 unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule);
1271 return std::min(a: std::max(a: TotalNumVGPRs / RoundedRegs, b: 1u), b: MaxWaves);
1272}
1273
1274unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
1275 AMDGPUSubtarget::Generation Gen) {
1276 if (Gen >= AMDGPUSubtarget::GFX10)
1277 return MaxWaves;
1278
1279 if (Gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
1280 if (SGPRs <= 80)
1281 return 10;
1282 if (SGPRs <= 88)
1283 return 9;
1284 if (SGPRs <= 100)
1285 return 8;
1286 return 7;
1287 }
1288 if (SGPRs <= 48)
1289 return 10;
1290 if (SGPRs <= 56)
1291 return 9;
1292 if (SGPRs <= 64)
1293 return 8;
1294 if (SGPRs <= 72)
1295 return 7;
1296 if (SGPRs <= 80)
1297 return 6;
1298 return 5;
1299}
1300
1301unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1302 unsigned DynamicVGPRBlockSize) {
1303 assert(WavesPerEU != 0);
1304
1305 unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1306 if (WavesPerEU >= MaxWavesPerEU)
1307 return 0;
1308
1309 unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1310 unsigned AddrsableNumVGPRs =
1311 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1312 unsigned Granule = getVGPRAllocGranule(STI, DynamicVGPRBlockSize);
1313 unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule);
1314
1315 if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule))
1316 return 0;
1317
1318 unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs,
1319 DynamicVGPRBlockSize);
1320 if (WavesPerEU < MinWavesPerEU)
1321 return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU, DynamicVGPRBlockSize);
1322
1323 unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule);
1324 unsigned MinNumVGPRs = 1 + std::min(a: MaxNumVGPRs - Granule, b: MaxNumVGPRsNext);
1325 return std::min(a: MinNumVGPRs, b: AddrsableNumVGPRs);
1326}
1327
1328unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
1329 unsigned DynamicVGPRBlockSize) {
1330 assert(WavesPerEU != 0);
1331
1332 unsigned MaxNumVGPRs =
1333 alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU,
1334 Align: getVGPRAllocGranule(STI, DynamicVGPRBlockSize));
1335 unsigned AddressableNumVGPRs =
1336 getAddressableNumVGPRs(STI, DynamicVGPRBlockSize);
1337 return std::min(a: MaxNumVGPRs, b: AddressableNumVGPRs);
1338}
1339
1340unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1341 std::optional<bool> EnableWavefrontSize32) {
1342 return getGranulatedNumRegisterBlocks(
1343 NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) -
1344 1;
1345}
1346
1347unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
1348 unsigned NumVGPRs,
1349 unsigned DynamicVGPRBlockSize,
1350 std::optional<bool> EnableWavefrontSize32) {
1351 return getGranulatedNumRegisterBlocks(
1352 NumRegs: NumVGPRs,
1353 Granule: getVGPRAllocGranule(STI, DynamicVGPRBlockSize, EnableWavefrontSize32));
1354}
1355} // end namespace IsaInfo
1356
1357void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
1358 const MCSubtargetInfo *STI) {
1359 IsaVersion Version = getIsaVersion(GPU: STI->getCPU());
1360 KernelCode.amd_kernel_code_version_major = 1;
1361 KernelCode.amd_kernel_code_version_minor = 2;
1362 KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1363 KernelCode.amd_machine_version_major = Version.Major;
1364 KernelCode.amd_machine_version_minor = Version.Minor;
1365 KernelCode.amd_machine_version_stepping = Version.Stepping;
1366 KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
1367 if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) {
1368 KernelCode.wavefront_size = 5;
1369 KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1370 } else {
1371 KernelCode.wavefront_size = 6;
1372 }
1373
1374 // If the code object does not support indirect functions, then the value must
1375 // be 0xffffffff.
1376 KernelCode.call_convention = -1;
1377
1378 // These alignment values are specified in powers of two, so alignment =
1379 // 2^n. The minimum alignment is 2^4 = 16.
1380 KernelCode.kernarg_segment_alignment = 4;
1381 KernelCode.group_segment_alignment = 4;
1382 KernelCode.private_segment_alignment = 4;
1383
1384 if (Version.Major >= 10) {
1385 KernelCode.compute_pgm_resource_registers |=
1386 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1387 S_00B848_MEM_ORDERED(1) | S_00B848_FWD_PROGRESS(1);
1388 }
1389}
1390
1391bool isGroupSegment(const GlobalValue *GV) {
1392 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1393}
1394
1395bool isGlobalSegment(const GlobalValue *GV) {
1396 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1397}
1398
1399bool isReadOnlySegment(const GlobalValue *GV) {
1400 unsigned AS = GV->getAddressSpace();
1401 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1402 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1403}
1404
1405bool shouldEmitConstantsToTextSection(const Triple &TT) {
1406 return TT.getArch() == Triple::r600;
1407}
1408
1409std::pair<unsigned, unsigned>
1410getIntegerPairAttribute(const Function &F, StringRef Name,
1411 std::pair<unsigned, unsigned> Default,
1412 bool OnlyFirstRequired) {
1413 if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired))
1414 return {Attr->first, Attr->second.value_or(u&: Default.second)};
1415 return Default;
1416}
1417
1418std::optional<std::pair<unsigned, std::optional<unsigned>>>
1419getIntegerPairAttribute(const Function &F, StringRef Name,
1420 bool OnlyFirstRequired) {
1421 Attribute A = F.getFnAttribute(Kind: Name);
1422 if (!A.isStringAttribute())
1423 return std::nullopt;
1424
1425 LLVMContext &Ctx = F.getContext();
1426 std::pair<unsigned, std::optional<unsigned>> Ints;
1427 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(Separator: ',');
1428 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: Ints.first)) {
1429 Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name);
1430 return std::nullopt;
1431 }
1432 unsigned Second = 0;
1433 if (Strs.second.trim().getAsInteger(Radix: 0, Result&: Second)) {
1434 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1435 Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name);
1436 return std::nullopt;
1437 }
1438 } else {
1439 Ints.second = Second;
1440 }
1441
1442 return Ints;
1443}
1444
1445SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1446 unsigned Size,
1447 unsigned DefaultVal) {
1448 std::optional<SmallVector<unsigned>> R =
1449 getIntegerVecAttribute(F, Name, Size);
1450 return R.has_value() ? *R : SmallVector<unsigned>(Size, DefaultVal);
1451}
1452
1453std::optional<SmallVector<unsigned>>
1454getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
1455 assert(Size > 2);
1456 LLVMContext &Ctx = F.getContext();
1457
1458 Attribute A = F.getFnAttribute(Kind: Name);
1459 if (!A.isValid())
1460 return std::nullopt;
1461 if (!A.isStringAttribute()) {
1462 Ctx.emitError(ErrorStr: Name + " is not a string attribute");
1463 return std::nullopt;
1464 }
1465
1466 SmallVector<unsigned> Vals(Size);
1467
1468 StringRef S = A.getValueAsString();
1469 unsigned i = 0;
1470 for (; !S.empty() && i < Size; i++) {
1471 std::pair<StringRef, StringRef> Strs = S.split(Separator: ',');
1472 unsigned IntVal;
1473 if (Strs.first.trim().getAsInteger(Radix: 0, Result&: IntVal)) {
1474 Ctx.emitError(ErrorStr: "can't parse integer attribute " + Strs.first + " in " +
1475 Name);
1476 return std::nullopt;
1477 }
1478 Vals[i] = IntVal;
1479 S = Strs.second;
1480 }
1481
1482 if (!S.empty() || i < Size) {
1483 Ctx.emitError(ErrorStr: "attribute " + Name +
1484 " has incorrect number of integers; expected " +
1485 llvm::utostr(X: Size));
1486 return std::nullopt;
1487 }
1488 return Vals;
1489}
1490
1491unsigned getVmcntBitMask(const IsaVersion &Version) {
1492 return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) +
1493 getVmcntBitWidthHi(VersionMajor: Version.Major))) -
1494 1;
1495}
1496
1497unsigned getLoadcntBitMask(const IsaVersion &Version) {
1498 return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1;
1499}
1500
1501unsigned getSamplecntBitMask(const IsaVersion &Version) {
1502 return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1;
1503}
1504
1505unsigned getBvhcntBitMask(const IsaVersion &Version) {
1506 return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1;
1507}
1508
1509unsigned getExpcntBitMask(const IsaVersion &Version) {
1510 return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1;
1511}
1512
1513unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1514 return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1;
1515}
1516
1517unsigned getDscntBitMask(const IsaVersion &Version) {
1518 return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1;
1519}
1520
1521unsigned getKmcntBitMask(const IsaVersion &Version) {
1522 return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1;
1523}
1524
1525unsigned getXcntBitMask(const IsaVersion &Version) {
1526 return (1 << getXcntBitWidth(VersionMajor: Version.Major, VersionMinor: Version.Minor)) - 1;
1527}
1528
1529unsigned getStorecntBitMask(const IsaVersion &Version) {
1530 return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1;
1531}
1532
1533unsigned getWaitcntBitMask(const IsaVersion &Version) {
1534 unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1535 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1536 unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major),
1537 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1538 unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1539 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1540 unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1541 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1542 return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1543}
1544
1545unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1546 unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1547 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1548 unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1549 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1550 return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major);
1551}
1552
1553unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1554 return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1555 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1556}
1557
1558unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1559 return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1560 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1561}
1562
1563void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1564 unsigned &Expcnt, unsigned &Lgkmcnt) {
1565 Vmcnt = decodeVmcnt(Version, Waitcnt);
1566 Expcnt = decodeExpcnt(Version, Waitcnt);
1567 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1568}
1569
1570Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1571 Waitcnt Decoded;
1572 Decoded.LoadCnt = decodeVmcnt(Version, Waitcnt: Encoded);
1573 Decoded.ExpCnt = decodeExpcnt(Version, Waitcnt: Encoded);
1574 Decoded.DsCnt = decodeLgkmcnt(Version, Waitcnt: Encoded);
1575 return Decoded;
1576}
1577
1578unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1579 unsigned Vmcnt) {
1580 Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major),
1581 Width: getVmcntBitWidthLo(VersionMajor: Version.Major));
1582 return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt,
1583 Shift: getVmcntBitShiftHi(VersionMajor: Version.Major),
1584 Width: getVmcntBitWidthHi(VersionMajor: Version.Major));
1585}
1586
1587unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1588 unsigned Expcnt) {
1589 return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major),
1590 Width: getExpcntBitWidth(VersionMajor: Version.Major));
1591}
1592
1593unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1594 unsigned Lgkmcnt) {
1595 return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major),
1596 Width: getLgkmcntBitWidth(VersionMajor: Version.Major));
1597}
1598
1599unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1600 unsigned Expcnt, unsigned Lgkmcnt) {
1601 unsigned Waitcnt = getWaitcntBitMask(Version);
1602 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1603 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1604 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1605 return Waitcnt;
1606}
1607
1608unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1609 return encodeWaitcnt(Version, Vmcnt: Decoded.LoadCnt, Expcnt: Decoded.ExpCnt, Lgkmcnt: Decoded.DsCnt);
1610}
1611
1612static unsigned getCombinedCountBitMask(const IsaVersion &Version,
1613 bool IsStore) {
1614 unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major),
1615 Width: getDscntBitWidth(VersionMajor: Version.Major));
1616 if (IsStore) {
1617 unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1618 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1619 return Dscnt | Storecnt;
1620 }
1621 unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1622 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1623 return Dscnt | Loadcnt;
1624}
1625
1626Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) {
1627 Waitcnt Decoded;
1628 Decoded.LoadCnt =
1629 unpackBits(Src: LoadcntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1630 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1631 Decoded.DsCnt = unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1632 Width: getDscntBitWidth(VersionMajor: Version.Major));
1633 return Decoded;
1634}
1635
1636Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) {
1637 Waitcnt Decoded;
1638 Decoded.StoreCnt =
1639 unpackBits(Src: StorecntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1640 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1641 Decoded.DsCnt = unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1642 Width: getDscntBitWidth(VersionMajor: Version.Major));
1643 return Decoded;
1644}
1645
1646static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt,
1647 unsigned Loadcnt) {
1648 return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1649 Width: getLoadcntBitWidth(VersionMajor: Version.Major));
1650}
1651
1652static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt,
1653 unsigned Storecnt) {
1654 return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major),
1655 Width: getStorecntBitWidth(VersionMajor: Version.Major));
1656}
1657
1658static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt,
1659 unsigned Dscnt) {
1660 return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major),
1661 Width: getDscntBitWidth(VersionMajor: Version.Major));
1662}
1663
1664static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1665 unsigned Dscnt) {
1666 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false);
1667 Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt);
1668 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1669 return Waitcnt;
1670}
1671
1672unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1673 return encodeLoadcntDscnt(Version, Loadcnt: Decoded.LoadCnt, Dscnt: Decoded.DsCnt);
1674}
1675
1676static unsigned encodeStorecntDscnt(const IsaVersion &Version,
1677 unsigned Storecnt, unsigned Dscnt) {
1678 unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true);
1679 Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt);
1680 Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt);
1681 return Waitcnt;
1682}
1683
1684unsigned encodeStorecntDscnt(const IsaVersion &Version,
1685 const Waitcnt &Decoded) {
1686 return encodeStorecntDscnt(Version, Storecnt: Decoded.StoreCnt, Dscnt: Decoded.DsCnt);
1687}
1688
1689//===----------------------------------------------------------------------===//
1690// Custom Operand Values
1691//===----------------------------------------------------------------------===//
1692
1693static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1694 int Size,
1695 const MCSubtargetInfo &STI) {
1696 unsigned Enc = 0;
1697 for (int Idx = 0; Idx < Size; ++Idx) {
1698 const auto &Op = Opr[Idx];
1699 if (Op.isSupported(STI))
1700 Enc |= Op.encode(Val: Op.Default);
1701 }
1702 return Enc;
1703}
1704
1705static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1706 int Size, unsigned Code,
1707 bool &HasNonDefaultVal,
1708 const MCSubtargetInfo &STI) {
1709 unsigned UsedOprMask = 0;
1710 HasNonDefaultVal = false;
1711 for (int Idx = 0; Idx < Size; ++Idx) {
1712 const auto &Op = Opr[Idx];
1713 if (!Op.isSupported(STI))
1714 continue;
1715 UsedOprMask |= Op.getMask();
1716 unsigned Val = Op.decode(Code);
1717 if (!Op.isValid(Val))
1718 return false;
1719 HasNonDefaultVal |= (Val != Op.Default);
1720 }
1721 return (Code & ~UsedOprMask) == 0;
1722}
1723
1724static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1725 unsigned Code, int &Idx, StringRef &Name,
1726 unsigned &Val, bool &IsDefault,
1727 const MCSubtargetInfo &STI) {
1728 while (Idx < Size) {
1729 const auto &Op = Opr[Idx++];
1730 if (Op.isSupported(STI)) {
1731 Name = Op.Name;
1732 Val = Op.decode(Code);
1733 IsDefault = (Val == Op.Default);
1734 return true;
1735 }
1736 }
1737
1738 return false;
1739}
1740
1741static int encodeCustomOperandVal(const CustomOperandVal &Op,
1742 int64_t InputVal) {
1743 if (InputVal < 0 || InputVal > Op.Max)
1744 return OPR_VAL_INVALID;
1745 return Op.encode(Val: InputVal);
1746}
1747
1748static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1749 const StringRef Name, int64_t InputVal,
1750 unsigned &UsedOprMask,
1751 const MCSubtargetInfo &STI) {
1752 int InvalidId = OPR_ID_UNKNOWN;
1753 for (int Idx = 0; Idx < Size; ++Idx) {
1754 const auto &Op = Opr[Idx];
1755 if (Op.Name == Name) {
1756 if (!Op.isSupported(STI)) {
1757 InvalidId = OPR_ID_UNSUPPORTED;
1758 continue;
1759 }
1760 auto OprMask = Op.getMask();
1761 if (OprMask & UsedOprMask)
1762 return OPR_ID_DUPLICATE;
1763 UsedOprMask |= OprMask;
1764 return encodeCustomOperandVal(Op, InputVal);
1765 }
1766 }
1767 return InvalidId;
1768}
1769
1770//===----------------------------------------------------------------------===//
1771// DepCtr
1772//===----------------------------------------------------------------------===//
1773
1774namespace DepCtr {
1775
1776int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1777 static int Default = -1;
1778 if (Default == -1)
1779 Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI);
1780 return Default;
1781}
1782
1783bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1784 const MCSubtargetInfo &STI) {
1785 return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code,
1786 HasNonDefaultVal, STI);
1787}
1788
1789bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1790 bool &IsDefault, const MCSubtargetInfo &STI) {
1791 return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val,
1792 IsDefault, STI);
1793}
1794
1795int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1796 const MCSubtargetInfo &STI) {
1797 return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask,
1798 STI);
1799}
1800
1801unsigned decodeFieldVmVsrc(unsigned Encoded) {
1802 return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
1803}
1804
1805unsigned decodeFieldVaVdst(unsigned Encoded) {
1806 return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
1807}
1808
1809unsigned decodeFieldSaSdst(unsigned Encoded) {
1810 return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
1811}
1812
1813unsigned decodeFieldVaSdst(unsigned Encoded) {
1814 return unpackBits(Src: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
1815}
1816
1817unsigned decodeFieldVaVcc(unsigned Encoded) {
1818 return unpackBits(Src: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
1819}
1820
1821unsigned decodeFieldVaSsrc(unsigned Encoded) {
1822 return unpackBits(Src: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
1823}
1824
1825unsigned decodeFieldHoldCnt(unsigned Encoded) {
1826 return unpackBits(Src: Encoded, Shift: getHoldCntBitShift(), Width: getHoldCntWidth());
1827}
1828
1829unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1830 return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth());
1831}
1832
1833unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1834 return encodeFieldVmVsrc(Encoded: 0xffff, VmVsrc);
1835}
1836
1837unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1838 return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth());
1839}
1840
1841unsigned encodeFieldVaVdst(unsigned VaVdst) {
1842 return encodeFieldVaVdst(Encoded: 0xffff, VaVdst);
1843}
1844
1845unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1846 return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth());
1847}
1848
1849unsigned encodeFieldSaSdst(unsigned SaSdst) {
1850 return encodeFieldSaSdst(Encoded: 0xffff, SaSdst);
1851}
1852
1853unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst) {
1854 return packBits(Src: VaSdst, Dst: Encoded, Shift: getVaSdstBitShift(), Width: getVaSdstBitWidth());
1855}
1856
1857unsigned encodeFieldVaSdst(unsigned VaSdst) {
1858 return encodeFieldVaSdst(Encoded: 0xffff, VaSdst);
1859}
1860
1861unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc) {
1862 return packBits(Src: VaVcc, Dst: Encoded, Shift: getVaVccBitShift(), Width: getVaVccBitWidth());
1863}
1864
1865unsigned encodeFieldVaVcc(unsigned VaVcc) {
1866 return encodeFieldVaVcc(Encoded: 0xffff, VaVcc);
1867}
1868
1869unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc) {
1870 return packBits(Src: VaSsrc, Dst: Encoded, Shift: getVaSsrcBitShift(), Width: getVaSsrcBitWidth());
1871}
1872
1873unsigned encodeFieldVaSsrc(unsigned VaSsrc) {
1874 return encodeFieldVaSsrc(Encoded: 0xffff, VaSsrc);
1875}
1876
1877unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) {
1878 return packBits(Src: HoldCnt, Dst: Encoded, Shift: getHoldCntBitShift(), Width: getHoldCntWidth());
1879}
1880
1881unsigned encodeFieldHoldCnt(unsigned HoldCnt) {
1882 return encodeFieldHoldCnt(Encoded: 0xffff, HoldCnt);
1883}
1884
1885} // namespace DepCtr
1886
1887//===----------------------------------------------------------------------===//
1888// exp tgt
1889//===----------------------------------------------------------------------===//
1890
1891namespace Exp {
1892
1893struct ExpTgt {
1894 StringLiteral Name;
1895 unsigned Tgt;
1896 unsigned MaxIndex;
1897};
1898
1899// clang-format off
1900static constexpr ExpTgt ExpTgtInfo[] = {
1901 {.Name: {"null"}, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX},
1902 {.Name: {"mrtz"}, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX},
1903 {.Name: {"prim"}, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX},
1904 {.Name: {"mrt"}, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX},
1905 {.Name: {"pos"}, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX},
1906 {.Name: {"dual_src_blend"},.Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX},
1907 {.Name: {"param"}, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX},
1908};
1909// clang-format on
1910
1911bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1912 for (const ExpTgt &Val : ExpTgtInfo) {
1913 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1914 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1915 Name = Val.Name;
1916 return true;
1917 }
1918 }
1919 return false;
1920}
1921
1922unsigned getTgtId(const StringRef Name) {
1923
1924 for (const ExpTgt &Val : ExpTgtInfo) {
1925 if (Val.MaxIndex == 0 && Name == Val.Name)
1926 return Val.Tgt;
1927
1928 if (Val.MaxIndex > 0 && Name.starts_with(Prefix: Val.Name)) {
1929 StringRef Suffix = Name.drop_front(N: Val.Name.size());
1930
1931 unsigned Id;
1932 if (Suffix.getAsInteger(Radix: 10, Result&: Id) || Id > Val.MaxIndex)
1933 return ET_INVALID;
1934
1935 // Disable leading zeroes
1936 if (Suffix.size() > 1 && Suffix[0] == '0')
1937 return ET_INVALID;
1938
1939 return Val.Tgt + Id;
1940 }
1941 }
1942 return ET_INVALID;
1943}
1944
1945bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1946 switch (Id) {
1947 case ET_NULL:
1948 return !isGFX11Plus(STI);
1949 case ET_POS4:
1950 case ET_PRIM:
1951 return isGFX10Plus(STI);
1952 case ET_DUAL_SRC_BLEND0:
1953 case ET_DUAL_SRC_BLEND1:
1954 return isGFX11Plus(STI);
1955 default:
1956 if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1957 return !isGFX11Plus(STI);
1958 return true;
1959 }
1960}
1961
1962} // namespace Exp
1963
1964//===----------------------------------------------------------------------===//
1965// MTBUF Format
1966//===----------------------------------------------------------------------===//
1967
1968namespace MTBUFFormat {
1969
1970int64_t getDfmt(const StringRef Name) {
1971 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1972 if (Name == DfmtSymbolic[Id])
1973 return Id;
1974 }
1975 return DFMT_UNDEF;
1976}
1977
1978StringRef getDfmtName(unsigned Id) {
1979 assert(Id <= DFMT_MAX);
1980 return DfmtSymbolic[Id];
1981}
1982
1983static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1984 if (isSI(STI) || isCI(STI))
1985 return NfmtSymbolicSICI;
1986 if (isVI(STI) || isGFX9(STI))
1987 return NfmtSymbolicVI;
1988 return NfmtSymbolicGFX10;
1989}
1990
1991int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1992 const auto *lookupTable = getNfmtLookupTable(STI);
1993 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1994 if (Name == lookupTable[Id])
1995 return Id;
1996 }
1997 return NFMT_UNDEF;
1998}
1999
2000StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
2001 assert(Id <= NFMT_MAX);
2002 return getNfmtLookupTable(STI)[Id];
2003}
2004
2005bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2006 unsigned Dfmt;
2007 unsigned Nfmt;
2008 decodeDfmtNfmt(Format: Id, Dfmt, Nfmt);
2009 return isValidNfmt(Val: Nfmt, STI);
2010}
2011
2012bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
2013 return !getNfmtName(Id, STI).empty();
2014}
2015
2016int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
2017 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
2018}
2019
2020void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
2021 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
2022 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
2023}
2024
2025int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
2026 if (isGFX11Plus(STI)) {
2027 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2028 if (Name == UfmtSymbolicGFX11[Id])
2029 return Id;
2030 }
2031 } else {
2032 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2033 if (Name == UfmtSymbolicGFX10[Id])
2034 return Id;
2035 }
2036 }
2037 return UFMT_UNDEF;
2038}
2039
2040StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
2041 if (isValidUnifiedFormat(Val: Id, STI))
2042 return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
2043 return "";
2044}
2045
2046bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
2047 return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
2048}
2049
2050int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
2051 const MCSubtargetInfo &STI) {
2052 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
2053 if (isGFX11Plus(STI)) {
2054 for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
2055 if (Fmt == DfmtNfmt2UFmtGFX11[Id])
2056 return Id;
2057 }
2058 } else {
2059 for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
2060 if (Fmt == DfmtNfmt2UFmtGFX10[Id])
2061 return Id;
2062 }
2063 }
2064 return UFMT_UNDEF;
2065}
2066
2067bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
2068 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
2069}
2070
2071unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
2072 if (isGFX10Plus(STI))
2073 return UFMT_DEFAULT;
2074 return DFMT_NFMT_DEFAULT;
2075}
2076
2077} // namespace MTBUFFormat
2078
2079//===----------------------------------------------------------------------===//
2080// SendMsg
2081//===----------------------------------------------------------------------===//
2082
2083namespace SendMsg {
2084
2085static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
2086 return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
2087}
2088
2089bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
2090 return (MsgId & ~(getMsgIdMask(STI))) == 0;
2091}
2092
2093bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
2094 bool Strict) {
2095 assert(isValidMsgId(MsgId, STI));
2096
2097 if (!Strict)
2098 return 0 <= OpId && isUInt<OP_WIDTH_>(x: OpId);
2099
2100 if (msgRequiresOp(MsgId, STI)) {
2101 if (MsgId == ID_GS_PreGFX11 && OpId == OP_GS_NOP)
2102 return false;
2103
2104 return !getMsgOpName(MsgId, Encoding: OpId, STI).empty();
2105 }
2106
2107 return OpId == OP_NONE_;
2108}
2109
2110bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
2111 const MCSubtargetInfo &STI, bool Strict) {
2112 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
2113
2114 if (!Strict)
2115 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(x: StreamId);
2116
2117 if (!isGFX11Plus(STI)) {
2118 switch (MsgId) {
2119 case ID_GS_PreGFX11:
2120 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
2121 case ID_GS_DONE_PreGFX11:
2122 return (OpId == OP_GS_NOP)
2123 ? (StreamId == STREAM_ID_NONE_)
2124 : (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
2125 }
2126 }
2127 return StreamId == STREAM_ID_NONE_;
2128}
2129
2130bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
2131 return MsgId == ID_SYSMSG ||
2132 (!isGFX11Plus(STI) &&
2133 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
2134}
2135
2136bool msgSupportsStream(int64_t MsgId, int64_t OpId,
2137 const MCSubtargetInfo &STI) {
2138 return !isGFX11Plus(STI) &&
2139 (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
2140 OpId != OP_GS_NOP;
2141}
2142
2143void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
2144 uint16_t &StreamId, const MCSubtargetInfo &STI) {
2145 MsgId = Val & getMsgIdMask(STI);
2146 if (isGFX11Plus(STI)) {
2147 OpId = 0;
2148 StreamId = 0;
2149 } else {
2150 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
2151 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
2152 }
2153}
2154
2155uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId) {
2156 return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
2157}
2158
2159} // namespace SendMsg
2160
2161//===----------------------------------------------------------------------===//
2162//
2163//===----------------------------------------------------------------------===//
2164
2165unsigned getInitialPSInputAddr(const Function &F) {
2166 return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr", Default: 0);
2167}
2168
2169bool getHasColorExport(const Function &F) {
2170 // As a safe default always respond as if PS has color exports.
2171 return F.getFnAttributeAsParsedInteger(
2172 Kind: "amdgpu-color-export",
2173 Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
2174}
2175
2176bool getHasDepthExport(const Function &F) {
2177 return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export", Default: 0) != 0;
2178}
2179
2180unsigned getDynamicVGPRBlockSize(const Function &F) {
2181 unsigned BlockSize =
2182 F.getFnAttributeAsParsedInteger(Kind: "amdgpu-dynamic-vgpr-block-size", Default: 0);
2183
2184 if (BlockSize == 16 || BlockSize == 32)
2185 return BlockSize;
2186
2187 return 0;
2188}
2189
2190bool hasXNACK(const MCSubtargetInfo &STI) {
2191 return STI.hasFeature(Feature: AMDGPU::FeatureXNACK);
2192}
2193
2194bool hasSRAMECC(const MCSubtargetInfo &STI) {
2195 return STI.hasFeature(Feature: AMDGPU::FeatureSRAMECC);
2196}
2197
2198bool hasMIMG_R128(const MCSubtargetInfo &STI) {
2199 return STI.hasFeature(Feature: AMDGPU::FeatureMIMG_R128) &&
2200 !STI.hasFeature(Feature: AMDGPU::FeatureR128A16);
2201}
2202
2203bool hasA16(const MCSubtargetInfo &STI) {
2204 return STI.hasFeature(Feature: AMDGPU::FeatureA16);
2205}
2206
2207bool hasG16(const MCSubtargetInfo &STI) {
2208 return STI.hasFeature(Feature: AMDGPU::FeatureG16);
2209}
2210
2211bool hasPackedD16(const MCSubtargetInfo &STI) {
2212 return !STI.hasFeature(Feature: AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
2213 !isSI(STI);
2214}
2215
2216bool hasGDS(const MCSubtargetInfo &STI) {
2217 return STI.hasFeature(Feature: AMDGPU::FeatureGDS);
2218}
2219
2220unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
2221 auto Version = getIsaVersion(GPU: STI.getCPU());
2222 if (Version.Major == 10)
2223 return Version.Minor >= 3 ? 13 : 5;
2224 if (Version.Major == 11)
2225 return 5;
2226 if (Version.Major >= 12)
2227 return HasSampler ? 4 : 5;
2228 return 0;
2229}
2230
2231unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
2232
2233bool isSI(const MCSubtargetInfo &STI) {
2234 return STI.hasFeature(Feature: AMDGPU::FeatureSouthernIslands);
2235}
2236
2237bool isCI(const MCSubtargetInfo &STI) {
2238 return STI.hasFeature(Feature: AMDGPU::FeatureSeaIslands);
2239}
2240
2241bool isVI(const MCSubtargetInfo &STI) {
2242 return STI.hasFeature(Feature: AMDGPU::FeatureVolcanicIslands);
2243}
2244
2245bool isGFX9(const MCSubtargetInfo &STI) {
2246 return STI.hasFeature(Feature: AMDGPU::FeatureGFX9);
2247}
2248
2249bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2250 return isGFX9(STI) || isGFX10(STI);
2251}
2252
2253bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2254 return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2255}
2256
2257bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2258 return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2259}
2260
2261bool isGFX8Plus(const MCSubtargetInfo &STI) {
2262 return isVI(STI) || isGFX9Plus(STI);
2263}
2264
2265bool isGFX9Plus(const MCSubtargetInfo &STI) {
2266 return isGFX9(STI) || isGFX10Plus(STI);
2267}
2268
2269bool isNotGFX9Plus(const MCSubtargetInfo &STI) { return !isGFX9Plus(STI); }
2270
2271bool isGFX10(const MCSubtargetInfo &STI) {
2272 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10);
2273}
2274
2275bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2276 return isGFX10(STI) || isGFX11(STI);
2277}
2278
2279bool isGFX10Plus(const MCSubtargetInfo &STI) {
2280 return isGFX10(STI) || isGFX11Plus(STI);
2281}
2282
2283bool isGFX11(const MCSubtargetInfo &STI) {
2284 return STI.hasFeature(Feature: AMDGPU::FeatureGFX11);
2285}
2286
2287bool isGFX11Plus(const MCSubtargetInfo &STI) {
2288 return isGFX11(STI) || isGFX12Plus(STI);
2289}
2290
2291bool isGFX12(const MCSubtargetInfo &STI) {
2292 return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2293}
2294
2295bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2296
2297bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2298
2299bool isGFX1250(const MCSubtargetInfo &STI) {
2300 return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
2301}
2302
2303bool isNotGFX11Plus(const MCSubtargetInfo &STI) { return !isGFX11Plus(STI); }
2304
2305bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2306 return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2307}
2308
2309bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2310 return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2311}
2312
2313bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2314 return STI.hasFeature(Feature: AMDGPU::FeatureGCN3Encoding);
2315}
2316
2317bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2318 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_AEncoding);
2319}
2320
2321bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2322 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_BEncoding);
2323}
2324
2325bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2326 return STI.hasFeature(Feature: AMDGPU::FeatureGFX10_3Insts);
2327}
2328
2329bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2330 return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2331}
2332
2333bool isGFX90A(const MCSubtargetInfo &STI) {
2334 return STI.hasFeature(Feature: AMDGPU::FeatureGFX90AInsts);
2335}
2336
2337bool isGFX940(const MCSubtargetInfo &STI) {
2338 return STI.hasFeature(Feature: AMDGPU::FeatureGFX940Insts);
2339}
2340
2341bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2342 return STI.hasFeature(Feature: AMDGPU::FeatureArchitectedFlatScratch);
2343}
2344
2345bool hasMAIInsts(const MCSubtargetInfo &STI) {
2346 return STI.hasFeature(Feature: AMDGPU::FeatureMAIInsts);
2347}
2348
2349bool hasVOPD(const MCSubtargetInfo &STI) {
2350 return STI.hasFeature(Feature: AMDGPU::FeatureVOPD);
2351}
2352
2353bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2354 return STI.hasFeature(Feature: AMDGPU::FeatureDPPSrc1SGPR);
2355}
2356
2357unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2358 return STI.hasFeature(Feature: AMDGPU::FeatureKernargPreload);
2359}
2360
2361int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2362 int32_t ArgNumVGPR) {
2363 if (has90AInsts && ArgNumAGPR)
2364 return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR;
2365 return std::max(a: ArgNumVGPR, b: ArgNumAGPR);
2366}
2367
2368bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI) {
2369 const MCRegisterClass SGPRClass = TRI->getRegClass(i: AMDGPU::SReg_32RegClassID);
2370 const MCRegister FirstSubReg = TRI->getSubReg(Reg, Idx: AMDGPU::sub0);
2371 return SGPRClass.contains(Reg: FirstSubReg != 0 ? FirstSubReg : Reg) ||
2372 Reg == AMDGPU::SCC;
2373}
2374
2375bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI) {
2376 return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI16;
2377}
2378
2379#define MAP_REG2REG \
2380 using namespace AMDGPU; \
2381 switch (Reg.id()) { \
2382 default: \
2383 return Reg; \
2384 CASE_CI_VI(FLAT_SCR) \
2385 CASE_CI_VI(FLAT_SCR_LO) \
2386 CASE_CI_VI(FLAT_SCR_HI) \
2387 CASE_VI_GFX9PLUS(TTMP0) \
2388 CASE_VI_GFX9PLUS(TTMP1) \
2389 CASE_VI_GFX9PLUS(TTMP2) \
2390 CASE_VI_GFX9PLUS(TTMP3) \
2391 CASE_VI_GFX9PLUS(TTMP4) \
2392 CASE_VI_GFX9PLUS(TTMP5) \
2393 CASE_VI_GFX9PLUS(TTMP6) \
2394 CASE_VI_GFX9PLUS(TTMP7) \
2395 CASE_VI_GFX9PLUS(TTMP8) \
2396 CASE_VI_GFX9PLUS(TTMP9) \
2397 CASE_VI_GFX9PLUS(TTMP10) \
2398 CASE_VI_GFX9PLUS(TTMP11) \
2399 CASE_VI_GFX9PLUS(TTMP12) \
2400 CASE_VI_GFX9PLUS(TTMP13) \
2401 CASE_VI_GFX9PLUS(TTMP14) \
2402 CASE_VI_GFX9PLUS(TTMP15) \
2403 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2404 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2405 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2406 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2407 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2408 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2409 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2410 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2411 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2412 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2413 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2414 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2415 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2416 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2417 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2418 CASE_VI_GFX9PLUS( \
2419 TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2420 CASE_GFXPRE11_GFX11PLUS(M0) \
2421 CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2422 CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2423 }
2424
2425#define CASE_CI_VI(node) \
2426 assert(!isSI(STI)); \
2427 case node: \
2428 return isCI(STI) ? node##_ci : node##_vi;
2429
2430#define CASE_VI_GFX9PLUS(node) \
2431 case node: \
2432 return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2433
2434#define CASE_GFXPRE11_GFX11PLUS(node) \
2435 case node: \
2436 return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2437
2438#define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2439 case node: \
2440 return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2441
2442MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI) {
2443 if (STI.getTargetTriple().getArch() == Triple::r600)
2444 return Reg;
2445 MAP_REG2REG
2446}
2447
2448#undef CASE_CI_VI
2449#undef CASE_VI_GFX9PLUS
2450#undef CASE_GFXPRE11_GFX11PLUS
2451#undef CASE_GFXPRE11_GFX11PLUS_TO
2452
2453#define CASE_CI_VI(node) \
2454 case node##_ci: \
2455 case node##_vi: \
2456 return node;
2457#define CASE_VI_GFX9PLUS(node) \
2458 case node##_vi: \
2459 case node##_gfx9plus: \
2460 return node;
2461#define CASE_GFXPRE11_GFX11PLUS(node) \
2462 case node##_gfx11plus: \
2463 case node##_gfxpre11: \
2464 return node;
2465#define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2466
2467MCRegister mc2PseudoReg(MCRegister Reg) { MAP_REG2REG }
2468
2469bool isInlineValue(unsigned Reg) {
2470 switch (Reg) {
2471 case AMDGPU::SRC_SHARED_BASE_LO:
2472 case AMDGPU::SRC_SHARED_BASE:
2473 case AMDGPU::SRC_SHARED_LIMIT_LO:
2474 case AMDGPU::SRC_SHARED_LIMIT:
2475 case AMDGPU::SRC_PRIVATE_BASE_LO:
2476 case AMDGPU::SRC_PRIVATE_BASE:
2477 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2478 case AMDGPU::SRC_PRIVATE_LIMIT:
2479 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2480 return true;
2481 case AMDGPU::SRC_VCCZ:
2482 case AMDGPU::SRC_EXECZ:
2483 case AMDGPU::SRC_SCC:
2484 return true;
2485 case AMDGPU::SGPR_NULL:
2486 return true;
2487 default:
2488 return false;
2489 }
2490}
2491
2492#undef CASE_CI_VI
2493#undef CASE_VI_GFX9PLUS
2494#undef CASE_GFXPRE11_GFX11PLUS
2495#undef CASE_GFXPRE11_GFX11PLUS_TO
2496#undef MAP_REG2REG
2497
2498bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2499 assert(OpNo < Desc.NumOperands);
2500 unsigned OpType = Desc.operands()[OpNo].OperandType;
2501 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2502 OpType <= AMDGPU::OPERAND_SRC_LAST;
2503}
2504
2505bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2506 assert(OpNo < Desc.NumOperands);
2507 unsigned OpType = Desc.operands()[OpNo].OperandType;
2508 return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2509 OpType <= AMDGPU::OPERAND_KIMM_LAST;
2510}
2511
2512bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2513 assert(OpNo < Desc.NumOperands);
2514 unsigned OpType = Desc.operands()[OpNo].OperandType;
2515 switch (OpType) {
2516 case AMDGPU::OPERAND_REG_IMM_FP32:
2517 case AMDGPU::OPERAND_REG_IMM_FP64:
2518 case AMDGPU::OPERAND_REG_IMM_FP16:
2519 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2520 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2521 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2522 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2523 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2524 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2525 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2526 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2527 return true;
2528 default:
2529 return false;
2530 }
2531}
2532
2533bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2534 assert(OpNo < Desc.NumOperands);
2535 unsigned OpType = Desc.operands()[OpNo].OperandType;
2536 return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2537 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2538 (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2539 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2540}
2541
2542// Avoid using MCRegisterClass::getSize, since that function will go away
2543// (move from MC* level to Target* level). Return size in bits.
2544unsigned getRegBitWidth(unsigned RCID) {
2545 switch (RCID) {
2546 case AMDGPU::VGPR_16RegClassID:
2547 case AMDGPU::VGPR_16_Lo128RegClassID:
2548 case AMDGPU::SGPR_LO16RegClassID:
2549 case AMDGPU::AGPR_LO16RegClassID:
2550 return 16;
2551 case AMDGPU::SGPR_32RegClassID:
2552 case AMDGPU::VGPR_32RegClassID:
2553 case AMDGPU::VRegOrLds_32RegClassID:
2554 case AMDGPU::AGPR_32RegClassID:
2555 case AMDGPU::VS_32RegClassID:
2556 case AMDGPU::AV_32RegClassID:
2557 case AMDGPU::SReg_32RegClassID:
2558 case AMDGPU::SReg_32_XM0RegClassID:
2559 case AMDGPU::SRegOrLds_32RegClassID:
2560 return 32;
2561 case AMDGPU::SGPR_64RegClassID:
2562 case AMDGPU::VS_64RegClassID:
2563 case AMDGPU::SReg_64RegClassID:
2564 case AMDGPU::VReg_64RegClassID:
2565 case AMDGPU::AReg_64RegClassID:
2566 case AMDGPU::SReg_64_XEXECRegClassID:
2567 case AMDGPU::VReg_64_Align2RegClassID:
2568 case AMDGPU::AReg_64_Align2RegClassID:
2569 case AMDGPU::AV_64RegClassID:
2570 case AMDGPU::AV_64_Align2RegClassID:
2571 return 64;
2572 case AMDGPU::SGPR_96RegClassID:
2573 case AMDGPU::SReg_96RegClassID:
2574 case AMDGPU::VReg_96RegClassID:
2575 case AMDGPU::AReg_96RegClassID:
2576 case AMDGPU::VReg_96_Align2RegClassID:
2577 case AMDGPU::AReg_96_Align2RegClassID:
2578 case AMDGPU::AV_96RegClassID:
2579 case AMDGPU::AV_96_Align2RegClassID:
2580 return 96;
2581 case AMDGPU::SGPR_128RegClassID:
2582 case AMDGPU::SReg_128RegClassID:
2583 case AMDGPU::VReg_128RegClassID:
2584 case AMDGPU::AReg_128RegClassID:
2585 case AMDGPU::VReg_128_Align2RegClassID:
2586 case AMDGPU::AReg_128_Align2RegClassID:
2587 case AMDGPU::AV_128RegClassID:
2588 case AMDGPU::AV_128_Align2RegClassID:
2589 case AMDGPU::SReg_128_XNULLRegClassID:
2590 return 128;
2591 case AMDGPU::SGPR_160RegClassID:
2592 case AMDGPU::SReg_160RegClassID:
2593 case AMDGPU::VReg_160RegClassID:
2594 case AMDGPU::AReg_160RegClassID:
2595 case AMDGPU::VReg_160_Align2RegClassID:
2596 case AMDGPU::AReg_160_Align2RegClassID:
2597 case AMDGPU::AV_160RegClassID:
2598 case AMDGPU::AV_160_Align2RegClassID:
2599 return 160;
2600 case AMDGPU::SGPR_192RegClassID:
2601 case AMDGPU::SReg_192RegClassID:
2602 case AMDGPU::VReg_192RegClassID:
2603 case AMDGPU::AReg_192RegClassID:
2604 case AMDGPU::VReg_192_Align2RegClassID:
2605 case AMDGPU::AReg_192_Align2RegClassID:
2606 case AMDGPU::AV_192RegClassID:
2607 case AMDGPU::AV_192_Align2RegClassID:
2608 return 192;
2609 case AMDGPU::SGPR_224RegClassID:
2610 case AMDGPU::SReg_224RegClassID:
2611 case AMDGPU::VReg_224RegClassID:
2612 case AMDGPU::AReg_224RegClassID:
2613 case AMDGPU::VReg_224_Align2RegClassID:
2614 case AMDGPU::AReg_224_Align2RegClassID:
2615 case AMDGPU::AV_224RegClassID:
2616 case AMDGPU::AV_224_Align2RegClassID:
2617 return 224;
2618 case AMDGPU::SGPR_256RegClassID:
2619 case AMDGPU::SReg_256RegClassID:
2620 case AMDGPU::VReg_256RegClassID:
2621 case AMDGPU::AReg_256RegClassID:
2622 case AMDGPU::VReg_256_Align2RegClassID:
2623 case AMDGPU::AReg_256_Align2RegClassID:
2624 case AMDGPU::AV_256RegClassID:
2625 case AMDGPU::AV_256_Align2RegClassID:
2626 case AMDGPU::SReg_256_XNULLRegClassID:
2627 return 256;
2628 case AMDGPU::SGPR_288RegClassID:
2629 case AMDGPU::SReg_288RegClassID:
2630 case AMDGPU::VReg_288RegClassID:
2631 case AMDGPU::AReg_288RegClassID:
2632 case AMDGPU::VReg_288_Align2RegClassID:
2633 case AMDGPU::AReg_288_Align2RegClassID:
2634 case AMDGPU::AV_288RegClassID:
2635 case AMDGPU::AV_288_Align2RegClassID:
2636 return 288;
2637 case AMDGPU::SGPR_320RegClassID:
2638 case AMDGPU::SReg_320RegClassID:
2639 case AMDGPU::VReg_320RegClassID:
2640 case AMDGPU::AReg_320RegClassID:
2641 case AMDGPU::VReg_320_Align2RegClassID:
2642 case AMDGPU::AReg_320_Align2RegClassID:
2643 case AMDGPU::AV_320RegClassID:
2644 case AMDGPU::AV_320_Align2RegClassID:
2645 return 320;
2646 case AMDGPU::SGPR_352RegClassID:
2647 case AMDGPU::SReg_352RegClassID:
2648 case AMDGPU::VReg_352RegClassID:
2649 case AMDGPU::AReg_352RegClassID:
2650 case AMDGPU::VReg_352_Align2RegClassID:
2651 case AMDGPU::AReg_352_Align2RegClassID:
2652 case AMDGPU::AV_352RegClassID:
2653 case AMDGPU::AV_352_Align2RegClassID:
2654 return 352;
2655 case AMDGPU::SGPR_384RegClassID:
2656 case AMDGPU::SReg_384RegClassID:
2657 case AMDGPU::VReg_384RegClassID:
2658 case AMDGPU::AReg_384RegClassID:
2659 case AMDGPU::VReg_384_Align2RegClassID:
2660 case AMDGPU::AReg_384_Align2RegClassID:
2661 case AMDGPU::AV_384RegClassID:
2662 case AMDGPU::AV_384_Align2RegClassID:
2663 return 384;
2664 case AMDGPU::SGPR_512RegClassID:
2665 case AMDGPU::SReg_512RegClassID:
2666 case AMDGPU::VReg_512RegClassID:
2667 case AMDGPU::AReg_512RegClassID:
2668 case AMDGPU::VReg_512_Align2RegClassID:
2669 case AMDGPU::AReg_512_Align2RegClassID:
2670 case AMDGPU::AV_512RegClassID:
2671 case AMDGPU::AV_512_Align2RegClassID:
2672 return 512;
2673 case AMDGPU::SGPR_1024RegClassID:
2674 case AMDGPU::SReg_1024RegClassID:
2675 case AMDGPU::VReg_1024RegClassID:
2676 case AMDGPU::AReg_1024RegClassID:
2677 case AMDGPU::VReg_1024_Align2RegClassID:
2678 case AMDGPU::AReg_1024_Align2RegClassID:
2679 case AMDGPU::AV_1024RegClassID:
2680 case AMDGPU::AV_1024_Align2RegClassID:
2681 return 1024;
2682 default:
2683 llvm_unreachable("Unexpected register class");
2684 }
2685}
2686
2687unsigned getRegBitWidth(const MCRegisterClass &RC) {
2688 return getRegBitWidth(RCID: RC.getID());
2689}
2690
2691unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2692 unsigned OpNo) {
2693 assert(OpNo < Desc.NumOperands);
2694 unsigned RCID = Desc.operands()[OpNo].RegClass;
2695 return getRegBitWidth(RCID) / 8;
2696}
2697
2698bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2699 if (isInlinableIntLiteral(Literal))
2700 return true;
2701
2702 uint64_t Val = static_cast<uint64_t>(Literal);
2703 return (Val == llvm::bit_cast<uint64_t>(from: 0.0)) ||
2704 (Val == llvm::bit_cast<uint64_t>(from: 1.0)) ||
2705 (Val == llvm::bit_cast<uint64_t>(from: -1.0)) ||
2706 (Val == llvm::bit_cast<uint64_t>(from: 0.5)) ||
2707 (Val == llvm::bit_cast<uint64_t>(from: -0.5)) ||
2708 (Val == llvm::bit_cast<uint64_t>(from: 2.0)) ||
2709 (Val == llvm::bit_cast<uint64_t>(from: -2.0)) ||
2710 (Val == llvm::bit_cast<uint64_t>(from: 4.0)) ||
2711 (Val == llvm::bit_cast<uint64_t>(from: -4.0)) ||
2712 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2713}
2714
2715bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2716 if (isInlinableIntLiteral(Literal))
2717 return true;
2718
2719 // The actual type of the operand does not seem to matter as long
2720 // as the bits match one of the inline immediate values. For example:
2721 //
2722 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2723 // so it is a legal inline immediate.
2724 //
2725 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2726 // floating-point, so it is a legal inline immediate.
2727
2728 uint32_t Val = static_cast<uint32_t>(Literal);
2729 return (Val == llvm::bit_cast<uint32_t>(from: 0.0f)) ||
2730 (Val == llvm::bit_cast<uint32_t>(from: 1.0f)) ||
2731 (Val == llvm::bit_cast<uint32_t>(from: -1.0f)) ||
2732 (Val == llvm::bit_cast<uint32_t>(from: 0.5f)) ||
2733 (Val == llvm::bit_cast<uint32_t>(from: -0.5f)) ||
2734 (Val == llvm::bit_cast<uint32_t>(from: 2.0f)) ||
2735 (Val == llvm::bit_cast<uint32_t>(from: -2.0f)) ||
2736 (Val == llvm::bit_cast<uint32_t>(from: 4.0f)) ||
2737 (Val == llvm::bit_cast<uint32_t>(from: -4.0f)) ||
2738 (Val == 0x3e22f983 && HasInv2Pi);
2739}
2740
2741bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
2742 if (!HasInv2Pi)
2743 return false;
2744 if (isInlinableIntLiteral(Literal))
2745 return true;
2746 uint16_t Val = static_cast<uint16_t>(Literal);
2747 return Val == 0x3F00 || // 0.5
2748 Val == 0xBF00 || // -0.5
2749 Val == 0x3F80 || // 1.0
2750 Val == 0xBF80 || // -1.0
2751 Val == 0x4000 || // 2.0
2752 Val == 0xC000 || // -2.0
2753 Val == 0x4080 || // 4.0
2754 Val == 0xC080 || // -4.0
2755 Val == 0x3E22; // 1.0 / (2.0 * pi)
2756}
2757
2758bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) {
2759 return isInlinableLiteral32(Literal, HasInv2Pi);
2760}
2761
2762bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
2763 if (!HasInv2Pi)
2764 return false;
2765 if (isInlinableIntLiteral(Literal))
2766 return true;
2767 uint16_t Val = static_cast<uint16_t>(Literal);
2768 return Val == 0x3C00 || // 1.0
2769 Val == 0xBC00 || // -1.0
2770 Val == 0x3800 || // 0.5
2771 Val == 0xB800 || // -0.5
2772 Val == 0x4000 || // 2.0
2773 Val == 0xC000 || // -2.0
2774 Val == 0x4400 || // 4.0
2775 Val == 0xC400 || // -4.0
2776 Val == 0x3118; // 1/2pi
2777}
2778
2779std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
2780 // Unfortunately, the Instruction Set Architecture Reference Guide is
2781 // misleading about how the inline operands work for (packed) 16-bit
2782 // instructions. In a nutshell, the actual HW behavior is:
2783 //
2784 // - integer encodings (-16 .. 64) are always produced as sign-extended
2785 // 32-bit values
2786 // - float encodings are produced as:
2787 // - for F16 instructions: corresponding half-precision float values in
2788 // the LSBs, 0 in the MSBs
2789 // - for UI16 instructions: corresponding single-precision float value
2790 int32_t Signed = static_cast<int32_t>(Literal);
2791 if (Signed >= 0 && Signed <= 64)
2792 return 128 + Signed;
2793
2794 if (Signed >= -16 && Signed <= -1)
2795 return 192 + std::abs(x: Signed);
2796
2797 if (IsFloat) {
2798 // clang-format off
2799 switch (Literal) {
2800 case 0x3800: return 240; // 0.5
2801 case 0xB800: return 241; // -0.5
2802 case 0x3C00: return 242; // 1.0
2803 case 0xBC00: return 243; // -1.0
2804 case 0x4000: return 244; // 2.0
2805 case 0xC000: return 245; // -2.0
2806 case 0x4400: return 246; // 4.0
2807 case 0xC400: return 247; // -4.0
2808 case 0x3118: return 248; // 1.0 / (2.0 * pi)
2809 default: break;
2810 }
2811 // clang-format on
2812 } else {
2813 // clang-format off
2814 switch (Literal) {
2815 case 0x3F000000: return 240; // 0.5
2816 case 0xBF000000: return 241; // -0.5
2817 case 0x3F800000: return 242; // 1.0
2818 case 0xBF800000: return 243; // -1.0
2819 case 0x40000000: return 244; // 2.0
2820 case 0xC0000000: return 245; // -2.0
2821 case 0x40800000: return 246; // 4.0
2822 case 0xC0800000: return 247; // -4.0
2823 case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
2824 default: break;
2825 }
2826 // clang-format on
2827 }
2828
2829 return {};
2830}
2831
2832// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
2833// or nullopt.
2834std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
2835 return getInlineEncodingV216(IsFloat: false, Literal);
2836}
2837
2838// Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction
2839// or nullopt.
2840std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) {
2841 int32_t Signed = static_cast<int32_t>(Literal);
2842 if (Signed >= 0 && Signed <= 64)
2843 return 128 + Signed;
2844
2845 if (Signed >= -16 && Signed <= -1)
2846 return 192 + std::abs(x: Signed);
2847
2848 // clang-format off
2849 switch (Literal) {
2850 case 0x3F00: return 240; // 0.5
2851 case 0xBF00: return 241; // -0.5
2852 case 0x3F80: return 242; // 1.0
2853 case 0xBF80: return 243; // -1.0
2854 case 0x4000: return 244; // 2.0
2855 case 0xC000: return 245; // -2.0
2856 case 0x4080: return 246; // 4.0
2857 case 0xC080: return 247; // -4.0
2858 case 0x3E22: return 248; // 1.0 / (2.0 * pi)
2859 default: break;
2860 }
2861 // clang-format on
2862
2863 return std::nullopt;
2864}
2865
2866// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
2867// or nullopt.
2868std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
2869 return getInlineEncodingV216(IsFloat: true, Literal);
2870}
2871
2872// Whether the given literal can be inlined for a V_PK_* instruction.
2873bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
2874 switch (OpType) {
2875 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2876 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2877 return getInlineEncodingV216(IsFloat: false, Literal).has_value();
2878 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2879 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2880 return getInlineEncodingV216(IsFloat: true, Literal).has_value();
2881 case AMDGPU::OPERAND_REG_IMM_V2BF16:
2882 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2883 return isInlinableLiteralV2BF16(Literal);
2884 default:
2885 llvm_unreachable("bad packed operand type");
2886 }
2887}
2888
2889// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
2890bool isInlinableLiteralV2I16(uint32_t Literal) {
2891 return getInlineEncodingV2I16(Literal).has_value();
2892}
2893
2894// Whether the given literal can be inlined for a V_PK_*_BF16 instruction.
2895bool isInlinableLiteralV2BF16(uint32_t Literal) {
2896 return getInlineEncodingV2BF16(Literal).has_value();
2897}
2898
2899// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
2900bool isInlinableLiteralV2F16(uint32_t Literal) {
2901 return getInlineEncodingV2F16(Literal).has_value();
2902}
2903
2904bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2905 if (IsFP64)
2906 return !(Val & 0xffffffffu);
2907
2908 return isUInt<32>(x: Val) || isInt<32>(x: Val);
2909}
2910
2911bool isArgPassedInSGPR(const Argument *A) {
2912 const Function *F = A->getParent();
2913
2914 // Arguments to compute shaders are never a source of divergence.
2915 CallingConv::ID CC = F->getCallingConv();
2916 switch (CC) {
2917 case CallingConv::AMDGPU_KERNEL:
2918 case CallingConv::SPIR_KERNEL:
2919 return true;
2920 case CallingConv::AMDGPU_VS:
2921 case CallingConv::AMDGPU_LS:
2922 case CallingConv::AMDGPU_HS:
2923 case CallingConv::AMDGPU_ES:
2924 case CallingConv::AMDGPU_GS:
2925 case CallingConv::AMDGPU_PS:
2926 case CallingConv::AMDGPU_CS:
2927 case CallingConv::AMDGPU_Gfx:
2928 case CallingConv::AMDGPU_CS_Chain:
2929 case CallingConv::AMDGPU_CS_ChainPreserve:
2930 // For non-compute shaders, SGPR inputs are marked with either inreg or
2931 // byval. Everything else is in VGPRs.
2932 return A->hasAttribute(Kind: Attribute::InReg) ||
2933 A->hasAttribute(Kind: Attribute::ByVal);
2934 default:
2935 // TODO: treat i1 as divergent?
2936 return A->hasAttribute(Kind: Attribute::InReg);
2937 }
2938}
2939
2940bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2941 // Arguments to compute shaders are never a source of divergence.
2942 CallingConv::ID CC = CB->getCallingConv();
2943 switch (CC) {
2944 case CallingConv::AMDGPU_KERNEL:
2945 case CallingConv::SPIR_KERNEL:
2946 return true;
2947 case CallingConv::AMDGPU_VS:
2948 case CallingConv::AMDGPU_LS:
2949 case CallingConv::AMDGPU_HS:
2950 case CallingConv::AMDGPU_ES:
2951 case CallingConv::AMDGPU_GS:
2952 case CallingConv::AMDGPU_PS:
2953 case CallingConv::AMDGPU_CS:
2954 case CallingConv::AMDGPU_Gfx:
2955 case CallingConv::AMDGPU_CS_Chain:
2956 case CallingConv::AMDGPU_CS_ChainPreserve:
2957 // For non-compute shaders, SGPR inputs are marked with either inreg or
2958 // byval. Everything else is in VGPRs.
2959 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg) ||
2960 CB->paramHasAttr(ArgNo, Kind: Attribute::ByVal);
2961 default:
2962 return CB->paramHasAttr(ArgNo, Kind: Attribute::InReg);
2963 }
2964}
2965
2966static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2967 return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST);
2968}
2969
2970bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
2971 int64_t EncodedOffset) {
2972 if (isGFX12Plus(STI: ST))
2973 return isUInt<23>(x: EncodedOffset);
2974
2975 return hasSMEMByteOffset(ST) ? isUInt<20>(x: EncodedOffset)
2976 : isUInt<8>(x: EncodedOffset);
2977}
2978
2979bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
2980 int64_t EncodedOffset, bool IsBuffer) {
2981 if (isGFX12Plus(STI: ST))
2982 return isInt<24>(x: EncodedOffset);
2983
2984 return !IsBuffer && hasSMRDSignedImmOffset(ST) && isInt<21>(x: EncodedOffset);
2985}
2986
2987static bool isDwordAligned(uint64_t ByteOffset) {
2988 return (ByteOffset & 3) == 0;
2989}
2990
2991uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
2992 uint64_t ByteOffset) {
2993 if (hasSMEMByteOffset(ST))
2994 return ByteOffset;
2995
2996 assert(isDwordAligned(ByteOffset));
2997 return ByteOffset >> 2;
2998}
2999
3000std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
3001 int64_t ByteOffset, bool IsBuffer,
3002 bool HasSOffset) {
3003 // For unbuffered smem loads, it is illegal for the Immediate Offset to be
3004 // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
3005 // Handle case where SOffset is not present.
3006 if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST))
3007 return std::nullopt;
3008
3009 if (isGFX12Plus(STI: ST)) // 24 bit signed offsets
3010 return isInt<24>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3011 : std::nullopt;
3012
3013 // The signed version is always a byte offset.
3014 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
3015 assert(hasSMEMByteOffset(ST));
3016 return isInt<20>(x: ByteOffset) ? std::optional<int64_t>(ByteOffset)
3017 : std::nullopt;
3018 }
3019
3020 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
3021 return std::nullopt;
3022
3023 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3024 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
3025 ? std::optional<int64_t>(EncodedOffset)
3026 : std::nullopt;
3027}
3028
3029std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
3030 int64_t ByteOffset) {
3031 if (!isCI(STI: ST) || !isDwordAligned(ByteOffset))
3032 return std::nullopt;
3033
3034 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
3035 return isUInt<32>(x: EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
3036 : std::nullopt;
3037}
3038
3039unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
3040 if (AMDGPU::isGFX10(STI: ST))
3041 return 12;
3042
3043 if (AMDGPU::isGFX12(STI: ST))
3044 return 24;
3045 return 13;
3046}
3047
3048namespace {
3049
3050struct SourceOfDivergence {
3051 unsigned Intr;
3052};
3053const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
3054
3055struct AlwaysUniform {
3056 unsigned Intr;
3057};
3058const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
3059
3060#define GET_SourcesOfDivergence_IMPL
3061#define GET_UniformIntrinsics_IMPL
3062#define GET_Gfx9BufferFormat_IMPL
3063#define GET_Gfx10BufferFormat_IMPL
3064#define GET_Gfx11PlusBufferFormat_IMPL
3065
3066#include "AMDGPUGenSearchableTables.inc"
3067
3068} // end anonymous namespace
3069
3070bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
3071 return lookupSourceOfDivergence(Intr: IntrID);
3072}
3073
3074bool isIntrinsicAlwaysUniform(unsigned IntrID) {
3075 return lookupAlwaysUniform(Intr: IntrID);
3076}
3077
3078const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
3079 uint8_t NumComponents,
3080 uint8_t NumFormat,
3081 const MCSubtargetInfo &STI) {
3082 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(
3083 BitsPerComp, NumComponents, NumFormat)
3084 : isGFX10(STI)
3085 ? getGfx10BufferFormatInfo(BitsPerComp, NumComponents, NumFormat)
3086 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
3087}
3088
3089const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
3090 const MCSubtargetInfo &STI) {
3091 return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
3092 : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
3093 : getGfx9BufferFormatInfo(Format);
3094}
3095
3096bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
3097 for (auto OpName : {OpName::vdst, OpName::src0, OpName::src1, OpName::src2}) {
3098 int Idx = getNamedOperandIdx(Opcode: OpDesc.getOpcode(), Name: OpName);
3099 if (Idx == -1)
3100 continue;
3101
3102 if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
3103 OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
3104 return true;
3105 }
3106
3107 return false;
3108}
3109
3110bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
3111 return hasAny64BitVGPROperands(OpDesc);
3112}
3113
3114unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
3115 // Currently this is 128 for all subtargets
3116 return 128;
3117}
3118
3119} // namespace AMDGPU
3120
3121raw_ostream &operator<<(raw_ostream &OS,
3122 const AMDGPU::IsaInfo::TargetIDSetting S) {
3123 switch (S) {
3124 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
3125 OS << "Unsupported";
3126 break;
3127 case (AMDGPU::IsaInfo::TargetIDSetting::Any):
3128 OS << "Any";
3129 break;
3130 case (AMDGPU::IsaInfo::TargetIDSetting::Off):
3131 OS << "Off";
3132 break;
3133 case (AMDGPU::IsaInfo::TargetIDSetting::On):
3134 OS << "On";
3135 break;
3136 }
3137 return OS;
3138}
3139
3140} // namespace llvm
3141