1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/InstrTypes.h"
16#include "llvm/IR/Module.h"
17#include "llvm/Support/Alignment.h"
18#include <array>
19#include <functional>
20#include <utility>
21
22// Pull in OpName enum definition and getNamedOperandIdx() declaration.
23#define GET_INSTRINFO_OPERAND_ENUM
24#include "AMDGPUGenInstrInfo.inc"
25
26struct amd_kernel_code_t;
27
28namespace llvm {
29
30struct Align;
31class Argument;
32class Function;
33class GlobalValue;
34class MCInstrInfo;
35class MCRegisterClass;
36class MCRegisterInfo;
37class MCSubtargetInfo;
38class MDNode;
39class StringRef;
40class Triple;
41class raw_ostream;
42
43namespace AMDGPU {
44
45struct AMDGPUMCKernelCodeT;
46struct IsaVersion;
47
48/// Generic target versions emitted by this version of LLVM.
49///
50/// These numbers are incremented every time a codegen breaking change occurs
51/// within a generic family.
52namespace GenericVersion {
53static constexpr unsigned GFX9 = 1;
54static constexpr unsigned GFX9_4 = 1;
55static constexpr unsigned GFX10_1 = 1;
56static constexpr unsigned GFX10_3 = 1;
57static constexpr unsigned GFX11 = 1;
58static constexpr unsigned GFX12 = 1;
59} // namespace GenericVersion
60
61enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
62
63enum class FPType { None, FP4, FP8 };
64
65/// \returns True if \p STI is AMDHSA.
66bool isHsaAbi(const MCSubtargetInfo &STI);
67
68/// \returns Code object version from the IR module flag.
69unsigned getAMDHSACodeObjectVersion(const Module &M);
70
71/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
72unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
73
74/// \returns The default HSA code object version. This should only be used when
75/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
76/// flag or a .amdhsa_code_object_version directive)
77unsigned getDefaultAMDHSACodeObjectVersion();
78
79/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
80/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
81uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
82
83/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
84unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
85
86/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
87unsigned getHostcallImplicitArgPosition(unsigned COV);
88
89unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
90unsigned getCompletionActionImplicitArgPosition(unsigned COV);
91
92struct GcnBufferFormatInfo {
93 unsigned Format;
94 unsigned BitsPerComp;
95 unsigned NumComponents;
96 unsigned NumFormat;
97 unsigned DataFormat;
98};
99
100struct MAIInstInfo {
101 uint32_t Opcode;
102 bool is_dgemm;
103 bool is_gfx940_xdl;
104};
105
106struct MFMA_F8F6F4_Info {
107 unsigned Opcode;
108 unsigned F8F8Opcode;
109 uint8_t NumRegsSrcA;
110 uint8_t NumRegsSrcB;
111};
112
113struct CvtScaleF32_F32F16ToF8F4_Info {
114 unsigned Opcode;
115};
116
117struct True16D16Info {
118 unsigned T16Op;
119 unsigned HiOp;
120 unsigned LoOp;
121};
122
123struct WMMAInstInfo {
124 uint32_t Opcode;
125 bool is_wmma_xdl;
126};
127
128#define GET_MIMGBaseOpcode_DECL
129#define GET_MIMGDim_DECL
130#define GET_MIMGEncoding_DECL
131#define GET_MIMGLZMapping_DECL
132#define GET_MIMGMIPMapping_DECL
133#define GET_MIMGBiASMapping_DECL
134#define GET_MAIInstInfoTable_DECL
135#define GET_isMFMA_F8F6F4Table_DECL
136#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
137#define GET_True16D16Table_DECL
138#define GET_WMMAInstInfoTable_DECL
139#include "AMDGPUGenSearchableTables.inc"
140
141namespace IsaInfo {
142
143enum {
144 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
145 // doesn't spill SGPRs as much as when 80 is set.
146 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
147 TRAP_NUM_SGPRS = 16
148};
149
150enum class TargetIDSetting { Unsupported, Any, Off, On };
151
152class AMDGPUTargetID {
153private:
154 const MCSubtargetInfo &STI;
155 TargetIDSetting XnackSetting;
156 TargetIDSetting SramEccSetting;
157
158public:
159 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
160 ~AMDGPUTargetID() = default;
161
162 /// \return True if the current xnack setting is not "Unsupported".
163 bool isXnackSupported() const {
164 return XnackSetting != TargetIDSetting::Unsupported;
165 }
166
167 /// \returns True if the current xnack setting is "On" or "Any".
168 bool isXnackOnOrAny() const {
169 return XnackSetting == TargetIDSetting::On ||
170 XnackSetting == TargetIDSetting::Any;
171 }
172
173 /// \returns True if current xnack setting is "On" or "Off",
174 /// false otherwise.
175 bool isXnackOnOrOff() const {
176 return getXnackSetting() == TargetIDSetting::On ||
177 getXnackSetting() == TargetIDSetting::Off;
178 }
179
180 /// \returns The current xnack TargetIDSetting, possible options are
181 /// "Unsupported", "Any", "Off", and "On".
182 TargetIDSetting getXnackSetting() const { return XnackSetting; }
183
184 /// Sets xnack setting to \p NewXnackSetting.
185 void setXnackSetting(TargetIDSetting NewXnackSetting) {
186 XnackSetting = NewXnackSetting;
187 }
188
189 /// \return True if the current sramecc setting is not "Unsupported".
190 bool isSramEccSupported() const {
191 return SramEccSetting != TargetIDSetting::Unsupported;
192 }
193
194 /// \returns True if the current sramecc setting is "On" or "Any".
195 bool isSramEccOnOrAny() const {
196 return SramEccSetting == TargetIDSetting::On ||
197 SramEccSetting == TargetIDSetting::Any;
198 }
199
200 /// \returns True if current sramecc setting is "On" or "Off",
201 /// false otherwise.
202 bool isSramEccOnOrOff() const {
203 return getSramEccSetting() == TargetIDSetting::On ||
204 getSramEccSetting() == TargetIDSetting::Off;
205 }
206
207 /// \returns The current sramecc TargetIDSetting, possible options are
208 /// "Unsupported", "Any", "Off", and "On".
209 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
210
211 /// Sets sramecc setting to \p NewSramEccSetting.
212 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
213 SramEccSetting = NewSramEccSetting;
214 }
215
216 void setTargetIDFromFeaturesString(StringRef FS);
217 void setTargetIDFromTargetIDStream(StringRef TargetID);
218
219 /// \returns String representation of an object.
220 std::string toString() const;
221};
222
223/// \returns Wavefront size for given subtarget \p STI.
224unsigned getWavefrontSize(const MCSubtargetInfo *STI);
225
226/// \returns Local memory size in bytes for given subtarget \p STI.
227unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
228
229/// \returns Maximum addressable local memory size in bytes for given subtarget
230/// \p STI.
231unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
232
233/// \returns Number of execution units per compute unit for given subtarget \p
234/// STI.
235unsigned getEUsPerCU(const MCSubtargetInfo *STI);
236
237/// \returns Maximum number of work groups per compute unit for given subtarget
238/// \p STI and limited by given \p FlatWorkGroupSize.
239unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
240 unsigned FlatWorkGroupSize);
241
242/// \returns Minimum number of waves per execution unit for given subtarget \p
243/// STI.
244unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
245
246/// \returns Maximum number of waves per execution unit for given subtarget \p
247/// STI without any kind of limitation.
248unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
249
250/// \returns Number of waves per execution unit required to support the given \p
251/// FlatWorkGroupSize.
252unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
253 unsigned FlatWorkGroupSize);
254
255/// \returns Minimum flat work group size for given subtarget \p STI.
256unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
257
258/// \returns Maximum flat work group size for given subtarget \p STI.
259unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
260
261/// \returns Number of waves per work group for given subtarget \p STI and
262/// \p FlatWorkGroupSize.
263unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
264 unsigned FlatWorkGroupSize);
265
266/// \returns SGPR allocation granularity for given subtarget \p STI.
267unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
268
269/// \returns SGPR encoding granularity for given subtarget \p STI.
270unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
271
272/// \returns Total number of SGPRs for given subtarget \p STI.
273unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
274
275/// \returns Addressable number of SGPRs for given subtarget \p STI.
276unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
277
278/// \returns Minimum number of SGPRs that meets the given number of waves per
279/// execution unit requirement for given subtarget \p STI.
280unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
281
282/// \returns Maximum number of SGPRs that meets the given number of waves per
283/// execution unit requirement for given subtarget \p STI.
284unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
285 bool Addressable);
286
287/// \returns Number of extra SGPRs implicitly required by given subtarget \p
288/// STI when the given special registers are used.
289unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
290 bool FlatScrUsed, bool XNACKUsed);
291
292/// \returns Number of extra SGPRs implicitly required by given subtarget \p
293/// STI when the given special registers are used. XNACK is inferred from
294/// \p STI.
295unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
296 bool FlatScrUsed);
297
298/// \returns Number of SGPR blocks needed for given subtarget \p STI when
299/// \p NumSGPRs are used. \p NumSGPRs should already include any special
300/// register counts.
301unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
302
303/// \returns VGPR allocation granularity for given subtarget \p STI.
304///
305/// For subtargets which support it, \p EnableWavefrontSize32 should match
306/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
307unsigned
308getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
309 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
310
311/// \returns VGPR encoding granularity for given subtarget \p STI.
312///
313/// For subtargets which support it, \p EnableWavefrontSize32 should match
314/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
315unsigned getVGPREncodingGranule(
316 const MCSubtargetInfo *STI,
317 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
318
319/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
320/// returns the allocation granule for ArchVGPRs.
321unsigned getArchVGPRAllocGranule();
322
323/// \returns Total number of VGPRs for given subtarget \p STI.
324unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
325
326/// \returns Addressable number of architectural VGPRs for a given subtarget \p
327/// STI.
328unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
329
330/// \returns Addressable number of VGPRs for given subtarget \p STI.
331unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
332 unsigned DynamicVGPRBlockSize);
333
334/// \returns Minimum number of VGPRs that meets given number of waves per
335/// execution unit requirement for given subtarget \p STI.
336unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
337 unsigned DynamicVGPRBlockSize);
338
339/// \returns Maximum number of VGPRs that meets given number of waves per
340/// execution unit requirement for given subtarget \p STI.
341unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
342 unsigned DynamicVGPRBlockSize);
343
344/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
345/// subtarget \p STI.
346unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
347 unsigned NumVGPRs,
348 unsigned DynamicVGPRBlockSize);
349
350/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
351/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
352unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
353 unsigned MaxWaves,
354 unsigned TotalNumVGPRs);
355
356/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
357/// Gen.
358unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
359 AMDGPUSubtarget::Generation Gen);
360
361/// \returns Number of VGPR blocks needed for given subtarget \p STI when
362/// \p NumVGPRs are used. We actually return the number of blocks -1, since
363/// that's what we encode.
364///
365/// For subtargets which support it, \p EnableWavefrontSize32 should match the
366/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
367unsigned getEncodedNumVGPRBlocks(
368 const MCSubtargetInfo *STI, unsigned NumVGPRs,
369 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
370
371/// \returns Number of VGPR blocks that need to be allocated for the given
372/// subtarget \p STI when \p NumVGPRs are used.
373unsigned getAllocatedNumVGPRBlocks(
374 const MCSubtargetInfo *STI, unsigned NumVGPRs,
375 unsigned DynamicVGPRBlockSize,
376 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
377
378} // end namespace IsaInfo
379
380// Represents a field in an encoded value.
381template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
382struct EncodingField {
383 static_assert(HighBit >= LowBit, "Invalid bit range!");
384 static constexpr unsigned Offset = LowBit;
385 static constexpr unsigned Width = HighBit - LowBit + 1;
386
387 using ValueType = unsigned;
388 static constexpr ValueType Default = D;
389
390 ValueType Value;
391 constexpr EncodingField(ValueType Value) : Value(Value) {}
392
393 constexpr uint64_t encode() const { return Value; }
394 static ValueType decode(uint64_t Encoded) { return Encoded; }
395};
396
397// Represents a single bit in an encoded value.
398template <unsigned Bit, unsigned D = 0>
399using EncodingBit = EncodingField<Bit, Bit, D>;
400
401// A helper for encoding and decoding multiple fields.
402template <typename... Fields> struct EncodingFields {
403 static constexpr uint64_t encode(Fields... Values) {
404 return ((Values.encode() << Values.Offset) | ...);
405 }
406
407 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
408 return {Fields::decode((Encoded >> Fields::Offset) &
409 maxUIntN(Fields::Width))...};
410 }
411};
412
413LLVM_READONLY
414inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
415 return getNamedOperandIdx(Opcode, Name: NamedIdx) != -1;
416}
417
418LLVM_READONLY
419int32_t getSOPPWithRelaxation(uint32_t Opcode);
420
421struct MIMGBaseOpcodeInfo {
422 MIMGBaseOpcode BaseOpcode;
423 bool Store;
424 bool Atomic;
425 bool AtomicX2;
426 bool Sampler;
427 bool Gather4;
428
429 uint8_t NumExtraArgs;
430 bool Gradients;
431 bool G16;
432 bool Coordinates;
433 bool LodOrClampOrMip;
434 bool HasD16;
435 bool MSAA;
436 bool BVH;
437 bool A16;
438 bool NoReturn;
439 bool PointSampleAccel;
440};
441
442LLVM_READONLY
443const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
444
445LLVM_READONLY
446const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
447
448struct MIMGDimInfo {
449 MIMGDim Dim;
450 uint8_t NumCoords;
451 uint8_t NumGradients;
452 bool MSAA;
453 bool DA;
454 uint8_t Encoding;
455 const char *AsmSuffix;
456};
457
458LLVM_READONLY
459const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
460
461LLVM_READONLY
462const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
463
464LLVM_READONLY
465const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
466
467struct MIMGLZMappingInfo {
468 MIMGBaseOpcode L;
469 MIMGBaseOpcode LZ;
470};
471
472struct MIMGMIPMappingInfo {
473 MIMGBaseOpcode MIP;
474 MIMGBaseOpcode NONMIP;
475};
476
477struct MIMGBiasMappingInfo {
478 MIMGBaseOpcode Bias;
479 MIMGBaseOpcode NoBias;
480};
481
482struct MIMGOffsetMappingInfo {
483 MIMGBaseOpcode Offset;
484 MIMGBaseOpcode NoOffset;
485};
486
487struct MIMGG16MappingInfo {
488 MIMGBaseOpcode G;
489 MIMGBaseOpcode G16;
490};
491
492LLVM_READONLY
493const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
494
495struct WMMAOpcodeMappingInfo {
496 unsigned Opcode2Addr;
497 unsigned Opcode3Addr;
498};
499
500LLVM_READONLY
501const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
502
503LLVM_READONLY
504const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
505
506LLVM_READONLY
507const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
508
509LLVM_READONLY
510const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
511
512LLVM_READONLY
513int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
514 unsigned VDataDwords, unsigned VAddrDwords);
515
516LLVM_READONLY
517int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
518
519LLVM_READONLY
520unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
521 const MIMGDimInfo *Dim, bool IsA16,
522 bool IsG16Supported);
523
524struct MIMGInfo {
525 uint32_t Opcode;
526 uint32_t BaseOpcode;
527 uint8_t MIMGEncoding;
528 uint8_t VDataDwords;
529 uint8_t VAddrDwords;
530 uint8_t VAddrOperands;
531};
532
533LLVM_READONLY
534const MIMGInfo *getMIMGInfo(unsigned Opc);
535
536LLVM_READONLY
537int getMTBUFBaseOpcode(unsigned Opc);
538
539LLVM_READONLY
540int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
541
542LLVM_READONLY
543int getMTBUFElements(unsigned Opc);
544
545LLVM_READONLY
546bool getMTBUFHasVAddr(unsigned Opc);
547
548LLVM_READONLY
549bool getMTBUFHasSrsrc(unsigned Opc);
550
551LLVM_READONLY
552bool getMTBUFHasSoffset(unsigned Opc);
553
554LLVM_READONLY
555int getMUBUFBaseOpcode(unsigned Opc);
556
557LLVM_READONLY
558int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
559
560LLVM_READONLY
561int getMUBUFElements(unsigned Opc);
562
563LLVM_READONLY
564bool getMUBUFHasVAddr(unsigned Opc);
565
566LLVM_READONLY
567bool getMUBUFHasSrsrc(unsigned Opc);
568
569LLVM_READONLY
570bool getMUBUFHasSoffset(unsigned Opc);
571
572LLVM_READONLY
573bool getMUBUFIsBufferInv(unsigned Opc);
574
575LLVM_READONLY
576bool getMUBUFTfe(unsigned Opc);
577
578LLVM_READONLY
579bool getSMEMIsBuffer(unsigned Opc);
580
581LLVM_READONLY
582bool getVOP1IsSingle(unsigned Opc);
583
584LLVM_READONLY
585bool getVOP2IsSingle(unsigned Opc);
586
587LLVM_READONLY
588bool getVOP3IsSingle(unsigned Opc);
589
590LLVM_READONLY
591bool isVOPC64DPP(unsigned Opc);
592
593LLVM_READONLY
594bool isVOPCAsmOnly(unsigned Opc);
595
596/// Returns true if MAI operation is a double precision GEMM.
597LLVM_READONLY
598bool getMAIIsDGEMM(unsigned Opc);
599
600LLVM_READONLY
601bool getMAIIsGFX940XDL(unsigned Opc);
602
603LLVM_READONLY
604bool getWMMAIsXDL(unsigned Opc);
605
606// Get an equivalent BitOp3 for a binary logical \p Opc.
607// \returns BitOp3 modifier for the logical operation or zero.
608// Used in VOPD3 conversion.
609unsigned getBitOp2(unsigned Opc);
610
611struct CanBeVOPD {
612 bool X;
613 bool Y;
614};
615
616/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
617LLVM_READONLY
618unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
619
620LLVM_READONLY
621CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
622
623LLVM_READNONE
624uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
625
626LLVM_READONLY
627const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
628 unsigned BLGP,
629 unsigned F8F8Opcode);
630
631LLVM_READNONE
632uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt);
633
634LLVM_READONLY
635const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
636 unsigned FmtB,
637 unsigned F8F8Opcode);
638
639LLVM_READONLY
640const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
641 uint8_t NumComponents,
642 uint8_t NumFormat,
643 const MCSubtargetInfo &STI);
644LLVM_READONLY
645const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
646 const MCSubtargetInfo &STI);
647
648LLVM_READONLY
649int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
650
651LLVM_READONLY
652unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
653
654LLVM_READONLY
655int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
656 bool VOPD3);
657
658LLVM_READONLY
659bool isVOPD(unsigned Opc);
660
661LLVM_READNONE
662bool isMAC(unsigned Opc);
663
664LLVM_READNONE
665bool isPermlane16(unsigned Opc);
666
667LLVM_READNONE
668bool isGenericAtomic(unsigned Opc);
669
670LLVM_READNONE
671bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
672
673namespace VOPD {
674
675enum Component : unsigned {
676 DST = 0,
677 SRC0,
678 SRC1,
679 SRC2,
680
681 DST_NUM = 1,
682 MAX_SRC_NUM = 3,
683 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
684};
685
686// LSB mask for VGPR banks per VOPD component operand.
687// 4 banks result in a mask 3, setting 2 lower bits.
688constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
689constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
690
691enum ComponentIndex : unsigned { X = 0, Y = 1 };
692constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
693constexpr unsigned COMPONENTS_NUM = 2;
694
695// Properties of VOPD components.
696class ComponentProps {
697private:
698 unsigned SrcOperandsNum = 0;
699 unsigned MandatoryLiteralIdx = ~0u;
700 bool HasSrc2Acc = false;
701 unsigned NumVOPD3Mods = 0;
702 unsigned Opcode = 0;
703 bool IsVOP3 = false;
704
705public:
706 ComponentProps() = default;
707 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
708
709 // Return the total number of src operands this component has.
710 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
711
712 // Return the number of src operands of this component visible to the parser.
713 unsigned getCompParsedSrcOperandsNum() const {
714 return SrcOperandsNum - HasSrc2Acc;
715 }
716
717 // Return true iif this component has a mandatory literal.
718 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
719
720 // If this component has a mandatory literal, return component operand
721 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
722 unsigned getMandatoryLiteralCompOperandIndex() const {
723 assert(hasMandatoryLiteral());
724 return MandatoryLiteralIdx;
725 }
726
727 // Return true iif this component has operand
728 // with component index CompSrcIdx and this operand may be a register.
729 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
730 assert(CompSrcIdx < Component::MAX_SRC_NUM);
731 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
732 }
733
734 // Return true iif this component has tied src2.
735 bool hasSrc2Acc() const { return HasSrc2Acc; }
736
737 // Return a number of source modifiers if instruction is used in VOPD3.
738 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
739
740 // Return opcode of the component.
741 unsigned getOpcode() const { return Opcode; }
742
743 // Returns if component opcode is in VOP3 encoding.
744 unsigned isVOP3() const { return IsVOP3; }
745
746 // Return index of BitOp3 operand or -1.
747 int getBitOp3OperandIdx() const;
748
749private:
750 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
751 assert(CompSrcIdx < Component::MAX_SRC_NUM);
752 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
753 }
754};
755
756enum ComponentKind : unsigned {
757 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
758 COMPONENT_X, // A VOPD instruction, X component.
759 COMPONENT_Y, // A VOPD instruction, Y component.
760 MAX = COMPONENT_Y
761};
762
763// Interface functions of this class map VOPD component operand indices
764// to indices of operands in MachineInstr/MCInst or parsed operands array.
765//
766// Note that this class operates with 3 kinds of indices:
767// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
768// - MC operand indices (they refer operands in a MachineInstr/MCInst);
769// - parsed operand indices (they refer operands in parsed operands array).
770//
771// For SINGLE components mapping between these indices is trivial.
772// But things get more complicated for COMPONENT_X and
773// COMPONENT_Y because these components share the same
774// MachineInstr/MCInst and the same parsed operands array.
775// Below is an example of component operand to parsed operand
776// mapping for the following instruction:
777//
778// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
779//
780// PARSED COMPONENT PARSED
781// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
782// -------------------------------------------------------------------
783// "v_dual_add_f32" 0
784// v_dual_add_f32 v255 0 (DST) --> 1
785// v4 1 (SRC0) --> 2
786// v5 2 (SRC1) --> 3
787// "::" 4
788// "v_dual_mov_b32" 5
789// v_dual_mov_b32 v6 0 (DST) --> 6
790// v1 1 (SRC0) --> 7
791// -------------------------------------------------------------------
792//
793class ComponentLayout {
794private:
795 // Regular MachineInstr/MCInst operands are ordered as follows:
796 // dst, src0 [, other src operands]
797 // VOPD MachineInstr/MCInst operands are ordered as follows:
798 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
799 // Each ComponentKind has operand indices defined below.
800 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
801
802 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
803 // used if there is tied accumulator. Indexing of this array:
804 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
805 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
806 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
807 // For VOPD1/VOPD2 use column with zero modifiers.
808 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
809 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
810
811 // Parsed operands of regular instructions are ordered as follows:
812 // Mnemo dst src0 [vsrc1 ...]
813 // Parsed VOPD operands are ordered as follows:
814 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
815 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
816 // Each ComponentKind has operand indices defined below.
817 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
818 4 /* + OpX.ParsedSrcNum */};
819 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
820 2, 2, 5 /* + OpX.ParsedSrcNum */};
821
822private:
823 const ComponentKind Kind;
824 const ComponentProps PrevComp;
825 const unsigned VOPD3ModsNum;
826 const int BitOp3Idx; // Index of bitop3 operand or -1
827
828public:
829 // Create layout for COMPONENT_X or SINGLE component.
830 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
831 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
832 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
833 }
834
835 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
836 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
837 int BitOp3Idx)
838 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
839 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
840
841public:
842 // Return the index of dst operand in MCInst operands.
843 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
844
845 // Return the index of the specified src operand in MCInst operands.
846 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
847 assert(CompSrcIdx < Component::MAX_SRC_NUM);
848
849 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
850 return BitOp3Idx;
851
852 if (VOPD3) {
853 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
854 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
855 }
856
857 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
858 (Kind != SINGLE ? 1 : 0);
859 }
860
861 // Return the index of dst operand in the parsed operands array.
862 unsigned getIndexOfDstInParsedOperands() const {
863 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
864 }
865
866 // Return the index of the specified src operand in the parsed operands array.
867 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
868 assert(CompSrcIdx < Component::MAX_SRC_NUM);
869 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
870 }
871
872private:
873 unsigned getPrevCompSrcNum() const {
874 return PrevComp.getCompSrcOperandsNum();
875 }
876 unsigned getPrevCompParsedSrcNum() const {
877 return PrevComp.getCompParsedSrcOperandsNum();
878 }
879 unsigned getPrevCompVOPD3ModsNum() const {
880 return PrevComp.getCompVOPD3ModsNum();
881 }
882};
883
884// Layout and properties of VOPD components.
885class ComponentInfo : public ComponentProps, public ComponentLayout {
886public:
887 // Create ComponentInfo for COMPONENT_X or SINGLE component.
888 ComponentInfo(const MCInstrDesc &OpDesc,
889 ComponentKind Kind = ComponentKind::SINGLE,
890 bool VOP3Layout = false)
891 : ComponentProps(OpDesc, VOP3Layout),
892 ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
893
894 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
895 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
896 bool VOP3Layout = false)
897 : ComponentProps(OpDesc, VOP3Layout),
898 ComponentLayout(OpXProps, getCompVOPD3ModsNum(),
899 getBitOp3OperandIdx()) {}
900
901 // Map component operand index to parsed operand index.
902 // Return 0 if the specified operand does not exist.
903 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
904};
905
906// Properties of VOPD instructions.
907class InstInfo {
908private:
909 const ComponentInfo CompInfo[COMPONENTS_NUM];
910
911public:
912 using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
913
914 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
915 : CompInfo{OpX, OpY} {}
916
917 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
918 : CompInfo{OprInfoX, OprInfoY} {}
919
920 const ComponentInfo &operator[](size_t ComponentIdx) const {
921 assert(ComponentIdx < COMPONENTS_NUM);
922 return CompInfo[ComponentIdx];
923 }
924
925 // Check VOPD operands constraints.
926 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
927 // for the specified component and MC operand. The callback must return 0
928 // if the operand is not a register or not a VGPR.
929 // If \p SkipSrc is set to true then constraints for source operands are not
930 // checked.
931 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
932 // even though it violates requirement to be from different banks.
933 // If \p VOPD3 is set to true both dst registers allowed to be either odd
934 // or even and instruction may have real src2 as opposed to tied accumulator.
935 bool
936 hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
937 const MCRegisterInfo &MRI, bool SkipSrc = false,
938 bool AllowSameVGPR = false, bool VOPD3 = false) const {
939 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
940 VOPD3)
941 .has_value();
942 }
943
944 // Check VOPD operands constraints.
945 // Return the index of an invalid component operand, if any.
946 // If \p SkipSrc is set to true then constraints for source operands are not
947 // checked except for being from the same halves of VGPR file on gfx1250.
948 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
949 // even though it violates requirement to be from different banks.
950 // If \p VOPD3 is set to true both dst registers allowed to be either odd
951 // or even and instruction may have real src2 as opposed to tied accumulator.
952 std::optional<unsigned> getInvalidCompOperandIndex(
953 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
954 const MCRegisterInfo &MRI, bool SkipSrc = false,
955 bool AllowSameVGPR = false, bool VOPD3 = false) const;
956
957private:
958 RegIndices
959 getRegIndices(unsigned ComponentIdx,
960 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
961 bool VOPD3) const;
962};
963
964} // namespace VOPD
965
966LLVM_READONLY
967std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
968
969LLVM_READONLY
970// Get properties of 2 single VOP1/VOP2 instructions
971// used as components to create a VOPD instruction.
972VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
973
974LLVM_READONLY
975// Get properties of VOPD X and Y components.
976VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
977 const MCInstrInfo *InstrInfo);
978
979LLVM_READONLY
980bool isAsyncStore(unsigned Opc);
981LLVM_READONLY
982bool isTensorStore(unsigned Opc);
983LLVM_READONLY
984unsigned getTemporalHintType(const MCInstrDesc TID);
985
986LLVM_READONLY
987bool isTrue16Inst(unsigned Opc);
988
989LLVM_READONLY
990FPType getFPDstSelType(unsigned Opc);
991
992LLVM_READONLY
993bool isInvalidSingleUseConsumerInst(unsigned Opc);
994
995LLVM_READONLY
996bool isInvalidSingleUseProducerInst(unsigned Opc);
997
998bool isDPMACCInstruction(unsigned Opc);
999
1000LLVM_READONLY
1001unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
1002
1003LLVM_READONLY
1004unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
1005
1006void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
1007 const MCSubtargetInfo *STI);
1008
1009bool isGroupSegment(const GlobalValue *GV);
1010bool isGlobalSegment(const GlobalValue *GV);
1011bool isReadOnlySegment(const GlobalValue *GV);
1012
1013/// \returns True if constants should be emitted to .text section for given
1014/// target triple \p TT, false otherwise.
1015bool shouldEmitConstantsToTextSection(const Triple &TT);
1016
1017/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1018/// register name. Followed by the start register number, and the register
1019/// width. Does not validate the number of registers exists in the class. Unlike
1020/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1021/// "{}".
1022std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1023
1024/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1025/// register constraint. Followed by the start register number, and the register
1026/// width. Does not validate the number of registers exists in the class.
1027std::tuple<char, unsigned, unsigned>
1028parseAsmConstraintPhysReg(StringRef Constraint);
1029
1030/// \returns Integer value requested using \p F's \p Name attribute.
1031///
1032/// \returns \p Default if attribute is not present.
1033///
1034/// \returns \p Default and emits error if requested value cannot be converted
1035/// to integer.
1036int getIntegerAttribute(const Function &F, StringRef Name, int Default);
1037
1038/// \returns A pair of integer values requested using \p F's \p Name attribute
1039/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1040/// is false).
1041///
1042/// \returns \p Default if attribute is not present.
1043///
1044/// \returns \p Default and emits error if one of the requested values cannot be
1045/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1046/// not present.
1047std::pair<unsigned, unsigned>
1048getIntegerPairAttribute(const Function &F, StringRef Name,
1049 std::pair<unsigned, unsigned> Default,
1050 bool OnlyFirstRequired = false);
1051
1052/// \returns A pair of integer values requested using \p F's \p Name attribute
1053/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1054/// is false).
1055///
1056/// \returns \p std::nullopt if attribute is not present.
1057///
1058/// \returns \p std::nullopt and emits error if one of the requested values
1059/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1060/// "second" value is not present.
1061std::optional<std::pair<unsigned, std::optional<unsigned>>>
1062getIntegerPairAttribute(const Function &F, StringRef Name,
1063 bool OnlyFirstRequired = false);
1064
1065/// \returns Generate a vector of integer values requested using \p F's \p Name
1066/// attribute.
1067/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1068/// if any error occurs. The corresponding error will also be emitted.
1069SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1070 unsigned Size,
1071 unsigned DefaultVal);
1072/// Similar to the function above, but returns std::nullopt if any error occurs.
1073std::optional<SmallVector<unsigned>>
1074getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1075
1076/// Checks if \p Val is inside \p MD, a !range-like metadata.
1077bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1078
1079enum InstCounterType {
1080 LOAD_CNT = 0, // VMcnt prior to gfx12.
1081 DS_CNT, // LKGMcnt prior to gfx12.
1082 EXP_CNT, //
1083 STORE_CNT, // VScnt in gfx10/gfx11.
1084 NUM_NORMAL_INST_CNTS,
1085 SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
1086 BVH_CNT, // gfx12+ only.
1087 KM_CNT, // gfx12+ only.
1088 X_CNT, // gfx1250.
1089 NUM_EXTENDED_INST_CNTS,
1090 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
1091 VM_VSRC, // gfx12+ expert mode only.
1092 NUM_EXPERT_INST_CNTS,
1093 NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
1094};
1095
1096// Return an iterator over all counters between LOAD_CNT (the first counter)
1097// and \c MaxCounter (exclusive, default value yields an enumeration over
1098// all counters).
1099iota_range<InstCounterType>
1100inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS);
1101
1102} // namespace AMDGPU
1103
1104template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
1105 static constexpr bool is_iterable = true;
1106};
1107
1108namespace AMDGPU {
1109
1110/// Represents the counter values to wait for in an s_waitcnt instruction.
1111///
1112/// Large values (including the maximum possible integer) can be used to
1113/// represent "don't care" waits.
1114class Waitcnt {
1115 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
1116 unsigned ExpCnt = ~0u;
1117 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
1118 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
1119 unsigned SampleCnt = ~0u; // gfx12+ only.
1120 unsigned BvhCnt = ~0u; // gfx12+ only.
1121 unsigned KmCnt = ~0u; // gfx12+ only.
1122 unsigned XCnt = ~0u; // gfx1250.
1123 unsigned VaVdst = ~0u; // gfx12+ expert scheduling mode only.
1124 unsigned VmVsrc = ~0u; // gfx12+ expert scheduling mode only.
1125
1126public:
1127 unsigned get(InstCounterType T) const {
1128 switch (T) {
1129 case LOAD_CNT:
1130 return LoadCnt;
1131 case EXP_CNT:
1132 return ExpCnt;
1133 case DS_CNT:
1134 return DsCnt;
1135 case STORE_CNT:
1136 return StoreCnt;
1137 case SAMPLE_CNT:
1138 return SampleCnt;
1139 case BVH_CNT:
1140 return BvhCnt;
1141 case KM_CNT:
1142 return KmCnt;
1143 case X_CNT:
1144 return XCnt;
1145 case VA_VDST:
1146 return VaVdst;
1147 case VM_VSRC:
1148 return VmVsrc;
1149 default:
1150 llvm_unreachable("bad InstCounterType");
1151 }
1152 }
1153 void set(InstCounterType T, unsigned Val) {
1154 switch (T) {
1155 case LOAD_CNT:
1156 LoadCnt = Val;
1157 break;
1158 case EXP_CNT:
1159 ExpCnt = Val;
1160 break;
1161 case DS_CNT:
1162 DsCnt = Val;
1163 break;
1164 case STORE_CNT:
1165 StoreCnt = Val;
1166 break;
1167 case SAMPLE_CNT:
1168 SampleCnt = Val;
1169 break;
1170 case BVH_CNT:
1171 BvhCnt = Val;
1172 break;
1173 case KM_CNT:
1174 KmCnt = Val;
1175 break;
1176 case X_CNT:
1177 XCnt = Val;
1178 break;
1179 case VA_VDST:
1180 VaVdst = Val;
1181 break;
1182 case VM_VSRC:
1183 VmVsrc = Val;
1184 break;
1185 default:
1186 llvm_unreachable("bad InstCounterType");
1187 }
1188 }
1189
1190 Waitcnt() = default;
1191 // Pre-gfx12 constructor.
1192 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1193 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
1194
1195 // gfx12+ constructor.
1196 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1197 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
1198 unsigned VaVdst, unsigned VmVsrc)
1199 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1200 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt),
1201 VaVdst(VaVdst), VmVsrc(VmVsrc) {}
1202
1203 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
1204
1205 bool hasWaitExceptStoreCnt() const {
1206 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
1207 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u ||
1208 VaVdst != ~0u || VmVsrc != ~0u;
1209 }
1210
1211 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
1212
1213 bool hasWaitDepctr() const { return VaVdst != ~0u || VmVsrc != ~0u; }
1214
1215 Waitcnt combined(const Waitcnt &Other) const {
1216 // Does the right thing provided self and Other are either both pre-gfx12
1217 // or both gfx12+.
1218 return Waitcnt(
1219 std::min(a: LoadCnt, b: Other.LoadCnt), std::min(a: ExpCnt, b: Other.ExpCnt),
1220 std::min(a: DsCnt, b: Other.DsCnt), std::min(a: StoreCnt, b: Other.StoreCnt),
1221 std::min(a: SampleCnt, b: Other.SampleCnt), std::min(a: BvhCnt, b: Other.BvhCnt),
1222 std::min(a: KmCnt, b: Other.KmCnt), std::min(a: XCnt, b: Other.XCnt),
1223 std::min(a: VaVdst, b: Other.VaVdst), std::min(a: VmVsrc, b: Other.VmVsrc));
1224 }
1225
1226 friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
1227};
1228
1229/// Represents the hardware counter limits for different wait count types.
1230struct HardwareLimits {
1231 unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
1232 unsigned ExpcntMax;
1233 unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
1234 unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
1235 unsigned SamplecntMax; // gfx12+ only.
1236 unsigned BvhcntMax; // gfx12+ only.
1237 unsigned KmcntMax; // gfx12+ only.
1238 unsigned XcntMax; // gfx1250.
1239 unsigned VaVdstMax; // gfx12+ expert mode only.
1240 unsigned VmVsrcMax; // gfx12+ expert mode only.
1241
1242 HardwareLimits() = default;
1243
1244 /// Initializes hardware limits from ISA version.
1245 HardwareLimits(const IsaVersion &IV);
1246};
1247
1248// The following methods are only meaningful on targets that support
1249// S_WAITCNT.
1250
1251/// \returns Vmcnt bit mask for given isa \p Version.
1252unsigned getVmcntBitMask(const IsaVersion &Version);
1253
1254/// \returns Expcnt bit mask for given isa \p Version.
1255unsigned getExpcntBitMask(const IsaVersion &Version);
1256
1257/// \returns Lgkmcnt bit mask for given isa \p Version.
1258unsigned getLgkmcntBitMask(const IsaVersion &Version);
1259
1260/// \returns Waitcnt bit mask for given isa \p Version.
1261unsigned getWaitcntBitMask(const IsaVersion &Version);
1262
1263/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1264unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1265
1266/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1267unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1268
1269/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1270unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1271
1272/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1273/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1274/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1275/// which needs it is deprecated
1276///
1277/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1278/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1279/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1280/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1281/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1282/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1283/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1284/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1285/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1286///
1287void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1288 unsigned &Expcnt, unsigned &Lgkmcnt);
1289
1290Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1291
1292/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1293unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1294 unsigned Vmcnt);
1295
1296/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1297unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1298 unsigned Expcnt);
1299
1300/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1301unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1302 unsigned Lgkmcnt);
1303
1304/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1305/// \p Version. Should not be used on gfx12+, the instruction which needs
1306/// it is deprecated
1307///
1308/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1309/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1310/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1311/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1312/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1313/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1314/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1315/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1316/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1317/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1318///
1319/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1320/// isa \p Version.
1321///
1322unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1323 unsigned Expcnt, unsigned Lgkmcnt);
1324
1325unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1326
1327// The following methods are only meaningful on targets that support
1328// S_WAIT_*CNT, introduced with gfx12.
1329
1330/// \returns Loadcnt bit mask for given isa \p Version.
1331/// Returns 0 for versions that do not support LOADcnt
1332unsigned getLoadcntBitMask(const IsaVersion &Version);
1333
1334/// \returns Samplecnt bit mask for given isa \p Version.
1335/// Returns 0 for versions that do not support SAMPLEcnt
1336unsigned getSamplecntBitMask(const IsaVersion &Version);
1337
1338/// \returns Bvhcnt bit mask for given isa \p Version.
1339/// Returns 0 for versions that do not support BVHcnt
1340unsigned getBvhcntBitMask(const IsaVersion &Version);
1341
1342/// \returns Dscnt bit mask for given isa \p Version.
1343/// Returns 0 for versions that do not support DScnt
1344unsigned getDscntBitMask(const IsaVersion &Version);
1345
1346/// \returns Dscnt bit mask for given isa \p Version.
1347/// Returns 0 for versions that do not support KMcnt
1348unsigned getKmcntBitMask(const IsaVersion &Version);
1349
1350/// \returns Xcnt bit mask for given isa \p Version.
1351/// Returns 0 for versions that do not support Xcnt.
1352unsigned getXcntBitMask(const IsaVersion &Version);
1353
1354/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1355/// returns 0 for versions that do not support STOREcnt or VScnt.
1356/// STOREcnt and VScnt are the same counter, the name used
1357/// depends on the ISA version.
1358unsigned getStorecntBitMask(const IsaVersion &Version);
1359
1360// The following are only meaningful on targets that support
1361// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1362
1363/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1364/// isa \p Version.
1365Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1366
1367/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1368/// isa \p Version.
1369Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1370
1371/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1372/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1373/// \p Version.
1374unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1375
1376/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1377/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1378/// \p Version.
1379unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1380
1381namespace Hwreg {
1382
1383using HwregId = EncodingField<5, 0>;
1384using HwregOffset = EncodingField<10, 6>;
1385
1386struct HwregSize : EncodingField<15, 11, 32> {
1387 using EncodingField::EncodingField;
1388 constexpr uint64_t encode() const { return Value - 1; }
1389 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1390};
1391
1392using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1393
1394} // namespace Hwreg
1395
1396namespace DepCtr {
1397
1398int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1399int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1400 const MCSubtargetInfo &STI);
1401bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1402 const MCSubtargetInfo &STI);
1403bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1404 bool &IsDefault, const MCSubtargetInfo &STI);
1405
1406/// \returns Maximum VaVdst value that can be encoded.
1407unsigned getVaVdstBitMask();
1408
1409/// \returns Maximum VaSdst value that can be encoded.
1410unsigned getVaSdstBitMask();
1411
1412/// \returns Maximum VaSsrc value that can be encoded.
1413unsigned getVaSsrcBitMask();
1414
1415/// \returns Maximum HoldCnt value that can be encoded.
1416unsigned getHoldCntBitMask(const IsaVersion &Version);
1417
1418/// \returns Maximum VmVsrc value that can be encoded.
1419unsigned getVmVsrcBitMask();
1420
1421/// \returns Maximum VaVcc value that can be encoded.
1422unsigned getVaVccBitMask();
1423
1424/// \returns Maximum SaSdst value that can be encoded.
1425unsigned getSaSdstBitMask();
1426
1427/// \returns Decoded VaVdst from given immediate \p Encoded.
1428unsigned decodeFieldVaVdst(unsigned Encoded);
1429
1430/// \returns Decoded VmVsrc from given immediate \p Encoded.
1431unsigned decodeFieldVmVsrc(unsigned Encoded);
1432
1433/// \returns Decoded SaSdst from given immediate \p Encoded.
1434unsigned decodeFieldSaSdst(unsigned Encoded);
1435
1436/// \returns Decoded VaSdst from given immediate \p Encoded.
1437unsigned decodeFieldVaSdst(unsigned Encoded);
1438
1439/// \returns Decoded VaVcc from given immediate \p Encoded.
1440unsigned decodeFieldVaVcc(unsigned Encoded);
1441
1442/// \returns Decoded SaSrc from given immediate \p Encoded.
1443unsigned decodeFieldVaSsrc(unsigned Encoded);
1444
1445/// \returns Decoded HoldCnt from given immediate \p Encoded.
1446unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1447
1448/// \returns \p VmVsrc as an encoded Depctr immediate.
1449unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1450
1451/// \returns \p Encoded combined with encoded \p VmVsrc.
1452unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1453
1454/// \returns \p VaVdst as an encoded Depctr immediate.
1455unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1456
1457/// \returns \p Encoded combined with encoded \p VaVdst.
1458unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1459
1460/// \returns \p SaSdst as an encoded Depctr immediate.
1461unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1462
1463/// \returns \p Encoded combined with encoded \p SaSdst.
1464unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1465
1466/// \returns \p VaSdst as an encoded Depctr immediate.
1467unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1468
1469/// \returns \p Encoded combined with encoded \p VaSdst.
1470unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1471
1472/// \returns \p VaVcc as an encoded Depctr immediate.
1473unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1474
1475/// \returns \p Encoded combined with encoded \p VaVcc.
1476unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1477
1478/// \returns \p HoldCnt as an encoded Depctr immediate.
1479unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1480
1481/// \returns \p Encoded combined with encoded \p HoldCnt.
1482unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1483 const IsaVersion &Version);
1484
1485/// \returns \p VaSsrc as an encoded Depctr immediate.
1486unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1487
1488/// \returns \p Encoded combined with encoded \p VaSsrc.
1489unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1490
1491} // namespace DepCtr
1492
1493namespace Exp {
1494
1495bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1496
1497LLVM_READONLY
1498unsigned getTgtId(const StringRef Name);
1499
1500LLVM_READNONE
1501bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1502
1503} // namespace Exp
1504
1505namespace MTBUFFormat {
1506
1507LLVM_READNONE
1508int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1509
1510void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1511
1512int64_t getDfmt(const StringRef Name);
1513
1514StringRef getDfmtName(unsigned Id);
1515
1516int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1517
1518StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1519
1520bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1521
1522bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1523
1524int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1525
1526StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1527
1528bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1529
1530int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1531 const MCSubtargetInfo &STI);
1532
1533bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1534
1535unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1536
1537} // namespace MTBUFFormat
1538
1539namespace SendMsg {
1540
1541LLVM_READNONE
1542bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1543
1544LLVM_READNONE
1545bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1546 bool Strict = true);
1547
1548LLVM_READNONE
1549bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1550 const MCSubtargetInfo &STI, bool Strict = true);
1551
1552LLVM_READNONE
1553bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1554
1555LLVM_READNONE
1556bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1557
1558void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1559 uint16_t &StreamId, const MCSubtargetInfo &STI);
1560
1561LLVM_READNONE
1562uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1563
1564} // namespace SendMsg
1565
1566unsigned getInitialPSInputAddr(const Function &F);
1567
1568bool getHasColorExport(const Function &F);
1569
1570bool getHasDepthExport(const Function &F);
1571
1572bool hasDynamicVGPR(const Function &F);
1573
1574// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1575// the attribute is missing or its value is invalid.
1576unsigned getDynamicVGPRBlockSize(const Function &F);
1577
1578LLVM_READNONE
1579constexpr bool isShader(CallingConv::ID CC) {
1580 switch (CC) {
1581 case CallingConv::AMDGPU_VS:
1582 case CallingConv::AMDGPU_LS:
1583 case CallingConv::AMDGPU_HS:
1584 case CallingConv::AMDGPU_ES:
1585 case CallingConv::AMDGPU_GS:
1586 case CallingConv::AMDGPU_PS:
1587 case CallingConv::AMDGPU_CS_Chain:
1588 case CallingConv::AMDGPU_CS_ChainPreserve:
1589 case CallingConv::AMDGPU_CS:
1590 return true;
1591 default:
1592 return false;
1593 }
1594}
1595
1596LLVM_READNONE
1597constexpr bool isGraphics(CallingConv::ID CC) {
1598 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx ||
1599 CC == CallingConv::AMDGPU_Gfx_WholeWave;
1600}
1601
1602LLVM_READNONE
1603constexpr bool isCompute(CallingConv::ID CC) {
1604 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1605}
1606
1607LLVM_READNONE
1608constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1609 switch (CC) {
1610 case CallingConv::AMDGPU_KERNEL:
1611 case CallingConv::SPIR_KERNEL:
1612 case CallingConv::AMDGPU_VS:
1613 case CallingConv::AMDGPU_GS:
1614 case CallingConv::AMDGPU_PS:
1615 case CallingConv::AMDGPU_CS:
1616 case CallingConv::AMDGPU_ES:
1617 case CallingConv::AMDGPU_HS:
1618 case CallingConv::AMDGPU_LS:
1619 return true;
1620 default:
1621 return false;
1622 }
1623}
1624
1625LLVM_READNONE
1626constexpr bool isChainCC(CallingConv::ID CC) {
1627 switch (CC) {
1628 case CallingConv::AMDGPU_CS_Chain:
1629 case CallingConv::AMDGPU_CS_ChainPreserve:
1630 return true;
1631 default:
1632 return false;
1633 }
1634}
1635
1636// These functions are considered entrypoints into the current module, i.e. they
1637// are allowed to be called from outside the current module. This is different
1638// from isEntryFunctionCC, which is only true for functions that are entered by
1639// the hardware. Module entry points include all entry functions but also
1640// include functions that can be called from other functions inside or outside
1641// the current module. Module entry functions are allowed to allocate LDS.
1642LLVM_READNONE
1643constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1644 switch (CC) {
1645 case CallingConv::AMDGPU_Gfx:
1646 return true;
1647 default:
1648 return isEntryFunctionCC(CC) || isChainCC(CC);
1649 }
1650}
1651
1652LLVM_READNONE
1653constexpr inline bool isKernel(CallingConv::ID CC) {
1654 switch (CC) {
1655 case CallingConv::AMDGPU_KERNEL:
1656 case CallingConv::SPIR_KERNEL:
1657 return true;
1658 default:
1659 return false;
1660 }
1661}
1662
1663inline bool isKernel(const Function &F) { return isKernel(CC: F.getCallingConv()); }
1664
1665LLVM_READNONE
1666constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1667 return CC == CallingConv::Fast;
1668}
1669
1670/// Return true if we might ever do TCO for calls with this calling convention.
1671LLVM_READNONE
1672constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1673 switch (CC) {
1674 case CallingConv::C:
1675 case CallingConv::AMDGPU_Gfx:
1676 case CallingConv::AMDGPU_Gfx_WholeWave:
1677 return true;
1678 default:
1679 return canGuaranteeTCO(CC);
1680 }
1681}
1682
1683bool hasXNACK(const MCSubtargetInfo &STI);
1684bool hasSRAMECC(const MCSubtargetInfo &STI);
1685bool hasMIMG_R128(const MCSubtargetInfo &STI);
1686bool hasA16(const MCSubtargetInfo &STI);
1687bool hasG16(const MCSubtargetInfo &STI);
1688bool hasPackedD16(const MCSubtargetInfo &STI);
1689bool hasGDS(const MCSubtargetInfo &STI);
1690unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1691unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1692
1693bool isSI(const MCSubtargetInfo &STI);
1694bool isCI(const MCSubtargetInfo &STI);
1695bool isVI(const MCSubtargetInfo &STI);
1696bool isGFX9(const MCSubtargetInfo &STI);
1697bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1698bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1699bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1700bool isGFX8Plus(const MCSubtargetInfo &STI);
1701bool isGFX9Plus(const MCSubtargetInfo &STI);
1702bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1703bool isGFX10(const MCSubtargetInfo &STI);
1704bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1705bool isGFX10Plus(const MCSubtargetInfo &STI);
1706bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1707bool isGFX10Before1030(const MCSubtargetInfo &STI);
1708bool isGFX11(const MCSubtargetInfo &STI);
1709bool isGFX1170(const MCSubtargetInfo &STI);
1710bool isGFX11Plus(const MCSubtargetInfo &STI);
1711bool isGFX12(const MCSubtargetInfo &STI);
1712bool isGFX12Plus(const MCSubtargetInfo &STI);
1713bool isGFX1250(const MCSubtargetInfo &STI);
1714bool isGFX1250Plus(const MCSubtargetInfo &STI);
1715bool isGFX13(const MCSubtargetInfo &STI);
1716bool isGFX13Plus(const MCSubtargetInfo &STI);
1717bool supportsWGP(const MCSubtargetInfo &STI);
1718bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1719bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1720bool isGCN3Encoding(const MCSubtargetInfo &STI);
1721bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1722bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1723bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1724bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1725bool isGFX90A(const MCSubtargetInfo &STI);
1726bool isGFX940(const MCSubtargetInfo &STI);
1727bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1728bool hasMAIInsts(const MCSubtargetInfo &STI);
1729bool hasVOPD(const MCSubtargetInfo &STI);
1730bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1731
1732inline bool supportsWave32(const MCSubtargetInfo &STI) {
1733 return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1734}
1735
1736int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1737unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1738bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1739
1740/// Is Reg - scalar register
1741bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1742
1743/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1744bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1745
1746/// If \p Reg is a pseudo reg, return the correct hardware register given
1747/// \p STI otherwise return \p Reg.
1748MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1749
1750/// Convert hardware register \p Reg to a pseudo register
1751LLVM_READNONE
1752MCRegister mc2PseudoReg(MCRegister Reg);
1753
1754LLVM_READNONE
1755bool isInlineValue(MCRegister Reg);
1756
1757/// Is this an AMDGPU specific source operand? These include registers,
1758/// inline constants, literals and mandatory literals (KImm).
1759constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1760 return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1761 OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1762}
1763
1764inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1765 return isSISrcOperand(OpInfo: Desc.operands()[OpNo]);
1766}
1767
1768/// Is this a KImm operand?
1769bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1770
1771/// Is this floating-point operand?
1772bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1773
1774/// Does this operand support only inlinable literals?
1775bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1776
1777/// Get the size in bits of a register from the register class \p RC.
1778unsigned getRegBitWidth(unsigned RCID);
1779
1780/// Get the size in bits of a register from the register class \p RC.
1781unsigned getRegBitWidth(const MCRegisterClass &RC);
1782
1783LLVM_READNONE
1784inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1785 switch (OpInfo.OperandType) {
1786 case AMDGPU::OPERAND_REG_IMM_INT32:
1787 case AMDGPU::OPERAND_REG_IMM_FP32:
1788 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1789 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1790 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1791 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1792 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1793 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1794 case AMDGPU::OPERAND_KIMM32:
1795 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1796 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1797 return 4;
1798
1799 case AMDGPU::OPERAND_REG_IMM_INT64:
1800 case AMDGPU::OPERAND_REG_IMM_FP64:
1801 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1802 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1803 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1804 case AMDGPU::OPERAND_KIMM64:
1805 return 8;
1806
1807 case AMDGPU::OPERAND_REG_IMM_INT16:
1808 case AMDGPU::OPERAND_REG_IMM_BF16:
1809 case AMDGPU::OPERAND_REG_IMM_FP16:
1810 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1811 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1812 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1813 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1814 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1815 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1816 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1817 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1818 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1819 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1820 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1821 return 2;
1822
1823 default:
1824 llvm_unreachable("unhandled operand type");
1825 }
1826}
1827
1828LLVM_READNONE
1829inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1830 return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1831}
1832
1833/// Is this literal inlinable, and not one of the values intended for floating
1834/// point values.
1835LLVM_READNONE
1836inline bool isInlinableIntLiteral(int64_t Literal) {
1837 return Literal >= -16 && Literal <= 64;
1838}
1839
1840/// Is this literal inlinable
1841LLVM_READNONE
1842bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1843
1844LLVM_READNONE
1845bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1846
1847LLVM_READNONE
1848bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1849
1850LLVM_READNONE
1851bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1852
1853LLVM_READNONE
1854bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1855
1856LLVM_READNONE
1857std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1858
1859LLVM_READNONE
1860std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1861
1862LLVM_READNONE
1863std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1864
1865LLVM_READNONE
1866std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1867 bool IsGFX11Plus);
1868
1869LLVM_READNONE
1870bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1871
1872LLVM_READNONE
1873bool isInlinableLiteralV2I16(uint32_t Literal);
1874
1875LLVM_READNONE
1876bool isInlinableLiteralV2BF16(uint32_t Literal);
1877
1878LLVM_READNONE
1879bool isInlinableLiteralV2F16(uint32_t Literal);
1880
1881LLVM_READNONE
1882bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1883
1884LLVM_READNONE
1885bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1886
1887LLVM_READNONE
1888int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1889
1890bool isArgPassedInSGPR(const Argument *Arg);
1891
1892bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1893
1894LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1895
1896LLVM_READONLY
1897bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1898 int64_t EncodedOffset);
1899
1900LLVM_READONLY
1901bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1902 int64_t EncodedOffset, bool IsBuffer);
1903
1904/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1905/// offsets.
1906uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1907
1908/// \returns The encoding that will be used for \p ByteOffset in the
1909/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1910/// S_LOAD instructions have a signed offset, on other subtargets it is
1911/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1912std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1913 int64_t ByteOffset, bool IsBuffer,
1914 bool HasSOffset = false);
1915
1916/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1917/// instruction. This is only useful on CI.s
1918std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1919 int64_t ByteOffset);
1920
1921/// For pre-GFX12 FLAT instructions the offset must be positive;
1922/// MSB is ignored and forced to zero.
1923///
1924/// \return The number of bits available for the signed offset field in flat
1925/// instructions. Note that some forms of the instruction disallow negative
1926/// offsets.
1927unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1928
1929/// \returns true if this offset is small enough to fit in the SMRD
1930/// offset field. \p ByteOffset should be the offset in bytes and
1931/// not the encoded offset.
1932bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1933
1934LLVM_READNONE
1935inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1936 if (isGFX12(STI: ST))
1937 return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1938 if (isGFX90A(STI: ST))
1939 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1940 return false;
1941}
1942
1943/// \returns true if an instruction may have a 64-bit VGPR operand.
1944bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc,
1945 const MCSubtargetInfo &ST);
1946
1947/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1948bool isDPALU_DPP32BitOpc(unsigned Opc);
1949
1950/// \returns true if an instruction is a DP ALU DPP.
1951bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1952 const MCSubtargetInfo &ST);
1953
1954/// \returns true if the intrinsic is divergent
1955bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1956
1957/// \returns true if the intrinsic is uniform
1958bool isIntrinsicAlwaysUniform(unsigned IntrID);
1959
1960/// \returns a register class for the physical register \p Reg if it is a VGPR
1961/// or nullptr otherwise.
1962const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
1963 const MCRegisterInfo &MRI);
1964
1965/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1966/// physical register \p Reg.
1967unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1968
1969/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1970MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
1971 const MCRegisterInfo &MRI);
1972
1973// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
1974// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
1975// maps, one for X and one for Y component.
1976std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
1977getVGPRLoweringOperandTables(const MCInstrDesc &Desc);
1978
1979/// \returns true if a memory instruction supports scale_offset modifier.
1980bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
1981
1982/// \returns lds block size in terms of dwords. \p
1983/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1984/// must be defined in terms of bytes.
1985unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1986
1987class ClusterDimsAttr {
1988public:
1989 enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
1990
1991 ClusterDimsAttr() = default;
1992
1993 Kind getKind() const { return AttrKind; }
1994
1995 bool isUnknown() const { return getKind() == Kind::Unknown; }
1996
1997 bool isNoCluster() const { return getKind() == Kind::NoCluster; }
1998
1999 bool isFixedDims() const { return getKind() == Kind::FixedDims; }
2000
2001 bool isVariableDims() const { return getKind() == Kind::VariableDims; }
2002
2003 void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }
2004
2005 void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); }
2006
2007 void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); }
2008
2009 /// \returns the dims stored. Note that this function can only be called if
2010 /// the kind is \p Fixed.
2011 const std::array<unsigned, 3> &getDims() const;
2012
2013 bool operator==(const ClusterDimsAttr &RHS) const {
2014 return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
2015 }
2016
2017 std::string to_string() const;
2018
2019 static ClusterDimsAttr get(const Function &F);
2020
2021private:
2022 enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
2023
2024 ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
2025
2026 std::array<unsigned, 3> Dims = {0, 0, 0};
2027
2028 Kind AttrKind = Kind::Unknown;
2029};
2030
2031} // namespace AMDGPU
2032
2033raw_ostream &operator<<(raw_ostream &OS,
2034 const AMDGPU::IsaInfo::TargetIDSetting S);
2035
2036} // end namespace llvm
2037
2038#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
2039