1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/ADT/StringExtras.h"
15#include "llvm/IR/CallingConv.h"
16#include "llvm/IR/InstrTypes.h"
17#include "llvm/IR/Module.h"
18#include "llvm/Support/Alignment.h"
19#include <array>
20#include <functional>
21#include <utility>
22
23// Pull in OpName enum definition and getNamedOperandIdx() declaration.
24#define GET_INSTRINFO_OPERAND_ENUM
25#include "AMDGPUGenInstrInfo.inc"
26
27struct amd_kernel_code_t;
28
29namespace llvm {
30
31struct Align;
32class Argument;
33class Function;
34class GlobalValue;
35class MachineInstr;
36class MCInstrInfo;
37class MCRegisterClass;
38class MCRegisterInfo;
39class MCSubtargetInfo;
40class MDNode;
41class StringRef;
42class Triple;
43class raw_ostream;
44
45namespace AMDGPU {
46
47struct AMDGPUMCKernelCodeT;
48struct IsaVersion;
49
50/// Generic target versions emitted by this version of LLVM.
51///
52/// These numbers are incremented every time a codegen breaking change occurs
53/// within a generic family.
54namespace GenericVersion {
55static constexpr unsigned GFX9 = 1;
56static constexpr unsigned GFX9_4 = 1;
57static constexpr unsigned GFX10_1 = 1;
58static constexpr unsigned GFX10_3 = 1;
59static constexpr unsigned GFX11 = 1;
60static constexpr unsigned GFX12 = 1;
61static constexpr unsigned GFX12_5 = 1;
62} // namespace GenericVersion
63
64enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
65
66enum class FPType { None, FP4, FP8 };
67
68/// \returns True if \p STI is AMDHSA.
69bool isHsaAbi(const MCSubtargetInfo &STI);
70
71/// \returns Code object version from the IR module flag.
72unsigned getAMDHSACodeObjectVersion(const Module &M);
73
74/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
75unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
76
77/// \returns The default HSA code object version. This should only be used when
78/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
79/// flag or a .amdhsa_code_object_version directive)
80unsigned getDefaultAMDHSACodeObjectVersion();
81
82/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
83/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
84uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
85
86/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
87unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
88
89/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
90unsigned getHostcallImplicitArgPosition(unsigned COV);
91
92unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
93unsigned getCompletionActionImplicitArgPosition(unsigned COV);
94
95struct GcnBufferFormatInfo {
96 unsigned Format;
97 unsigned BitsPerComp;
98 unsigned NumComponents;
99 unsigned NumFormat;
100 unsigned DataFormat;
101};
102
103struct MAIInstInfo {
104 uint32_t Opcode;
105 bool is_dgemm;
106 bool is_gfx940_xdl;
107};
108
109struct MFMA_F8F6F4_Info {
110 unsigned Opcode;
111 unsigned F8F8Opcode;
112 uint8_t NumRegsSrcA;
113 uint8_t NumRegsSrcB;
114};
115
116struct CvtScaleF32_F32F16ToF8F4_Info {
117 unsigned Opcode;
118};
119
120struct True16D16Info {
121 unsigned T16Op;
122 unsigned HiOp;
123 unsigned LoOp;
124};
125
126struct WMMAInstInfo {
127 uint32_t Opcode;
128 bool is_wmma_xdl;
129};
130
131#define GET_MIMGBaseOpcode_DECL
132#define GET_MIMGDim_DECL
133#define GET_MIMGEncoding_DECL
134#define GET_MIMGLZMapping_DECL
135#define GET_MIMGMIPMapping_DECL
136#define GET_MIMGBiASMapping_DECL
137#define GET_MAIInstInfoTable_DECL
138#define GET_isMFMA_F8F6F4Table_DECL
139#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
140#define GET_True16D16Table_DECL
141#define GET_WMMAInstInfoTable_DECL
142#include "AMDGPUGenSearchableTables.inc"
143
144namespace IsaInfo {
145
146enum {
147 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
148 // doesn't spill SGPRs as much as when 80 is set.
149 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
150 TRAP_NUM_SGPRS = 16
151};
152
153enum class TargetIDSetting { Unsupported, Any, Off, On };
154
155class AMDGPUTargetID {
156private:
157 const MCSubtargetInfo &STI;
158 TargetIDSetting XnackSetting;
159 TargetIDSetting SramEccSetting;
160
161public:
162 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
163 ~AMDGPUTargetID() = default;
164
165 /// \return True if the current xnack setting is not "Unsupported".
166 bool isXnackSupported() const {
167 return XnackSetting != TargetIDSetting::Unsupported;
168 }
169
170 /// \returns True if the current xnack setting is "On" or "Any".
171 bool isXnackOnOrAny() const {
172 return XnackSetting == TargetIDSetting::On ||
173 XnackSetting == TargetIDSetting::Any;
174 }
175
176 /// \returns True if current xnack setting is "On" or "Off",
177 /// false otherwise.
178 bool isXnackOnOrOff() const {
179 return getXnackSetting() == TargetIDSetting::On ||
180 getXnackSetting() == TargetIDSetting::Off;
181 }
182
183 /// \returns The current xnack TargetIDSetting, possible options are
184 /// "Unsupported", "Any", "Off", and "On".
185 TargetIDSetting getXnackSetting() const { return XnackSetting; }
186
187 /// Sets xnack setting to \p NewXnackSetting.
188 void setXnackSetting(TargetIDSetting NewXnackSetting) {
189 XnackSetting = NewXnackSetting;
190 }
191
192 /// \return True if the current sramecc setting is not "Unsupported".
193 bool isSramEccSupported() const {
194 return SramEccSetting != TargetIDSetting::Unsupported;
195 }
196
197 /// \returns True if the current sramecc setting is "On" or "Any".
198 bool isSramEccOnOrAny() const {
199 return SramEccSetting == TargetIDSetting::On ||
200 SramEccSetting == TargetIDSetting::Any;
201 }
202
203 /// \returns True if current sramecc setting is "On" or "Off",
204 /// false otherwise.
205 bool isSramEccOnOrOff() const {
206 return getSramEccSetting() == TargetIDSetting::On ||
207 getSramEccSetting() == TargetIDSetting::Off;
208 }
209
210 /// \returns The current sramecc TargetIDSetting, possible options are
211 /// "Unsupported", "Any", "Off", and "On".
212 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
213
214 /// Sets sramecc setting to \p NewSramEccSetting.
215 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
216 SramEccSetting = NewSramEccSetting;
217 }
218
219 void setTargetIDFromFeaturesString(StringRef FS);
220 void setTargetIDFromTargetIDStream(StringRef TargetID);
221
222 /// Write string representation to \p OS
223 void print(raw_ostream &OS) const;
224
225 /// \returns String representation of an object.
226 std::string toString() const;
227};
228
229inline raw_ostream &operator<<(raw_ostream &OS,
230 const AMDGPUTargetID &TargetID) {
231 TargetID.print(OS);
232 return OS;
233}
234
235/// \returns Wavefront size for given subtarget \p STI.
236unsigned getWavefrontSize(const MCSubtargetInfo *STI);
237
238/// \returns Local memory size in bytes for given subtarget \p STI.
239unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
240
241/// \returns Maximum addressable local memory size in bytes for given subtarget
242/// \p STI.
243unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
244
245/// \returns Number of execution units per compute unit for given subtarget \p
246/// STI.
247unsigned getEUsPerCU(const MCSubtargetInfo *STI);
248
249/// \returns Maximum number of work groups per compute unit for given subtarget
250/// \p STI and limited by given \p FlatWorkGroupSize.
251unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
252 unsigned FlatWorkGroupSize);
253
254/// \returns Minimum number of waves per execution unit for given subtarget \p
255/// STI.
256unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
257
258/// \returns Maximum number of waves per execution unit for given subtarget \p
259/// STI without any kind of limitation.
260unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
261
262/// \returns Number of waves per execution unit required to support the given \p
263/// FlatWorkGroupSize.
264unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
265 unsigned FlatWorkGroupSize);
266
267/// \returns Minimum flat work group size for given subtarget \p STI.
268unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
269
270/// \returns Maximum flat work group size
271constexpr unsigned getMaxFlatWorkGroupSize() {
272 // Some subtargets allow encoding 2048, but this isn't tested or supported.
273 return 1024;
274}
275
276/// \returns Number of waves per work group for given subtarget \p STI and
277/// \p FlatWorkGroupSize.
278unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
279 unsigned FlatWorkGroupSize);
280
281/// \returns SGPR allocation granularity for given subtarget \p STI.
282unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
283
284/// \returns SGPR encoding granularity for given subtarget \p STI.
285unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
286
287/// \returns Total number of SGPRs for given subtarget \p STI.
288unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
289
290/// \returns Addressable number of SGPRs for given subtarget \p STI.
291unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
292
293/// \returns Minimum number of SGPRs that meets the given number of waves per
294/// execution unit requirement for given subtarget \p STI.
295unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
296
297/// \returns Maximum number of SGPRs that meets the given number of waves per
298/// execution unit requirement for given subtarget \p STI.
299unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
300 bool Addressable);
301
302/// \returns Number of extra SGPRs implicitly required by given subtarget \p
303/// STI when the given special registers are used.
304unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
305 bool FlatScrUsed, bool XNACKUsed);
306
307/// \returns Number of extra SGPRs implicitly required by given subtarget \p
308/// STI when the given special registers are used. XNACK is inferred from
309/// \p STI.
310unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
311 bool FlatScrUsed);
312
313/// \returns Number of SGPR blocks needed for given subtarget \p STI when
314/// \p NumSGPRs are used. \p NumSGPRs should already include any special
315/// register counts.
316unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
317
318/// \returns VGPR allocation granularity for given subtarget \p STI.
319///
320/// For subtargets which support it, \p EnableWavefrontSize32 should match
321/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
322unsigned
323getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
324 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
325
326/// \returns VGPR encoding granularity for given subtarget \p STI.
327///
328/// For subtargets which support it, \p EnableWavefrontSize32 should match
329/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
330unsigned getVGPREncodingGranule(
331 const MCSubtargetInfo *STI,
332 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
333
334/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
335/// returns the allocation granule for ArchVGPRs.
336unsigned getArchVGPRAllocGranule();
337
338/// \returns Total number of VGPRs for given subtarget \p STI.
339unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
340
341/// \returns Addressable number of architectural VGPRs for a given subtarget \p
342/// STI.
343unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
344
345/// \returns Addressable number of VGPRs for given subtarget \p STI.
346unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
347 unsigned DynamicVGPRBlockSize);
348
349/// \returns Minimum number of VGPRs that meets given number of waves per
350/// execution unit requirement for given subtarget \p STI.
351unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
352 unsigned DynamicVGPRBlockSize);
353
354/// \returns Maximum number of VGPRs that meets given number of waves per
355/// execution unit requirement for given subtarget \p STI.
356unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
357 unsigned DynamicVGPRBlockSize);
358
359/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
360/// subtarget \p STI.
361unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
362 unsigned NumVGPRs,
363 unsigned DynamicVGPRBlockSize);
364
365/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
366/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
367unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
368 unsigned MaxWaves,
369 unsigned TotalNumVGPRs);
370
371/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
372/// Gen.
373unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
374 AMDGPUSubtarget::Generation Gen);
375
376/// \returns Number of VGPR blocks needed for given subtarget \p STI when
377/// \p NumVGPRs are used. We actually return the number of blocks -1, since
378/// that's what we encode.
379///
380/// For subtargets which support it, \p EnableWavefrontSize32 should match the
381/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
382unsigned getEncodedNumVGPRBlocks(
383 const MCSubtargetInfo *STI, unsigned NumVGPRs,
384 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
385
386/// \returns Number of VGPR blocks that need to be allocated for the given
387/// subtarget \p STI when \p NumVGPRs are used.
388unsigned getAllocatedNumVGPRBlocks(
389 const MCSubtargetInfo *STI, unsigned NumVGPRs,
390 unsigned DynamicVGPRBlockSize,
391 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
392
393} // end namespace IsaInfo
394
395// Represents a field in an encoded value.
396template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
397struct EncodingField {
398 static_assert(HighBit >= LowBit, "Invalid bit range!");
399 static constexpr unsigned Offset = LowBit;
400 static constexpr unsigned Width = HighBit - LowBit + 1;
401
402 using ValueType = unsigned;
403 static constexpr ValueType Default = D;
404
405 ValueType Value;
406 constexpr EncodingField(ValueType Value) : Value(Value) {}
407
408 constexpr uint64_t encode() const { return Value; }
409 static ValueType decode(uint64_t Encoded) { return Encoded; }
410};
411
412// Represents a single bit in an encoded value.
413template <unsigned Bit, unsigned D = 0>
414using EncodingBit = EncodingField<Bit, Bit, D>;
415
416// A helper for encoding and decoding multiple fields.
417template <typename... Fields> struct EncodingFields {
418 static constexpr uint64_t encode(Fields... Values) {
419 return ((Values.encode() << Values.Offset) | ...);
420 }
421
422 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
423 return {Fields::decode((Encoded >> Fields::Offset) &
424 maxUIntN(Fields::Width))...};
425 }
426};
427
428LLVM_READONLY
429inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
430 return getNamedOperandIdx(Opcode, Name: NamedIdx) != -1;
431}
432
433LLVM_READONLY
434int32_t getSOPPWithRelaxation(uint32_t Opcode);
435
436struct MIMGBaseOpcodeInfo {
437 MIMGBaseOpcode BaseOpcode;
438 bool Store;
439 bool Atomic;
440 bool AtomicX2;
441 bool Sampler;
442 bool Gather4;
443
444 uint8_t NumExtraArgs;
445 bool Gradients;
446 bool G16;
447 bool Coordinates;
448 bool LodOrClampOrMip;
449 bool HasD16;
450 bool MSAA;
451 bool BVH;
452 bool A16;
453 bool NoReturn;
454 bool PointSampleAccel;
455};
456
457LLVM_READONLY
458const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
459
460LLVM_READONLY
461const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
462
463struct MIMGDimInfo {
464 MIMGDim Dim;
465 uint8_t NumCoords;
466 uint8_t NumGradients;
467 bool MSAA;
468 bool DA;
469 uint8_t Encoding;
470 const char *AsmSuffix;
471};
472
473LLVM_READONLY
474const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
475
476LLVM_READONLY
477const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
478
479LLVM_READONLY
480const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
481
482struct MIMGLZMappingInfo {
483 MIMGBaseOpcode L;
484 MIMGBaseOpcode LZ;
485};
486
487struct MIMGMIPMappingInfo {
488 MIMGBaseOpcode MIP;
489 MIMGBaseOpcode NONMIP;
490};
491
492struct MIMGBiasMappingInfo {
493 MIMGBaseOpcode Bias;
494 MIMGBaseOpcode NoBias;
495};
496
497struct MIMGOffsetMappingInfo {
498 MIMGBaseOpcode Offset;
499 MIMGBaseOpcode NoOffset;
500};
501
502struct MIMGG16MappingInfo {
503 MIMGBaseOpcode G;
504 MIMGBaseOpcode G16;
505};
506
507LLVM_READONLY
508const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
509
510struct WMMAOpcodeMappingInfo {
511 unsigned Opcode2Addr;
512 unsigned Opcode3Addr;
513};
514
515LLVM_READONLY
516const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
517
518LLVM_READONLY
519const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
520
521LLVM_READONLY
522const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
523
524LLVM_READONLY
525const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
526
527LLVM_READONLY
528int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
529 unsigned VDataDwords, unsigned VAddrDwords);
530
531LLVM_READONLY
532int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
533
534LLVM_READONLY
535unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
536 const MIMGDimInfo *Dim, bool IsA16,
537 bool IsG16Supported);
538
539struct MIMGInfo {
540 uint32_t Opcode;
541 uint32_t BaseOpcode;
542 uint8_t MIMGEncoding;
543 uint8_t VDataDwords;
544 uint8_t VAddrDwords;
545 uint8_t VAddrOperands;
546};
547
548LLVM_READONLY
549const MIMGInfo *getMIMGInfo(unsigned Opc);
550
551LLVM_READONLY
552int getMTBUFBaseOpcode(unsigned Opc);
553
554LLVM_READONLY
555int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
556
557LLVM_READONLY
558int getMTBUFElements(unsigned Opc);
559
560LLVM_READONLY
561bool getMTBUFHasVAddr(unsigned Opc);
562
563LLVM_READONLY
564bool getMTBUFHasSrsrc(unsigned Opc);
565
566LLVM_READONLY
567bool getMTBUFHasSoffset(unsigned Opc);
568
569LLVM_READONLY
570int getMUBUFBaseOpcode(unsigned Opc);
571
572LLVM_READONLY
573int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
574
575LLVM_READONLY
576int getMUBUFElements(unsigned Opc);
577
578LLVM_READONLY
579bool getMUBUFHasVAddr(unsigned Opc);
580
581LLVM_READONLY
582bool getMUBUFHasSrsrc(unsigned Opc);
583
584LLVM_READONLY
585bool getMUBUFHasSoffset(unsigned Opc);
586
587LLVM_READONLY
588bool getMUBUFIsBufferInv(unsigned Opc);
589
590LLVM_READONLY
591bool getMUBUFTfe(unsigned Opc);
592
593LLVM_READONLY
594bool getSMEMIsBuffer(unsigned Opc);
595
596LLVM_READONLY
597bool getVOP1IsSingle(unsigned Opc);
598
599LLVM_READONLY
600bool getVOP2IsSingle(unsigned Opc);
601
602LLVM_READONLY
603bool getVOP3IsSingle(unsigned Opc);
604
605LLVM_READONLY
606bool isVOPC64DPP(unsigned Opc);
607
608LLVM_READONLY
609bool isVOPCAsmOnly(unsigned Opc);
610
611/// Returns true if MAI operation is a double precision GEMM.
612LLVM_READONLY
613bool getMAIIsDGEMM(unsigned Opc);
614
615LLVM_READONLY
616bool getMAIIsGFX940XDL(unsigned Opc);
617
618LLVM_READONLY
619bool getWMMAIsXDL(unsigned Opc);
620
621// Get an equivalent BitOp3 for a binary logical \p Opc.
622// \returns BitOp3 modifier for the logical operation or zero.
623// Used in VOPD3 conversion.
624unsigned getBitOp2(unsigned Opc);
625
626struct CanBeVOPD {
627 bool X;
628 bool Y;
629};
630
631/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
632LLVM_READONLY
633unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
634
635LLVM_READONLY
636CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
637
638LLVM_READNONE
639uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
640
641LLVM_READONLY
642const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
643 unsigned BLGP,
644 unsigned F8F8Opcode);
645
646LLVM_READNONE
647uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt);
648
649LLVM_READONLY
650const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
651 unsigned FmtB,
652 unsigned F8F8Opcode);
653
654LLVM_READONLY
655const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
656 uint8_t NumComponents,
657 uint8_t NumFormat,
658 const MCSubtargetInfo &STI);
659LLVM_READONLY
660const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
661 const MCSubtargetInfo &STI);
662
663LLVM_READONLY
664int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
665
666LLVM_READONLY
667unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
668
669LLVM_READONLY
670int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
671 bool VOPD3);
672
673LLVM_READONLY
674bool isVOPD(unsigned Opc);
675
676LLVM_READNONE
677bool isMAC(unsigned Opc);
678
679LLVM_READNONE
680bool isPermlane16(unsigned Opc);
681
682LLVM_READNONE
683bool isGenericAtomic(unsigned Opc);
684
685LLVM_READNONE
686bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
687
688namespace VOPD {
689
690enum Component : unsigned {
691 DST = 0,
692 SRC0,
693 SRC1,
694 SRC2,
695
696 DST_NUM = 1,
697 MAX_SRC_NUM = 3,
698 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
699};
700
701// LSB mask for VGPR banks per VOPD component operand.
702// 4 banks result in a mask 3, setting 2 lower bits.
703constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
704constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
705
706enum ComponentIndex : unsigned { X = 0, Y = 1 };
707constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
708constexpr unsigned COMPONENTS_NUM = 2;
709
710// Properties of VOPD components.
711class ComponentProps {
712private:
713 unsigned SrcOperandsNum = 0;
714 unsigned MandatoryLiteralIdx = ~0u;
715 bool HasSrc2Acc = false;
716 unsigned NumVOPD3Mods = 0;
717 unsigned Opcode = 0;
718 bool IsVOP3 = false;
719
720public:
721 ComponentProps() = default;
722 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
723
724 // Return the total number of src operands this component has.
725 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
726
727 // Return the number of src operands of this component visible to the parser.
728 unsigned getCompParsedSrcOperandsNum() const {
729 return SrcOperandsNum - HasSrc2Acc;
730 }
731
732 // Return true iif this component has a mandatory literal.
733 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
734
735 // If this component has a mandatory literal, return component operand
736 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
737 unsigned getMandatoryLiteralCompOperandIndex() const {
738 assert(hasMandatoryLiteral());
739 return MandatoryLiteralIdx;
740 }
741
742 // Return true iif this component has operand
743 // with component index CompSrcIdx and this operand may be a register.
744 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
745 assert(CompSrcIdx < Component::MAX_SRC_NUM);
746 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
747 }
748
749 // Return true iif this component has tied src2.
750 bool hasSrc2Acc() const { return HasSrc2Acc; }
751
752 // Return a number of source modifiers if instruction is used in VOPD3.
753 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
754
755 // Return opcode of the component.
756 unsigned getOpcode() const { return Opcode; }
757
758 // Returns if component opcode is in VOP3 encoding.
759 unsigned isVOP3() const { return IsVOP3; }
760
761 // Return index of BitOp3 operand or -1.
762 int getBitOp3OperandIdx() const;
763
764private:
765 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
766 assert(CompSrcIdx < Component::MAX_SRC_NUM);
767 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
768 }
769};
770
771enum ComponentKind : unsigned {
772 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
773 COMPONENT_X, // A VOPD instruction, X component.
774 COMPONENT_Y, // A VOPD instruction, Y component.
775 MAX = COMPONENT_Y
776};
777
778// Interface functions of this class map VOPD component operand indices
779// to indices of operands in MachineInstr/MCInst or parsed operands array.
780//
781// Note that this class operates with 3 kinds of indices:
782// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
783// - MC operand indices (they refer operands in a MachineInstr/MCInst);
784// - parsed operand indices (they refer operands in parsed operands array).
785//
786// For SINGLE components mapping between these indices is trivial.
787// But things get more complicated for COMPONENT_X and
788// COMPONENT_Y because these components share the same
789// MachineInstr/MCInst and the same parsed operands array.
790// Below is an example of component operand to parsed operand
791// mapping for the following instruction:
792//
793// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
794//
795// PARSED COMPONENT PARSED
796// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
797// -------------------------------------------------------------------
798// "v_dual_add_f32" 0
799// v_dual_add_f32 v255 0 (DST) --> 1
800// v4 1 (SRC0) --> 2
801// v5 2 (SRC1) --> 3
802// "::" 4
803// "v_dual_mov_b32" 5
804// v_dual_mov_b32 v6 0 (DST) --> 6
805// v1 1 (SRC0) --> 7
806// -------------------------------------------------------------------
807//
808class ComponentLayout {
809private:
810 // Regular MachineInstr/MCInst operands are ordered as follows:
811 // dst, src0 [, other src operands]
812 // VOPD MachineInstr/MCInst operands are ordered as follows:
813 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
814 // Each ComponentKind has operand indices defined below.
815 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
816
817 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
818 // used if there is tied accumulator. Indexing of this array:
819 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
820 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
821 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
822 // For VOPD1/VOPD2 use column with zero modifiers.
823 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
824 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
825
826 // Parsed operands of regular instructions are ordered as follows:
827 // Mnemo dst src0 [vsrc1 ...]
828 // Parsed VOPD operands are ordered as follows:
829 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
830 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
831 // Each ComponentKind has operand indices defined below.
832 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
833 4 /* + OpX.ParsedSrcNum */};
834 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
835 2, 2, 5 /* + OpX.ParsedSrcNum */};
836
837private:
838 const ComponentKind Kind;
839 const ComponentProps PrevComp;
840 const unsigned VOPD3ModsNum;
841 const int BitOp3Idx; // Index of bitop3 operand or -1
842
843public:
844 // Create layout for COMPONENT_X or SINGLE component.
845 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
846 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
847 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
848 }
849
850 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
851 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
852 int BitOp3Idx)
853 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
854 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
855
856public:
857 // Return the index of dst operand in MCInst operands.
858 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
859
860 // Return the index of the specified src operand in MCInst operands.
861 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
862 assert(CompSrcIdx < Component::MAX_SRC_NUM);
863
864 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
865 return BitOp3Idx;
866
867 if (VOPD3) {
868 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
869 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
870 }
871
872 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
873 (Kind != SINGLE ? 1 : 0);
874 }
875
876 // Return the index of dst operand in the parsed operands array.
877 unsigned getIndexOfDstInParsedOperands() const {
878 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
879 }
880
881 // Return the index of the specified src operand in the parsed operands array.
882 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
883 assert(CompSrcIdx < Component::MAX_SRC_NUM);
884 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
885 }
886
887private:
888 unsigned getPrevCompSrcNum() const {
889 return PrevComp.getCompSrcOperandsNum();
890 }
891 unsigned getPrevCompParsedSrcNum() const {
892 return PrevComp.getCompParsedSrcOperandsNum();
893 }
894 unsigned getPrevCompVOPD3ModsNum() const {
895 return PrevComp.getCompVOPD3ModsNum();
896 }
897};
898
899// Layout and properties of VOPD components.
900class ComponentInfo : public ComponentProps, public ComponentLayout {
901public:
902 // Create ComponentInfo for COMPONENT_X or SINGLE component.
903 ComponentInfo(const MCInstrDesc &OpDesc,
904 ComponentKind Kind = ComponentKind::SINGLE,
905 bool VOP3Layout = false)
906 : ComponentProps(OpDesc, VOP3Layout),
907 ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
908
909 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
910 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
911 bool VOP3Layout = false)
912 : ComponentProps(OpDesc, VOP3Layout),
913 ComponentLayout(OpXProps, getCompVOPD3ModsNum(),
914 getBitOp3OperandIdx()) {}
915
916 // Map component operand index to parsed operand index.
917 // Return 0 if the specified operand does not exist.
918 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
919};
920
921// Properties of VOPD instructions.
922class InstInfo {
923private:
924 const ComponentInfo CompInfo[COMPONENTS_NUM];
925
926public:
927 using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
928
929 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
930 : CompInfo{OpX, OpY} {}
931
932 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
933 : CompInfo{OprInfoX, OprInfoY} {}
934
935 const ComponentInfo &operator[](size_t ComponentIdx) const {
936 assert(ComponentIdx < COMPONENTS_NUM);
937 return CompInfo[ComponentIdx];
938 }
939
940 // Check VOPD operands constraints.
941 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
942 // for the specified component and MC operand. The callback must return 0
943 // if the operand is not a register or not a VGPR.
944 // If \p SkipSrc is set to true then constraints for source operands are not
945 // checked.
946 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
947 // even though it violates requirement to be from different banks.
948 // If \p VOPD3 is set to true both dst registers allowed to be either odd
949 // or even and instruction may have real src2 as opposed to tied accumulator.
950 bool
951 hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
952 const MCRegisterInfo &MRI, bool SkipSrc = false,
953 bool AllowSameVGPR = false, bool VOPD3 = false) const {
954 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
955 VOPD3)
956 .has_value();
957 }
958
959 // Check VOPD operands constraints.
960 // Return the index of an invalid component operand, if any.
961 // If \p SkipSrc is set to true then constraints for source operands are not
962 // checked except for being from the same halves of VGPR file on gfx1250.
963 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
964 // even though it violates requirement to be from different banks.
965 // If \p VOPD3 is set to true both dst registers allowed to be either odd
966 // or even and instruction may have real src2 as opposed to tied accumulator.
967 std::optional<unsigned> getInvalidCompOperandIndex(
968 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
969 const MCRegisterInfo &MRI, bool SkipSrc = false,
970 bool AllowSameVGPR = false, bool VOPD3 = false) const;
971
972private:
973 RegIndices
974 getRegIndices(unsigned ComponentIdx,
975 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
976 bool VOPD3) const;
977};
978
979} // namespace VOPD
980
981LLVM_READONLY
982std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
983
984LLVM_READONLY
985// Get properties of 2 single VOP1/VOP2 instructions
986// used as components to create a VOPD instruction.
987VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
988
989LLVM_READONLY
990// Get properties of VOPD X and Y components.
991VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
992 const MCInstrInfo *InstrInfo);
993
994LLVM_READONLY
995bool isAsyncStore(unsigned Opc);
996LLVM_READONLY
997bool isTensorStore(unsigned Opc);
998LLVM_READONLY
999unsigned getTemporalHintType(const MCInstrDesc TID);
1000
1001LLVM_READONLY
1002bool isTrue16Inst(unsigned Opc);
1003
1004LLVM_READONLY
1005FPType getFPDstSelType(unsigned Opc);
1006
1007LLVM_READONLY
1008bool isInvalidSingleUseConsumerInst(unsigned Opc);
1009
1010LLVM_READONLY
1011bool isInvalidSingleUseProducerInst(unsigned Opc);
1012
1013bool isDPMACCInstruction(unsigned Opc);
1014
1015LLVM_READONLY
1016unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
1017
1018LLVM_READONLY
1019unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
1020
1021void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
1022 const MCSubtargetInfo *STI);
1023
1024bool isGroupSegment(const GlobalValue *GV);
1025bool isGlobalSegment(const GlobalValue *GV);
1026bool isReadOnlySegment(const GlobalValue *GV);
1027
1028/// \returns True if constants should be emitted to .text section for given
1029/// target triple \p TT, false otherwise.
1030bool shouldEmitConstantsToTextSection(const Triple &TT);
1031
1032/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1033/// register name. Followed by the start register number, and the register
1034/// width. Does not validate the number of registers exists in the class. Unlike
1035/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1036/// "{}".
1037std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1038
1039/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1040/// register constraint. Followed by the start register number, and the register
1041/// width. Does not validate the number of registers exists in the class.
1042std::tuple<char, unsigned, unsigned>
1043parseAsmConstraintPhysReg(StringRef Constraint);
1044
1045/// \returns Integer value requested using \p F's \p Name attribute.
1046///
1047/// \returns \p Default if attribute is not present.
1048///
1049/// \returns \p Default and emits error if requested value cannot be converted
1050/// to integer.
1051int getIntegerAttribute(const Function &F, StringRef Name, int Default);
1052
1053/// \returns A pair of integer values requested using \p F's \p Name attribute
1054/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1055/// is false).
1056///
1057/// \returns \p Default if attribute is not present.
1058///
1059/// \returns \p Default and emits error if one of the requested values cannot be
1060/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1061/// not present.
1062std::pair<unsigned, unsigned>
1063getIntegerPairAttribute(const Function &F, StringRef Name,
1064 std::pair<unsigned, unsigned> Default,
1065 bool OnlyFirstRequired = false);
1066
1067/// \returns A pair of integer values requested using \p F's \p Name attribute
1068/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1069/// is false).
1070///
1071/// \returns \p std::nullopt if attribute is not present.
1072///
1073/// \returns \p std::nullopt and emits error if one of the requested values
1074/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1075/// "second" value is not present.
1076std::optional<std::pair<unsigned, std::optional<unsigned>>>
1077getIntegerPairAttribute(const Function &F, StringRef Name,
1078 bool OnlyFirstRequired = false);
1079
1080/// \returns Generate a vector of integer values requested using \p F's \p Name
1081/// attribute.
1082/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1083/// if any error occurs. The corresponding error will also be emitted.
1084SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1085 unsigned Size,
1086 unsigned DefaultVal);
1087/// Similar to the function above, but returns std::nullopt if any error occurs.
1088std::optional<SmallVector<unsigned>>
1089getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1090
1091/// Checks if \p Val is inside \p MD, a !range-like metadata.
1092bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1093
1094enum InstCounterType {
1095 LOAD_CNT = 0, // VMcnt prior to gfx12.
1096 DS_CNT, // LKGMcnt prior to gfx12.
1097 EXP_CNT, //
1098 STORE_CNT, // VScnt in gfx10/gfx11.
1099 NUM_NORMAL_INST_CNTS,
1100 SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
1101 BVH_CNT, // gfx12+ only.
1102 KM_CNT, // gfx12+ only.
1103 X_CNT, // gfx1250.
1104 ASYNC_CNT, // gfx1250.
1105 NUM_EXTENDED_INST_CNTS,
1106 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
1107 VM_VSRC, // gfx12+ expert mode only.
1108 NUM_EXPERT_INST_CNTS,
1109 NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
1110};
1111
1112StringLiteral getInstCounterName(InstCounterType T);
1113
1114// Return an iterator over all counters between LOAD_CNT (the first counter)
1115// and \c MaxCounter (exclusive, default value yields an enumeration over
1116// all counters).
1117iota_range<InstCounterType>
1118inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS);
1119
1120} // namespace AMDGPU
1121
1122template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
1123 static constexpr bool is_iterable = true;
1124};
1125
1126namespace AMDGPU {
1127
1128/// Represents the counter values to wait for in an s_waitcnt instruction.
1129///
1130/// Large values (including the maximum possible integer) can be used to
1131/// represent "don't care" waits.
1132class Waitcnt {
1133 std::array<unsigned, NUM_INST_CNTS> Cnt;
1134
1135public:
1136 unsigned get(InstCounterType T) const { return Cnt[T]; }
1137 void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
1138
1139 Waitcnt() { fill(Range&: Cnt, Value: ~0u); }
1140 // Pre-gfx12 constructor.
1141 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1142 : Waitcnt() {
1143 Cnt[LOAD_CNT] = VmCnt;
1144 Cnt[EXP_CNT] = ExpCnt;
1145 Cnt[DS_CNT] = LgkmCnt;
1146 Cnt[STORE_CNT] = VsCnt;
1147 }
1148
1149 // gfx12+ constructor.
1150 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1151 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
1152 unsigned AsyncCnt, unsigned VaVdst, unsigned VmVsrc)
1153 : Waitcnt() {
1154 Cnt[LOAD_CNT] = LoadCnt;
1155 Cnt[DS_CNT] = DsCnt;
1156 Cnt[EXP_CNT] = ExpCnt;
1157 Cnt[STORE_CNT] = StoreCnt;
1158 Cnt[SAMPLE_CNT] = SampleCnt;
1159 Cnt[BVH_CNT] = BvhCnt;
1160 Cnt[KM_CNT] = KmCnt;
1161 Cnt[X_CNT] = XCnt;
1162 Cnt[ASYNC_CNT] = AsyncCnt;
1163 Cnt[VA_VDST] = VaVdst;
1164 Cnt[VM_VSRC] = VmVsrc;
1165 }
1166
1167 bool hasWait() const {
1168 return any_of(Range: Cnt, P: [](unsigned Val) { return Val != ~0u; });
1169 }
1170
1171 bool hasWaitExceptStoreCnt() const {
1172 for (InstCounterType T : inst_counter_types()) {
1173 if (T == STORE_CNT)
1174 continue;
1175 if (Cnt[T] != ~0u)
1176 return true;
1177 }
1178 return false;
1179 }
1180
1181 bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
1182
1183 bool hasWaitDepctr() const {
1184 return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
1185 }
1186
1187 Waitcnt combined(const Waitcnt &Other) const {
1188 // Does the right thing provided self and Other are either both pre-gfx12
1189 // or both gfx12+.
1190 Waitcnt Wait;
1191 for (InstCounterType T : inst_counter_types())
1192 Wait.Cnt[T] = std::min(a: Cnt[T], b: Other.Cnt[T]);
1193 return Wait;
1194 }
1195
1196 void print(raw_ostream &OS) const {
1197 ListSeparator LS;
1198 for (InstCounterType T : inst_counter_types())
1199 OS << LS << getInstCounterName(T) << ": " << Cnt[T];
1200 if (LS.unused())
1201 OS << "none";
1202 OS << '\n';
1203 }
1204
1205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1206 LLVM_DUMP_METHOD void dump() const;
1207#endif
1208
1209 friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
1210 Wait.print(OS);
1211 return OS;
1212 }
1213};
1214
1215/// Represents the hardware counter limits for different wait count types.
1216struct HardwareLimits {
1217 unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
1218 unsigned ExpcntMax;
1219 unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
1220 unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
1221 unsigned SamplecntMax; // gfx12+ only.
1222 unsigned BvhcntMax; // gfx12+ only.
1223 unsigned KmcntMax; // gfx12+ only.
1224 unsigned XcntMax; // gfx1250.
1225 unsigned AsyncMax; // gfx1250.
1226 unsigned VaVdstMax; // gfx12+ expert mode only.
1227 unsigned VmVsrcMax; // gfx12+ expert mode only.
1228
1229 HardwareLimits() = default;
1230
1231 /// Initializes hardware limits from ISA version.
1232 HardwareLimits(const IsaVersion &IV);
1233};
1234
1235// The following methods are only meaningful on targets that support
1236// S_WAITCNT.
1237
1238/// \returns Vmcnt bit mask for given isa \p Version.
1239unsigned getVmcntBitMask(const IsaVersion &Version);
1240
1241/// \returns Expcnt bit mask for given isa \p Version.
1242unsigned getExpcntBitMask(const IsaVersion &Version);
1243
1244/// \returns Lgkmcnt bit mask for given isa \p Version.
1245unsigned getLgkmcntBitMask(const IsaVersion &Version);
1246
1247/// \returns Waitcnt bit mask for given isa \p Version.
1248unsigned getWaitcntBitMask(const IsaVersion &Version);
1249
1250/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1251unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1252
1253/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1254unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1255
1256/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1257unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1258
1259/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1260/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1261/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1262/// which needs it is deprecated
1263///
1264/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1265/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1266/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1267/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1268/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1269/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1270/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1271/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1272/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1273///
1274void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1275 unsigned &Expcnt, unsigned &Lgkmcnt);
1276
1277Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1278
1279/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1280unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1281 unsigned Vmcnt);
1282
1283/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1284unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1285 unsigned Expcnt);
1286
1287/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1288unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1289 unsigned Lgkmcnt);
1290
1291/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1292/// \p Version. Should not be used on gfx12+, the instruction which needs
1293/// it is deprecated
1294///
1295/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1296/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1297/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1298/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1299/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1300/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1301/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1302/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1303/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1304/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1305///
1306/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1307/// isa \p Version.
1308///
1309unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1310 unsigned Expcnt, unsigned Lgkmcnt);
1311
1312unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1313
1314// The following methods are only meaningful on targets that support
1315// S_WAIT_*CNT, introduced with gfx12.
1316
1317/// \returns Loadcnt bit mask for given isa \p Version.
1318/// Returns 0 for versions that do not support LOADcnt
1319unsigned getLoadcntBitMask(const IsaVersion &Version);
1320
1321/// \returns Samplecnt bit mask for given isa \p Version.
1322/// Returns 0 for versions that do not support SAMPLEcnt
1323unsigned getSamplecntBitMask(const IsaVersion &Version);
1324
1325/// \returns Bvhcnt bit mask for given isa \p Version.
1326/// Returns 0 for versions that do not support BVHcnt
1327unsigned getBvhcntBitMask(const IsaVersion &Version);
1328
1329/// \returns Asynccnt bit mask for given isa \p Version.
1330/// Returns 0 for versions that do not support Asynccnt
1331unsigned getAsynccntBitMask(const IsaVersion &Version);
1332
1333/// \returns Dscnt bit mask for given isa \p Version.
1334/// Returns 0 for versions that do not support DScnt
1335unsigned getDscntBitMask(const IsaVersion &Version);
1336
1337/// \returns Dscnt bit mask for given isa \p Version.
1338/// Returns 0 for versions that do not support KMcnt
1339unsigned getKmcntBitMask(const IsaVersion &Version);
1340
1341/// \returns Xcnt bit mask for given isa \p Version.
1342/// Returns 0 for versions that do not support Xcnt.
1343unsigned getXcntBitMask(const IsaVersion &Version);
1344
1345/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1346/// returns 0 for versions that do not support STOREcnt or VScnt.
1347/// STOREcnt and VScnt are the same counter, the name used
1348/// depends on the ISA version.
1349unsigned getStorecntBitMask(const IsaVersion &Version);
1350
1351// The following are only meaningful on targets that support
1352// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1353
1354/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1355/// isa \p Version.
1356Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1357
1358/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1359/// isa \p Version.
1360Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1361
1362/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1363/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1364/// \p Version.
1365unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1366
1367/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1368/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1369/// \p Version.
1370unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1371
1372namespace Hwreg {
1373
1374using HwregId = EncodingField<5, 0>;
1375using HwregOffset = EncodingField<10, 6>;
1376
1377struct HwregSize : EncodingField<15, 11, 32> {
1378 using EncodingField::EncodingField;
1379 constexpr uint64_t encode() const { return Value - 1; }
1380 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1381};
1382
1383using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1384
1385} // namespace Hwreg
1386
1387namespace DepCtr {
1388
1389int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1390int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1391 const MCSubtargetInfo &STI);
1392bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1393 const MCSubtargetInfo &STI);
1394bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1395 bool &IsDefault, const MCSubtargetInfo &STI);
1396
1397/// \returns Maximum VaVdst value that can be encoded.
1398unsigned getVaVdstBitMask();
1399
1400/// \returns Maximum VaSdst value that can be encoded.
1401unsigned getVaSdstBitMask();
1402
1403/// \returns Maximum VaSsrc value that can be encoded.
1404unsigned getVaSsrcBitMask();
1405
1406/// \returns Maximum HoldCnt value that can be encoded.
1407unsigned getHoldCntBitMask(const IsaVersion &Version);
1408
1409/// \returns Maximum VmVsrc value that can be encoded.
1410unsigned getVmVsrcBitMask();
1411
1412/// \returns Maximum VaVcc value that can be encoded.
1413unsigned getVaVccBitMask();
1414
1415/// \returns Maximum SaSdst value that can be encoded.
1416unsigned getSaSdstBitMask();
1417
1418/// \returns Decoded VaVdst from given immediate \p Encoded.
1419unsigned decodeFieldVaVdst(unsigned Encoded);
1420
1421/// \returns Decoded VmVsrc from given immediate \p Encoded.
1422unsigned decodeFieldVmVsrc(unsigned Encoded);
1423
1424/// \returns Decoded SaSdst from given immediate \p Encoded.
1425unsigned decodeFieldSaSdst(unsigned Encoded);
1426
1427/// \returns Decoded VaSdst from given immediate \p Encoded.
1428unsigned decodeFieldVaSdst(unsigned Encoded);
1429
1430/// \returns Decoded VaVcc from given immediate \p Encoded.
1431unsigned decodeFieldVaVcc(unsigned Encoded);
1432
1433/// \returns Decoded SaSrc from given immediate \p Encoded.
1434unsigned decodeFieldVaSsrc(unsigned Encoded);
1435
1436/// \returns Decoded HoldCnt from given immediate \p Encoded.
1437unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1438
1439/// \returns \p VmVsrc as an encoded Depctr immediate.
1440unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1441
1442/// \returns \p Encoded combined with encoded \p VmVsrc.
1443unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1444
1445/// \returns \p VaVdst as an encoded Depctr immediate.
1446unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1447
1448/// \returns \p Encoded combined with encoded \p VaVdst.
1449unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1450
1451/// \returns \p SaSdst as an encoded Depctr immediate.
1452unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1453
1454/// \returns \p Encoded combined with encoded \p SaSdst.
1455unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1456
1457/// \returns \p VaSdst as an encoded Depctr immediate.
1458unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1459
1460/// \returns \p Encoded combined with encoded \p VaSdst.
1461unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1462
1463/// \returns \p VaVcc as an encoded Depctr immediate.
1464unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1465
1466/// \returns \p Encoded combined with encoded \p VaVcc.
1467unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1468
1469/// \returns \p HoldCnt as an encoded Depctr immediate.
1470unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1471
1472/// \returns \p Encoded combined with encoded \p HoldCnt.
1473unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1474 const IsaVersion &Version);
1475
1476/// \returns \p VaSsrc as an encoded Depctr immediate.
1477unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1478
1479/// \returns \p Encoded combined with encoded \p VaSsrc.
1480unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1481
1482} // namespace DepCtr
1483
1484namespace Exp {
1485
1486bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1487
1488LLVM_READONLY
1489unsigned getTgtId(const StringRef Name);
1490
1491LLVM_READNONE
1492bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1493
1494} // namespace Exp
1495
1496namespace MTBUFFormat {
1497
1498LLVM_READNONE
1499int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1500
1501void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1502
1503int64_t getDfmt(const StringRef Name);
1504
1505StringRef getDfmtName(unsigned Id);
1506
1507int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1508
1509StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1510
1511bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1512
1513bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1514
1515int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1516
1517StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1518
1519bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1520
1521int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1522 const MCSubtargetInfo &STI);
1523
1524bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1525
1526unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1527
1528} // namespace MTBUFFormat
1529
1530namespace SendMsg {
1531
1532LLVM_READNONE
1533bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1534
1535LLVM_READNONE
1536bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1537 bool Strict = true);
1538
1539LLVM_READNONE
1540bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1541 const MCSubtargetInfo &STI, bool Strict = true);
1542
1543LLVM_READNONE
1544bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1545
1546LLVM_READNONE
1547bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1548
1549void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1550 uint16_t &StreamId, const MCSubtargetInfo &STI);
1551
1552LLVM_READNONE
1553uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1554
1555/// Returns true if the message does not use the m0 operand.
1556bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
1557
1558} // namespace SendMsg
1559
1560unsigned getInitialPSInputAddr(const Function &F);
1561
1562bool getHasColorExport(const Function &F);
1563
1564bool getHasDepthExport(const Function &F);
1565
1566bool hasDynamicVGPR(const Function &F);
1567
1568// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1569// the attribute is missing or its value is invalid.
1570unsigned getDynamicVGPRBlockSize(const Function &F);
1571
1572LLVM_READNONE
1573constexpr bool isShader(CallingConv::ID CC) {
1574 switch (CC) {
1575 case CallingConv::AMDGPU_VS:
1576 case CallingConv::AMDGPU_LS:
1577 case CallingConv::AMDGPU_HS:
1578 case CallingConv::AMDGPU_ES:
1579 case CallingConv::AMDGPU_GS:
1580 case CallingConv::AMDGPU_PS:
1581 case CallingConv::AMDGPU_CS_Chain:
1582 case CallingConv::AMDGPU_CS_ChainPreserve:
1583 case CallingConv::AMDGPU_CS:
1584 return true;
1585 default:
1586 return false;
1587 }
1588}
1589
1590LLVM_READNONE
1591constexpr bool isGraphics(CallingConv::ID CC) {
1592 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx ||
1593 CC == CallingConv::AMDGPU_Gfx_WholeWave;
1594}
1595
1596LLVM_READNONE
1597constexpr bool isCompute(CallingConv::ID CC) {
1598 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1599}
1600
1601LLVM_READNONE
1602constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1603 switch (CC) {
1604 case CallingConv::AMDGPU_KERNEL:
1605 case CallingConv::SPIR_KERNEL:
1606 case CallingConv::AMDGPU_VS:
1607 case CallingConv::AMDGPU_GS:
1608 case CallingConv::AMDGPU_PS:
1609 case CallingConv::AMDGPU_CS:
1610 case CallingConv::AMDGPU_ES:
1611 case CallingConv::AMDGPU_HS:
1612 case CallingConv::AMDGPU_LS:
1613 return true;
1614 default:
1615 return false;
1616 }
1617}
1618
1619LLVM_READNONE
1620constexpr bool isChainCC(CallingConv::ID CC) {
1621 switch (CC) {
1622 case CallingConv::AMDGPU_CS_Chain:
1623 case CallingConv::AMDGPU_CS_ChainPreserve:
1624 return true;
1625 default:
1626 return false;
1627 }
1628}
1629
1630// These functions are considered entrypoints into the current module, i.e. they
1631// are allowed to be called from outside the current module. This is different
1632// from isEntryFunctionCC, which is only true for functions that are entered by
1633// the hardware. Module entry points include all entry functions but also
1634// include functions that can be called from other functions inside or outside
1635// the current module. Module entry functions are allowed to allocate LDS.
1636//
1637// AMDGPU_CS_Chain is intended for externally callable chain functions, so it is
1638// treated as a module entrypoint. AMDGPU_CS_ChainPreserve is used for internal
1639// helper functions (e.g. retry helpers), so it is not a module entrypoint.
1640LLVM_READNONE
1641constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1642 switch (CC) {
1643 case CallingConv::AMDGPU_Gfx:
1644 case CallingConv::AMDGPU_CS_Chain:
1645 return true;
1646 default:
1647 return isEntryFunctionCC(CC);
1648 }
1649}
1650
1651LLVM_READNONE
1652constexpr inline bool isKernel(CallingConv::ID CC) {
1653 switch (CC) {
1654 case CallingConv::AMDGPU_KERNEL:
1655 case CallingConv::SPIR_KERNEL:
1656 return true;
1657 default:
1658 return false;
1659 }
1660}
1661
1662inline bool isKernel(const Function &F) { return isKernel(CC: F.getCallingConv()); }
1663
1664LLVM_READNONE
1665constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1666 return CC == CallingConv::Fast;
1667}
1668
1669/// Return true if we might ever do TCO for calls with this calling convention.
1670LLVM_READNONE
1671constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1672 switch (CC) {
1673 case CallingConv::C:
1674 case CallingConv::AMDGPU_Gfx:
1675 case CallingConv::AMDGPU_Gfx_WholeWave:
1676 return true;
1677 default:
1678 return canGuaranteeTCO(CC);
1679 }
1680}
1681
1682bool hasXNACK(const MCSubtargetInfo &STI);
1683bool hasSRAMECC(const MCSubtargetInfo &STI);
1684bool hasMIMG_R128(const MCSubtargetInfo &STI);
1685bool hasA16(const MCSubtargetInfo &STI);
1686bool hasG16(const MCSubtargetInfo &STI);
1687bool hasPackedD16(const MCSubtargetInfo &STI);
1688bool hasGDS(const MCSubtargetInfo &STI);
1689unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1690unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1691
1692bool isSI(const MCSubtargetInfo &STI);
1693bool isCI(const MCSubtargetInfo &STI);
1694bool isVI(const MCSubtargetInfo &STI);
1695bool isGFX9(const MCSubtargetInfo &STI);
1696bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1697bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1698bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1699bool isGFX8Plus(const MCSubtargetInfo &STI);
1700bool isGFX9Plus(const MCSubtargetInfo &STI);
1701bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1702bool isGFX10(const MCSubtargetInfo &STI);
1703bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1704bool isGFX10Plus(const MCSubtargetInfo &STI);
1705bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1706bool isGFX10Before1030(const MCSubtargetInfo &STI);
1707bool isGFX11(const MCSubtargetInfo &STI);
1708bool isGFX11Plus(const MCSubtargetInfo &STI);
1709bool isGFX12(const MCSubtargetInfo &STI);
1710bool isGFX12Plus(const MCSubtargetInfo &STI);
1711bool isGFX1250(const MCSubtargetInfo &STI);
1712bool isGFX1250Plus(const MCSubtargetInfo &STI);
1713bool isGFX13(const MCSubtargetInfo &STI);
1714bool isGFX13Plus(const MCSubtargetInfo &STI);
1715bool supportsWGP(const MCSubtargetInfo &STI);
1716bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1717bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1718bool isGCN3Encoding(const MCSubtargetInfo &STI);
1719bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1720bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1721bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1722bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1723bool isGFX90A(const MCSubtargetInfo &STI);
1724bool isGFX940(const MCSubtargetInfo &STI);
1725bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1726bool hasMAIInsts(const MCSubtargetInfo &STI);
1727bool hasVOPD(const MCSubtargetInfo &STI);
1728bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1729
1730inline bool supportsWave32(const MCSubtargetInfo &STI) {
1731 return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1732}
1733
1734int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1735unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1736bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1737
1738/// Is Reg - scalar register
1739bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1740
1741/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1742bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1743
1744/// If \p Reg is a pseudo reg, return the correct hardware register given
1745/// \p STI otherwise return \p Reg.
1746MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1747
1748/// Convert hardware register \p Reg to a pseudo register
1749LLVM_READNONE
1750MCRegister mc2PseudoReg(MCRegister Reg);
1751
1752LLVM_READNONE
1753bool isInlineValue(MCRegister Reg);
1754
1755/// Is this an AMDGPU specific source operand? These include registers,
1756/// inline constants, literals and mandatory literals (KImm).
1757constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1758 return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1759 OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1760}
1761
1762inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1763 return isSISrcOperand(OpInfo: Desc.operands()[OpNo]);
1764}
1765
1766/// Is this a KImm operand?
1767bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1768
1769/// Is this floating-point operand?
1770bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1771
1772/// Does this operand support only inlinable literals?
1773bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1774
1775/// Get the size in bits of a register from the register class \p RC.
1776unsigned getRegBitWidth(unsigned RCID);
1777
1778/// Get the size in bits of a register from the register class \p RC.
1779unsigned getRegBitWidth(const MCRegisterClass &RC);
1780
1781LLVM_READNONE
1782inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1783 switch (OpInfo.OperandType) {
1784 case AMDGPU::OPERAND_REG_IMM_INT32:
1785 case AMDGPU::OPERAND_REG_IMM_FP32:
1786 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1787 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1788 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1789 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1790 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1791 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1792 case AMDGPU::OPERAND_KIMM32:
1793 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1794 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1795 return 4;
1796
1797 case AMDGPU::OPERAND_REG_IMM_INT64:
1798 case AMDGPU::OPERAND_REG_IMM_FP64:
1799 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1800 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1801 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1802 case AMDGPU::OPERAND_KIMM64:
1803 return 8;
1804
1805 case AMDGPU::OPERAND_REG_IMM_INT16:
1806 case AMDGPU::OPERAND_REG_IMM_BF16:
1807 case AMDGPU::OPERAND_REG_IMM_FP16:
1808 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1809 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1810 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1811 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1812 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1813 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1814 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1815 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1816 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1817 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1818 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1819 return 2;
1820
1821 default:
1822 llvm_unreachable("unhandled operand type");
1823 }
1824}
1825
1826LLVM_READNONE
1827inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1828 return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1829}
1830
1831/// Is this literal inlinable, and not one of the values intended for floating
1832/// point values.
1833LLVM_READNONE
1834inline bool isInlinableIntLiteral(int64_t Literal) {
1835 return Literal >= -16 && Literal <= 64;
1836}
1837
1838/// Is this literal inlinable
1839LLVM_READNONE
1840bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1841
1842LLVM_READNONE
1843bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1844
1845LLVM_READNONE
1846bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1847
1848LLVM_READNONE
1849bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1850
1851LLVM_READNONE
1852bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1853
1854LLVM_READNONE
1855std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1856
1857LLVM_READNONE
1858std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1859
1860LLVM_READNONE
1861std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1862
1863LLVM_READNONE
1864std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1865 bool IsGFX11Plus);
1866
1867LLVM_READNONE
1868bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1869
1870LLVM_READNONE
1871bool isInlinableLiteralV2I16(uint32_t Literal);
1872
1873LLVM_READNONE
1874bool isInlinableLiteralV2BF16(uint32_t Literal);
1875
1876LLVM_READNONE
1877bool isInlinableLiteralV2F16(uint32_t Literal);
1878
1879LLVM_READNONE
1880bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1881
1882LLVM_READNONE
1883bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1884
1885LLVM_READNONE
1886int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1887
1888bool isArgPassedInSGPR(const Argument *Arg);
1889
1890bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1891
1892LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1893
1894LLVM_READONLY
1895bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1896 int64_t EncodedOffset);
1897
1898LLVM_READONLY
1899bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1900 int64_t EncodedOffset, bool IsBuffer);
1901
1902/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1903/// offsets.
1904uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1905
1906/// \returns The encoding that will be used for \p ByteOffset in the
1907/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1908/// S_LOAD instructions have a signed offset, on other subtargets it is
1909/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1910std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1911 int64_t ByteOffset, bool IsBuffer,
1912 bool HasSOffset = false);
1913
1914/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1915/// instruction. This is only useful on CI.s
1916std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1917 int64_t ByteOffset);
1918
1919/// For pre-GFX12 FLAT instructions the offset must be positive;
1920/// MSB is ignored and forced to zero.
1921///
1922/// \return The number of bits available for the signed offset field in flat
1923/// instructions. Note that some forms of the instruction disallow negative
1924/// offsets.
1925unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1926
1927/// \returns true if this offset is small enough to fit in the SMRD
1928/// offset field. \p ByteOffset should be the offset in bytes and
1929/// not the encoded offset.
1930bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1931
1932LLVM_READNONE
1933inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1934 if (isGFX12(STI: ST))
1935 return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1936 if (isGFX90A(STI: ST))
1937 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1938 return false;
1939}
1940
1941/// \returns true if an instruction may have a 64-bit VGPR operand.
1942bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc,
1943 const MCSubtargetInfo &ST);
1944
1945/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1946bool isDPALU_DPP32BitOpc(unsigned Opc);
1947
1948/// \returns true if an instruction is a DP ALU DPP.
1949bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1950 const MCSubtargetInfo &ST);
1951
1952/// \returns true if the intrinsic is divergent
1953bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1954
1955/// \returns true if the intrinsic is uniform
1956bool isIntrinsicAlwaysUniform(unsigned IntrID);
1957
1958/// \returns a register class for the physical register \p Reg if it is a VGPR
1959/// or nullptr otherwise.
1960const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
1961 const MCRegisterInfo &MRI);
1962
1963/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1964/// physical register \p Reg.
1965unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1966
1967/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1968MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
1969 const MCRegisterInfo &MRI);
1970
1971/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1972/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1973/// ignored.
1974std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
1975 bool HasSetregVGPRMSBFixup);
1976
1977/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1978/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1979/// ignored.
1980std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
1981 bool HasSetregVGPRMSBFixup);
1982
1983// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
1984// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
1985// maps, one for X and one for Y component.
1986std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
1987getVGPRLoweringOperandTables(const MCInstrDesc &Desc);
1988
1989/// \returns true if a memory instruction supports scale_offset modifier.
1990bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
1991
1992/// \returns lds block size in terms of dwords. \p
1993/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1994/// must be defined in terms of bytes.
1995unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1996
1997class ClusterDimsAttr {
1998public:
1999 enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
2000
2001 ClusterDimsAttr() = default;
2002
2003 Kind getKind() const { return AttrKind; }
2004
2005 bool isUnknown() const { return getKind() == Kind::Unknown; }
2006
2007 bool isNoCluster() const { return getKind() == Kind::NoCluster; }
2008
2009 bool isFixedDims() const { return getKind() == Kind::FixedDims; }
2010
2011 bool isVariableDims() const { return getKind() == Kind::VariableDims; }
2012
2013 void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }
2014
2015 void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); }
2016
2017 void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); }
2018
2019 /// \returns the dims stored. Note that this function can only be called if
2020 /// the kind is \p Fixed.
2021 const std::array<unsigned, 3> &getDims() const;
2022
2023 bool operator==(const ClusterDimsAttr &RHS) const {
2024 return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
2025 }
2026
2027 std::string to_string() const;
2028
2029 static ClusterDimsAttr get(const Function &F);
2030
2031private:
2032 enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
2033
2034 ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
2035
2036 std::array<unsigned, 3> Dims = {0, 0, 0};
2037
2038 Kind AttrKind = Kind::Unknown;
2039};
2040
2041} // namespace AMDGPU
2042
2043raw_ostream &operator<<(raw_ostream &OS,
2044 const AMDGPU::IsaInfo::TargetIDSetting S);
2045
2046} // end namespace llvm
2047
2048#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
2049