1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/InstrTypes.h"
16#include "llvm/IR/Module.h"
17#include "llvm/Support/Alignment.h"
18#include <array>
19#include <functional>
20#include <utility>
21
22// Pull in OpName enum definition and getNamedOperandIdx() declaration.
23#define GET_INSTRINFO_OPERAND_ENUM
24#include "AMDGPUGenInstrInfo.inc"
25
26struct amd_kernel_code_t;
27
28namespace llvm {
29
30struct Align;
31class Argument;
32class Function;
33class GlobalValue;
34class MachineInstr;
35class MCInstrInfo;
36class MCRegisterClass;
37class MCRegisterInfo;
38class MCSubtargetInfo;
39class MDNode;
40class StringRef;
41class Triple;
42class raw_ostream;
43
44namespace AMDGPU {
45
46struct AMDGPUMCKernelCodeT;
47struct IsaVersion;
48
49/// Generic target versions emitted by this version of LLVM.
50///
51/// These numbers are incremented every time a codegen breaking change occurs
52/// within a generic family.
53namespace GenericVersion {
54static constexpr unsigned GFX9 = 1;
55static constexpr unsigned GFX9_4 = 1;
56static constexpr unsigned GFX10_1 = 1;
57static constexpr unsigned GFX10_3 = 1;
58static constexpr unsigned GFX11 = 1;
59static constexpr unsigned GFX12 = 1;
60static constexpr unsigned GFX12_5 = 1;
61} // namespace GenericVersion
62
63enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
64
65enum class FPType { None, FP4, FP8 };
66
67/// \returns True if \p STI is AMDHSA.
68bool isHsaAbi(const MCSubtargetInfo &STI);
69
70/// \returns Code object version from the IR module flag.
71unsigned getAMDHSACodeObjectVersion(const Module &M);
72
73/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
74unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
75
76/// \returns The default HSA code object version. This should only be used when
77/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
78/// flag or a .amdhsa_code_object_version directive)
79unsigned getDefaultAMDHSACodeObjectVersion();
80
81/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
82/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
83uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
84
85/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
86unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
87
88/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
89unsigned getHostcallImplicitArgPosition(unsigned COV);
90
91unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
92unsigned getCompletionActionImplicitArgPosition(unsigned COV);
93
94struct GcnBufferFormatInfo {
95 unsigned Format;
96 unsigned BitsPerComp;
97 unsigned NumComponents;
98 unsigned NumFormat;
99 unsigned DataFormat;
100};
101
102struct MAIInstInfo {
103 uint32_t Opcode;
104 bool is_dgemm;
105 bool is_gfx940_xdl;
106};
107
108struct MFMA_F8F6F4_Info {
109 unsigned Opcode;
110 unsigned F8F8Opcode;
111 uint8_t NumRegsSrcA;
112 uint8_t NumRegsSrcB;
113};
114
115struct CvtScaleF32_F32F16ToF8F4_Info {
116 unsigned Opcode;
117};
118
119struct True16D16Info {
120 unsigned T16Op;
121 unsigned HiOp;
122 unsigned LoOp;
123};
124
125struct WMMAInstInfo {
126 uint32_t Opcode;
127 bool is_wmma_xdl;
128};
129
130#define GET_MIMGBaseOpcode_DECL
131#define GET_MIMGDim_DECL
132#define GET_MIMGEncoding_DECL
133#define GET_MIMGLZMapping_DECL
134#define GET_MIMGMIPMapping_DECL
135#define GET_MIMGBiASMapping_DECL
136#define GET_MAIInstInfoTable_DECL
137#define GET_isMFMA_F8F6F4Table_DECL
138#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
139#define GET_True16D16Table_DECL
140#define GET_WMMAInstInfoTable_DECL
141#include "AMDGPUGenSearchableTables.inc"
142
143namespace IsaInfo {
144
145enum {
146 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
147 // doesn't spill SGPRs as much as when 80 is set.
148 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
149 TRAP_NUM_SGPRS = 16
150};
151
152enum class TargetIDSetting { Unsupported, Any, Off, On };
153
154class AMDGPUTargetID {
155private:
156 const MCSubtargetInfo &STI;
157 TargetIDSetting XnackSetting;
158 TargetIDSetting SramEccSetting;
159
160public:
161 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
162 ~AMDGPUTargetID() = default;
163
164 /// \return True if the current xnack setting is not "Unsupported".
165 bool isXnackSupported() const {
166 return XnackSetting != TargetIDSetting::Unsupported;
167 }
168
169 /// \returns True if the current xnack setting is "On" or "Any".
170 bool isXnackOnOrAny() const {
171 return XnackSetting == TargetIDSetting::On ||
172 XnackSetting == TargetIDSetting::Any;
173 }
174
175 /// \returns True if current xnack setting is "On" or "Off",
176 /// false otherwise.
177 bool isXnackOnOrOff() const {
178 return getXnackSetting() == TargetIDSetting::On ||
179 getXnackSetting() == TargetIDSetting::Off;
180 }
181
182 /// \returns The current xnack TargetIDSetting, possible options are
183 /// "Unsupported", "Any", "Off", and "On".
184 TargetIDSetting getXnackSetting() const { return XnackSetting; }
185
186 /// Sets xnack setting to \p NewXnackSetting.
187 void setXnackSetting(TargetIDSetting NewXnackSetting) {
188 XnackSetting = NewXnackSetting;
189 }
190
191 /// \return True if the current sramecc setting is not "Unsupported".
192 bool isSramEccSupported() const {
193 return SramEccSetting != TargetIDSetting::Unsupported;
194 }
195
196 /// \returns True if the current sramecc setting is "On" or "Any".
197 bool isSramEccOnOrAny() const {
198 return SramEccSetting == TargetIDSetting::On ||
199 SramEccSetting == TargetIDSetting::Any;
200 }
201
202 /// \returns True if current sramecc setting is "On" or "Off",
203 /// false otherwise.
204 bool isSramEccOnOrOff() const {
205 return getSramEccSetting() == TargetIDSetting::On ||
206 getSramEccSetting() == TargetIDSetting::Off;
207 }
208
209 /// \returns The current sramecc TargetIDSetting, possible options are
210 /// "Unsupported", "Any", "Off", and "On".
211 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
212
213 /// Sets sramecc setting to \p NewSramEccSetting.
214 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
215 SramEccSetting = NewSramEccSetting;
216 }
217
218 void setTargetIDFromFeaturesString(StringRef FS);
219 void setTargetIDFromTargetIDStream(StringRef TargetID);
220
221 /// Write string representation to \p OS
222 void print(raw_ostream &OS) const;
223
224 /// \returns String representation of an object.
225 std::string toString() const;
226};
227
228inline raw_ostream &operator<<(raw_ostream &OS,
229 const AMDGPUTargetID &TargetID) {
230 TargetID.print(OS);
231 return OS;
232}
233
234/// \returns Wavefront size for given subtarget \p STI.
235unsigned getWavefrontSize(const MCSubtargetInfo *STI);
236
237/// \returns Local memory size in bytes for given subtarget \p STI.
238unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
239
240/// \returns Maximum addressable local memory size in bytes for given subtarget
241/// \p STI.
242unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
243
244/// \returns Number of execution units per compute unit for given subtarget \p
245/// STI.
246unsigned getEUsPerCU(const MCSubtargetInfo *STI);
247
248/// \returns Maximum number of work groups per compute unit for given subtarget
249/// \p STI and limited by given \p FlatWorkGroupSize.
250unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
251 unsigned FlatWorkGroupSize);
252
253/// \returns Minimum number of waves per execution unit for given subtarget \p
254/// STI.
255unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
256
257/// \returns Maximum number of waves per execution unit for given subtarget \p
258/// STI without any kind of limitation.
259unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
260
261/// \returns Number of waves per execution unit required to support the given \p
262/// FlatWorkGroupSize.
263unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
264 unsigned FlatWorkGroupSize);
265
266/// \returns Minimum flat work group size for given subtarget \p STI.
267unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
268
269/// \returns Maximum flat work group size
270constexpr unsigned getMaxFlatWorkGroupSize() {
271 // Some subtargets allow encoding 2048, but this isn't tested or supported.
272 return 1024;
273}
274
275/// \returns Number of waves per work group for given subtarget \p STI and
276/// \p FlatWorkGroupSize.
277unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
278 unsigned FlatWorkGroupSize);
279
280/// \returns SGPR allocation granularity for given subtarget \p STI.
281unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
282
283/// \returns SGPR encoding granularity for given subtarget \p STI.
284unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
285
286/// \returns Total number of SGPRs for given subtarget \p STI.
287unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
288
289/// \returns Addressable number of SGPRs for given subtarget \p STI.
290unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
291
292/// \returns Minimum number of SGPRs that meets the given number of waves per
293/// execution unit requirement for given subtarget \p STI.
294unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
295
296/// \returns Maximum number of SGPRs that meets the given number of waves per
297/// execution unit requirement for given subtarget \p STI.
298unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
299 bool Addressable);
300
301/// \returns Number of extra SGPRs implicitly required by given subtarget \p
302/// STI when the given special registers are used.
303unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
304 bool FlatScrUsed, bool XNACKUsed);
305
306/// \returns Number of extra SGPRs implicitly required by given subtarget \p
307/// STI when the given special registers are used. XNACK is inferred from
308/// \p STI.
309unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
310 bool FlatScrUsed);
311
312/// \returns Number of SGPR blocks needed for given subtarget \p STI when
313/// \p NumSGPRs are used. \p NumSGPRs should already include any special
314/// register counts.
315unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
316
317/// \returns VGPR allocation granularity for given subtarget \p STI.
318///
319/// For subtargets which support it, \p EnableWavefrontSize32 should match
320/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
321unsigned
322getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
323 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
324
325/// \returns VGPR encoding granularity for given subtarget \p STI.
326///
327/// For subtargets which support it, \p EnableWavefrontSize32 should match
328/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
329unsigned getVGPREncodingGranule(
330 const MCSubtargetInfo *STI,
331 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
332
333/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
334/// returns the allocation granule for ArchVGPRs.
335unsigned getArchVGPRAllocGranule();
336
337/// \returns Total number of VGPRs for given subtarget \p STI.
338unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
339
340/// \returns Addressable number of architectural VGPRs for a given subtarget \p
341/// STI.
342unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
343
344/// \returns Addressable number of VGPRs for given subtarget \p STI.
345unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
346 unsigned DynamicVGPRBlockSize);
347
348/// \returns Minimum number of VGPRs that meets given number of waves per
349/// execution unit requirement for given subtarget \p STI.
350unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
351 unsigned DynamicVGPRBlockSize);
352
353/// \returns Maximum number of VGPRs that meets given number of waves per
354/// execution unit requirement for given subtarget \p STI.
355unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
356 unsigned DynamicVGPRBlockSize);
357
358/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
359/// subtarget \p STI.
360unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
361 unsigned NumVGPRs,
362 unsigned DynamicVGPRBlockSize);
363
364/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
365/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
366unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
367 unsigned MaxWaves,
368 unsigned TotalNumVGPRs);
369
370/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
371/// Gen.
372unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
373 AMDGPUSubtarget::Generation Gen);
374
375/// \returns Number of VGPR blocks needed for given subtarget \p STI when
376/// \p NumVGPRs are used. We actually return the number of blocks -1, since
377/// that's what we encode.
378///
379/// For subtargets which support it, \p EnableWavefrontSize32 should match the
380/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
381unsigned getEncodedNumVGPRBlocks(
382 const MCSubtargetInfo *STI, unsigned NumVGPRs,
383 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
384
385/// \returns Number of VGPR blocks that need to be allocated for the given
386/// subtarget \p STI when \p NumVGPRs are used.
387unsigned getAllocatedNumVGPRBlocks(
388 const MCSubtargetInfo *STI, unsigned NumVGPRs,
389 unsigned DynamicVGPRBlockSize,
390 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
391
392} // end namespace IsaInfo
393
394// Represents a field in an encoded value.
395template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
396struct EncodingField {
397 static_assert(HighBit >= LowBit, "Invalid bit range!");
398 static constexpr unsigned Offset = LowBit;
399 static constexpr unsigned Width = HighBit - LowBit + 1;
400
401 using ValueType = unsigned;
402 static constexpr ValueType Default = D;
403
404 ValueType Value;
405 constexpr EncodingField(ValueType Value) : Value(Value) {}
406
407 constexpr uint64_t encode() const { return Value; }
408 static ValueType decode(uint64_t Encoded) { return Encoded; }
409};
410
411// Represents a single bit in an encoded value.
412template <unsigned Bit, unsigned D = 0>
413using EncodingBit = EncodingField<Bit, Bit, D>;
414
415// A helper for encoding and decoding multiple fields.
416template <typename... Fields> struct EncodingFields {
417 static constexpr uint64_t encode(Fields... Values) {
418 return ((Values.encode() << Values.Offset) | ...);
419 }
420
421 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
422 return {Fields::decode((Encoded >> Fields::Offset) &
423 maxUIntN(Fields::Width))...};
424 }
425};
426
427LLVM_READONLY
428inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
429 return getNamedOperandIdx(Opcode, Name: NamedIdx) != -1;
430}
431
432LLVM_READONLY
433int32_t getSOPPWithRelaxation(uint32_t Opcode);
434
435struct MIMGBaseOpcodeInfo {
436 MIMGBaseOpcode BaseOpcode;
437 bool Store;
438 bool Atomic;
439 bool AtomicX2;
440 bool Sampler;
441 bool Gather4;
442
443 uint8_t NumExtraArgs;
444 bool Gradients;
445 bool G16;
446 bool Coordinates;
447 bool LodOrClampOrMip;
448 bool HasD16;
449 bool MSAA;
450 bool BVH;
451 bool A16;
452 bool NoReturn;
453 bool PointSampleAccel;
454};
455
456LLVM_READONLY
457const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
458
459LLVM_READONLY
460const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
461
462struct MIMGDimInfo {
463 MIMGDim Dim;
464 uint8_t NumCoords;
465 uint8_t NumGradients;
466 bool MSAA;
467 bool DA;
468 uint8_t Encoding;
469 const char *AsmSuffix;
470};
471
472LLVM_READONLY
473const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
474
475LLVM_READONLY
476const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
477
478LLVM_READONLY
479const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
480
481struct MIMGLZMappingInfo {
482 MIMGBaseOpcode L;
483 MIMGBaseOpcode LZ;
484};
485
486struct MIMGMIPMappingInfo {
487 MIMGBaseOpcode MIP;
488 MIMGBaseOpcode NONMIP;
489};
490
491struct MIMGBiasMappingInfo {
492 MIMGBaseOpcode Bias;
493 MIMGBaseOpcode NoBias;
494};
495
496struct MIMGOffsetMappingInfo {
497 MIMGBaseOpcode Offset;
498 MIMGBaseOpcode NoOffset;
499};
500
501struct MIMGG16MappingInfo {
502 MIMGBaseOpcode G;
503 MIMGBaseOpcode G16;
504};
505
506LLVM_READONLY
507const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
508
509struct WMMAOpcodeMappingInfo {
510 unsigned Opcode2Addr;
511 unsigned Opcode3Addr;
512};
513
514LLVM_READONLY
515const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
516
517LLVM_READONLY
518const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
519
520LLVM_READONLY
521const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
522
523LLVM_READONLY
524const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
525
526LLVM_READONLY
527int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
528 unsigned VDataDwords, unsigned VAddrDwords);
529
530LLVM_READONLY
531int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
532
533LLVM_READONLY
534unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
535 const MIMGDimInfo *Dim, bool IsA16,
536 bool IsG16Supported);
537
538struct MIMGInfo {
539 uint32_t Opcode;
540 uint32_t BaseOpcode;
541 uint8_t MIMGEncoding;
542 uint8_t VDataDwords;
543 uint8_t VAddrDwords;
544 uint8_t VAddrOperands;
545};
546
547LLVM_READONLY
548const MIMGInfo *getMIMGInfo(unsigned Opc);
549
550LLVM_READONLY
551int getMTBUFBaseOpcode(unsigned Opc);
552
553LLVM_READONLY
554int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
555
556LLVM_READONLY
557int getMTBUFElements(unsigned Opc);
558
559LLVM_READONLY
560bool getMTBUFHasVAddr(unsigned Opc);
561
562LLVM_READONLY
563bool getMTBUFHasSrsrc(unsigned Opc);
564
565LLVM_READONLY
566bool getMTBUFHasSoffset(unsigned Opc);
567
568LLVM_READONLY
569int getMUBUFBaseOpcode(unsigned Opc);
570
571LLVM_READONLY
572int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
573
574LLVM_READONLY
575int getMUBUFElements(unsigned Opc);
576
577LLVM_READONLY
578bool getMUBUFHasVAddr(unsigned Opc);
579
580LLVM_READONLY
581bool getMUBUFHasSrsrc(unsigned Opc);
582
583LLVM_READONLY
584bool getMUBUFHasSoffset(unsigned Opc);
585
586LLVM_READONLY
587bool getMUBUFIsBufferInv(unsigned Opc);
588
589LLVM_READONLY
590bool getMUBUFTfe(unsigned Opc);
591
592LLVM_READONLY
593bool getSMEMIsBuffer(unsigned Opc);
594
595LLVM_READONLY
596bool getVOP1IsSingle(unsigned Opc);
597
598LLVM_READONLY
599bool getVOP2IsSingle(unsigned Opc);
600
601LLVM_READONLY
602bool getVOP3IsSingle(unsigned Opc);
603
604LLVM_READONLY
605bool isVOPC64DPP(unsigned Opc);
606
607LLVM_READONLY
608bool isVOPCAsmOnly(unsigned Opc);
609
610/// Returns true if MAI operation is a double precision GEMM.
611LLVM_READONLY
612bool getMAIIsDGEMM(unsigned Opc);
613
614LLVM_READONLY
615bool getMAIIsGFX940XDL(unsigned Opc);
616
617LLVM_READONLY
618bool getWMMAIsXDL(unsigned Opc);
619
620// Get an equivalent BitOp3 for a binary logical \p Opc.
621// \returns BitOp3 modifier for the logical operation or zero.
622// Used in VOPD3 conversion.
623unsigned getBitOp2(unsigned Opc);
624
625struct CanBeVOPD {
626 bool X;
627 bool Y;
628};
629
630/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
631LLVM_READONLY
632unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
633
634LLVM_READONLY
635CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
636
637LLVM_READNONE
638uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
639
640LLVM_READONLY
641const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
642 unsigned BLGP,
643 unsigned F8F8Opcode);
644
645LLVM_READNONE
646uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt);
647
648LLVM_READONLY
649const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
650 unsigned FmtB,
651 unsigned F8F8Opcode);
652
653LLVM_READONLY
654const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
655 uint8_t NumComponents,
656 uint8_t NumFormat,
657 const MCSubtargetInfo &STI);
658LLVM_READONLY
659const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
660 const MCSubtargetInfo &STI);
661
662LLVM_READONLY
663int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
664
665LLVM_READONLY
666unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
667
668LLVM_READONLY
669int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
670 bool VOPD3);
671
672LLVM_READONLY
673bool isVOPD(unsigned Opc);
674
675LLVM_READNONE
676bool isMAC(unsigned Opc);
677
678LLVM_READNONE
679bool isPermlane16(unsigned Opc);
680
681LLVM_READNONE
682bool isGenericAtomic(unsigned Opc);
683
684LLVM_READNONE
685bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
686
687namespace VOPD {
688
689enum Component : unsigned {
690 DST = 0,
691 SRC0,
692 SRC1,
693 SRC2,
694
695 DST_NUM = 1,
696 MAX_SRC_NUM = 3,
697 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
698};
699
700// LSB mask for VGPR banks per VOPD component operand.
701// 4 banks result in a mask 3, setting 2 lower bits.
702constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
703constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
704
705enum ComponentIndex : unsigned { X = 0, Y = 1 };
706constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
707constexpr unsigned COMPONENTS_NUM = 2;
708
709// Properties of VOPD components.
710class ComponentProps {
711private:
712 unsigned SrcOperandsNum = 0;
713 unsigned MandatoryLiteralIdx = ~0u;
714 bool HasSrc2Acc = false;
715 unsigned NumVOPD3Mods = 0;
716 unsigned Opcode = 0;
717 bool IsVOP3 = false;
718
719public:
720 ComponentProps() = default;
721 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
722
723 // Return the total number of src operands this component has.
724 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
725
726 // Return the number of src operands of this component visible to the parser.
727 unsigned getCompParsedSrcOperandsNum() const {
728 return SrcOperandsNum - HasSrc2Acc;
729 }
730
731 // Return true iif this component has a mandatory literal.
732 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
733
734 // If this component has a mandatory literal, return component operand
735 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
736 unsigned getMandatoryLiteralCompOperandIndex() const {
737 assert(hasMandatoryLiteral());
738 return MandatoryLiteralIdx;
739 }
740
741 // Return true iif this component has operand
742 // with component index CompSrcIdx and this operand may be a register.
743 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
744 assert(CompSrcIdx < Component::MAX_SRC_NUM);
745 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
746 }
747
748 // Return true iif this component has tied src2.
749 bool hasSrc2Acc() const { return HasSrc2Acc; }
750
751 // Return a number of source modifiers if instruction is used in VOPD3.
752 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
753
754 // Return opcode of the component.
755 unsigned getOpcode() const { return Opcode; }
756
757 // Returns if component opcode is in VOP3 encoding.
758 unsigned isVOP3() const { return IsVOP3; }
759
760 // Return index of BitOp3 operand or -1.
761 int getBitOp3OperandIdx() const;
762
763private:
764 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
765 assert(CompSrcIdx < Component::MAX_SRC_NUM);
766 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
767 }
768};
769
770enum ComponentKind : unsigned {
771 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
772 COMPONENT_X, // A VOPD instruction, X component.
773 COMPONENT_Y, // A VOPD instruction, Y component.
774 MAX = COMPONENT_Y
775};
776
777// Interface functions of this class map VOPD component operand indices
778// to indices of operands in MachineInstr/MCInst or parsed operands array.
779//
780// Note that this class operates with 3 kinds of indices:
781// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
782// - MC operand indices (they refer operands in a MachineInstr/MCInst);
783// - parsed operand indices (they refer operands in parsed operands array).
784//
785// For SINGLE components mapping between these indices is trivial.
786// But things get more complicated for COMPONENT_X and
787// COMPONENT_Y because these components share the same
788// MachineInstr/MCInst and the same parsed operands array.
789// Below is an example of component operand to parsed operand
790// mapping for the following instruction:
791//
792// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
793//
794// PARSED COMPONENT PARSED
795// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
796// -------------------------------------------------------------------
797// "v_dual_add_f32" 0
798// v_dual_add_f32 v255 0 (DST) --> 1
799// v4 1 (SRC0) --> 2
800// v5 2 (SRC1) --> 3
801// "::" 4
802// "v_dual_mov_b32" 5
803// v_dual_mov_b32 v6 0 (DST) --> 6
804// v1 1 (SRC0) --> 7
805// -------------------------------------------------------------------
806//
807class ComponentLayout {
808private:
809 // Regular MachineInstr/MCInst operands are ordered as follows:
810 // dst, src0 [, other src operands]
811 // VOPD MachineInstr/MCInst operands are ordered as follows:
812 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
813 // Each ComponentKind has operand indices defined below.
814 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
815
816 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
817 // used if there is tied accumulator. Indexing of this array:
818 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
819 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
820 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
821 // For VOPD1/VOPD2 use column with zero modifiers.
822 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
823 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
824
825 // Parsed operands of regular instructions are ordered as follows:
826 // Mnemo dst src0 [vsrc1 ...]
827 // Parsed VOPD operands are ordered as follows:
828 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
829 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
830 // Each ComponentKind has operand indices defined below.
831 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
832 4 /* + OpX.ParsedSrcNum */};
833 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
834 2, 2, 5 /* + OpX.ParsedSrcNum */};
835
836private:
837 const ComponentKind Kind;
838 const ComponentProps PrevComp;
839 const unsigned VOPD3ModsNum;
840 const int BitOp3Idx; // Index of bitop3 operand or -1
841
842public:
843 // Create layout for COMPONENT_X or SINGLE component.
844 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
845 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
846 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
847 }
848
849 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
850 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
851 int BitOp3Idx)
852 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
853 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
854
855public:
856 // Return the index of dst operand in MCInst operands.
857 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
858
859 // Return the index of the specified src operand in MCInst operands.
860 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
861 assert(CompSrcIdx < Component::MAX_SRC_NUM);
862
863 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
864 return BitOp3Idx;
865
866 if (VOPD3) {
867 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
868 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
869 }
870
871 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
872 (Kind != SINGLE ? 1 : 0);
873 }
874
875 // Return the index of dst operand in the parsed operands array.
876 unsigned getIndexOfDstInParsedOperands() const {
877 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
878 }
879
880 // Return the index of the specified src operand in the parsed operands array.
881 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
882 assert(CompSrcIdx < Component::MAX_SRC_NUM);
883 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
884 }
885
886private:
887 unsigned getPrevCompSrcNum() const {
888 return PrevComp.getCompSrcOperandsNum();
889 }
890 unsigned getPrevCompParsedSrcNum() const {
891 return PrevComp.getCompParsedSrcOperandsNum();
892 }
893 unsigned getPrevCompVOPD3ModsNum() const {
894 return PrevComp.getCompVOPD3ModsNum();
895 }
896};
897
898// Layout and properties of VOPD components.
899class ComponentInfo : public ComponentProps, public ComponentLayout {
900public:
901 // Create ComponentInfo for COMPONENT_X or SINGLE component.
902 ComponentInfo(const MCInstrDesc &OpDesc,
903 ComponentKind Kind = ComponentKind::SINGLE,
904 bool VOP3Layout = false)
905 : ComponentProps(OpDesc, VOP3Layout),
906 ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
907
908 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
909 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
910 bool VOP3Layout = false)
911 : ComponentProps(OpDesc, VOP3Layout),
912 ComponentLayout(OpXProps, getCompVOPD3ModsNum(),
913 getBitOp3OperandIdx()) {}
914
915 // Map component operand index to parsed operand index.
916 // Return 0 if the specified operand does not exist.
917 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
918};
919
920// Properties of VOPD instructions.
921class InstInfo {
922private:
923 const ComponentInfo CompInfo[COMPONENTS_NUM];
924
925public:
926 using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
927
928 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
929 : CompInfo{OpX, OpY} {}
930
931 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
932 : CompInfo{OprInfoX, OprInfoY} {}
933
934 const ComponentInfo &operator[](size_t ComponentIdx) const {
935 assert(ComponentIdx < COMPONENTS_NUM);
936 return CompInfo[ComponentIdx];
937 }
938
939 // Check VOPD operands constraints.
940 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
941 // for the specified component and MC operand. The callback must return 0
942 // if the operand is not a register or not a VGPR.
943 // If \p SkipSrc is set to true then constraints for source operands are not
944 // checked.
945 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
946 // even though it violates requirement to be from different banks.
947 // If \p VOPD3 is set to true both dst registers allowed to be either odd
948 // or even and instruction may have real src2 as opposed to tied accumulator.
949 bool
950 hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
951 const MCRegisterInfo &MRI, bool SkipSrc = false,
952 bool AllowSameVGPR = false, bool VOPD3 = false) const {
953 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
954 VOPD3)
955 .has_value();
956 }
957
958 // Check VOPD operands constraints.
959 // Return the index of an invalid component operand, if any.
960 // If \p SkipSrc is set to true then constraints for source operands are not
961 // checked except for being from the same halves of VGPR file on gfx1250.
962 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
963 // even though it violates requirement to be from different banks.
964 // If \p VOPD3 is set to true both dst registers allowed to be either odd
965 // or even and instruction may have real src2 as opposed to tied accumulator.
966 std::optional<unsigned> getInvalidCompOperandIndex(
967 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
968 const MCRegisterInfo &MRI, bool SkipSrc = false,
969 bool AllowSameVGPR = false, bool VOPD3 = false) const;
970
971private:
972 RegIndices
973 getRegIndices(unsigned ComponentIdx,
974 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
975 bool VOPD3) const;
976};
977
978} // namespace VOPD
979
980LLVM_READONLY
981std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
982
983LLVM_READONLY
984// Get properties of 2 single VOP1/VOP2 instructions
985// used as components to create a VOPD instruction.
986VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
987
988LLVM_READONLY
989// Get properties of VOPD X and Y components.
990VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
991 const MCInstrInfo *InstrInfo);
992
993LLVM_READONLY
994bool isAsyncStore(unsigned Opc);
995LLVM_READONLY
996bool isTensorStore(unsigned Opc);
997LLVM_READONLY
998unsigned getTemporalHintType(const MCInstrDesc TID);
999
1000LLVM_READONLY
1001bool isTrue16Inst(unsigned Opc);
1002
1003LLVM_READONLY
1004FPType getFPDstSelType(unsigned Opc);
1005
1006LLVM_READONLY
1007bool isInvalidSingleUseConsumerInst(unsigned Opc);
1008
1009LLVM_READONLY
1010bool isInvalidSingleUseProducerInst(unsigned Opc);
1011
1012bool isDPMACCInstruction(unsigned Opc);
1013
1014LLVM_READONLY
1015unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
1016
1017LLVM_READONLY
1018unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
1019
1020void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
1021 const MCSubtargetInfo *STI);
1022
1023bool isGroupSegment(const GlobalValue *GV);
1024bool isGlobalSegment(const GlobalValue *GV);
1025bool isReadOnlySegment(const GlobalValue *GV);
1026
1027/// \returns True if constants should be emitted to .text section for given
1028/// target triple \p TT, false otherwise.
1029bool shouldEmitConstantsToTextSection(const Triple &TT);
1030
1031/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1032/// register name. Followed by the start register number, and the register
1033/// width. Does not validate the number of registers exists in the class. Unlike
1034/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1035/// "{}".
1036std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1037
1038/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1039/// register constraint. Followed by the start register number, and the register
1040/// width. Does not validate the number of registers exists in the class.
1041std::tuple<char, unsigned, unsigned>
1042parseAsmConstraintPhysReg(StringRef Constraint);
1043
1044/// \returns Integer value requested using \p F's \p Name attribute.
1045///
1046/// \returns \p Default if attribute is not present.
1047///
1048/// \returns \p Default and emits error if requested value cannot be converted
1049/// to integer.
1050int getIntegerAttribute(const Function &F, StringRef Name, int Default);
1051
1052/// \returns A pair of integer values requested using \p F's \p Name attribute
1053/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1054/// is false).
1055///
1056/// \returns \p Default if attribute is not present.
1057///
1058/// \returns \p Default and emits error if one of the requested values cannot be
1059/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1060/// not present.
1061std::pair<unsigned, unsigned>
1062getIntegerPairAttribute(const Function &F, StringRef Name,
1063 std::pair<unsigned, unsigned> Default,
1064 bool OnlyFirstRequired = false);
1065
1066/// \returns A pair of integer values requested using \p F's \p Name attribute
1067/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1068/// is false).
1069///
1070/// \returns \p std::nullopt if attribute is not present.
1071///
1072/// \returns \p std::nullopt and emits error if one of the requested values
1073/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1074/// "second" value is not present.
1075std::optional<std::pair<unsigned, std::optional<unsigned>>>
1076getIntegerPairAttribute(const Function &F, StringRef Name,
1077 bool OnlyFirstRequired = false);
1078
1079/// \returns Generate a vector of integer values requested using \p F's \p Name
1080/// attribute.
1081/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1082/// if any error occurs. The corresponding error will also be emitted.
1083SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1084 unsigned Size,
1085 unsigned DefaultVal);
1086/// Similar to the function above, but returns std::nullopt if any error occurs.
1087std::optional<SmallVector<unsigned>>
1088getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1089
1090/// Checks if \p Val is inside \p MD, a !range-like metadata.
1091bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1092
1093enum InstCounterType {
1094 LOAD_CNT = 0, // VMcnt prior to gfx12.
1095 DS_CNT, // LKGMcnt prior to gfx12.
1096 EXP_CNT, //
1097 STORE_CNT, // VScnt in gfx10/gfx11.
1098 NUM_NORMAL_INST_CNTS,
1099 SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
1100 BVH_CNT, // gfx12+ only.
1101 KM_CNT, // gfx12+ only.
1102 X_CNT, // gfx1250.
1103 NUM_EXTENDED_INST_CNTS,
1104 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
1105 VM_VSRC, // gfx12+ expert mode only.
1106 NUM_EXPERT_INST_CNTS,
1107 NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
1108};
1109
1110// Return an iterator over all counters between LOAD_CNT (the first counter)
1111// and \c MaxCounter (exclusive, default value yields an enumeration over
1112// all counters).
1113iota_range<InstCounterType>
1114inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS);
1115
1116} // namespace AMDGPU
1117
1118template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
1119 static constexpr bool is_iterable = true;
1120};
1121
1122namespace AMDGPU {
1123
1124/// Represents the counter values to wait for in an s_waitcnt instruction.
1125///
1126/// Large values (including the maximum possible integer) can be used to
1127/// represent "don't care" waits.
1128class Waitcnt {
1129 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
1130 unsigned ExpCnt = ~0u;
1131 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
1132 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
1133 unsigned SampleCnt = ~0u; // gfx12+ only.
1134 unsigned BvhCnt = ~0u; // gfx12+ only.
1135 unsigned KmCnt = ~0u; // gfx12+ only.
1136 unsigned XCnt = ~0u; // gfx1250.
1137 unsigned VaVdst = ~0u; // gfx12+ expert scheduling mode only.
1138 unsigned VmVsrc = ~0u; // gfx12+ expert scheduling mode only.
1139
1140public:
1141 unsigned get(InstCounterType T) const {
1142 switch (T) {
1143 case LOAD_CNT:
1144 return LoadCnt;
1145 case EXP_CNT:
1146 return ExpCnt;
1147 case DS_CNT:
1148 return DsCnt;
1149 case STORE_CNT:
1150 return StoreCnt;
1151 case SAMPLE_CNT:
1152 return SampleCnt;
1153 case BVH_CNT:
1154 return BvhCnt;
1155 case KM_CNT:
1156 return KmCnt;
1157 case X_CNT:
1158 return XCnt;
1159 case VA_VDST:
1160 return VaVdst;
1161 case VM_VSRC:
1162 return VmVsrc;
1163 default:
1164 llvm_unreachable("bad InstCounterType");
1165 }
1166 }
1167 void set(InstCounterType T, unsigned Val) {
1168 switch (T) {
1169 case LOAD_CNT:
1170 LoadCnt = Val;
1171 break;
1172 case EXP_CNT:
1173 ExpCnt = Val;
1174 break;
1175 case DS_CNT:
1176 DsCnt = Val;
1177 break;
1178 case STORE_CNT:
1179 StoreCnt = Val;
1180 break;
1181 case SAMPLE_CNT:
1182 SampleCnt = Val;
1183 break;
1184 case BVH_CNT:
1185 BvhCnt = Val;
1186 break;
1187 case KM_CNT:
1188 KmCnt = Val;
1189 break;
1190 case X_CNT:
1191 XCnt = Val;
1192 break;
1193 case VA_VDST:
1194 VaVdst = Val;
1195 break;
1196 case VM_VSRC:
1197 VmVsrc = Val;
1198 break;
1199 default:
1200 llvm_unreachable("bad InstCounterType");
1201 }
1202 }
1203
1204 Waitcnt() = default;
1205 // Pre-gfx12 constructor.
1206 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1207 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
1208
1209 // gfx12+ constructor.
1210 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1211 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
1212 unsigned VaVdst, unsigned VmVsrc)
1213 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1214 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt),
1215 VaVdst(VaVdst), VmVsrc(VmVsrc) {}
1216
1217 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
1218
1219 bool hasWaitExceptStoreCnt() const {
1220 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
1221 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u ||
1222 VaVdst != ~0u || VmVsrc != ~0u;
1223 }
1224
1225 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
1226
1227 bool hasWaitDepctr() const { return VaVdst != ~0u || VmVsrc != ~0u; }
1228
1229 Waitcnt combined(const Waitcnt &Other) const {
1230 // Does the right thing provided self and Other are either both pre-gfx12
1231 // or both gfx12+.
1232 return Waitcnt(
1233 std::min(a: LoadCnt, b: Other.LoadCnt), std::min(a: ExpCnt, b: Other.ExpCnt),
1234 std::min(a: DsCnt, b: Other.DsCnt), std::min(a: StoreCnt, b: Other.StoreCnt),
1235 std::min(a: SampleCnt, b: Other.SampleCnt), std::min(a: BvhCnt, b: Other.BvhCnt),
1236 std::min(a: KmCnt, b: Other.KmCnt), std::min(a: XCnt, b: Other.XCnt),
1237 std::min(a: VaVdst, b: Other.VaVdst), std::min(a: VmVsrc, b: Other.VmVsrc));
1238 }
1239
1240 friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
1241};
1242
1243/// Represents the hardware counter limits for different wait count types.
1244struct HardwareLimits {
1245 unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
1246 unsigned ExpcntMax;
1247 unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
1248 unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
1249 unsigned SamplecntMax; // gfx12+ only.
1250 unsigned BvhcntMax; // gfx12+ only.
1251 unsigned KmcntMax; // gfx12+ only.
1252 unsigned XcntMax; // gfx1250.
1253 unsigned VaVdstMax; // gfx12+ expert mode only.
1254 unsigned VmVsrcMax; // gfx12+ expert mode only.
1255
1256 HardwareLimits() = default;
1257
1258 /// Initializes hardware limits from ISA version.
1259 HardwareLimits(const IsaVersion &IV);
1260};
1261
1262// The following methods are only meaningful on targets that support
1263// S_WAITCNT.
1264
1265/// \returns Vmcnt bit mask for given isa \p Version.
1266unsigned getVmcntBitMask(const IsaVersion &Version);
1267
1268/// \returns Expcnt bit mask for given isa \p Version.
1269unsigned getExpcntBitMask(const IsaVersion &Version);
1270
1271/// \returns Lgkmcnt bit mask for given isa \p Version.
1272unsigned getLgkmcntBitMask(const IsaVersion &Version);
1273
1274/// \returns Waitcnt bit mask for given isa \p Version.
1275unsigned getWaitcntBitMask(const IsaVersion &Version);
1276
1277/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1278unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1279
1280/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1281unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1282
1283/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1284unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1285
1286/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1287/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1288/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1289/// which needs it is deprecated
1290///
1291/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1292/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1293/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1294/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1295/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1296/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1297/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1298/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1299/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1300///
1301void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1302 unsigned &Expcnt, unsigned &Lgkmcnt);
1303
1304Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1305
1306/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1307unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1308 unsigned Vmcnt);
1309
1310/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1311unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1312 unsigned Expcnt);
1313
1314/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1315unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1316 unsigned Lgkmcnt);
1317
1318/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1319/// \p Version. Should not be used on gfx12+, the instruction which needs
1320/// it is deprecated
1321///
1322/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1323/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1324/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1325/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1326/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1327/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1328/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1329/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1330/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1331/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1332///
1333/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1334/// isa \p Version.
1335///
1336unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1337 unsigned Expcnt, unsigned Lgkmcnt);
1338
1339unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1340
1341// The following methods are only meaningful on targets that support
1342// S_WAIT_*CNT, introduced with gfx12.
1343
1344/// \returns Loadcnt bit mask for given isa \p Version.
1345/// Returns 0 for versions that do not support LOADcnt
1346unsigned getLoadcntBitMask(const IsaVersion &Version);
1347
1348/// \returns Samplecnt bit mask for given isa \p Version.
1349/// Returns 0 for versions that do not support SAMPLEcnt
1350unsigned getSamplecntBitMask(const IsaVersion &Version);
1351
1352/// \returns Bvhcnt bit mask for given isa \p Version.
1353/// Returns 0 for versions that do not support BVHcnt
1354unsigned getBvhcntBitMask(const IsaVersion &Version);
1355
1356/// \returns Dscnt bit mask for given isa \p Version.
1357/// Returns 0 for versions that do not support DScnt
1358unsigned getDscntBitMask(const IsaVersion &Version);
1359
1360/// \returns Dscnt bit mask for given isa \p Version.
1361/// Returns 0 for versions that do not support KMcnt
1362unsigned getKmcntBitMask(const IsaVersion &Version);
1363
1364/// \returns Xcnt bit mask for given isa \p Version.
1365/// Returns 0 for versions that do not support Xcnt.
1366unsigned getXcntBitMask(const IsaVersion &Version);
1367
1368/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1369/// returns 0 for versions that do not support STOREcnt or VScnt.
1370/// STOREcnt and VScnt are the same counter, the name used
1371/// depends on the ISA version.
1372unsigned getStorecntBitMask(const IsaVersion &Version);
1373
1374// The following are only meaningful on targets that support
1375// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1376
1377/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1378/// isa \p Version.
1379Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1380
1381/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1382/// isa \p Version.
1383Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1384
1385/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1386/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1387/// \p Version.
1388unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1389
1390/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1391/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1392/// \p Version.
1393unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1394
1395namespace Hwreg {
1396
1397using HwregId = EncodingField<5, 0>;
1398using HwregOffset = EncodingField<10, 6>;
1399
1400struct HwregSize : EncodingField<15, 11, 32> {
1401 using EncodingField::EncodingField;
1402 constexpr uint64_t encode() const { return Value - 1; }
1403 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1404};
1405
1406using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1407
1408} // namespace Hwreg
1409
1410namespace DepCtr {
1411
1412int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1413int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1414 const MCSubtargetInfo &STI);
1415bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1416 const MCSubtargetInfo &STI);
1417bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1418 bool &IsDefault, const MCSubtargetInfo &STI);
1419
1420/// \returns Maximum VaVdst value that can be encoded.
1421unsigned getVaVdstBitMask();
1422
1423/// \returns Maximum VaSdst value that can be encoded.
1424unsigned getVaSdstBitMask();
1425
1426/// \returns Maximum VaSsrc value that can be encoded.
1427unsigned getVaSsrcBitMask();
1428
1429/// \returns Maximum HoldCnt value that can be encoded.
1430unsigned getHoldCntBitMask(const IsaVersion &Version);
1431
1432/// \returns Maximum VmVsrc value that can be encoded.
1433unsigned getVmVsrcBitMask();
1434
1435/// \returns Maximum VaVcc value that can be encoded.
1436unsigned getVaVccBitMask();
1437
1438/// \returns Maximum SaSdst value that can be encoded.
1439unsigned getSaSdstBitMask();
1440
1441/// \returns Decoded VaVdst from given immediate \p Encoded.
1442unsigned decodeFieldVaVdst(unsigned Encoded);
1443
1444/// \returns Decoded VmVsrc from given immediate \p Encoded.
1445unsigned decodeFieldVmVsrc(unsigned Encoded);
1446
1447/// \returns Decoded SaSdst from given immediate \p Encoded.
1448unsigned decodeFieldSaSdst(unsigned Encoded);
1449
1450/// \returns Decoded VaSdst from given immediate \p Encoded.
1451unsigned decodeFieldVaSdst(unsigned Encoded);
1452
1453/// \returns Decoded VaVcc from given immediate \p Encoded.
1454unsigned decodeFieldVaVcc(unsigned Encoded);
1455
1456/// \returns Decoded SaSrc from given immediate \p Encoded.
1457unsigned decodeFieldVaSsrc(unsigned Encoded);
1458
1459/// \returns Decoded HoldCnt from given immediate \p Encoded.
1460unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1461
1462/// \returns \p VmVsrc as an encoded Depctr immediate.
1463unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1464
1465/// \returns \p Encoded combined with encoded \p VmVsrc.
1466unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1467
1468/// \returns \p VaVdst as an encoded Depctr immediate.
1469unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1470
1471/// \returns \p Encoded combined with encoded \p VaVdst.
1472unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1473
1474/// \returns \p SaSdst as an encoded Depctr immediate.
1475unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1476
1477/// \returns \p Encoded combined with encoded \p SaSdst.
1478unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1479
1480/// \returns \p VaSdst as an encoded Depctr immediate.
1481unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1482
1483/// \returns \p Encoded combined with encoded \p VaSdst.
1484unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1485
1486/// \returns \p VaVcc as an encoded Depctr immediate.
1487unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1488
1489/// \returns \p Encoded combined with encoded \p VaVcc.
1490unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1491
1492/// \returns \p HoldCnt as an encoded Depctr immediate.
1493unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1494
1495/// \returns \p Encoded combined with encoded \p HoldCnt.
1496unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1497 const IsaVersion &Version);
1498
1499/// \returns \p VaSsrc as an encoded Depctr immediate.
1500unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1501
1502/// \returns \p Encoded combined with encoded \p VaSsrc.
1503unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1504
1505} // namespace DepCtr
1506
1507namespace Exp {
1508
1509bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1510
1511LLVM_READONLY
1512unsigned getTgtId(const StringRef Name);
1513
1514LLVM_READNONE
1515bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1516
1517} // namespace Exp
1518
1519namespace MTBUFFormat {
1520
1521LLVM_READNONE
1522int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1523
1524void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1525
1526int64_t getDfmt(const StringRef Name);
1527
1528StringRef getDfmtName(unsigned Id);
1529
1530int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1531
1532StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1533
1534bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1535
1536bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1537
1538int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1539
1540StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1541
1542bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1543
1544int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1545 const MCSubtargetInfo &STI);
1546
1547bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1548
1549unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1550
1551} // namespace MTBUFFormat
1552
1553namespace SendMsg {
1554
1555LLVM_READNONE
1556bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1557
1558LLVM_READNONE
1559bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1560 bool Strict = true);
1561
1562LLVM_READNONE
1563bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1564 const MCSubtargetInfo &STI, bool Strict = true);
1565
1566LLVM_READNONE
1567bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1568
1569LLVM_READNONE
1570bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1571
1572void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1573 uint16_t &StreamId, const MCSubtargetInfo &STI);
1574
1575LLVM_READNONE
1576uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1577
1578/// Returns true if the message does not use the m0 operand.
1579bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
1580
1581} // namespace SendMsg
1582
1583unsigned getInitialPSInputAddr(const Function &F);
1584
1585bool getHasColorExport(const Function &F);
1586
1587bool getHasDepthExport(const Function &F);
1588
1589bool hasDynamicVGPR(const Function &F);
1590
1591// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1592// the attribute is missing or its value is invalid.
1593unsigned getDynamicVGPRBlockSize(const Function &F);
1594
1595LLVM_READNONE
1596constexpr bool isShader(CallingConv::ID CC) {
1597 switch (CC) {
1598 case CallingConv::AMDGPU_VS:
1599 case CallingConv::AMDGPU_LS:
1600 case CallingConv::AMDGPU_HS:
1601 case CallingConv::AMDGPU_ES:
1602 case CallingConv::AMDGPU_GS:
1603 case CallingConv::AMDGPU_PS:
1604 case CallingConv::AMDGPU_CS_Chain:
1605 case CallingConv::AMDGPU_CS_ChainPreserve:
1606 case CallingConv::AMDGPU_CS:
1607 return true;
1608 default:
1609 return false;
1610 }
1611}
1612
1613LLVM_READNONE
1614constexpr bool isGraphics(CallingConv::ID CC) {
1615 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx ||
1616 CC == CallingConv::AMDGPU_Gfx_WholeWave;
1617}
1618
1619LLVM_READNONE
1620constexpr bool isCompute(CallingConv::ID CC) {
1621 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1622}
1623
1624LLVM_READNONE
1625constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1626 switch (CC) {
1627 case CallingConv::AMDGPU_KERNEL:
1628 case CallingConv::SPIR_KERNEL:
1629 case CallingConv::AMDGPU_VS:
1630 case CallingConv::AMDGPU_GS:
1631 case CallingConv::AMDGPU_PS:
1632 case CallingConv::AMDGPU_CS:
1633 case CallingConv::AMDGPU_ES:
1634 case CallingConv::AMDGPU_HS:
1635 case CallingConv::AMDGPU_LS:
1636 return true;
1637 default:
1638 return false;
1639 }
1640}
1641
1642LLVM_READNONE
1643constexpr bool isChainCC(CallingConv::ID CC) {
1644 switch (CC) {
1645 case CallingConv::AMDGPU_CS_Chain:
1646 case CallingConv::AMDGPU_CS_ChainPreserve:
1647 return true;
1648 default:
1649 return false;
1650 }
1651}
1652
1653// These functions are considered entrypoints into the current module, i.e. they
1654// are allowed to be called from outside the current module. This is different
1655// from isEntryFunctionCC, which is only true for functions that are entered by
1656// the hardware. Module entry points include all entry functions but also
1657// include functions that can be called from other functions inside or outside
1658// the current module. Module entry functions are allowed to allocate LDS.
1659//
1660// AMDGPU_CS_Chain is intended for externally callable chain functions, so it is
1661// treated as a module entrypoint. AMDGPU_CS_ChainPreserve is used for internal
1662// helper functions (e.g. retry helpers), so it is not a module entrypoint.
1663LLVM_READNONE
1664constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1665 switch (CC) {
1666 case CallingConv::AMDGPU_Gfx:
1667 case CallingConv::AMDGPU_CS_Chain:
1668 return true;
1669 default:
1670 return isEntryFunctionCC(CC);
1671 }
1672}
1673
1674LLVM_READNONE
1675constexpr inline bool isKernel(CallingConv::ID CC) {
1676 switch (CC) {
1677 case CallingConv::AMDGPU_KERNEL:
1678 case CallingConv::SPIR_KERNEL:
1679 return true;
1680 default:
1681 return false;
1682 }
1683}
1684
1685inline bool isKernel(const Function &F) { return isKernel(CC: F.getCallingConv()); }
1686
1687LLVM_READNONE
1688constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1689 return CC == CallingConv::Fast;
1690}
1691
1692/// Return true if we might ever do TCO for calls with this calling convention.
1693LLVM_READNONE
1694constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1695 switch (CC) {
1696 case CallingConv::C:
1697 case CallingConv::AMDGPU_Gfx:
1698 case CallingConv::AMDGPU_Gfx_WholeWave:
1699 return true;
1700 default:
1701 return canGuaranteeTCO(CC);
1702 }
1703}
1704
1705bool hasXNACK(const MCSubtargetInfo &STI);
1706bool hasSRAMECC(const MCSubtargetInfo &STI);
1707bool hasMIMG_R128(const MCSubtargetInfo &STI);
1708bool hasA16(const MCSubtargetInfo &STI);
1709bool hasG16(const MCSubtargetInfo &STI);
1710bool hasPackedD16(const MCSubtargetInfo &STI);
1711bool hasGDS(const MCSubtargetInfo &STI);
1712unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1713unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1714
1715bool isSI(const MCSubtargetInfo &STI);
1716bool isCI(const MCSubtargetInfo &STI);
1717bool isVI(const MCSubtargetInfo &STI);
1718bool isGFX9(const MCSubtargetInfo &STI);
1719bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1720bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1721bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1722bool isGFX8Plus(const MCSubtargetInfo &STI);
1723bool isGFX9Plus(const MCSubtargetInfo &STI);
1724bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1725bool isGFX10(const MCSubtargetInfo &STI);
1726bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1727bool isGFX10Plus(const MCSubtargetInfo &STI);
1728bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1729bool isGFX10Before1030(const MCSubtargetInfo &STI);
1730bool isGFX11(const MCSubtargetInfo &STI);
1731bool isGFX11Plus(const MCSubtargetInfo &STI);
1732bool isGFX12(const MCSubtargetInfo &STI);
1733bool isGFX12Plus(const MCSubtargetInfo &STI);
1734bool isGFX1250(const MCSubtargetInfo &STI);
1735bool isGFX1250Plus(const MCSubtargetInfo &STI);
1736bool isGFX13(const MCSubtargetInfo &STI);
1737bool isGFX13Plus(const MCSubtargetInfo &STI);
1738bool supportsWGP(const MCSubtargetInfo &STI);
1739bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1740bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1741bool isGCN3Encoding(const MCSubtargetInfo &STI);
1742bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1743bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1744bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1745bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1746bool isGFX90A(const MCSubtargetInfo &STI);
1747bool isGFX940(const MCSubtargetInfo &STI);
1748bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1749bool hasMAIInsts(const MCSubtargetInfo &STI);
1750bool hasVOPD(const MCSubtargetInfo &STI);
1751bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1752
1753inline bool supportsWave32(const MCSubtargetInfo &STI) {
1754 return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1755}
1756
1757int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1758unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1759bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1760
1761/// Is Reg - scalar register
1762bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1763
1764/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1765bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1766
1767/// If \p Reg is a pseudo reg, return the correct hardware register given
1768/// \p STI otherwise return \p Reg.
1769MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1770
1771/// Convert hardware register \p Reg to a pseudo register
1772LLVM_READNONE
1773MCRegister mc2PseudoReg(MCRegister Reg);
1774
1775LLVM_READNONE
1776bool isInlineValue(MCRegister Reg);
1777
1778/// Is this an AMDGPU specific source operand? These include registers,
1779/// inline constants, literals and mandatory literals (KImm).
1780constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1781 return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1782 OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1783}
1784
1785inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1786 return isSISrcOperand(OpInfo: Desc.operands()[OpNo]);
1787}
1788
1789/// Is this a KImm operand?
1790bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1791
1792/// Is this floating-point operand?
1793bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1794
1795/// Does this operand support only inlinable literals?
1796bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1797
1798/// Get the size in bits of a register from the register class \p RC.
1799unsigned getRegBitWidth(unsigned RCID);
1800
1801/// Get the size in bits of a register from the register class \p RC.
1802unsigned getRegBitWidth(const MCRegisterClass &RC);
1803
1804LLVM_READNONE
1805inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1806 switch (OpInfo.OperandType) {
1807 case AMDGPU::OPERAND_REG_IMM_INT32:
1808 case AMDGPU::OPERAND_REG_IMM_FP32:
1809 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1810 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1811 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1812 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1813 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1814 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1815 case AMDGPU::OPERAND_KIMM32:
1816 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1817 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1818 return 4;
1819
1820 case AMDGPU::OPERAND_REG_IMM_INT64:
1821 case AMDGPU::OPERAND_REG_IMM_FP64:
1822 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1823 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1824 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1825 case AMDGPU::OPERAND_KIMM64:
1826 return 8;
1827
1828 case AMDGPU::OPERAND_REG_IMM_INT16:
1829 case AMDGPU::OPERAND_REG_IMM_BF16:
1830 case AMDGPU::OPERAND_REG_IMM_FP16:
1831 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1832 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1833 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1834 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1835 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1836 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1837 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1838 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1839 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1840 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1841 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1842 return 2;
1843
1844 default:
1845 llvm_unreachable("unhandled operand type");
1846 }
1847}
1848
1849LLVM_READNONE
1850inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1851 return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1852}
1853
1854/// Is this literal inlinable, and not one of the values intended for floating
1855/// point values.
1856LLVM_READNONE
1857inline bool isInlinableIntLiteral(int64_t Literal) {
1858 return Literal >= -16 && Literal <= 64;
1859}
1860
1861/// Is this literal inlinable
1862LLVM_READNONE
1863bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1864
1865LLVM_READNONE
1866bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1867
1868LLVM_READNONE
1869bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1870
1871LLVM_READNONE
1872bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1873
1874LLVM_READNONE
1875bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1876
1877LLVM_READNONE
1878std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1879
1880LLVM_READNONE
1881std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1882
1883LLVM_READNONE
1884std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1885
1886LLVM_READNONE
1887std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1888 bool IsGFX11Plus);
1889
1890LLVM_READNONE
1891bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1892
1893LLVM_READNONE
1894bool isInlinableLiteralV2I16(uint32_t Literal);
1895
1896LLVM_READNONE
1897bool isInlinableLiteralV2BF16(uint32_t Literal);
1898
1899LLVM_READNONE
1900bool isInlinableLiteralV2F16(uint32_t Literal);
1901
1902LLVM_READNONE
1903bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1904
1905LLVM_READNONE
1906bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1907
1908LLVM_READNONE
1909int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1910
1911bool isArgPassedInSGPR(const Argument *Arg);
1912
1913bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1914
1915LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1916
1917LLVM_READONLY
1918bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1919 int64_t EncodedOffset);
1920
1921LLVM_READONLY
1922bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1923 int64_t EncodedOffset, bool IsBuffer);
1924
1925/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1926/// offsets.
1927uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1928
1929/// \returns The encoding that will be used for \p ByteOffset in the
1930/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1931/// S_LOAD instructions have a signed offset, on other subtargets it is
1932/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1933std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1934 int64_t ByteOffset, bool IsBuffer,
1935 bool HasSOffset = false);
1936
1937/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1938/// instruction. This is only useful on CI.s
1939std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1940 int64_t ByteOffset);
1941
1942/// For pre-GFX12 FLAT instructions the offset must be positive;
1943/// MSB is ignored and forced to zero.
1944///
1945/// \return The number of bits available for the signed offset field in flat
1946/// instructions. Note that some forms of the instruction disallow negative
1947/// offsets.
1948unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1949
1950/// \returns true if this offset is small enough to fit in the SMRD
1951/// offset field. \p ByteOffset should be the offset in bytes and
1952/// not the encoded offset.
1953bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1954
1955LLVM_READNONE
1956inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1957 if (isGFX12(STI: ST))
1958 return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1959 if (isGFX90A(STI: ST))
1960 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1961 return false;
1962}
1963
1964/// \returns true if an instruction may have a 64-bit VGPR operand.
1965bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc,
1966 const MCSubtargetInfo &ST);
1967
1968/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1969bool isDPALU_DPP32BitOpc(unsigned Opc);
1970
1971/// \returns true if an instruction is a DP ALU DPP.
1972bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1973 const MCSubtargetInfo &ST);
1974
1975/// \returns true if the intrinsic is divergent
1976bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1977
1978/// \returns true if the intrinsic is uniform
1979bool isIntrinsicAlwaysUniform(unsigned IntrID);
1980
1981/// \returns a register class for the physical register \p Reg if it is a VGPR
1982/// or nullptr otherwise.
1983const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
1984 const MCRegisterInfo &MRI);
1985
1986/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1987/// physical register \p Reg.
1988unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1989
1990/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1991MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
1992 const MCRegisterInfo &MRI);
1993
1994/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1995/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1996/// ignored.
1997std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
1998 bool HasSetregVGPRMSBFixup);
1999
2000/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
2001/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
2002/// ignored.
2003std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
2004 bool HasSetregVGPRMSBFixup);
2005
2006// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
2007// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
2008// maps, one for X and one for Y component.
2009std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
2010getVGPRLoweringOperandTables(const MCInstrDesc &Desc);
2011
2012/// \returns true if a memory instruction supports scale_offset modifier.
2013bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
2014
2015/// \returns lds block size in terms of dwords. \p
2016/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
2017/// must be defined in terms of bytes.
2018unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
2019
2020class ClusterDimsAttr {
2021public:
2022 enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
2023
2024 ClusterDimsAttr() = default;
2025
2026 Kind getKind() const { return AttrKind; }
2027
2028 bool isUnknown() const { return getKind() == Kind::Unknown; }
2029
2030 bool isNoCluster() const { return getKind() == Kind::NoCluster; }
2031
2032 bool isFixedDims() const { return getKind() == Kind::FixedDims; }
2033
2034 bool isVariableDims() const { return getKind() == Kind::VariableDims; }
2035
2036 void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }
2037
2038 void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); }
2039
2040 void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); }
2041
2042 /// \returns the dims stored. Note that this function can only be called if
2043 /// the kind is \p Fixed.
2044 const std::array<unsigned, 3> &getDims() const;
2045
2046 bool operator==(const ClusterDimsAttr &RHS) const {
2047 return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
2048 }
2049
2050 std::string to_string() const;
2051
2052 static ClusterDimsAttr get(const Function &F);
2053
2054private:
2055 enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
2056
2057 ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
2058
2059 std::array<unsigned, 3> Dims = {0, 0, 0};
2060
2061 Kind AttrKind = Kind::Unknown;
2062};
2063
2064} // namespace AMDGPU
2065
2066raw_ostream &operator<<(raw_ostream &OS,
2067 const AMDGPU::IsaInfo::TargetIDSetting S);
2068
2069} // end namespace llvm
2070
2071#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
2072