1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/InstrTypes.h"
16#include "llvm/IR/Module.h"
17#include "llvm/Support/Alignment.h"
18#include <array>
19#include <functional>
20#include <utility>
21
22// Pull in OpName enum definition and getNamedOperandIdx() declaration.
23#define GET_INSTRINFO_OPERAND_ENUM
24#include "AMDGPUGenInstrInfo.inc"
25
26struct amd_kernel_code_t;
27
28namespace llvm {
29
30struct Align;
31class Argument;
32class Function;
33class GlobalValue;
34class MCInstrInfo;
35class MCRegisterClass;
36class MCRegisterInfo;
37class MCSubtargetInfo;
38class StringRef;
39class Triple;
40class raw_ostream;
41
42namespace AMDGPU {
43
44struct AMDGPUMCKernelCodeT;
45struct IsaVersion;
46
47/// Generic target versions emitted by this version of LLVM.
48///
49/// These numbers are incremented every time a codegen breaking change occurs
50/// within a generic family.
51namespace GenericVersion {
52static constexpr unsigned GFX9 = 1;
53static constexpr unsigned GFX9_4 = 1;
54static constexpr unsigned GFX10_1 = 1;
55static constexpr unsigned GFX10_3 = 1;
56static constexpr unsigned GFX11 = 1;
57static constexpr unsigned GFX12 = 1;
58} // namespace GenericVersion
59
60enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
61
62enum class FPType { None, FP4, FP8 };
63
64/// \returns True if \p STI is AMDHSA.
65bool isHsaAbi(const MCSubtargetInfo &STI);
66
67/// \returns Code object version from the IR module flag.
68unsigned getAMDHSACodeObjectVersion(const Module &M);
69
70/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
71unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
72
73/// \returns The default HSA code object version. This should only be used when
74/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
75/// flag or a .amdhsa_code_object_version directive)
76unsigned getDefaultAMDHSACodeObjectVersion();
77
78/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
79/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
80uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
81
82/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
83unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
84
85/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
86unsigned getHostcallImplicitArgPosition(unsigned COV);
87
88unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
89unsigned getCompletionActionImplicitArgPosition(unsigned COV);
90
91struct GcnBufferFormatInfo {
92 unsigned Format;
93 unsigned BitsPerComp;
94 unsigned NumComponents;
95 unsigned NumFormat;
96 unsigned DataFormat;
97};
98
99struct MAIInstInfo {
100 uint16_t Opcode;
101 bool is_dgemm;
102 bool is_gfx940_xdl;
103};
104
105struct MFMA_F8F6F4_Info {
106 unsigned Opcode;
107 unsigned F8F8Opcode;
108 uint8_t NumRegsSrcA;
109 uint8_t NumRegsSrcB;
110};
111
112struct CvtScaleF32_F32F16ToF8F4_Info {
113 unsigned Opcode;
114};
115
116struct True16D16Info {
117 unsigned T16Op;
118 unsigned HiOp;
119 unsigned LoOp;
120};
121
122#define GET_MIMGBaseOpcode_DECL
123#define GET_MIMGDim_DECL
124#define GET_MIMGEncoding_DECL
125#define GET_MIMGLZMapping_DECL
126#define GET_MIMGMIPMapping_DECL
127#define GET_MIMGBiASMapping_DECL
128#define GET_MAIInstInfoTable_DECL
129#define GET_isMFMA_F8F6F4Table_DECL
130#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
131#define GET_True16D16Table_DECL
132#include "AMDGPUGenSearchableTables.inc"
133
134namespace IsaInfo {
135
136enum {
137 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
138 // doesn't spill SGPRs as much as when 80 is set.
139 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
140 TRAP_NUM_SGPRS = 16
141};
142
143enum class TargetIDSetting { Unsupported, Any, Off, On };
144
145class AMDGPUTargetID {
146private:
147 const MCSubtargetInfo &STI;
148 TargetIDSetting XnackSetting;
149 TargetIDSetting SramEccSetting;
150
151public:
152 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
153 ~AMDGPUTargetID() = default;
154
155 /// \return True if the current xnack setting is not "Unsupported".
156 bool isXnackSupported() const {
157 return XnackSetting != TargetIDSetting::Unsupported;
158 }
159
160 /// \returns True if the current xnack setting is "On" or "Any".
161 bool isXnackOnOrAny() const {
162 return XnackSetting == TargetIDSetting::On ||
163 XnackSetting == TargetIDSetting::Any;
164 }
165
166 /// \returns True if current xnack setting is "On" or "Off",
167 /// false otherwise.
168 bool isXnackOnOrOff() const {
169 return getXnackSetting() == TargetIDSetting::On ||
170 getXnackSetting() == TargetIDSetting::Off;
171 }
172
173 /// \returns The current xnack TargetIDSetting, possible options are
174 /// "Unsupported", "Any", "Off", and "On".
175 TargetIDSetting getXnackSetting() const { return XnackSetting; }
176
177 /// Sets xnack setting to \p NewXnackSetting.
178 void setXnackSetting(TargetIDSetting NewXnackSetting) {
179 XnackSetting = NewXnackSetting;
180 }
181
182 /// \return True if the current sramecc setting is not "Unsupported".
183 bool isSramEccSupported() const {
184 return SramEccSetting != TargetIDSetting::Unsupported;
185 }
186
187 /// \returns True if the current sramecc setting is "On" or "Any".
188 bool isSramEccOnOrAny() const {
189 return SramEccSetting == TargetIDSetting::On ||
190 SramEccSetting == TargetIDSetting::Any;
191 }
192
193 /// \returns True if current sramecc setting is "On" or "Off",
194 /// false otherwise.
195 bool isSramEccOnOrOff() const {
196 return getSramEccSetting() == TargetIDSetting::On ||
197 getSramEccSetting() == TargetIDSetting::Off;
198 }
199
200 /// \returns The current sramecc TargetIDSetting, possible options are
201 /// "Unsupported", "Any", "Off", and "On".
202 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
203
204 /// Sets sramecc setting to \p NewSramEccSetting.
205 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
206 SramEccSetting = NewSramEccSetting;
207 }
208
209 void setTargetIDFromFeaturesString(StringRef FS);
210 void setTargetIDFromTargetIDStream(StringRef TargetID);
211
212 /// \returns String representation of an object.
213 std::string toString() const;
214};
215
216/// \returns Wavefront size for given subtarget \p STI.
217unsigned getWavefrontSize(const MCSubtargetInfo *STI);
218
219/// \returns Local memory size in bytes for given subtarget \p STI.
220unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
221
222/// \returns Maximum addressable local memory size in bytes for given subtarget
223/// \p STI.
224unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
225
226/// \returns Number of execution units per compute unit for given subtarget \p
227/// STI.
228unsigned getEUsPerCU(const MCSubtargetInfo *STI);
229
230/// \returns Maximum number of work groups per compute unit for given subtarget
231/// \p STI and limited by given \p FlatWorkGroupSize.
232unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
233 unsigned FlatWorkGroupSize);
234
235/// \returns Minimum number of waves per execution unit for given subtarget \p
236/// STI.
237unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
238
239/// \returns Maximum number of waves per execution unit for given subtarget \p
240/// STI without any kind of limitation.
241unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
242
243/// \returns Number of waves per execution unit required to support the given \p
244/// FlatWorkGroupSize.
245unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
246 unsigned FlatWorkGroupSize);
247
248/// \returns Minimum flat work group size for given subtarget \p STI.
249unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
250
251/// \returns Maximum flat work group size for given subtarget \p STI.
252unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
253
254/// \returns Number of waves per work group for given subtarget \p STI and
255/// \p FlatWorkGroupSize.
256unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
257 unsigned FlatWorkGroupSize);
258
259/// \returns SGPR allocation granularity for given subtarget \p STI.
260unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
261
262/// \returns SGPR encoding granularity for given subtarget \p STI.
263unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
264
265/// \returns Total number of SGPRs for given subtarget \p STI.
266unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
267
268/// \returns Addressable number of SGPRs for given subtarget \p STI.
269unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
270
271/// \returns Minimum number of SGPRs that meets the given number of waves per
272/// execution unit requirement for given subtarget \p STI.
273unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
274
275/// \returns Maximum number of SGPRs that meets the given number of waves per
276/// execution unit requirement for given subtarget \p STI.
277unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
278 bool Addressable);
279
280/// \returns Number of extra SGPRs implicitly required by given subtarget \p
281/// STI when the given special registers are used.
282unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
283 bool FlatScrUsed, bool XNACKUsed);
284
285/// \returns Number of extra SGPRs implicitly required by given subtarget \p
286/// STI when the given special registers are used. XNACK is inferred from
287/// \p STI.
288unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
289 bool FlatScrUsed);
290
291/// \returns Number of SGPR blocks needed for given subtarget \p STI when
292/// \p NumSGPRs are used. \p NumSGPRs should already include any special
293/// register counts.
294unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
295
296/// \returns VGPR allocation granularity for given subtarget \p STI.
297///
298/// For subtargets which support it, \p EnableWavefrontSize32 should match
299/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
300unsigned
301getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
302 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
303
304/// \returns VGPR encoding granularity for given subtarget \p STI.
305///
306/// For subtargets which support it, \p EnableWavefrontSize32 should match
307/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
308unsigned getVGPREncodingGranule(
309 const MCSubtargetInfo *STI,
310 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
311
312/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
313/// returns the allocation granule for ArchVGPRs.
314unsigned getArchVGPRAllocGranule();
315
316/// \returns Total number of VGPRs for given subtarget \p STI.
317unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
318
319/// \returns Addressable number of architectural VGPRs for a given subtarget \p
320/// STI.
321unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
322
323/// \returns Addressable number of VGPRs for given subtarget \p STI.
324unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
325 unsigned DynamicVGPRBlockSize);
326
327/// \returns Minimum number of VGPRs that meets given number of waves per
328/// execution unit requirement for given subtarget \p STI.
329unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
330 unsigned DynamicVGPRBlockSize);
331
332/// \returns Maximum number of VGPRs that meets given number of waves per
333/// execution unit requirement for given subtarget \p STI.
334unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
335 unsigned DynamicVGPRBlockSize);
336
337/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
338/// subtarget \p STI.
339unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
340 unsigned NumVGPRs,
341 unsigned DynamicVGPRBlockSize);
342
343/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
344/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
345unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
346 unsigned MaxWaves,
347 unsigned TotalNumVGPRs);
348
349/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
350/// Gen.
351unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
352 AMDGPUSubtarget::Generation Gen);
353
354/// \returns Number of VGPR blocks needed for given subtarget \p STI when
355/// \p NumVGPRs are used. We actually return the number of blocks -1, since
356/// that's what we encode.
357///
358/// For subtargets which support it, \p EnableWavefrontSize32 should match the
359/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
360unsigned getEncodedNumVGPRBlocks(
361 const MCSubtargetInfo *STI, unsigned NumVGPRs,
362 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
363
364/// \returns Number of VGPR blocks that need to be allocated for the given
365/// subtarget \p STI when \p NumVGPRs are used.
366unsigned getAllocatedNumVGPRBlocks(
367 const MCSubtargetInfo *STI, unsigned NumVGPRs,
368 unsigned DynamicVGPRBlockSize,
369 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
370
371} // end namespace IsaInfo
372
373// Represents a field in an encoded value.
374template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
375struct EncodingField {
376 static_assert(HighBit >= LowBit, "Invalid bit range!");
377 static constexpr unsigned Offset = LowBit;
378 static constexpr unsigned Width = HighBit - LowBit + 1;
379
380 using ValueType = unsigned;
381 static constexpr ValueType Default = D;
382
383 ValueType Value;
384 constexpr EncodingField(ValueType Value) : Value(Value) {}
385
386 constexpr uint64_t encode() const { return Value; }
387 static ValueType decode(uint64_t Encoded) { return Encoded; }
388};
389
390// Represents a single bit in an encoded value.
391template <unsigned Bit, unsigned D = 0>
392using EncodingBit = EncodingField<Bit, Bit, D>;
393
394// A helper for encoding and decoding multiple fields.
395template <typename... Fields> struct EncodingFields {
396 static constexpr uint64_t encode(Fields... Values) {
397 return ((Values.encode() << Values.Offset) | ...);
398 }
399
400 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
401 return {Fields::decode((Encoded >> Fields::Offset) &
402 maxUIntN(Fields::Width))...};
403 }
404};
405
406LLVM_READONLY
407inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
408 return getNamedOperandIdx(Opcode, Name: NamedIdx) != -1;
409}
410
411LLVM_READONLY
412int getSOPPWithRelaxation(uint16_t Opcode);
413
414struct MIMGBaseOpcodeInfo {
415 MIMGBaseOpcode BaseOpcode;
416 bool Store;
417 bool Atomic;
418 bool AtomicX2;
419 bool Sampler;
420 bool Gather4;
421
422 uint8_t NumExtraArgs;
423 bool Gradients;
424 bool G16;
425 bool Coordinates;
426 bool LodOrClampOrMip;
427 bool HasD16;
428 bool MSAA;
429 bool BVH;
430 bool A16;
431 bool NoReturn;
432 bool PointSampleAccel;
433};
434
435LLVM_READONLY
436const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
437
438LLVM_READONLY
439const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
440
441struct MIMGDimInfo {
442 MIMGDim Dim;
443 uint8_t NumCoords;
444 uint8_t NumGradients;
445 bool MSAA;
446 bool DA;
447 uint8_t Encoding;
448 const char *AsmSuffix;
449};
450
451LLVM_READONLY
452const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
453
454LLVM_READONLY
455const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
456
457LLVM_READONLY
458const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
459
460struct MIMGLZMappingInfo {
461 MIMGBaseOpcode L;
462 MIMGBaseOpcode LZ;
463};
464
465struct MIMGMIPMappingInfo {
466 MIMGBaseOpcode MIP;
467 MIMGBaseOpcode NONMIP;
468};
469
470struct MIMGBiasMappingInfo {
471 MIMGBaseOpcode Bias;
472 MIMGBaseOpcode NoBias;
473};
474
475struct MIMGOffsetMappingInfo {
476 MIMGBaseOpcode Offset;
477 MIMGBaseOpcode NoOffset;
478};
479
480struct MIMGG16MappingInfo {
481 MIMGBaseOpcode G;
482 MIMGBaseOpcode G16;
483};
484
485LLVM_READONLY
486const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
487
488struct WMMAOpcodeMappingInfo {
489 unsigned Opcode2Addr;
490 unsigned Opcode3Addr;
491};
492
493LLVM_READONLY
494const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
495
496LLVM_READONLY
497const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
498
499LLVM_READONLY
500const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
501
502LLVM_READONLY
503const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
504
505LLVM_READONLY
506int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
507 unsigned VDataDwords, unsigned VAddrDwords);
508
509LLVM_READONLY
510int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
511
512LLVM_READONLY
513unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
514 const MIMGDimInfo *Dim, bool IsA16,
515 bool IsG16Supported);
516
517struct MIMGInfo {
518 uint16_t Opcode;
519 uint16_t BaseOpcode;
520 uint8_t MIMGEncoding;
521 uint8_t VDataDwords;
522 uint8_t VAddrDwords;
523 uint8_t VAddrOperands;
524};
525
526LLVM_READONLY
527const MIMGInfo *getMIMGInfo(unsigned Opc);
528
529LLVM_READONLY
530int getMTBUFBaseOpcode(unsigned Opc);
531
532LLVM_READONLY
533int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
534
535LLVM_READONLY
536int getMTBUFElements(unsigned Opc);
537
538LLVM_READONLY
539bool getMTBUFHasVAddr(unsigned Opc);
540
541LLVM_READONLY
542bool getMTBUFHasSrsrc(unsigned Opc);
543
544LLVM_READONLY
545bool getMTBUFHasSoffset(unsigned Opc);
546
547LLVM_READONLY
548int getMUBUFBaseOpcode(unsigned Opc);
549
550LLVM_READONLY
551int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
552
553LLVM_READONLY
554int getMUBUFElements(unsigned Opc);
555
556LLVM_READONLY
557bool getMUBUFHasVAddr(unsigned Opc);
558
559LLVM_READONLY
560bool getMUBUFHasSrsrc(unsigned Opc);
561
562LLVM_READONLY
563bool getMUBUFHasSoffset(unsigned Opc);
564
565LLVM_READONLY
566bool getMUBUFIsBufferInv(unsigned Opc);
567
568LLVM_READONLY
569bool getMUBUFTfe(unsigned Opc);
570
571LLVM_READONLY
572bool getSMEMIsBuffer(unsigned Opc);
573
574LLVM_READONLY
575bool getVOP1IsSingle(unsigned Opc);
576
577LLVM_READONLY
578bool getVOP2IsSingle(unsigned Opc);
579
580LLVM_READONLY
581bool getVOP3IsSingle(unsigned Opc);
582
583LLVM_READONLY
584bool isVOPC64DPP(unsigned Opc);
585
586LLVM_READONLY
587bool isVOPCAsmOnly(unsigned Opc);
588
589/// Returns true if MAI operation is a double precision GEMM.
590LLVM_READONLY
591bool getMAIIsDGEMM(unsigned Opc);
592
593LLVM_READONLY
594bool getMAIIsGFX940XDL(unsigned Opc);
595
596struct CanBeVOPD {
597 bool X;
598 bool Y;
599};
600
601/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
602LLVM_READONLY
603unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
604
605LLVM_READONLY
606CanBeVOPD getCanBeVOPD(unsigned Opc);
607
608LLVM_READNONE
609uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
610
611LLVM_READONLY
612const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
613 unsigned BLGP,
614 unsigned F8F8Opcode);
615
616LLVM_READONLY
617const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
618 uint8_t NumComponents,
619 uint8_t NumFormat,
620 const MCSubtargetInfo &STI);
621LLVM_READONLY
622const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
623 const MCSubtargetInfo &STI);
624
625LLVM_READONLY
626int getMCOpcode(uint16_t Opcode, unsigned Gen);
627
628LLVM_READONLY
629unsigned getVOPDOpcode(unsigned Opc);
630
631LLVM_READONLY
632int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
633
634LLVM_READONLY
635bool isVOPD(unsigned Opc);
636
637LLVM_READNONE
638bool isMAC(unsigned Opc);
639
640LLVM_READNONE
641bool isPermlane16(unsigned Opc);
642
643LLVM_READNONE
644bool isGenericAtomic(unsigned Opc);
645
646LLVM_READNONE
647bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
648
649namespace VOPD {
650
651enum Component : unsigned {
652 DST = 0,
653 SRC0,
654 SRC1,
655 SRC2,
656
657 DST_NUM = 1,
658 MAX_SRC_NUM = 3,
659 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
660};
661
662// LSB mask for VGPR banks per VOPD component operand.
663// 4 banks result in a mask 3, setting 2 lower bits.
664constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
665
666enum ComponentIndex : unsigned { X = 0, Y = 1 };
667constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
668constexpr unsigned COMPONENTS_NUM = 2;
669
670// Properties of VOPD components.
671class ComponentProps {
672private:
673 unsigned SrcOperandsNum = 0;
674 unsigned MandatoryLiteralIdx = ~0u;
675 bool HasSrc2Acc = false;
676
677public:
678 ComponentProps() = default;
679 ComponentProps(const MCInstrDesc &OpDesc);
680
681 // Return the total number of src operands this component has.
682 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
683
684 // Return the number of src operands of this component visible to the parser.
685 unsigned getCompParsedSrcOperandsNum() const {
686 return SrcOperandsNum - HasSrc2Acc;
687 }
688
689 // Return true iif this component has a mandatory literal.
690 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
691
692 // If this component has a mandatory literal, return component operand
693 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
694 unsigned getMandatoryLiteralCompOperandIndex() const {
695 assert(hasMandatoryLiteral());
696 return MandatoryLiteralIdx;
697 }
698
699 // Return true iif this component has operand
700 // with component index CompSrcIdx and this operand may be a register.
701 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
702 assert(CompSrcIdx < Component::MAX_SRC_NUM);
703 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
704 }
705
706 // Return true iif this component has tied src2.
707 bool hasSrc2Acc() const { return HasSrc2Acc; }
708
709private:
710 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
711 assert(CompSrcIdx < Component::MAX_SRC_NUM);
712 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
713 }
714};
715
716enum ComponentKind : unsigned {
717 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
718 COMPONENT_X, // A VOPD instruction, X component.
719 COMPONENT_Y, // A VOPD instruction, Y component.
720 MAX = COMPONENT_Y
721};
722
723// Interface functions of this class map VOPD component operand indices
724// to indices of operands in MachineInstr/MCInst or parsed operands array.
725//
726// Note that this class operates with 3 kinds of indices:
727// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
728// - MC operand indices (they refer operands in a MachineInstr/MCInst);
729// - parsed operand indices (they refer operands in parsed operands array).
730//
731// For SINGLE components mapping between these indices is trivial.
732// But things get more complicated for COMPONENT_X and
733// COMPONENT_Y because these components share the same
734// MachineInstr/MCInst and the same parsed operands array.
735// Below is an example of component operand to parsed operand
736// mapping for the following instruction:
737//
738// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
739//
740// PARSED COMPONENT PARSED
741// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
742// -------------------------------------------------------------------
743// "v_dual_add_f32" 0
744// v_dual_add_f32 v255 0 (DST) --> 1
745// v4 1 (SRC0) --> 2
746// v5 2 (SRC1) --> 3
747// "::" 4
748// "v_dual_mov_b32" 5
749// v_dual_mov_b32 v6 0 (DST) --> 6
750// v1 1 (SRC0) --> 7
751// -------------------------------------------------------------------
752//
753class ComponentLayout {
754private:
755 // Regular MachineInstr/MCInst operands are ordered as follows:
756 // dst, src0 [, other src operands]
757 // VOPD MachineInstr/MCInst operands are ordered as follows:
758 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
759 // Each ComponentKind has operand indices defined below.
760 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
761 static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */};
762
763 // Parsed operands of regular instructions are ordered as follows:
764 // Mnemo dst src0 [vsrc1 ...]
765 // Parsed VOPD operands are ordered as follows:
766 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
767 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
768 // Each ComponentKind has operand indices defined below.
769 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
770 4 /* + OpX.ParsedSrcNum */};
771 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
772 2, 2, 5 /* + OpX.ParsedSrcNum */};
773
774private:
775 const ComponentKind Kind;
776 const ComponentProps PrevComp;
777
778public:
779 // Create layout for COMPONENT_X or SINGLE component.
780 ComponentLayout(ComponentKind Kind) : Kind(Kind) {
781 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
782 }
783
784 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
785 ComponentLayout(const ComponentProps &OpXProps)
786 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {}
787
788public:
789 // Return the index of dst operand in MCInst operands.
790 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
791
792 // Return the index of the specified src operand in MCInst operands.
793 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
794 assert(CompSrcIdx < Component::MAX_SRC_NUM);
795 return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
796 }
797
798 // Return the index of dst operand in the parsed operands array.
799 unsigned getIndexOfDstInParsedOperands() const {
800 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
801 }
802
803 // Return the index of the specified src operand in the parsed operands array.
804 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
805 assert(CompSrcIdx < Component::MAX_SRC_NUM);
806 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
807 }
808
809private:
810 unsigned getPrevCompSrcNum() const {
811 return PrevComp.getCompSrcOperandsNum();
812 }
813 unsigned getPrevCompParsedSrcNum() const {
814 return PrevComp.getCompParsedSrcOperandsNum();
815 }
816};
817
818// Layout and properties of VOPD components.
819class ComponentInfo : public ComponentLayout, public ComponentProps {
820public:
821 // Create ComponentInfo for COMPONENT_X or SINGLE component.
822 ComponentInfo(const MCInstrDesc &OpDesc,
823 ComponentKind Kind = ComponentKind::SINGLE)
824 : ComponentLayout(Kind), ComponentProps(OpDesc) {}
825
826 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
827 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
828 : ComponentLayout(OpXProps), ComponentProps(OpDesc) {}
829
830 // Map component operand index to parsed operand index.
831 // Return 0 if the specified operand does not exist.
832 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
833};
834
835// Properties of VOPD instructions.
836class InstInfo {
837private:
838 const ComponentInfo CompInfo[COMPONENTS_NUM];
839
840public:
841 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
842
843 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
844 : CompInfo{OpX, OpY} {}
845
846 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
847 : CompInfo{OprInfoX, OprInfoY} {}
848
849 const ComponentInfo &operator[](size_t ComponentIdx) const {
850 assert(ComponentIdx < COMPONENTS_NUM);
851 return CompInfo[ComponentIdx];
852 }
853
854 // Check VOPD operands constraints.
855 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
856 // for the specified component and MC operand. The callback must return 0
857 // if the operand is not a register or not a VGPR.
858 // If \p SkipSrc is set to true then constraints for source operands are not
859 // checked.
860 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
861 bool SkipSrc = false) const {
862 return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
863 }
864
865 // Check VOPD operands constraints.
866 // Return the index of an invalid component operand, if any.
867 // If \p SkipSrc is set to true then constraints for source operands are not
868 // checked.
869 std::optional<unsigned> getInvalidCompOperandIndex(
870 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
871 bool SkipSrc = false) const;
872
873private:
874 RegIndices
875 getRegIndices(unsigned ComponentIdx,
876 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
877};
878
879} // namespace VOPD
880
881LLVM_READONLY
882std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
883
884LLVM_READONLY
885// Get properties of 2 single VOP1/VOP2 instructions
886// used as components to create a VOPD instruction.
887VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
888
889LLVM_READONLY
890// Get properties of VOPD X and Y components.
891VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
892 const MCInstrInfo *InstrInfo);
893
894LLVM_READONLY
895bool isAsyncStore(unsigned Opc);
896LLVM_READONLY
897bool isTensorStore(unsigned Opc);
898LLVM_READONLY
899unsigned getTemporalHintType(const MCInstrDesc TID);
900
901LLVM_READONLY
902bool isTrue16Inst(unsigned Opc);
903
904LLVM_READONLY
905FPType getFPDstSelType(unsigned Opc);
906
907LLVM_READONLY
908bool isInvalidSingleUseConsumerInst(unsigned Opc);
909
910LLVM_READONLY
911bool isInvalidSingleUseProducerInst(unsigned Opc);
912
913bool isDPMACCInstruction(unsigned Opc);
914
915LLVM_READONLY
916unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
917
918LLVM_READONLY
919unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
920
921void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
922 const MCSubtargetInfo *STI);
923
924bool isGroupSegment(const GlobalValue *GV);
925bool isGlobalSegment(const GlobalValue *GV);
926bool isReadOnlySegment(const GlobalValue *GV);
927
928/// \returns True if constants should be emitted to .text section for given
929/// target triple \p TT, false otherwise.
930bool shouldEmitConstantsToTextSection(const Triple &TT);
931
932/// \returns Integer value requested using \p F's \p Name attribute.
933///
934/// \returns \p Default if attribute is not present.
935///
936/// \returns \p Default and emits error if requested value cannot be converted
937/// to integer.
938int getIntegerAttribute(const Function &F, StringRef Name, int Default);
939
940/// \returns A pair of integer values requested using \p F's \p Name attribute
941/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
942/// is false).
943///
944/// \returns \p Default if attribute is not present.
945///
946/// \returns \p Default and emits error if one of the requested values cannot be
947/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
948/// not present.
949std::pair<unsigned, unsigned>
950getIntegerPairAttribute(const Function &F, StringRef Name,
951 std::pair<unsigned, unsigned> Default,
952 bool OnlyFirstRequired = false);
953
954/// \returns A pair of integer values requested using \p F's \p Name attribute
955/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
956/// is false).
957///
958/// \returns \p std::nullopt if attribute is not present.
959///
960/// \returns \p std::nullopt and emits error if one of the requested values
961/// cannot be converted to integer, or \p OnlyFirstRequired is false and
962/// "second" value is not present.
963std::optional<std::pair<unsigned, std::optional<unsigned>>>
964getIntegerPairAttribute(const Function &F, StringRef Name,
965 bool OnlyFirstRequired = false);
966
967/// \returns Generate a vector of integer values requested using \p F's \p Name
968/// attribute.
969/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
970/// if any error occurs. The corresponding error will also be emitted.
971SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
972 unsigned Size,
973 unsigned DefaultVal);
974/// Similar to the function above, but returns std::nullopt if any error occurs.
975std::optional<SmallVector<unsigned>>
976getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
977
978/// Represents the counter values to wait for in an s_waitcnt instruction.
979///
980/// Large values (including the maximum possible integer) can be used to
981/// represent "don't care" waits.
982struct Waitcnt {
983 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
984 unsigned ExpCnt = ~0u;
985 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
986 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
987 unsigned SampleCnt = ~0u; // gfx12+ only.
988 unsigned BvhCnt = ~0u; // gfx12+ only.
989 unsigned KmCnt = ~0u; // gfx12+ only.
990 unsigned XCnt = ~0u; // gfx1250.
991
992 Waitcnt() = default;
993 // Pre-gfx12 constructor.
994 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
995 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
996
997 // gfx12+ constructor.
998 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
999 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
1000 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1001 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
1002
1003 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
1004
1005 bool hasWaitExceptStoreCnt() const {
1006 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
1007 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u;
1008 }
1009
1010 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
1011
1012 Waitcnt combined(const Waitcnt &Other) const {
1013 // Does the right thing provided self and Other are either both pre-gfx12
1014 // or both gfx12+.
1015 return Waitcnt(
1016 std::min(a: LoadCnt, b: Other.LoadCnt), std::min(a: ExpCnt, b: Other.ExpCnt),
1017 std::min(a: DsCnt, b: Other.DsCnt), std::min(a: StoreCnt, b: Other.StoreCnt),
1018 std::min(a: SampleCnt, b: Other.SampleCnt), std::min(a: BvhCnt, b: Other.BvhCnt),
1019 std::min(a: KmCnt, b: Other.KmCnt), std::min(a: XCnt, b: Other.XCnt));
1020 }
1021};
1022
1023// The following methods are only meaningful on targets that support
1024// S_WAITCNT.
1025
1026/// \returns Vmcnt bit mask for given isa \p Version.
1027unsigned getVmcntBitMask(const IsaVersion &Version);
1028
1029/// \returns Expcnt bit mask for given isa \p Version.
1030unsigned getExpcntBitMask(const IsaVersion &Version);
1031
1032/// \returns Lgkmcnt bit mask for given isa \p Version.
1033unsigned getLgkmcntBitMask(const IsaVersion &Version);
1034
1035/// \returns Waitcnt bit mask for given isa \p Version.
1036unsigned getWaitcntBitMask(const IsaVersion &Version);
1037
1038/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1039unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1040
1041/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1042unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1043
1044/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1045unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1046
1047/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1048/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1049/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1050/// which needs it is deprecated
1051///
1052/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1053/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1054/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1055/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1056/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1057/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1058/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1059/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1060/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1061///
1062void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1063 unsigned &Expcnt, unsigned &Lgkmcnt);
1064
1065Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1066
1067/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1068unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1069 unsigned Vmcnt);
1070
1071/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1072unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1073 unsigned Expcnt);
1074
1075/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1076unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1077 unsigned Lgkmcnt);
1078
1079/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1080/// \p Version. Should not be used on gfx12+, the instruction which needs
1081/// it is deprecated
1082///
1083/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1084/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1085/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1086/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1087/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1088/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1089/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1090/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1091/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1092/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1093///
1094/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1095/// isa \p Version.
1096///
1097unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1098 unsigned Expcnt, unsigned Lgkmcnt);
1099
1100unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1101
1102// The following methods are only meaningful on targets that support
1103// S_WAIT_*CNT, introduced with gfx12.
1104
1105/// \returns Loadcnt bit mask for given isa \p Version.
1106/// Returns 0 for versions that do not support LOADcnt
1107unsigned getLoadcntBitMask(const IsaVersion &Version);
1108
1109/// \returns Samplecnt bit mask for given isa \p Version.
1110/// Returns 0 for versions that do not support SAMPLEcnt
1111unsigned getSamplecntBitMask(const IsaVersion &Version);
1112
1113/// \returns Bvhcnt bit mask for given isa \p Version.
1114/// Returns 0 for versions that do not support BVHcnt
1115unsigned getBvhcntBitMask(const IsaVersion &Version);
1116
1117/// \returns Dscnt bit mask for given isa \p Version.
1118/// Returns 0 for versions that do not support DScnt
1119unsigned getDscntBitMask(const IsaVersion &Version);
1120
1121/// \returns Dscnt bit mask for given isa \p Version.
1122/// Returns 0 for versions that do not support KMcnt
1123unsigned getKmcntBitMask(const IsaVersion &Version);
1124
1125/// \returns Xcnt bit mask for given isa \p Version.
1126/// Returns 0 for versions that do not support Xcnt.
1127unsigned getXcntBitMask(const IsaVersion &Version);
1128
1129/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1130/// returns 0 for versions that do not support STOREcnt or VScnt.
1131/// STOREcnt and VScnt are the same counter, the name used
1132/// depends on the ISA version.
1133unsigned getStorecntBitMask(const IsaVersion &Version);
1134
1135// The following are only meaningful on targets that support
1136// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1137
1138/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1139/// isa \p Version.
1140Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1141
1142/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1143/// isa \p Version.
1144Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1145
1146/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1147/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1148/// \p Version.
1149unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1150
1151/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1152/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1153/// \p Version.
1154unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1155
1156namespace Hwreg {
1157
1158using HwregId = EncodingField<5, 0>;
1159using HwregOffset = EncodingField<10, 6>;
1160
1161struct HwregSize : EncodingField<15, 11, 32> {
1162 using EncodingField::EncodingField;
1163 constexpr uint64_t encode() const { return Value - 1; }
1164 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1165};
1166
1167using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1168
1169} // namespace Hwreg
1170
1171namespace DepCtr {
1172
1173int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1174int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1175 const MCSubtargetInfo &STI);
1176bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1177 const MCSubtargetInfo &STI);
1178bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1179 bool &IsDefault, const MCSubtargetInfo &STI);
1180
1181/// \returns Decoded VaVdst from given immediate \p Encoded.
1182unsigned decodeFieldVaVdst(unsigned Encoded);
1183
1184/// \returns Decoded VmVsrc from given immediate \p Encoded.
1185unsigned decodeFieldVmVsrc(unsigned Encoded);
1186
1187/// \returns Decoded SaSdst from given immediate \p Encoded.
1188unsigned decodeFieldSaSdst(unsigned Encoded);
1189
1190/// \returns Decoded VaSdst from given immediate \p Encoded.
1191unsigned decodeFieldVaSdst(unsigned Encoded);
1192
1193/// \returns Decoded VaVcc from given immediate \p Encoded.
1194unsigned decodeFieldVaVcc(unsigned Encoded);
1195
1196/// \returns Decoded SaSrc from given immediate \p Encoded.
1197unsigned decodeFieldVaSsrc(unsigned Encoded);
1198
1199/// \returns Decoded HoldCnt from given immediate \p Encoded.
1200unsigned decodeFieldHoldCnt(unsigned Encoded);
1201
1202/// \returns \p VmVsrc as an encoded Depctr immediate.
1203unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1204
1205/// \returns \p Encoded combined with encoded \p VmVsrc.
1206unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1207
1208/// \returns \p VaVdst as an encoded Depctr immediate.
1209unsigned encodeFieldVaVdst(unsigned VaVdst);
1210
1211/// \returns \p Encoded combined with encoded \p VaVdst.
1212unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1213
1214/// \returns \p SaSdst as an encoded Depctr immediate.
1215unsigned encodeFieldSaSdst(unsigned SaSdst);
1216
1217/// \returns \p Encoded combined with encoded \p SaSdst.
1218unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1219
1220/// \returns \p VaSdst as an encoded Depctr immediate.
1221unsigned encodeFieldVaSdst(unsigned VaSdst);
1222
1223/// \returns \p Encoded combined with encoded \p VaSdst.
1224unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1225
1226/// \returns \p VaVcc as an encoded Depctr immediate.
1227unsigned encodeFieldVaVcc(unsigned VaVcc);
1228
1229/// \returns \p Encoded combined with encoded \p VaVcc.
1230unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1231
1232/// \returns \p HoldCnt as an encoded Depctr immediate.
1233unsigned encodeFieldHoldCnt(unsigned HoldCnt);
1234
1235/// \returns \p Encoded combined with encoded \p HoldCnt.
1236unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
1237
1238/// \returns \p VaSsrc as an encoded Depctr immediate.
1239unsigned encodeFieldVaSsrc(unsigned VaSsrc);
1240
1241/// \returns \p Encoded combined with encoded \p VaSsrc.
1242unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1243
1244} // namespace DepCtr
1245
1246namespace Exp {
1247
1248bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1249
1250LLVM_READONLY
1251unsigned getTgtId(const StringRef Name);
1252
1253LLVM_READNONE
1254bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1255
1256} // namespace Exp
1257
1258namespace MTBUFFormat {
1259
1260LLVM_READNONE
1261int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1262
1263void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1264
1265int64_t getDfmt(const StringRef Name);
1266
1267StringRef getDfmtName(unsigned Id);
1268
1269int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1270
1271StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1272
1273bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1274
1275bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1276
1277int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1278
1279StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1280
1281bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1282
1283int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1284 const MCSubtargetInfo &STI);
1285
1286bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1287
1288unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1289
1290} // namespace MTBUFFormat
1291
1292namespace SendMsg {
1293
1294LLVM_READNONE
1295bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1296
1297LLVM_READNONE
1298bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1299 bool Strict = true);
1300
1301LLVM_READNONE
1302bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1303 const MCSubtargetInfo &STI, bool Strict = true);
1304
1305LLVM_READNONE
1306bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1307
1308LLVM_READNONE
1309bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1310
1311void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1312 uint16_t &StreamId, const MCSubtargetInfo &STI);
1313
1314LLVM_READNONE
1315uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1316
1317} // namespace SendMsg
1318
1319unsigned getInitialPSInputAddr(const Function &F);
1320
1321bool getHasColorExport(const Function &F);
1322
1323bool getHasDepthExport(const Function &F);
1324
1325bool hasDynamicVGPR(const Function &F);
1326
1327// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1328// the attribute is missing or its value is invalid.
1329unsigned getDynamicVGPRBlockSize(const Function &F);
1330
1331LLVM_READNONE
1332constexpr bool isShader(CallingConv::ID CC) {
1333 switch (CC) {
1334 case CallingConv::AMDGPU_VS:
1335 case CallingConv::AMDGPU_LS:
1336 case CallingConv::AMDGPU_HS:
1337 case CallingConv::AMDGPU_ES:
1338 case CallingConv::AMDGPU_GS:
1339 case CallingConv::AMDGPU_PS:
1340 case CallingConv::AMDGPU_CS_Chain:
1341 case CallingConv::AMDGPU_CS_ChainPreserve:
1342 case CallingConv::AMDGPU_CS:
1343 return true;
1344 default:
1345 return false;
1346 }
1347}
1348
1349LLVM_READNONE
1350constexpr bool isGraphics(CallingConv::ID CC) {
1351 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx;
1352}
1353
1354LLVM_READNONE
1355constexpr bool isCompute(CallingConv::ID CC) {
1356 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1357}
1358
1359LLVM_READNONE
1360constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1361 switch (CC) {
1362 case CallingConv::AMDGPU_KERNEL:
1363 case CallingConv::SPIR_KERNEL:
1364 case CallingConv::AMDGPU_VS:
1365 case CallingConv::AMDGPU_GS:
1366 case CallingConv::AMDGPU_PS:
1367 case CallingConv::AMDGPU_CS:
1368 case CallingConv::AMDGPU_ES:
1369 case CallingConv::AMDGPU_HS:
1370 case CallingConv::AMDGPU_LS:
1371 return true;
1372 default:
1373 return false;
1374 }
1375}
1376
1377LLVM_READNONE
1378constexpr bool isChainCC(CallingConv::ID CC) {
1379 switch (CC) {
1380 case CallingConv::AMDGPU_CS_Chain:
1381 case CallingConv::AMDGPU_CS_ChainPreserve:
1382 return true;
1383 default:
1384 return false;
1385 }
1386}
1387
1388// These functions are considered entrypoints into the current module, i.e. they
1389// are allowed to be called from outside the current module. This is different
1390// from isEntryFunctionCC, which is only true for functions that are entered by
1391// the hardware. Module entry points include all entry functions but also
1392// include functions that can be called from other functions inside or outside
1393// the current module. Module entry functions are allowed to allocate LDS.
1394LLVM_READNONE
1395constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1396 switch (CC) {
1397 case CallingConv::AMDGPU_Gfx:
1398 return true;
1399 default:
1400 return isEntryFunctionCC(CC) || isChainCC(CC);
1401 }
1402}
1403
1404LLVM_READNONE
1405constexpr inline bool isKernel(CallingConv::ID CC) {
1406 switch (CC) {
1407 case CallingConv::AMDGPU_KERNEL:
1408 case CallingConv::SPIR_KERNEL:
1409 return true;
1410 default:
1411 return false;
1412 }
1413}
1414
1415LLVM_READNONE
1416constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1417 return CC == CallingConv::Fast;
1418}
1419
1420/// Return true if we might ever do TCO for calls with this calling convention.
1421LLVM_READNONE
1422constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1423 switch (CC) {
1424 case CallingConv::C:
1425 case CallingConv::AMDGPU_Gfx:
1426 return true;
1427 default:
1428 return canGuaranteeTCO(CC);
1429 }
1430}
1431
1432bool hasXNACK(const MCSubtargetInfo &STI);
1433bool hasSRAMECC(const MCSubtargetInfo &STI);
1434bool hasMIMG_R128(const MCSubtargetInfo &STI);
1435bool hasA16(const MCSubtargetInfo &STI);
1436bool hasG16(const MCSubtargetInfo &STI);
1437bool hasPackedD16(const MCSubtargetInfo &STI);
1438bool hasGDS(const MCSubtargetInfo &STI);
1439unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1440unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1441
1442bool isSI(const MCSubtargetInfo &STI);
1443bool isCI(const MCSubtargetInfo &STI);
1444bool isVI(const MCSubtargetInfo &STI);
1445bool isGFX9(const MCSubtargetInfo &STI);
1446bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1447bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1448bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1449bool isGFX8Plus(const MCSubtargetInfo &STI);
1450bool isGFX9Plus(const MCSubtargetInfo &STI);
1451bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1452bool isGFX10(const MCSubtargetInfo &STI);
1453bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1454bool isGFX10Plus(const MCSubtargetInfo &STI);
1455bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1456bool isGFX10Before1030(const MCSubtargetInfo &STI);
1457bool isGFX11(const MCSubtargetInfo &STI);
1458bool isGFX11Plus(const MCSubtargetInfo &STI);
1459bool isGFX12(const MCSubtargetInfo &STI);
1460bool isGFX12Plus(const MCSubtargetInfo &STI);
1461bool isGFX1250(const MCSubtargetInfo &STI);
1462bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1463bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1464bool isGCN3Encoding(const MCSubtargetInfo &STI);
1465bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1466bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1467bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1468bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1469bool isGFX90A(const MCSubtargetInfo &STI);
1470bool isGFX940(const MCSubtargetInfo &STI);
1471bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1472bool hasMAIInsts(const MCSubtargetInfo &STI);
1473bool hasVOPD(const MCSubtargetInfo &STI);
1474bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1475int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1476unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1477bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1478
1479/// Is Reg - scalar register
1480bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1481
1482/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1483bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1484
1485/// If \p Reg is a pseudo reg, return the correct hardware register given
1486/// \p STI otherwise return \p Reg.
1487MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1488
1489/// Convert hardware register \p Reg to a pseudo register
1490LLVM_READNONE
1491MCRegister mc2PseudoReg(MCRegister Reg);
1492
1493LLVM_READNONE
1494bool isInlineValue(unsigned Reg);
1495
1496/// Is this an AMDGPU specific source operand? These include registers,
1497/// inline constants, literals and mandatory literals (KImm).
1498bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1499
1500/// Is this a KImm operand?
1501bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1502
1503/// Is this floating-point operand?
1504bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1505
1506/// Does this operand support only inlinable literals?
1507bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1508
1509/// Get the size in bits of a register from the register class \p RC.
1510unsigned getRegBitWidth(unsigned RCID);
1511
1512/// Get the size in bits of a register from the register class \p RC.
1513unsigned getRegBitWidth(const MCRegisterClass &RC);
1514
1515/// Get size of register operand
1516unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1517 unsigned OpNo);
1518
1519LLVM_READNONE
1520inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1521 switch (OpInfo.OperandType) {
1522 case AMDGPU::OPERAND_REG_IMM_INT32:
1523 case AMDGPU::OPERAND_REG_IMM_FP32:
1524 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1525 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1526 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1527 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1528 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1529 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1530 case AMDGPU::OPERAND_KIMM32:
1531 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1532 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1533 return 4;
1534
1535 case AMDGPU::OPERAND_REG_IMM_INT64:
1536 case AMDGPU::OPERAND_REG_IMM_FP64:
1537 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1538 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1539 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1540 return 8;
1541
1542 case AMDGPU::OPERAND_REG_IMM_INT16:
1543 case AMDGPU::OPERAND_REG_IMM_BF16:
1544 case AMDGPU::OPERAND_REG_IMM_FP16:
1545 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1546 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1547 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1548 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1549 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1550 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1551 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1552 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1553 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1554 return 2;
1555
1556 default:
1557 llvm_unreachable("unhandled operand type");
1558 }
1559}
1560
1561LLVM_READNONE
1562inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1563 return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1564}
1565
1566/// Is this literal inlinable, and not one of the values intended for floating
1567/// point values.
1568LLVM_READNONE
1569inline bool isInlinableIntLiteral(int64_t Literal) {
1570 return Literal >= -16 && Literal <= 64;
1571}
1572
1573/// Is this literal inlinable
1574LLVM_READNONE
1575bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1576
1577LLVM_READNONE
1578bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1579
1580LLVM_READNONE
1581bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1582
1583LLVM_READNONE
1584bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1585
1586LLVM_READNONE
1587bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1588
1589LLVM_READNONE
1590bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1591
1592LLVM_READNONE
1593std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1594
1595LLVM_READNONE
1596std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1597
1598LLVM_READNONE
1599std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1600
1601LLVM_READNONE
1602bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1603
1604LLVM_READNONE
1605bool isInlinableLiteralV2I16(uint32_t Literal);
1606
1607LLVM_READNONE
1608bool isInlinableLiteralV2BF16(uint32_t Literal);
1609
1610LLVM_READNONE
1611bool isInlinableLiteralV2F16(uint32_t Literal);
1612
1613LLVM_READNONE
1614bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1615
1616bool isArgPassedInSGPR(const Argument *Arg);
1617
1618bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1619
1620LLVM_READONLY
1621bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1622 int64_t EncodedOffset);
1623
1624LLVM_READONLY
1625bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1626 int64_t EncodedOffset, bool IsBuffer);
1627
1628/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1629/// offsets.
1630uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1631
1632/// \returns The encoding that will be used for \p ByteOffset in the
1633/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1634/// S_LOAD instructions have a signed offset, on other subtargets it is
1635/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1636std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1637 int64_t ByteOffset, bool IsBuffer,
1638 bool HasSOffset = false);
1639
1640/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1641/// instruction. This is only useful on CI.s
1642std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1643 int64_t ByteOffset);
1644
1645/// For pre-GFX12 FLAT instructions the offset must be positive;
1646/// MSB is ignored and forced to zero.
1647///
1648/// \return The number of bits available for the signed offset field in flat
1649/// instructions. Note that some forms of the instruction disallow negative
1650/// offsets.
1651unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1652
1653/// \returns true if this offset is small enough to fit in the SMRD
1654/// offset field. \p ByteOffset should be the offset in bytes and
1655/// not the encoded offset.
1656bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1657
1658LLVM_READNONE
1659inline bool isLegalDPALU_DPPControl(unsigned DC) {
1660 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1661}
1662
1663/// \returns true if an instruction may have a 64-bit VGPR operand.
1664bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1665
1666/// \returns true if an instruction is a DP ALU DPP.
1667bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1668
1669/// \returns true if the intrinsic is divergent
1670bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1671
1672/// \returns true if the intrinsic is uniform
1673bool isIntrinsicAlwaysUniform(unsigned IntrID);
1674
1675/// \returns lds block size in terms of dwords. \p
1676/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1677/// must be defined in terms of bytes.
1678unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1679
1680} // end namespace AMDGPU
1681
1682raw_ostream &operator<<(raw_ostream &OS,
1683 const AMDGPU::IsaInfo::TargetIDSetting S);
1684
1685} // end namespace llvm
1686
1687#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1688