1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/InstrTypes.h"
16#include "llvm/IR/Module.h"
17#include "llvm/Support/Alignment.h"
18#include <array>
19#include <functional>
20#include <utility>
21
22struct amd_kernel_code_t;
23
24namespace llvm {
25
26struct Align;
27class Argument;
28class Function;
29class GlobalValue;
30class MCInstrInfo;
31class MCRegisterClass;
32class MCRegisterInfo;
33class MCSubtargetInfo;
34class StringRef;
35class Triple;
36class raw_ostream;
37
38namespace AMDGPU {
39
40struct AMDGPUMCKernelCodeT;
41struct IsaVersion;
42
43/// Generic target versions emitted by this version of LLVM.
44///
45/// These numbers are incremented every time a codegen breaking change occurs
46/// within a generic family.
47namespace GenericVersion {
48static constexpr unsigned GFX9 = 1;
49static constexpr unsigned GFX10_1 = 1;
50static constexpr unsigned GFX10_3 = 1;
51static constexpr unsigned GFX11 = 1;
52static constexpr unsigned GFX12 = 1;
53} // namespace GenericVersion
54
55enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
56
57/// \returns True if \p STI is AMDHSA.
58bool isHsaAbi(const MCSubtargetInfo &STI);
59
60/// \returns Code object version from the IR module flag.
61unsigned getAMDHSACodeObjectVersion(const Module &M);
62
63/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
64unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
65
66/// \returns The default HSA code object version. This should only be used when
67/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
68/// flag or a .amdhsa_code_object_version directive)
69unsigned getDefaultAMDHSACodeObjectVersion();
70
71/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
72/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
73uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
74
75/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
76unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
77
78/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
79unsigned getHostcallImplicitArgPosition(unsigned COV);
80
81unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
82unsigned getCompletionActionImplicitArgPosition(unsigned COV);
83
84struct GcnBufferFormatInfo {
85 unsigned Format;
86 unsigned BitsPerComp;
87 unsigned NumComponents;
88 unsigned NumFormat;
89 unsigned DataFormat;
90};
91
92struct MAIInstInfo {
93 uint16_t Opcode;
94 bool is_dgemm;
95 bool is_gfx940_xdl;
96};
97
98#define GET_MIMGBaseOpcode_DECL
99#define GET_MIMGDim_DECL
100#define GET_MIMGEncoding_DECL
101#define GET_MIMGLZMapping_DECL
102#define GET_MIMGMIPMapping_DECL
103#define GET_MIMGBiASMapping_DECL
104#define GET_MAIInstInfoTable_DECL
105#include "AMDGPUGenSearchableTables.inc"
106
107namespace IsaInfo {
108
109enum {
110 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
111 // doesn't spill SGPRs as much as when 80 is set.
112 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
113 TRAP_NUM_SGPRS = 16
114};
115
116enum class TargetIDSetting {
117 Unsupported,
118 Any,
119 Off,
120 On
121};
122
123class AMDGPUTargetID {
124private:
125 const MCSubtargetInfo &STI;
126 TargetIDSetting XnackSetting;
127 TargetIDSetting SramEccSetting;
128
129public:
130 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
131 ~AMDGPUTargetID() = default;
132
133 /// \return True if the current xnack setting is not "Unsupported".
134 bool isXnackSupported() const {
135 return XnackSetting != TargetIDSetting::Unsupported;
136 }
137
138 /// \returns True if the current xnack setting is "On" or "Any".
139 bool isXnackOnOrAny() const {
140 return XnackSetting == TargetIDSetting::On ||
141 XnackSetting == TargetIDSetting::Any;
142 }
143
144 /// \returns True if current xnack setting is "On" or "Off",
145 /// false otherwise.
146 bool isXnackOnOrOff() const {
147 return getXnackSetting() == TargetIDSetting::On ||
148 getXnackSetting() == TargetIDSetting::Off;
149 }
150
151 /// \returns The current xnack TargetIDSetting, possible options are
152 /// "Unsupported", "Any", "Off", and "On".
153 TargetIDSetting getXnackSetting() const {
154 return XnackSetting;
155 }
156
157 /// Sets xnack setting to \p NewXnackSetting.
158 void setXnackSetting(TargetIDSetting NewXnackSetting) {
159 XnackSetting = NewXnackSetting;
160 }
161
162 /// \return True if the current sramecc setting is not "Unsupported".
163 bool isSramEccSupported() const {
164 return SramEccSetting != TargetIDSetting::Unsupported;
165 }
166
167 /// \returns True if the current sramecc setting is "On" or "Any".
168 bool isSramEccOnOrAny() const {
169 return SramEccSetting == TargetIDSetting::On ||
170 SramEccSetting == TargetIDSetting::Any;
171 }
172
173 /// \returns True if current sramecc setting is "On" or "Off",
174 /// false otherwise.
175 bool isSramEccOnOrOff() const {
176 return getSramEccSetting() == TargetIDSetting::On ||
177 getSramEccSetting() == TargetIDSetting::Off;
178 }
179
180 /// \returns The current sramecc TargetIDSetting, possible options are
181 /// "Unsupported", "Any", "Off", and "On".
182 TargetIDSetting getSramEccSetting() const {
183 return SramEccSetting;
184 }
185
186 /// Sets sramecc setting to \p NewSramEccSetting.
187 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
188 SramEccSetting = NewSramEccSetting;
189 }
190
191 void setTargetIDFromFeaturesString(StringRef FS);
192 void setTargetIDFromTargetIDStream(StringRef TargetID);
193
194 /// \returns String representation of an object.
195 std::string toString() const;
196};
197
198/// \returns Wavefront size for given subtarget \p STI.
199unsigned getWavefrontSize(const MCSubtargetInfo *STI);
200
201/// \returns Local memory size in bytes for given subtarget \p STI.
202unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
203
204/// \returns Maximum addressable local memory size in bytes for given subtarget
205/// \p STI.
206unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
207
208/// \returns Number of execution units per compute unit for given subtarget \p
209/// STI.
210unsigned getEUsPerCU(const MCSubtargetInfo *STI);
211
212/// \returns Maximum number of work groups per compute unit for given subtarget
213/// \p STI and limited by given \p FlatWorkGroupSize.
214unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
215 unsigned FlatWorkGroupSize);
216
217/// \returns Minimum number of waves per execution unit for given subtarget \p
218/// STI.
219unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
220
221/// \returns Maximum number of waves per execution unit for given subtarget \p
222/// STI without any kind of limitation.
223unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
224
225/// \returns Number of waves per execution unit required to support the given \p
226/// FlatWorkGroupSize.
227unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
228 unsigned FlatWorkGroupSize);
229
230/// \returns Minimum flat work group size for given subtarget \p STI.
231unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
232
233/// \returns Maximum flat work group size for given subtarget \p STI.
234unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
235
236/// \returns Number of waves per work group for given subtarget \p STI and
237/// \p FlatWorkGroupSize.
238unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
239 unsigned FlatWorkGroupSize);
240
241/// \returns SGPR allocation granularity for given subtarget \p STI.
242unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
243
244/// \returns SGPR encoding granularity for given subtarget \p STI.
245unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
246
247/// \returns Total number of SGPRs for given subtarget \p STI.
248unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
249
250/// \returns Addressable number of SGPRs for given subtarget \p STI.
251unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
252
253/// \returns Minimum number of SGPRs that meets the given number of waves per
254/// execution unit requirement for given subtarget \p STI.
255unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
256
257/// \returns Maximum number of SGPRs that meets the given number of waves per
258/// execution unit requirement for given subtarget \p STI.
259unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
260 bool Addressable);
261
262/// \returns Number of extra SGPRs implicitly required by given subtarget \p
263/// STI when the given special registers are used.
264unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
265 bool FlatScrUsed, bool XNACKUsed);
266
267/// \returns Number of extra SGPRs implicitly required by given subtarget \p
268/// STI when the given special registers are used. XNACK is inferred from
269/// \p STI.
270unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
271 bool FlatScrUsed);
272
273/// \returns Number of SGPR blocks needed for given subtarget \p STI when
274/// \p NumSGPRs are used. \p NumSGPRs should already include any special
275/// register counts.
276unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
277
278/// \returns VGPR allocation granularity for given subtarget \p STI.
279///
280/// For subtargets which support it, \p EnableWavefrontSize32 should match
281/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
282unsigned
283getVGPRAllocGranule(const MCSubtargetInfo *STI,
284 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
285
286/// \returns VGPR encoding granularity for given subtarget \p STI.
287///
288/// For subtargets which support it, \p EnableWavefrontSize32 should match
289/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
290unsigned getVGPREncodingGranule(
291 const MCSubtargetInfo *STI,
292 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
293
294/// \returns Total number of VGPRs for given subtarget \p STI.
295unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
296
297/// \returns Addressable number of architectural VGPRs for a given subtarget \p
298/// STI.
299unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
300
301/// \returns Addressable number of VGPRs for given subtarget \p STI.
302unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
303
304/// \returns Minimum number of VGPRs that meets given number of waves per
305/// execution unit requirement for given subtarget \p STI.
306unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
307
308/// \returns Maximum number of VGPRs that meets given number of waves per
309/// execution unit requirement for given subtarget \p STI.
310unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
311
312/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
313/// subtarget \p STI.
314unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
315 unsigned NumVGPRs);
316
317/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
318/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
319unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
320 unsigned MaxWaves,
321 unsigned TotalNumVGPRs);
322
323/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
324/// Gen.
325unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
326 AMDGPUSubtarget::Generation Gen);
327
328/// \returns Number of VGPR blocks needed for given subtarget \p STI when
329/// \p NumVGPRs are used. We actually return the number of blocks -1, since
330/// that's what we encode.
331///
332/// For subtargets which support it, \p EnableWavefrontSize32 should match the
333/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
334unsigned getEncodedNumVGPRBlocks(
335 const MCSubtargetInfo *STI, unsigned NumVGPRs,
336 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
337
338/// \returns Number of VGPR blocks that need to be allocated for the given
339/// subtarget \p STI when \p NumVGPRs are used.
340unsigned getAllocatedNumVGPRBlocks(
341 const MCSubtargetInfo *STI, unsigned NumVGPRs,
342 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
343
344} // end namespace IsaInfo
345
346// Represents a field in an encoded value.
347template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
348struct EncodingField {
349 static_assert(HighBit >= LowBit, "Invalid bit range!");
350 static constexpr unsigned Offset = LowBit;
351 static constexpr unsigned Width = HighBit - LowBit + 1;
352
353 using ValueType = unsigned;
354 static constexpr ValueType Default = D;
355
356 ValueType Value;
357 constexpr EncodingField(ValueType Value) : Value(Value) {}
358
359 constexpr uint64_t encode() const { return Value; }
360 static ValueType decode(uint64_t Encoded) { return Encoded; }
361};
362
363// Represents a single bit in an encoded value.
364template <unsigned Bit, unsigned D = 0>
365using EncodingBit = EncodingField<Bit, Bit, D>;
366
367// A helper for encoding and decoding multiple fields.
368template <typename... Fields> struct EncodingFields {
369 static constexpr uint64_t encode(Fields... Values) {
370 return ((Values.encode() << Values.Offset) | ...);
371 }
372
373 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
374 return {Fields::decode((Encoded >> Fields::Offset) &
375 maxUIntN(Fields::Width))...};
376 }
377};
378
379LLVM_READONLY
380int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
381
382LLVM_READONLY
383inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
384 return getNamedOperandIdx(Opcode, NamedIdx) != -1;
385}
386
387LLVM_READONLY
388int getSOPPWithRelaxation(uint16_t Opcode);
389
390struct MIMGBaseOpcodeInfo {
391 MIMGBaseOpcode BaseOpcode;
392 bool Store;
393 bool Atomic;
394 bool AtomicX2;
395 bool Sampler;
396 bool Gather4;
397
398 uint8_t NumExtraArgs;
399 bool Gradients;
400 bool G16;
401 bool Coordinates;
402 bool LodOrClampOrMip;
403 bool HasD16;
404 bool MSAA;
405 bool BVH;
406 bool A16;
407 bool NoReturn;
408};
409
410LLVM_READONLY
411const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
412
413LLVM_READONLY
414const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
415
416struct MIMGDimInfo {
417 MIMGDim Dim;
418 uint8_t NumCoords;
419 uint8_t NumGradients;
420 bool MSAA;
421 bool DA;
422 uint8_t Encoding;
423 const char *AsmSuffix;
424};
425
426LLVM_READONLY
427const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
428
429LLVM_READONLY
430const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
431
432LLVM_READONLY
433const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
434
435struct MIMGLZMappingInfo {
436 MIMGBaseOpcode L;
437 MIMGBaseOpcode LZ;
438};
439
440struct MIMGMIPMappingInfo {
441 MIMGBaseOpcode MIP;
442 MIMGBaseOpcode NONMIP;
443};
444
445struct MIMGBiasMappingInfo {
446 MIMGBaseOpcode Bias;
447 MIMGBaseOpcode NoBias;
448};
449
450struct MIMGOffsetMappingInfo {
451 MIMGBaseOpcode Offset;
452 MIMGBaseOpcode NoOffset;
453};
454
455struct MIMGG16MappingInfo {
456 MIMGBaseOpcode G;
457 MIMGBaseOpcode G16;
458};
459
460LLVM_READONLY
461const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
462
463struct WMMAOpcodeMappingInfo {
464 unsigned Opcode2Addr;
465 unsigned Opcode3Addr;
466};
467
468LLVM_READONLY
469const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
470
471LLVM_READONLY
472const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
473
474LLVM_READONLY
475const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
476
477LLVM_READONLY
478const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
479
480LLVM_READONLY
481int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
482 unsigned VDataDwords, unsigned VAddrDwords);
483
484LLVM_READONLY
485int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
486
487LLVM_READONLY
488unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
489 const MIMGDimInfo *Dim, bool IsA16,
490 bool IsG16Supported);
491
492struct MIMGInfo {
493 uint16_t Opcode;
494 uint16_t BaseOpcode;
495 uint8_t MIMGEncoding;
496 uint8_t VDataDwords;
497 uint8_t VAddrDwords;
498 uint8_t VAddrOperands;
499};
500
501LLVM_READONLY
502const MIMGInfo *getMIMGInfo(unsigned Opc);
503
504LLVM_READONLY
505int getMTBUFBaseOpcode(unsigned Opc);
506
507LLVM_READONLY
508int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
509
510LLVM_READONLY
511int getMTBUFElements(unsigned Opc);
512
513LLVM_READONLY
514bool getMTBUFHasVAddr(unsigned Opc);
515
516LLVM_READONLY
517bool getMTBUFHasSrsrc(unsigned Opc);
518
519LLVM_READONLY
520bool getMTBUFHasSoffset(unsigned Opc);
521
522LLVM_READONLY
523int getMUBUFBaseOpcode(unsigned Opc);
524
525LLVM_READONLY
526int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
527
528LLVM_READONLY
529int getMUBUFElements(unsigned Opc);
530
531LLVM_READONLY
532bool getMUBUFHasVAddr(unsigned Opc);
533
534LLVM_READONLY
535bool getMUBUFHasSrsrc(unsigned Opc);
536
537LLVM_READONLY
538bool getMUBUFHasSoffset(unsigned Opc);
539
540LLVM_READONLY
541bool getMUBUFIsBufferInv(unsigned Opc);
542
543LLVM_READONLY
544bool getMUBUFTfe(unsigned Opc);
545
546LLVM_READONLY
547bool getSMEMIsBuffer(unsigned Opc);
548
549LLVM_READONLY
550bool getVOP1IsSingle(unsigned Opc);
551
552LLVM_READONLY
553bool getVOP2IsSingle(unsigned Opc);
554
555LLVM_READONLY
556bool getVOP3IsSingle(unsigned Opc);
557
558LLVM_READONLY
559bool isVOPC64DPP(unsigned Opc);
560
561LLVM_READONLY
562bool isVOPCAsmOnly(unsigned Opc);
563
564/// Returns true if MAI operation is a double precision GEMM.
565LLVM_READONLY
566bool getMAIIsDGEMM(unsigned Opc);
567
568LLVM_READONLY
569bool getMAIIsGFX940XDL(unsigned Opc);
570
571struct CanBeVOPD {
572 bool X;
573 bool Y;
574};
575
576/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
577LLVM_READONLY
578unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
579
580LLVM_READONLY
581CanBeVOPD getCanBeVOPD(unsigned Opc);
582
583LLVM_READONLY
584const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
585 uint8_t NumComponents,
586 uint8_t NumFormat,
587 const MCSubtargetInfo &STI);
588LLVM_READONLY
589const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
590 const MCSubtargetInfo &STI);
591
592LLVM_READONLY
593int getMCOpcode(uint16_t Opcode, unsigned Gen);
594
595LLVM_READONLY
596unsigned getVOPDOpcode(unsigned Opc);
597
598LLVM_READONLY
599int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
600
601LLVM_READONLY
602bool isVOPD(unsigned Opc);
603
604LLVM_READNONE
605bool isMAC(unsigned Opc);
606
607LLVM_READNONE
608bool isPermlane16(unsigned Opc);
609
610LLVM_READNONE
611bool isGenericAtomic(unsigned Opc);
612
613LLVM_READNONE
614bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
615
616namespace VOPD {
617
618enum Component : unsigned {
619 DST = 0,
620 SRC0,
621 SRC1,
622 SRC2,
623
624 DST_NUM = 1,
625 MAX_SRC_NUM = 3,
626 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
627};
628
629// LSB mask for VGPR banks per VOPD component operand.
630// 4 banks result in a mask 3, setting 2 lower bits.
631constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
632
633enum ComponentIndex : unsigned { X = 0, Y = 1 };
634constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
635constexpr unsigned COMPONENTS_NUM = 2;
636
637// Properties of VOPD components.
638class ComponentProps {
639private:
640 unsigned SrcOperandsNum = 0;
641 unsigned MandatoryLiteralIdx = ~0u;
642 bool HasSrc2Acc = false;
643
644public:
645 ComponentProps() = default;
646 ComponentProps(const MCInstrDesc &OpDesc);
647
648 // Return the total number of src operands this component has.
649 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
650
651 // Return the number of src operands of this component visible to the parser.
652 unsigned getCompParsedSrcOperandsNum() const {
653 return SrcOperandsNum - HasSrc2Acc;
654 }
655
656 // Return true iif this component has a mandatory literal.
657 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
658
659 // If this component has a mandatory literal, return component operand
660 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
661 unsigned getMandatoryLiteralCompOperandIndex() const {
662 assert(hasMandatoryLiteral());
663 return MandatoryLiteralIdx;
664 }
665
666 // Return true iif this component has operand
667 // with component index CompSrcIdx and this operand may be a register.
668 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
669 assert(CompSrcIdx < Component::MAX_SRC_NUM);
670 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
671 }
672
673 // Return true iif this component has tied src2.
674 bool hasSrc2Acc() const { return HasSrc2Acc; }
675
676private:
677 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
678 assert(CompSrcIdx < Component::MAX_SRC_NUM);
679 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
680 }
681};
682
683enum ComponentKind : unsigned {
684 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
685 COMPONENT_X, // A VOPD instruction, X component.
686 COMPONENT_Y, // A VOPD instruction, Y component.
687 MAX = COMPONENT_Y
688};
689
690// Interface functions of this class map VOPD component operand indices
691// to indices of operands in MachineInstr/MCInst or parsed operands array.
692//
693// Note that this class operates with 3 kinds of indices:
694// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
695// - MC operand indices (they refer operands in a MachineInstr/MCInst);
696// - parsed operand indices (they refer operands in parsed operands array).
697//
698// For SINGLE components mapping between these indices is trivial.
699// But things get more complicated for COMPONENT_X and
700// COMPONENT_Y because these components share the same
701// MachineInstr/MCInst and the same parsed operands array.
702// Below is an example of component operand to parsed operand
703// mapping for the following instruction:
704//
705// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
706//
707// PARSED COMPONENT PARSED
708// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
709// -------------------------------------------------------------------
710// "v_dual_add_f32" 0
711// v_dual_add_f32 v255 0 (DST) --> 1
712// v4 1 (SRC0) --> 2
713// v5 2 (SRC1) --> 3
714// "::" 4
715// "v_dual_mov_b32" 5
716// v_dual_mov_b32 v6 0 (DST) --> 6
717// v1 1 (SRC0) --> 7
718// -------------------------------------------------------------------
719//
720class ComponentLayout {
721private:
722 // Regular MachineInstr/MCInst operands are ordered as follows:
723 // dst, src0 [, other src operands]
724 // VOPD MachineInstr/MCInst operands are ordered as follows:
725 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
726 // Each ComponentKind has operand indices defined below.
727 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
728 static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */};
729
730 // Parsed operands of regular instructions are ordered as follows:
731 // Mnemo dst src0 [vsrc1 ...]
732 // Parsed VOPD operands are ordered as follows:
733 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
734 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
735 // Each ComponentKind has operand indices defined below.
736 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
737 4 /* + OpX.ParsedSrcNum */};
738 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
739 2, 2, 5 /* + OpX.ParsedSrcNum */};
740
741private:
742 const ComponentKind Kind;
743 const ComponentProps PrevComp;
744
745public:
746 // Create layout for COMPONENT_X or SINGLE component.
747 ComponentLayout(ComponentKind Kind) : Kind(Kind) {
748 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
749 }
750
751 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
752 ComponentLayout(const ComponentProps &OpXProps)
753 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {}
754
755public:
756 // Return the index of dst operand in MCInst operands.
757 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
758
759 // Return the index of the specified src operand in MCInst operands.
760 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
761 assert(CompSrcIdx < Component::MAX_SRC_NUM);
762 return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
763 }
764
765 // Return the index of dst operand in the parsed operands array.
766 unsigned getIndexOfDstInParsedOperands() const {
767 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
768 }
769
770 // Return the index of the specified src operand in the parsed operands array.
771 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
772 assert(CompSrcIdx < Component::MAX_SRC_NUM);
773 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
774 }
775
776private:
777 unsigned getPrevCompSrcNum() const {
778 return PrevComp.getCompSrcOperandsNum();
779 }
780 unsigned getPrevCompParsedSrcNum() const {
781 return PrevComp.getCompParsedSrcOperandsNum();
782 }
783};
784
785// Layout and properties of VOPD components.
786class ComponentInfo : public ComponentLayout, public ComponentProps {
787public:
788 // Create ComponentInfo for COMPONENT_X or SINGLE component.
789 ComponentInfo(const MCInstrDesc &OpDesc,
790 ComponentKind Kind = ComponentKind::SINGLE)
791 : ComponentLayout(Kind), ComponentProps(OpDesc) {}
792
793 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
794 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
795 : ComponentLayout(OpXProps), ComponentProps(OpDesc) {}
796
797 // Map component operand index to parsed operand index.
798 // Return 0 if the specified operand does not exist.
799 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
800};
801
802// Properties of VOPD instructions.
803class InstInfo {
804private:
805 const ComponentInfo CompInfo[COMPONENTS_NUM];
806
807public:
808 using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
809
810 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
811 : CompInfo{OpX, OpY} {}
812
813 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
814 : CompInfo{OprInfoX, OprInfoY} {}
815
816 const ComponentInfo &operator[](size_t ComponentIdx) const {
817 assert(ComponentIdx < COMPONENTS_NUM);
818 return CompInfo[ComponentIdx];
819 }
820
821 // Check VOPD operands constraints.
822 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
823 // for the specified component and MC operand. The callback must return 0
824 // if the operand is not a register or not a VGPR.
825 // If \p SkipSrc is set to true then constraints for source operands are not
826 // checked.
827 bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
828 bool SkipSrc = false) const {
829 return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
830 }
831
832 // Check VOPD operands constraints.
833 // Return the index of an invalid component operand, if any.
834 // If \p SkipSrc is set to true then constraints for source operands are not
835 // checked.
836 std::optional<unsigned> getInvalidCompOperandIndex(
837 std::function<unsigned(unsigned, unsigned)> GetRegIdx,
838 bool SkipSrc = false) const;
839
840private:
841 RegIndices
842 getRegIndices(unsigned ComponentIdx,
843 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
844};
845
846} // namespace VOPD
847
848LLVM_READONLY
849std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
850
851LLVM_READONLY
852// Get properties of 2 single VOP1/VOP2 instructions
853// used as components to create a VOPD instruction.
854VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
855
856LLVM_READONLY
857// Get properties of VOPD X and Y components.
858VOPD::InstInfo
859getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
860
861LLVM_READONLY
862bool isTrue16Inst(unsigned Opc);
863
864LLVM_READONLY
865bool isInvalidSingleUseConsumerInst(unsigned Opc);
866
867LLVM_READONLY
868bool isInvalidSingleUseProducerInst(unsigned Opc);
869
870LLVM_READONLY
871unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
872
873LLVM_READONLY
874unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
875
876void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
877 const MCSubtargetInfo *STI);
878
879bool isGroupSegment(const GlobalValue *GV);
880bool isGlobalSegment(const GlobalValue *GV);
881bool isReadOnlySegment(const GlobalValue *GV);
882
883/// \returns True if constants should be emitted to .text section for given
884/// target triple \p TT, false otherwise.
885bool shouldEmitConstantsToTextSection(const Triple &TT);
886
887/// \returns Integer value requested using \p F's \p Name attribute.
888///
889/// \returns \p Default if attribute is not present.
890///
891/// \returns \p Default and emits error if requested value cannot be converted
892/// to integer.
893int getIntegerAttribute(const Function &F, StringRef Name, int Default);
894
895/// \returns A pair of integer values requested using \p F's \p Name attribute
896/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
897/// is false).
898///
899/// \returns \p Default if attribute is not present.
900///
901/// \returns \p Default and emits error if one of the requested values cannot be
902/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
903/// not present.
904std::pair<unsigned, unsigned>
905getIntegerPairAttribute(const Function &F, StringRef Name,
906 std::pair<unsigned, unsigned> Default,
907 bool OnlyFirstRequired = false);
908
909/// \returns Generate a vector of integer values requested using \p F's \p Name
910/// attribute.
911///
912/// \returns true if exactly Size (>2) number of integers are found in the
913/// attribute.
914///
915/// \returns false if any error occurs.
916SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
917 unsigned Size);
918
919/// Represents the counter values to wait for in an s_waitcnt instruction.
920///
921/// Large values (including the maximum possible integer) can be used to
922/// represent "don't care" waits.
923struct Waitcnt {
924 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
925 unsigned ExpCnt = ~0u;
926 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
927 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
928 unsigned SampleCnt = ~0u; // gfx12+ only.
929 unsigned BvhCnt = ~0u; // gfx12+ only.
930 unsigned KmCnt = ~0u; // gfx12+ only.
931
932 Waitcnt() = default;
933 // Pre-gfx12 constructor.
934 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
935 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt),
936 SampleCnt(~0u), BvhCnt(~0u), KmCnt(~0u) {}
937
938 // gfx12+ constructor.
939 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
940 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt)
941 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
942 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
943
944 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
945
946 bool hasWaitExceptStoreCnt() const {
947 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
948 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u;
949 }
950
951 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
952
953 Waitcnt combined(const Waitcnt &Other) const {
954 // Does the right thing provided self and Other are either both pre-gfx12
955 // or both gfx12+.
956 return Waitcnt(
957 std::min(a: LoadCnt, b: Other.LoadCnt), std::min(a: ExpCnt, b: Other.ExpCnt),
958 std::min(a: DsCnt, b: Other.DsCnt), std::min(a: StoreCnt, b: Other.StoreCnt),
959 std::min(a: SampleCnt, b: Other.SampleCnt), std::min(a: BvhCnt, b: Other.BvhCnt),
960 std::min(a: KmCnt, b: Other.KmCnt));
961 }
962};
963
964// The following methods are only meaningful on targets that support
965// S_WAITCNT.
966
967/// \returns Vmcnt bit mask for given isa \p Version.
968unsigned getVmcntBitMask(const IsaVersion &Version);
969
970/// \returns Expcnt bit mask for given isa \p Version.
971unsigned getExpcntBitMask(const IsaVersion &Version);
972
973/// \returns Lgkmcnt bit mask for given isa \p Version.
974unsigned getLgkmcntBitMask(const IsaVersion &Version);
975
976/// \returns Waitcnt bit mask for given isa \p Version.
977unsigned getWaitcntBitMask(const IsaVersion &Version);
978
979/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
980unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
981
982/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
983unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
984
985/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
986unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
987
988/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
989/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
990/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
991/// which needs it is deprecated
992///
993/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
994/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
995/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
996/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
997/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
998/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
999/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1000/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1001/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1002///
1003void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1004 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
1005
1006Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1007
1008/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1009unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1010 unsigned Vmcnt);
1011
1012/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1013unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1014 unsigned Expcnt);
1015
1016/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1017unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1018 unsigned Lgkmcnt);
1019
1020/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1021/// \p Version. Should not be used on gfx12+, the instruction which needs
1022/// it is deprecated
1023///
1024/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1025/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1026/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1027/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1028/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1029/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1030/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1031/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1032/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1033/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1034///
1035/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1036/// isa \p Version.
1037///
1038unsigned encodeWaitcnt(const IsaVersion &Version,
1039 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
1040
1041unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1042
1043// The following methods are only meaningful on targets that support
1044// S_WAIT_*CNT, introduced with gfx12.
1045
1046/// \returns Loadcnt bit mask for given isa \p Version.
1047/// Returns 0 for versions that do not support LOADcnt
1048unsigned getLoadcntBitMask(const IsaVersion &Version);
1049
1050/// \returns Samplecnt bit mask for given isa \p Version.
1051/// Returns 0 for versions that do not support SAMPLEcnt
1052unsigned getSamplecntBitMask(const IsaVersion &Version);
1053
1054/// \returns Bvhcnt bit mask for given isa \p Version.
1055/// Returns 0 for versions that do not support BVHcnt
1056unsigned getBvhcntBitMask(const IsaVersion &Version);
1057
1058/// \returns Dscnt bit mask for given isa \p Version.
1059/// Returns 0 for versions that do not support DScnt
1060unsigned getDscntBitMask(const IsaVersion &Version);
1061
1062/// \returns Dscnt bit mask for given isa \p Version.
1063/// Returns 0 for versions that do not support KMcnt
1064unsigned getKmcntBitMask(const IsaVersion &Version);
1065
1066/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1067/// returns 0 for versions that do not support STOREcnt or VScnt.
1068/// STOREcnt and VScnt are the same counter, the name used
1069/// depends on the ISA version.
1070unsigned getStorecntBitMask(const IsaVersion &Version);
1071
1072// The following are only meaningful on targets that support
1073// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1074
1075/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1076/// isa \p Version.
1077Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1078
1079/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1080/// isa \p Version.
1081Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1082
1083/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1084/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1085/// \p Version.
1086unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1087
1088/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1089/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1090/// \p Version.
1091unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1092
1093namespace Hwreg {
1094
1095using HwregId = EncodingField<5, 0>;
1096using HwregOffset = EncodingField<10, 6>;
1097
1098struct HwregSize : EncodingField<15, 11, 32> {
1099 using EncodingField::EncodingField;
1100 constexpr uint64_t encode() const { return Value - 1; }
1101 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1102};
1103
1104using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1105
1106} // namespace Hwreg
1107
1108namespace DepCtr {
1109
1110int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1111int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1112 const MCSubtargetInfo &STI);
1113bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1114 const MCSubtargetInfo &STI);
1115bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1116 bool &IsDefault, const MCSubtargetInfo &STI);
1117
1118/// \returns Decoded VaVdst from given immediate \p Encoded.
1119unsigned decodeFieldVaVdst(unsigned Encoded);
1120
1121/// \returns Decoded VmVsrc from given immediate \p Encoded.
1122unsigned decodeFieldVmVsrc(unsigned Encoded);
1123
1124/// \returns Decoded SaSdst from given immediate \p Encoded.
1125unsigned decodeFieldSaSdst(unsigned Encoded);
1126
1127/// \returns \p VmVsrc as an encoded Depctr immediate.
1128unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1129
1130/// \returns \p Encoded combined with encoded \p VmVsrc.
1131unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1132
1133/// \returns \p VaVdst as an encoded Depctr immediate.
1134unsigned encodeFieldVaVdst(unsigned VaVdst);
1135
1136/// \returns \p Encoded combined with encoded \p VaVdst.
1137unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1138
1139/// \returns \p SaSdst as an encoded Depctr immediate.
1140unsigned encodeFieldSaSdst(unsigned SaSdst);
1141
1142/// \returns \p Encoded combined with encoded \p SaSdst.
1143unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1144
1145} // namespace DepCtr
1146
1147namespace Exp {
1148
1149bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1150
1151LLVM_READONLY
1152unsigned getTgtId(const StringRef Name);
1153
1154LLVM_READNONE
1155bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1156
1157} // namespace Exp
1158
1159namespace MTBUFFormat {
1160
1161LLVM_READNONE
1162int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1163
1164void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1165
1166int64_t getDfmt(const StringRef Name);
1167
1168StringRef getDfmtName(unsigned Id);
1169
1170int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1171
1172StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1173
1174bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1175
1176bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1177
1178int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1179
1180StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1181
1182bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1183
1184int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1185 const MCSubtargetInfo &STI);
1186
1187bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1188
1189unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1190
1191} // namespace MTBUFFormat
1192
1193namespace SendMsg {
1194
1195LLVM_READNONE
1196bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1197
1198LLVM_READNONE
1199bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1200 bool Strict = true);
1201
1202LLVM_READNONE
1203bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1204 const MCSubtargetInfo &STI, bool Strict = true);
1205
1206LLVM_READNONE
1207bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1208
1209LLVM_READNONE
1210bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1211
1212void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1213 uint16_t &StreamId, const MCSubtargetInfo &STI);
1214
1215LLVM_READNONE
1216uint64_t encodeMsg(uint64_t MsgId,
1217 uint64_t OpId,
1218 uint64_t StreamId);
1219
1220} // namespace SendMsg
1221
1222
1223unsigned getInitialPSInputAddr(const Function &F);
1224
1225bool getHasColorExport(const Function &F);
1226
1227bool getHasDepthExport(const Function &F);
1228
1229LLVM_READNONE
1230bool isShader(CallingConv::ID CC);
1231
1232LLVM_READNONE
1233bool isGraphics(CallingConv::ID CC);
1234
1235LLVM_READNONE
1236bool isCompute(CallingConv::ID CC);
1237
1238LLVM_READNONE
1239bool isEntryFunctionCC(CallingConv::ID CC);
1240
1241// These functions are considered entrypoints into the current module, i.e. they
1242// are allowed to be called from outside the current module. This is different
1243// from isEntryFunctionCC, which is only true for functions that are entered by
1244// the hardware. Module entry points include all entry functions but also
1245// include functions that can be called from other functions inside or outside
1246// the current module. Module entry functions are allowed to allocate LDS.
1247LLVM_READNONE
1248bool isModuleEntryFunctionCC(CallingConv::ID CC);
1249
1250LLVM_READNONE
1251bool isChainCC(CallingConv::ID CC);
1252
1253bool isKernelCC(const Function *Func);
1254
1255// FIXME: Remove this when calling conventions cleaned up
1256LLVM_READNONE
1257inline bool isKernel(CallingConv::ID CC) {
1258 switch (CC) {
1259 case CallingConv::AMDGPU_KERNEL:
1260 case CallingConv::SPIR_KERNEL:
1261 return true;
1262 default:
1263 return false;
1264 }
1265}
1266
1267bool hasXNACK(const MCSubtargetInfo &STI);
1268bool hasSRAMECC(const MCSubtargetInfo &STI);
1269bool hasMIMG_R128(const MCSubtargetInfo &STI);
1270bool hasA16(const MCSubtargetInfo &STI);
1271bool hasG16(const MCSubtargetInfo &STI);
1272bool hasPackedD16(const MCSubtargetInfo &STI);
1273bool hasGDS(const MCSubtargetInfo &STI);
1274unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1275unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1276
1277bool isSI(const MCSubtargetInfo &STI);
1278bool isCI(const MCSubtargetInfo &STI);
1279bool isVI(const MCSubtargetInfo &STI);
1280bool isGFX9(const MCSubtargetInfo &STI);
1281bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1282bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1283bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1284bool isGFX8Plus(const MCSubtargetInfo &STI);
1285bool isGFX9Plus(const MCSubtargetInfo &STI);
1286bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1287bool isGFX10(const MCSubtargetInfo &STI);
1288bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1289bool isGFX10Plus(const MCSubtargetInfo &STI);
1290bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1291bool isGFX10Before1030(const MCSubtargetInfo &STI);
1292bool isGFX11(const MCSubtargetInfo &STI);
1293bool isGFX11Plus(const MCSubtargetInfo &STI);
1294bool isGFX12(const MCSubtargetInfo &STI);
1295bool isGFX12Plus(const MCSubtargetInfo &STI);
1296bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1297bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1298bool isGCN3Encoding(const MCSubtargetInfo &STI);
1299bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1300bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1301bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1302bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1303bool isGFX90A(const MCSubtargetInfo &STI);
1304bool isGFX940(const MCSubtargetInfo &STI);
1305bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1306bool hasMAIInsts(const MCSubtargetInfo &STI);
1307bool hasVOPD(const MCSubtargetInfo &STI);
1308bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1309int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1310unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1311bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1312
1313/// Is Reg - scalar register
1314bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
1315
1316/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1317/// The bit indicating isHi is the LSB of the encoding.
1318bool isHi(unsigned Reg, const MCRegisterInfo &MRI);
1319
1320/// If \p Reg is a pseudo reg, return the correct hardware register given
1321/// \p STI otherwise return \p Reg.
1322unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
1323
1324/// Convert hardware register \p Reg to a pseudo register
1325LLVM_READNONE
1326unsigned mc2PseudoReg(unsigned Reg);
1327
1328LLVM_READNONE
1329bool isInlineValue(unsigned Reg);
1330
1331/// Is this an AMDGPU specific source operand? These include registers,
1332/// inline constants, literals and mandatory literals (KImm).
1333bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1334
1335/// Is this a KImm operand?
1336bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1337
1338/// Is this floating-point operand?
1339bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1340
1341/// Does this operand support only inlinable literals?
1342bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1343
1344/// Get the size in bits of a register from the register class \p RC.
1345unsigned getRegBitWidth(unsigned RCID);
1346
1347/// Get the size in bits of a register from the register class \p RC.
1348unsigned getRegBitWidth(const MCRegisterClass &RC);
1349
1350/// Get size of register operand
1351unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1352 unsigned OpNo);
1353
1354LLVM_READNONE
1355inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1356 switch (OpInfo.OperandType) {
1357 case AMDGPU::OPERAND_REG_IMM_INT32:
1358 case AMDGPU::OPERAND_REG_IMM_FP32:
1359 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1360 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1361 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1362 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1363 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1364 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1365 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1366 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1367 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1368 case AMDGPU::OPERAND_KIMM32:
1369 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1370 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1371 return 4;
1372
1373 case AMDGPU::OPERAND_REG_IMM_INT64:
1374 case AMDGPU::OPERAND_REG_IMM_FP64:
1375 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1376 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1377 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1378 return 8;
1379
1380 case AMDGPU::OPERAND_REG_IMM_INT16:
1381 case AMDGPU::OPERAND_REG_IMM_BF16:
1382 case AMDGPU::OPERAND_REG_IMM_FP16:
1383 case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1384 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1385 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1386 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1387 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1388 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1389 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1390 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1391 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1392 case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1393 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1394 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1395 case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1396 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1397 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1398 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1399 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1400 return 2;
1401
1402 default:
1403 llvm_unreachable("unhandled operand type");
1404 }
1405}
1406
1407LLVM_READNONE
1408inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1409 return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1410}
1411
1412/// Is this literal inlinable, and not one of the values intended for floating
1413/// point values.
1414LLVM_READNONE
1415inline bool isInlinableIntLiteral(int64_t Literal) {
1416 return Literal >= -16 && Literal <= 64;
1417}
1418
1419/// Is this literal inlinable
1420LLVM_READNONE
1421bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1422
1423LLVM_READNONE
1424bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1425
1426LLVM_READNONE
1427bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1428
1429LLVM_READNONE
1430bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1431
1432LLVM_READNONE
1433bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1434
1435LLVM_READNONE
1436bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1437
1438LLVM_READNONE
1439std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1440
1441LLVM_READNONE
1442std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1443
1444LLVM_READNONE
1445std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1446
1447LLVM_READNONE
1448bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1449
1450LLVM_READNONE
1451bool isInlinableLiteralV2I16(uint32_t Literal);
1452
1453LLVM_READNONE
1454bool isInlinableLiteralV2BF16(uint32_t Literal);
1455
1456LLVM_READNONE
1457bool isInlinableLiteralV2F16(uint32_t Literal);
1458
1459LLVM_READNONE
1460bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1461
1462bool isArgPassedInSGPR(const Argument *Arg);
1463
1464bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1465
1466LLVM_READONLY
1467bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1468 int64_t EncodedOffset);
1469
1470LLVM_READONLY
1471bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1472 int64_t EncodedOffset,
1473 bool IsBuffer);
1474
1475/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1476/// offsets.
1477uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1478
1479/// \returns The encoding that will be used for \p ByteOffset in the
1480/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1481/// S_LOAD instructions have a signed offset, on other subtargets it is
1482/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1483std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1484 int64_t ByteOffset, bool IsBuffer,
1485 bool HasSOffset = false);
1486
1487/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1488/// instruction. This is only useful on CI.s
1489std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1490 int64_t ByteOffset);
1491
1492/// For pre-GFX12 FLAT instructions the offset must be positive;
1493/// MSB is ignored and forced to zero.
1494///
1495/// \return The number of bits available for the signed offset field in flat
1496/// instructions. Note that some forms of the instruction disallow negative
1497/// offsets.
1498unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1499
1500/// \returns true if this offset is small enough to fit in the SMRD
1501/// offset field. \p ByteOffset should be the offset in bytes and
1502/// not the encoded offset.
1503bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1504
1505LLVM_READNONE
1506inline bool isLegalDPALU_DPPControl(unsigned DC) {
1507 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1508}
1509
1510/// \returns true if an instruction may have a 64-bit VGPR operand.
1511bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1512
1513/// \returns true if an instruction is a DP ALU DPP.
1514bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1515
1516/// \returns true if the intrinsic is divergent
1517bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1518
1519/// \returns true if the intrinsic is uniform
1520bool isIntrinsicAlwaysUniform(unsigned IntrID);
1521
1522/// \returns lds block size in terms of dwords. \p
1523/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1524/// must be defined in terms of bytes.
1525unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1526
1527} // end namespace AMDGPU
1528
1529raw_ostream &operator<<(raw_ostream &OS,
1530 const AMDGPU::IsaInfo::TargetIDSetting S);
1531
1532} // end namespace llvm
1533
1534#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1535