1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/ADT/StringExtras.h"
15#include "llvm/ADT/StringTable.h"
16#include "llvm/IR/CallingConv.h"
17#include "llvm/IR/InstrTypes.h"
18#include "llvm/IR/Module.h"
19#include "llvm/Support/Alignment.h"
20#include "llvm/TargetParser/AMDGPUTargetParser.h"
21#include <array>
22#include <functional>
23#include <utility>
24
25// Pull in OpName enum definition and getNamedOperandIdx() declaration.
26#define GET_INSTRINFO_OPERAND_ENUM
27#include "AMDGPUGenInstrInfo.inc"
28
29struct amd_kernel_code_t;
30
31namespace llvm {
32
33struct Align;
34class Argument;
35class Function;
36class GlobalValue;
37class MachineInstr;
38class MCInstrInfo;
39class MCRegisterClass;
40class MCRegisterInfo;
41class MCSubtargetInfo;
42class MDNode;
43class StringRef;
44class Triple;
45class raw_ostream;
46
47namespace AMDGPU {
48
49struct AMDGPUMCKernelCodeT;
50struct IsaVersion;
51
52/// Generic target versions emitted by this version of LLVM.
53///
54/// These numbers are incremented every time a codegen breaking change occurs
55/// within a generic family.
56namespace GenericVersion {
57static constexpr unsigned GFX9 = 1;
58static constexpr unsigned GFX9_4 = 1;
59static constexpr unsigned GFX10_1 = 1;
60static constexpr unsigned GFX10_3 = 1;
61static constexpr unsigned GFX11 = 1;
62static constexpr unsigned GFX11_7 = 1;
63static constexpr unsigned GFX12 = 1;
64static constexpr unsigned GFX12_5 = 1;
65static constexpr unsigned GFX13 = 1;
66} // namespace GenericVersion
67
68enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
69
70enum class FPType { None, FP4, FP8 };
71
72/// \returns True if \p STI is AMDHSA.
73bool isHsaAbi(const MCSubtargetInfo &STI);
74
75/// \returns Code object version from the IR module flag.
76unsigned getAMDHSACodeObjectVersion(const Module &M);
77
78/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
79unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
80
81/// \returns The default HSA code object version. This should only be used when
82/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
83/// flag or a .amdhsa_code_object_version directive)
84unsigned getDefaultAMDHSACodeObjectVersion();
85
86/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
87/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
88uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
89
90/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
91unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
92
93/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
94unsigned getHostcallImplicitArgPosition(unsigned COV);
95
96unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
97unsigned getCompletionActionImplicitArgPosition(unsigned COV);
98
99struct GcnBufferFormatInfo {
100 unsigned Format;
101 unsigned BitsPerComp;
102 unsigned NumComponents;
103 unsigned NumFormat;
104 unsigned DataFormat;
105};
106
107struct MAIInstInfo {
108 uint32_t Opcode;
109 bool is_dgemm;
110 bool is_gfx940_xdl;
111};
112
113struct MFMA_F8F6F4_Info {
114 unsigned Opcode;
115 unsigned F8F8Opcode;
116 uint8_t NumRegsSrcA;
117 uint8_t NumRegsSrcB;
118};
119
120struct CvtScaleF32_F32F16ToF8F4_Info {
121 unsigned Opcode;
122};
123
124struct True16D16Info {
125 unsigned T16Op;
126 unsigned HiOp;
127 unsigned LoOp;
128};
129
130struct WMMAInstInfo {
131 uint32_t Opcode;
132 bool is_wmma_xdl;
133 bool HasMatrixScale;
134};
135
136#define GET_MIMGBaseOpcode_DECL
137#define GET_MIMGDim_DECL
138#define GET_MIMGEncoding_DECL
139#define GET_MIMGLZMapping_DECL
140#define GET_MIMGMIPMapping_DECL
141#define GET_MIMGBiASMapping_DECL
142#define GET_MAIInstInfoTable_DECL
143#define GET_isMFMA_F8F6F4Table_DECL
144#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
145#define GET_True16D16Table_DECL
146#define GET_WMMAInstInfoTable_DECL
147#include "AMDGPUGenSearchableTables.inc"
148
149using TargetIDSetting = AMDGPU::TargetIDSetting;
150using TargetID = AMDGPU::TargetID;
151
152/// Construct TargetID from MCSubtargetInfo. \p FeatureString is used to
153/// determine explicitly requested xnack/sramecc settings.
154TargetID createAMDGPUTargetID(const MCSubtargetInfo &STI,
155 StringRef FeatureString);
156
157namespace IsaInfo {
158
159enum {
160 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
161 // doesn't spill SGPRs as much as when 80 is set.
162 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
163 TRAP_NUM_SGPRS = 16
164};
165
166/// \returns Instruction cache line size in bytes for given subtarget \p STI.
167unsigned getInstCacheLineSize(const MCSubtargetInfo &STI);
168
169/// \returns Wavefront size for given subtarget \p STI.
170unsigned getWavefrontSize(const MCSubtargetInfo &STI);
171
172/// \returns Local memory size in bytes for given subtarget \p STI.
173unsigned getLocalMemorySize(const MCSubtargetInfo &STI);
174
175/// \returns Maximum addressable local memory size in bytes for given subtarget
176/// \p STI.
177unsigned getAddressableLocalMemorySize(const MCSubtargetInfo &STI);
178
179/// \returns Number of execution units per compute unit for given subtarget \p
180/// STI.
181unsigned getEUsPerCU(const MCSubtargetInfo &STI);
182
183/// \returns Maximum number of work groups per compute unit for given subtarget
184/// \p STI and limited by given \p FlatWorkGroupSize.
185unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo &STI,
186 unsigned FlatWorkGroupSize);
187
188/// \returns Minimum number of waves per execution unit for given subtarget \p
189/// STI.
190unsigned getMinWavesPerEU(const MCSubtargetInfo &STI);
191
192/// \returns Maximum number of waves per execution unit for given subtarget \p
193/// STI without any kind of limitation.
194unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI);
195
196/// \returns Number of waves per execution unit required to support the given \p
197/// FlatWorkGroupSize.
198unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo &STI,
199 unsigned FlatWorkGroupSize);
200
201/// \returns Minimum flat work group size for given subtarget \p STI.
202unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI);
203
204/// \returns Maximum flat work group size
205constexpr unsigned getMaxFlatWorkGroupSize() {
206 // Some subtargets allow encoding 2048, but this isn't tested or supported.
207 return 1024;
208}
209
210/// \returns Number of waves per work group for given subtarget \p STI and
211/// \p FlatWorkGroupSize.
212unsigned getWavesPerWorkGroup(const MCSubtargetInfo &STI,
213 unsigned FlatWorkGroupSize);
214
215/// \returns SGPR allocation granularity for given subtarget \p STI.
216unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI);
217
218/// \returns SGPR encoding granularity for given subtarget \p STI.
219unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI);
220
221/// \returns Total number of SGPRs for given subtarget \p STI.
222unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI);
223
224/// \returns Addressable number of SGPRs for given subtarget \p STI.
225unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI);
226
227/// \returns Minimum number of SGPRs that meets the given number of waves per
228/// execution unit requirement for given subtarget \p STI.
229unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU);
230
231/// \returns Maximum number of SGPRs that meets the given number of waves per
232/// execution unit requirement for given subtarget \p STI.
233unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
234 bool Addressable);
235
236/// \returns Number of extra SGPRs implicitly required by given subtarget \p
237/// STI when the given special registers are used.
238unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
239 bool FlatScrUsed, bool XNACKUsed);
240
241/// \returns Number of extra SGPRs implicitly required by given subtarget \p
242/// STI when the given special registers are used. XNACK is inferred from
243/// \p STI.
244unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed,
245 bool FlatScrUsed);
246
247/// \returns Number of SGPR blocks needed for given subtarget \p STI when
248/// \p NumSGPRs are used. \p NumSGPRs should already include any special
249/// register counts.
250unsigned getNumSGPRBlocks(const MCSubtargetInfo &STI, unsigned NumSGPRs);
251
252/// \returns VGPR allocation granularity for given subtarget \p STI.
253///
254/// For subtargets which support it, \p EnableWavefrontSize32 should match
255/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
256unsigned
257getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize,
258 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
259
260/// \returns VGPR encoding granularity for given subtarget \p STI.
261///
262/// For subtargets which support it, \p EnableWavefrontSize32 should match
263/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
264unsigned getVGPREncodingGranule(
265 const MCSubtargetInfo &STI,
266 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
267
268/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
269/// returns the allocation granule for ArchVGPRs.
270unsigned getArchVGPRAllocGranule();
271
272/// \returns Total number of VGPRs for given subtarget \p STI.
273unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI);
274
275/// Maximum number of VGPR blocks that can be allocated in dynamic VGPR mode.
276static constexpr unsigned MaxDynamicVGPRBlocks = 8;
277
278/// \returns Addressable number of architectural VGPRs for a given subtarget \p
279/// STI.
280unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo &STI);
281
282/// \returns Addressable number of VGPRs for given subtarget \p STI.
283unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI,
284 unsigned DynamicVGPRBlockSize);
285
286/// \returns Minimum number of VGPRs that meets given number of waves per
287/// execution unit requirement for given subtarget \p STI.
288unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
289 unsigned DynamicVGPRBlockSize);
290
291/// \returns Maximum number of VGPRs that meets given number of waves per
292/// execution unit requirement for given subtarget \p STI.
293unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU,
294 unsigned DynamicVGPRBlockSize);
295
296/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
297/// subtarget \p STI.
298unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo &STI,
299 unsigned NumVGPRs,
300 unsigned DynamicVGPRBlockSize);
301
302/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
303/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
304unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
305 unsigned MaxWaves,
306 unsigned TotalNumVGPRs);
307
308/// \returns Whether allocated SGPRs can reduce occupancy on subtarget \p STI
309/// (true pre-GFX10). One named capability so callers don't test the version.
310bool isSGPROccupancyLimited(const MCSubtargetInfo &STI);
311
312/// \returns SGPR-limited occupancy (waves per EU) for subtarget \p STI: the
313/// inverse of getMaxNumSGPRs(). Unlike getMaxNumSGPRs() the budget is not
314/// clamped to the addressable count, since the allocated count callers pass in
315/// can exceed it.
316unsigned getOccupancyWithNumSGPRs(const MCSubtargetInfo &STI, unsigned SGPRs);
317
318/// \returns SGPR-limited occupancy computed from explicit budget parameters
319/// (\p MaxWaves, \p TotalNumSGPRs, \p Granule, \p TrapReserve). Subtarget-free
320/// core shared by the overload above and the occupancy MCExpr. Callers must
321/// check isSGPROccupancyLimited() first.
322unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
323 unsigned TotalNumSGPRs, unsigned Granule,
324 unsigned TrapReserve);
325
326/// \returns Number of VGPR blocks needed for given subtarget \p STI when
327/// \p NumVGPRs are used. We actually return the number of blocks -1, since
328/// that's what we encode.
329///
330/// For subtargets which support it, \p EnableWavefrontSize32 should match the
331/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
332unsigned getEncodedNumVGPRBlocks(
333 const MCSubtargetInfo &STI, unsigned NumVGPRs,
334 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
335
336/// \returns Number of VGPR blocks that need to be allocated for the given
337/// subtarget \p STI when \p NumVGPRs are used.
338unsigned getAllocatedNumVGPRBlocks(
339 const MCSubtargetInfo &STI, unsigned NumVGPRs,
340 unsigned DynamicVGPRBlockSize,
341 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
342
343} // end namespace IsaInfo
344
345// Represents a field in an encoded value.
346template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
347struct EncodingField {
348 static_assert(HighBit >= LowBit, "Invalid bit range!");
349 static constexpr unsigned Offset = LowBit;
350 static constexpr unsigned Width = HighBit - LowBit + 1;
351
352 using ValueType = unsigned;
353 static constexpr ValueType Default = D;
354
355 ValueType Value;
356 constexpr EncodingField(ValueType Value) : Value(Value) {}
357
358 constexpr uint64_t encode() const { return Value; }
359 static ValueType decode(uint64_t Encoded) { return Encoded; }
360};
361
362// Represents a single bit in an encoded value.
363template <unsigned Bit, unsigned D = 0>
364using EncodingBit = EncodingField<Bit, Bit, D>;
365
366// A helper for encoding and decoding multiple fields.
367template <typename... Fields> struct EncodingFields {
368 static constexpr uint64_t encode(Fields... Values) {
369 return ((Values.encode() << Values.Offset) | ...);
370 }
371
372 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
373 return {Fields::decode((Encoded >> Fields::Offset) &
374 maxUIntN(Fields::Width))...};
375 }
376};
377
378LLVM_READONLY
379inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
380 return getNamedOperandIdx(Opcode, Name: NamedIdx) != -1;
381}
382
383LLVM_READONLY
384int32_t getSOPPWithRelaxation(uint32_t Opcode);
385
386struct MIMGBaseOpcodeInfo {
387 MIMGBaseOpcode BaseOpcode;
388 bool Store;
389 bool Atomic;
390 bool AtomicX2;
391 bool Sampler;
392 bool Gather4;
393
394 uint8_t NumExtraArgs;
395 bool Gradients;
396 bool G16;
397 bool Coordinates;
398 bool LodOrClampOrMip;
399 bool HasD16;
400 bool MSAA;
401 bool BVH;
402 bool A16;
403 bool NoReturn;
404 bool PointSampleAccel;
405};
406
407LLVM_READONLY
408const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
409
410LLVM_READONLY
411const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
412
413struct MIMGDimInfo {
414 MIMGDim Dim;
415 uint8_t NumCoords;
416 uint8_t NumGradients;
417 bool MSAA;
418 bool DA;
419 uint8_t Encoding;
420 StringTable::Offset AsmSuffix;
421};
422
423LLVM_READONLY
424const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
425
426LLVM_READONLY StringRef getMIMGDimInfoStr(StringTable::Offset);
427
428LLVM_READONLY
429const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
430
431LLVM_READONLY
432const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
433
434struct MIMGLZMappingInfo {
435 MIMGBaseOpcode L;
436 MIMGBaseOpcode LZ;
437};
438
439struct MIMGMIPMappingInfo {
440 MIMGBaseOpcode MIP;
441 MIMGBaseOpcode NONMIP;
442};
443
444struct MIMGBiasMappingInfo {
445 MIMGBaseOpcode Bias;
446 MIMGBaseOpcode NoBias;
447};
448
449struct MIMGOffsetMappingInfo {
450 MIMGBaseOpcode Offset;
451 MIMGBaseOpcode NoOffset;
452};
453
454struct MIMGG16MappingInfo {
455 MIMGBaseOpcode G;
456 MIMGBaseOpcode G16;
457};
458
459LLVM_READONLY
460const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
461
462struct WMMAOpcodeMappingInfo {
463 unsigned Opcode2Addr;
464 unsigned Opcode3Addr;
465};
466
467LLVM_READONLY
468const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
469
470LLVM_READONLY
471const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
472
473LLVM_READONLY
474const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
475
476LLVM_READONLY
477const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
478
479LLVM_READONLY
480int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
481 unsigned VDataDwords, unsigned VAddrDwords);
482
483LLVM_READONLY
484int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
485
486LLVM_READONLY
487unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
488 const MIMGDimInfo *Dim, bool IsA16,
489 bool IsG16Supported);
490
491struct MIMGInfo {
492 uint32_t Opcode;
493 uint32_t BaseOpcode;
494 uint8_t MIMGEncoding;
495 uint8_t VDataDwords;
496 uint8_t VAddrDwords;
497 uint8_t VAddrOperands;
498};
499
500LLVM_READONLY
501const MIMGInfo *getMIMGInfo(unsigned Opc);
502
503LLVM_READONLY
504int getMTBUFBaseOpcode(unsigned Opc);
505
506LLVM_READONLY
507int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
508
509LLVM_READONLY
510int getMTBUFElements(unsigned Opc);
511
512LLVM_READONLY
513bool getMTBUFHasVAddr(unsigned Opc);
514
515LLVM_READONLY
516bool getMTBUFHasSrsrc(unsigned Opc);
517
518LLVM_READONLY
519bool getMTBUFHasSoffset(unsigned Opc);
520
521LLVM_READONLY
522int getMUBUFBaseOpcode(unsigned Opc);
523
524LLVM_READONLY
525int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
526
527LLVM_READONLY
528int getMUBUFElements(unsigned Opc);
529
530LLVM_READONLY
531bool getMUBUFHasVAddr(unsigned Opc);
532
533LLVM_READONLY
534bool getMUBUFHasSrsrc(unsigned Opc);
535
536LLVM_READONLY
537bool getMUBUFHasSoffset(unsigned Opc);
538
539LLVM_READONLY
540bool getMUBUFIsBufferInv(unsigned Opc);
541
542LLVM_READONLY
543bool getMUBUFTfe(unsigned Opc);
544
545LLVM_READONLY
546bool getSMEMIsBuffer(unsigned Opc);
547
548LLVM_READONLY
549bool getVOP1IsSingle(unsigned Opc);
550
551LLVM_READONLY
552bool getVOP2IsSingle(unsigned Opc);
553
554LLVM_READONLY
555bool getVOP3IsSingle(unsigned Opc);
556
557LLVM_READONLY
558bool isVOPC64DPP(unsigned Opc);
559
560LLVM_READONLY
561bool isVOPCAsmOnly(unsigned Opc);
562
563/// Returns true if MAI operation is a double precision GEMM.
564LLVM_READONLY
565bool getMAIIsDGEMM(unsigned Opc);
566
567LLVM_READONLY
568bool getMAIIsGFX940XDL(unsigned Opc);
569
570LLVM_READONLY
571bool getWMMAIsXDL(unsigned Opc);
572
573LLVM_READONLY
574bool getHasMatrixScale(unsigned Opc);
575
576// Get an equivalent BitOp3 for a binary logical \p Opc.
577// \returns BitOp3 modifier for the logical operation or zero.
578// Used in VOPD3 conversion.
579unsigned getBitOp2(unsigned Opc);
580
581struct CanBeVOPD {
582 bool X;
583 bool Y;
584};
585
586/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
587LLVM_READONLY
588unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
589
590LLVM_READONLY
591CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
592
593LLVM_READNONE
594uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
595
596LLVM_READONLY
597const MFMA_F8F6F4_Info *getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ,
598 unsigned BLGP,
599 unsigned F8F8Opcode);
600
601LLVM_READNONE
602uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt);
603
604LLVM_READONLY
605const MFMA_F8F6F4_Info *getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA,
606 unsigned FmtB,
607 unsigned F8F8Opcode);
608
609/// \return true if this combination is listed as valid.
610LLVM_READONLY
611bool isValidWMMAScaleFmtCombination(unsigned AFmt, unsigned AScale,
612 unsigned BFmt, unsigned BScale);
613
614LLVM_READONLY
615const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
616 uint8_t NumComponents,
617 uint8_t NumFormat,
618 const MCSubtargetInfo &STI);
619LLVM_READONLY
620const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
621 const MCSubtargetInfo &STI);
622
623LLVM_READONLY
624int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
625
626LLVM_READONLY
627unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
628
629LLVM_READONLY
630int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
631 bool VOPD3);
632
633LLVM_READONLY
634bool isVOPD(unsigned Opc);
635
636LLVM_READNONE
637bool isMAC(unsigned Opc);
638
639LLVM_READNONE
640bool isPermlane16(unsigned Opc);
641
642LLVM_READNONE
643bool isGenericAtomic(unsigned Opc);
644
645LLVM_READNONE
646bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
647
648namespace VOPD {
649
650enum Component : unsigned {
651 DST = 0,
652 SRC0,
653 SRC1,
654 SRC2,
655
656 DST_NUM = 1,
657 MAX_SRC_NUM = 3,
658 MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
659};
660
661// LSB mask for VGPR banks per VOPD component operand.
662// 4 banks result in a mask 3, setting 2 lower bits.
663constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
664constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
665
666enum ComponentIndex : unsigned { X = 0, Y = 1 };
667constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
668constexpr unsigned COMPONENTS_NUM = 2;
669
670// Properties of VOPD components.
671class ComponentProps {
672private:
673 unsigned SrcOperandsNum = 0;
674 unsigned MandatoryLiteralIdx = ~0u;
675 bool HasSrc2Acc = false;
676 unsigned NumVOPD3Mods = 0;
677 unsigned Opcode = 0;
678 bool IsVOP3 = false;
679
680public:
681 ComponentProps() = default;
682 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
683
684 // Return the total number of src operands this component has.
685 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
686
687 // Return the number of src operands of this component visible to the parser.
688 unsigned getCompParsedSrcOperandsNum() const {
689 return SrcOperandsNum - HasSrc2Acc;
690 }
691
692 // Return true iif this component has a mandatory literal.
693 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
694
695 // If this component has a mandatory literal, return component operand
696 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
697 unsigned getMandatoryLiteralCompOperandIndex() const {
698 assert(hasMandatoryLiteral());
699 return MandatoryLiteralIdx;
700 }
701
702 // Return true iif this component has operand
703 // with component index CompSrcIdx and this operand may be a register.
704 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
705 assert(CompSrcIdx < Component::MAX_SRC_NUM);
706 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
707 }
708
709 // Return true iif this component has tied src2.
710 bool hasSrc2Acc() const { return HasSrc2Acc; }
711
712 // Return a number of source modifiers if instruction is used in VOPD3.
713 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
714
715 // Return opcode of the component.
716 unsigned getOpcode() const { return Opcode; }
717
718 // Returns if component opcode is in VOP3 encoding.
719 unsigned isVOP3() const { return IsVOP3; }
720
721 // Return index of BitOp3 operand or -1.
722 int getBitOp3OperandIdx() const;
723
724private:
725 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
726 assert(CompSrcIdx < Component::MAX_SRC_NUM);
727 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
728 }
729};
730
731enum ComponentKind : unsigned {
732 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
733 COMPONENT_X, // A VOPD instruction, X component.
734 COMPONENT_Y, // A VOPD instruction, Y component.
735 MAX = COMPONENT_Y
736};
737
738// Interface functions of this class map VOPD component operand indices
739// to indices of operands in MachineInstr/MCInst or parsed operands array.
740//
741// Note that this class operates with 3 kinds of indices:
742// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
743// - MC operand indices (they refer operands in a MachineInstr/MCInst);
744// - parsed operand indices (they refer operands in parsed operands array).
745//
746// For SINGLE components mapping between these indices is trivial.
747// But things get more complicated for COMPONENT_X and
748// COMPONENT_Y because these components share the same
749// MachineInstr/MCInst and the same parsed operands array.
750// Below is an example of component operand to parsed operand
751// mapping for the following instruction:
752//
753// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
754//
755// PARSED COMPONENT PARSED
756// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
757// -------------------------------------------------------------------
758// "v_dual_add_f32" 0
759// v_dual_add_f32 v255 0 (DST) --> 1
760// v4 1 (SRC0) --> 2
761// v5 2 (SRC1) --> 3
762// "::" 4
763// "v_dual_mov_b32" 5
764// v_dual_mov_b32 v6 0 (DST) --> 6
765// v1 1 (SRC0) --> 7
766// -------------------------------------------------------------------
767//
768class ComponentLayout {
769private:
770 // Regular MachineInstr/MCInst operands are ordered as follows:
771 // dst, src0 [, other src operands]
772 // VOPD MachineInstr/MCInst operands are ordered as follows:
773 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
774 // Each ComponentKind has operand indices defined below.
775 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
776
777 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
778 // used if there is tied accumulator. Indexing of this array:
779 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
780 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
781 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
782 // For VOPD1/VOPD2 use column with zero modifiers.
783 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
784 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
785
786 // Parsed operands of regular instructions are ordered as follows:
787 // Mnemo dst src0 [vsrc1 ...]
788 // Parsed VOPD operands are ordered as follows:
789 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
790 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
791 // Each ComponentKind has operand indices defined below.
792 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
793 4 /* + OpX.ParsedSrcNum */};
794 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
795 2, 2, 5 /* + OpX.ParsedSrcNum */};
796
797private:
798 const ComponentKind Kind;
799 const ComponentProps PrevComp;
800 const unsigned VOPD3ModsNum;
801 const int BitOp3Idx; // Index of bitop3 operand or -1
802
803public:
804 // Create layout for COMPONENT_X or SINGLE component.
805 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
806 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
807 assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
808 }
809
810 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
811 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
812 int BitOp3Idx)
813 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
814 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
815
816public:
817 // Return the index of dst operand in MCInst operands.
818 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
819
820 // Return the index of the specified src operand in MCInst operands.
821 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
822 assert(CompSrcIdx < Component::MAX_SRC_NUM);
823
824 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
825 return BitOp3Idx;
826
827 if (VOPD3) {
828 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
829 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
830 }
831
832 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
833 (Kind != SINGLE ? 1 : 0);
834 }
835
836 // Return the index of dst operand in the parsed operands array.
837 unsigned getIndexOfDstInParsedOperands() const {
838 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
839 }
840
841 // Return the index of the specified src operand in the parsed operands array.
842 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
843 assert(CompSrcIdx < Component::MAX_SRC_NUM);
844 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
845 }
846
847private:
848 unsigned getPrevCompSrcNum() const {
849 return PrevComp.getCompSrcOperandsNum();
850 }
851 unsigned getPrevCompParsedSrcNum() const {
852 return PrevComp.getCompParsedSrcOperandsNum();
853 }
854 unsigned getPrevCompVOPD3ModsNum() const {
855 return PrevComp.getCompVOPD3ModsNum();
856 }
857};
858
859// Layout and properties of VOPD components.
860class ComponentInfo : public ComponentProps, public ComponentLayout {
861public:
862 // Create ComponentInfo for COMPONENT_X or SINGLE component.
863 ComponentInfo(const MCInstrDesc &OpDesc,
864 ComponentKind Kind = ComponentKind::SINGLE,
865 bool VOP3Layout = false)
866 : ComponentProps(OpDesc, VOP3Layout),
867 ComponentLayout(Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
868
869 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
870 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
871 bool VOP3Layout = false)
872 : ComponentProps(OpDesc, VOP3Layout),
873 ComponentLayout(OpXProps, getCompVOPD3ModsNum(),
874 getBitOp3OperandIdx()) {}
875
876 // Map component operand index to parsed operand index.
877 // Return 0 if the specified operand does not exist.
878 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
879};
880
881// Properties of VOPD instructions.
882class InstInfo {
883private:
884 const ComponentInfo CompInfo[COMPONENTS_NUM];
885
886public:
887 using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
888
889 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
890 : CompInfo{OpX, OpY} {}
891
892 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
893 : CompInfo{OprInfoX, OprInfoY} {}
894
895 const ComponentInfo &operator[](size_t ComponentIdx) const {
896 assert(ComponentIdx < COMPONENTS_NUM);
897 return CompInfo[ComponentIdx];
898 }
899
900 // Check VOPD operands constraints.
901 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
902 // for the specified component and MC operand. The callback must return 0
903 // if the operand is not a register or not a VGPR.
904 // If \p SkipSrc is set to true then constraints for source operands are not
905 // checked.
906 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
907 // even though it violates requirement to be from different banks.
908 // If \p VOPD3 is set to true both dst registers allowed to be either odd
909 // or even and instruction may have real src2 as opposed to tied accumulator.
910 bool
911 hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
912 const MCRegisterInfo &MRI, bool SkipSrc = false,
913 bool AllowSameVGPR = false, bool VOPD3 = false) const {
914 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
915 VOPD3)
916 .has_value();
917 }
918
919 // Check VOPD operands constraints.
920 // Return the index of an invalid component operand, if any.
921 // If \p SkipSrc is set to true then constraints for source operands are not
922 // checked except for being from the same halves of VGPR file on gfx1250.
923 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
924 // even though it violates requirement to be from different banks.
925 // If \p VOPD3 is set to true both dst registers allowed to be either odd
926 // or even and instruction may have real src2 as opposed to tied accumulator.
927 std::optional<unsigned> getInvalidCompOperandIndex(
928 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
929 const MCRegisterInfo &MRI, bool SkipSrc = false,
930 bool AllowSameVGPR = false, bool VOPD3 = false) const;
931
932private:
933 RegIndices
934 getRegIndices(unsigned ComponentIdx,
935 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
936 bool VOPD3) const;
937};
938
939} // namespace VOPD
940
941LLVM_READONLY
942std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
943
944LLVM_READONLY
945// Get properties of 2 single VOP1/VOP2 instructions
946// used as components to create a VOPD instruction.
947VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
948
949LLVM_READONLY
950// Get properties of VOPD X and Y components.
951VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
952 const MCInstrInfo *InstrInfo);
953
954LLVM_READONLY
955bool isAsyncStore(unsigned Opc);
956LLVM_READONLY
957bool isTensorStore(unsigned Opc);
958LLVM_READONLY
959unsigned getTemporalHintType(const MCInstrDesc TID);
960
961LLVM_READONLY
962bool isTrue16Inst(unsigned Opc);
963
964LLVM_READONLY
965FPType getFPDstSelType(unsigned Opc);
966
967bool isDPMACCInstruction(unsigned Opc);
968
969LLVM_READONLY
970unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
971
972LLVM_READONLY
973unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
974
975void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
976 const MCSubtargetInfo &STI);
977
978bool isGroupSegment(const GlobalValue *GV);
979bool isGlobalSegment(const GlobalValue *GV);
980bool isReadOnlySegment(const GlobalValue *GV);
981
982/// \returns True if constants should be emitted to .text section for given
983/// target triple \p TT, false otherwise.
984bool shouldEmitConstantsToTextSection(const Triple &TT);
985
986/// Returns a valid charcode or 0 in the first entry if this is a valid physical
987/// register name. Followed by the start register number, and the register
988/// width. Does not validate the number of registers exists in the class. Unlike
989/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
990/// "{}".
991std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
992
993/// Returns a valid charcode or 0 in the first entry if this is a valid physical
994/// register constraint. Followed by the start register number, and the register
995/// width. Does not validate the number of registers exists in the class.
996std::tuple<char, unsigned, unsigned>
997parseAsmConstraintPhysReg(StringRef Constraint);
998
999/// \returns A pair of integer values requested using \p F's \p Name attribute
1000/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1001/// is false).
1002///
1003/// \returns \p Default if attribute is not present.
1004///
1005/// \returns \p Default and emits error if one of the requested values cannot be
1006/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1007/// not present.
1008std::pair<unsigned, unsigned>
1009getIntegerPairAttribute(const Function &F, StringRef Name,
1010 std::pair<unsigned, unsigned> Default,
1011 bool OnlyFirstRequired = false);
1012
1013/// \returns A pair of integer values requested using \p F's \p Name attribute
1014/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1015/// is false).
1016///
1017/// \returns \p std::nullopt if attribute is not present.
1018///
1019/// \returns \p std::nullopt and emits error if one of the requested values
1020/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1021/// "second" value is not present.
1022std::optional<std::pair<unsigned, std::optional<unsigned>>>
1023getIntegerPairAttribute(const Function &F, StringRef Name,
1024 bool OnlyFirstRequired = false);
1025
1026/// \returns Generate a vector of integer values requested using \p F's \p Name
1027/// attribute.
1028/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1029/// if any error occurs. The corresponding error will also be emitted.
1030SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1031 unsigned Size,
1032 unsigned DefaultVal);
1033/// Similar to the function above, but returns std::nullopt if any error occurs.
1034std::optional<SmallVector<unsigned>>
1035getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1036
1037/// Checks if \p Val is inside \p MD, a !range-like metadata.
1038bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1039
1040// The following methods are only meaningful on targets that support
1041// S_WAITCNT.
1042
1043/// \returns Vmcnt bit mask for given isa \p Version.
1044unsigned getVmcntBitMask(const IsaVersion &Version);
1045
1046/// \returns Expcnt bit mask for given isa \p Version.
1047unsigned getExpcntBitMask(const IsaVersion &Version);
1048
1049/// \returns Lgkmcnt bit mask for given isa \p Version.
1050unsigned getLgkmcntBitMask(const IsaVersion &Version);
1051
1052/// \returns Waitcnt bit mask for given isa \p Version.
1053unsigned getWaitcntBitMask(const IsaVersion &Version);
1054
1055/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1056unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1057
1058/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1059unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1060
1061/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1062unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1063
1064/// \returns Decoded Loadcnt from given \p Waitcnt for given isa \p Version.
1065unsigned decodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt);
1066
1067/// \returns Decoded Storecnt from given \p Waitcnt for given isa \p Version.
1068unsigned decodeStorecnt(const IsaVersion &Version, unsigned Waitcnt);
1069
1070/// \returns Decoded Dscnt from given \p Waitcnt for given isa \p Version.
1071unsigned decodeDscnt(const IsaVersion &Version, unsigned Waitcnt);
1072
1073/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1074/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1075/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1076/// which needs it is deprecated
1077///
1078/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1079/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1080/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1081/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1082/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1083/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1084/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1085/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1086/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1087///
1088void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1089 unsigned &Expcnt, unsigned &Lgkmcnt);
1090
1091/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1092unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1093 unsigned Vmcnt);
1094
1095/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1096unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1097 unsigned Expcnt);
1098
1099/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1100unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1101 unsigned Lgkmcnt);
1102
1103/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1104/// \p Version. Should not be used on gfx12+, the instruction which needs
1105/// it is deprecated
1106///
1107/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1108/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1109/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1110/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1111/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1112/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1113/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1114/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1115/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1116/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1117///
1118/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1119/// isa \p Version.
1120///
1121unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1122 unsigned Expcnt, unsigned Lgkmcnt);
1123
1124/// \returns Waitcnt with encoded \p Loadcnt and \p Dscnt for given isa \p
1125/// Version.
1126unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt,
1127 unsigned Dscnt);
1128
1129/// \returns Waitcnt with encoded \p Storecnt and \p Dscnt for given isa \p
1130/// Version.
1131unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt,
1132 unsigned Dscnt);
1133
1134// The following methods are only meaningful on targets that support
1135// S_WAIT_*CNT, introduced with gfx12.
1136
1137/// \returns Loadcnt bit mask for given isa \p Version.
1138/// Returns 0 for versions that do not support LOADcnt
1139unsigned getLoadcntBitMask(const IsaVersion &Version);
1140
1141/// \returns Samplecnt bit mask for given isa \p Version.
1142/// Returns 0 for versions that do not support SAMPLEcnt
1143unsigned getSamplecntBitMask(const IsaVersion &Version);
1144
1145/// \returns Bvhcnt bit mask for given isa \p Version.
1146/// Returns 0 for versions that do not support BVHcnt
1147unsigned getBvhcntBitMask(const IsaVersion &Version);
1148
1149/// \returns Asynccnt bit mask for given isa \p Version.
1150/// Returns 0 for versions that do not support Asynccnt
1151unsigned getAsynccntBitMask(const IsaVersion &Version);
1152
1153/// \returns Dscnt bit mask for given isa \p Version.
1154/// Returns 0 for versions that do not support DScnt
1155unsigned getDscntBitMask(const IsaVersion &Version);
1156
1157/// \returns Dscnt bit mask for given isa \p Version.
1158/// Returns 0 for versions that do not support KMcnt
1159unsigned getKmcntBitMask(const IsaVersion &Version);
1160
1161/// \returns Xcnt bit mask for given isa \p Version.
1162/// Returns 0 for versions that do not support Xcnt.
1163unsigned getXcntBitMask(const IsaVersion &Version);
1164
1165/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1166/// returns 0 for versions that do not support STOREcnt or VScnt.
1167/// STOREcnt and VScnt are the same counter, the name used
1168/// depends on the ISA version.
1169unsigned getStorecntBitMask(const IsaVersion &Version);
1170
1171namespace Hwreg {
1172
1173using HwregId = EncodingField<5, 0>;
1174using HwregOffset = EncodingField<10, 6>;
1175
1176struct HwregSize : EncodingField<15, 11, 32> {
1177 using EncodingField::EncodingField;
1178 constexpr uint64_t encode() const { return Value - 1; }
1179 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1180};
1181
1182using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1183
1184} // namespace Hwreg
1185
1186namespace DepCtr {
1187
1188int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1189int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1190 const MCSubtargetInfo &STI);
1191bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1192 const MCSubtargetInfo &STI);
1193bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1194 bool &IsDefault, const MCSubtargetInfo &STI);
1195
1196/// \returns Maximum VaVdst value that can be encoded.
1197unsigned getVaVdstBitMask();
1198
1199/// \returns Maximum VaSdst value that can be encoded.
1200unsigned getVaSdstBitMask();
1201
1202/// \returns Maximum VaSsrc value that can be encoded.
1203unsigned getVaSsrcBitMask();
1204
1205/// \returns Maximum HoldCnt value that can be encoded.
1206unsigned getHoldCntBitMask(const IsaVersion &Version);
1207
1208/// \returns Maximum VmVsrc value that can be encoded.
1209unsigned getVmVsrcBitMask();
1210
1211/// \returns Maximum VaVcc value that can be encoded.
1212unsigned getVaVccBitMask();
1213
1214/// \returns Maximum SaSdst value that can be encoded.
1215unsigned getSaSdstBitMask();
1216
1217/// \returns Decoded VaVdst from given immediate \p Encoded.
1218unsigned decodeFieldVaVdst(unsigned Encoded);
1219
1220/// \returns Decoded VmVsrc from given immediate \p Encoded.
1221unsigned decodeFieldVmVsrc(unsigned Encoded);
1222
1223/// \returns Decoded SaSdst from given immediate \p Encoded.
1224unsigned decodeFieldSaSdst(unsigned Encoded);
1225
1226/// \returns Decoded VaSdst from given immediate \p Encoded.
1227unsigned decodeFieldVaSdst(unsigned Encoded);
1228
1229/// \returns Decoded VaVcc from given immediate \p Encoded.
1230unsigned decodeFieldVaVcc(unsigned Encoded);
1231
1232/// \returns Decoded SaSrc from given immediate \p Encoded.
1233unsigned decodeFieldVaSsrc(unsigned Encoded);
1234
1235/// \returns Decoded HoldCnt from given immediate \p Encoded.
1236unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1237
1238/// \returns \p VmVsrc as an encoded Depctr immediate.
1239unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1240
1241/// \returns \p Encoded combined with encoded \p VmVsrc.
1242unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1243
1244/// \returns \p VaVdst as an encoded Depctr immediate.
1245unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1246
1247/// \returns \p Encoded combined with encoded \p VaVdst.
1248unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1249
1250/// \returns \p SaSdst as an encoded Depctr immediate.
1251unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1252
1253/// \returns \p Encoded combined with encoded \p SaSdst.
1254unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1255
1256/// \returns \p VaSdst as an encoded Depctr immediate.
1257unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1258
1259/// \returns \p Encoded combined with encoded \p VaSdst.
1260unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1261
1262/// \returns \p VaVcc as an encoded Depctr immediate.
1263unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1264
1265/// \returns \p Encoded combined with encoded \p VaVcc.
1266unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1267
1268/// \returns \p HoldCnt as an encoded Depctr immediate.
1269unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1270
1271/// \returns \p Encoded combined with encoded \p HoldCnt.
1272unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1273 const IsaVersion &Version);
1274
1275/// \returns \p VaSsrc as an encoded Depctr immediate.
1276unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1277
1278/// \returns \p Encoded combined with encoded \p VaSsrc.
1279unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1280
1281} // namespace DepCtr
1282
1283namespace Exp {
1284
1285bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1286
1287LLVM_READONLY
1288unsigned getTgtId(const StringRef Name);
1289
1290LLVM_READNONE
1291bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1292
1293} // namespace Exp
1294
1295namespace MTBUFFormat {
1296
1297LLVM_READNONE
1298int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1299
1300void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1301
1302int64_t getDfmt(const StringRef Name);
1303
1304StringRef getDfmtName(unsigned Id);
1305
1306int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1307
1308StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1309
1310bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1311
1312bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1313
1314int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1315
1316StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1317
1318bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1319
1320int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1321 const MCSubtargetInfo &STI);
1322
1323bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1324
1325unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1326
1327} // namespace MTBUFFormat
1328
1329namespace SendMsg {
1330
1331LLVM_READNONE
1332bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1333
1334LLVM_READNONE
1335bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1336 bool Strict = true);
1337
1338LLVM_READNONE
1339bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1340 const MCSubtargetInfo &STI, bool Strict = true);
1341
1342LLVM_READNONE
1343bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1344
1345LLVM_READNONE
1346bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1347
1348void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1349 uint16_t &StreamId, const MCSubtargetInfo &STI);
1350
1351LLVM_READNONE
1352uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1353
1354/// Returns true if the message does not use the m0 operand.
1355bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
1356
1357} // namespace SendMsg
1358
1359unsigned getInitialPSInputAddr(const Function &F);
1360
1361bool getHasColorExport(const Function &F);
1362
1363bool getHasDepthExport(const Function &F);
1364
1365// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1366// the attribute is missing or its value is invalid.
1367unsigned getDynamicVGPRBlockSize(const Function &F);
1368
1369LLVM_READNONE
1370constexpr bool isShader(CallingConv::ID CC) {
1371 switch (CC) {
1372 case CallingConv::AMDGPU_VS:
1373 case CallingConv::AMDGPU_LS:
1374 case CallingConv::AMDGPU_HS:
1375 case CallingConv::AMDGPU_ES:
1376 case CallingConv::AMDGPU_GS:
1377 case CallingConv::AMDGPU_PS:
1378 case CallingConv::AMDGPU_CS_Chain:
1379 case CallingConv::AMDGPU_CS_ChainPreserve:
1380 case CallingConv::AMDGPU_CS:
1381 return true;
1382 default:
1383 return false;
1384 }
1385}
1386
1387LLVM_READNONE
1388constexpr bool isGraphics(CallingConv::ID CC) {
1389 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx ||
1390 CC == CallingConv::AMDGPU_Gfx_WholeWave;
1391}
1392
1393LLVM_READNONE
1394constexpr bool isCompute(CallingConv::ID CC) {
1395 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1396}
1397
1398LLVM_READNONE
1399constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1400 switch (CC) {
1401 case CallingConv::AMDGPU_KERNEL:
1402 case CallingConv::SPIR_KERNEL:
1403 case CallingConv::AMDGPU_VS:
1404 case CallingConv::AMDGPU_GS:
1405 case CallingConv::AMDGPU_PS:
1406 case CallingConv::AMDGPU_CS:
1407 case CallingConv::AMDGPU_ES:
1408 case CallingConv::AMDGPU_HS:
1409 case CallingConv::AMDGPU_LS:
1410 return true;
1411 default:
1412 return false;
1413 }
1414}
1415
1416LLVM_READNONE
1417constexpr bool isChainCC(CallingConv::ID CC) {
1418 switch (CC) {
1419 case CallingConv::AMDGPU_CS_Chain:
1420 case CallingConv::AMDGPU_CS_ChainPreserve:
1421 return true;
1422 default:
1423 return false;
1424 }
1425}
1426
1427// These functions are considered entrypoints into the current module, i.e. they
1428// are allowed to be called from outside the current module. This is different
1429// from isEntryFunctionCC, which is only true for functions that are entered by
1430// the hardware. Module entry points include all entry functions but also
1431// include functions that can be called from other functions inside or outside
1432// the current module. Module entry functions are allowed to allocate LDS.
1433//
1434// AMDGPU_CS_Chain is intended for externally callable chain functions, so it is
1435// treated as a module entrypoint. AMDGPU_CS_ChainPreserve is used for internal
1436// helper functions (e.g. retry helpers), so it is not a module entrypoint.
1437LLVM_READNONE
1438constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1439 switch (CC) {
1440 case CallingConv::AMDGPU_Gfx:
1441 case CallingConv::AMDGPU_CS_Chain:
1442 return true;
1443 default:
1444 return isEntryFunctionCC(CC);
1445 }
1446}
1447
1448LLVM_READNONE
1449constexpr inline bool isKernel(CallingConv::ID CC) {
1450 switch (CC) {
1451 case CallingConv::AMDGPU_KERNEL:
1452 case CallingConv::SPIR_KERNEL:
1453 return true;
1454 default:
1455 return false;
1456 }
1457}
1458
1459inline bool isKernel(const Function &F) { return isKernel(CC: F.getCallingConv()); }
1460
1461LLVM_READNONE
1462constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1463 return CC == CallingConv::Fast;
1464}
1465
1466/// Return true if we might ever do TCO for calls with this calling convention.
1467LLVM_READNONE
1468constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1469 switch (CC) {
1470 case CallingConv::C:
1471 case CallingConv::AMDGPU_Gfx:
1472 case CallingConv::AMDGPU_Gfx_WholeWave:
1473 return true;
1474 default:
1475 return canGuaranteeTCO(CC);
1476 }
1477}
1478
1479bool hasXNACK(const MCSubtargetInfo &STI);
1480bool hasMIMG_R128(const MCSubtargetInfo &STI);
1481bool hasA16(const MCSubtargetInfo &STI);
1482bool hasG16(const MCSubtargetInfo &STI);
1483bool hasPackedD16(const MCSubtargetInfo &STI);
1484bool hasGDS(const MCSubtargetInfo &STI);
1485unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1486unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1487
1488bool isSI(const MCSubtargetInfo &STI);
1489bool isCI(const MCSubtargetInfo &STI);
1490bool isVI(const MCSubtargetInfo &STI);
1491bool isGFX9(const MCSubtargetInfo &STI);
1492bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1493bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1494bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1495bool isGFX8Plus(const MCSubtargetInfo &STI);
1496bool isGFX9Plus(const MCSubtargetInfo &STI);
1497bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1498bool isGFX10(const MCSubtargetInfo &STI);
1499bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1500bool isGFX10Plus(const MCSubtargetInfo &STI);
1501bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1502bool isGFX10Before1030(const MCSubtargetInfo &STI);
1503bool isGFX11(const MCSubtargetInfo &STI);
1504bool isGFX11Plus(const MCSubtargetInfo &STI);
1505bool isGFX12(const MCSubtargetInfo &STI);
1506bool isGFX12Plus(const MCSubtargetInfo &STI);
1507bool isGFX1250(const MCSubtargetInfo &STI);
1508bool isGFX1250Plus(const MCSubtargetInfo &STI);
1509bool isGFX13(const MCSubtargetInfo &STI);
1510bool isGFX13Plus(const MCSubtargetInfo &STI);
1511bool supportsWGP(const MCSubtargetInfo &STI);
1512bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1513bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1514bool isGCN3Encoding(const MCSubtargetInfo &STI);
1515bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1516bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1517bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1518bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1519bool isGFX90A(const MCSubtargetInfo &STI);
1520bool isGFX940(const MCSubtargetInfo &STI);
1521bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1522bool hasMAIInsts(const MCSubtargetInfo &STI);
1523bool hasVOPD(const MCSubtargetInfo &STI);
1524bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1525
1526inline bool supportsWave32(const MCSubtargetInfo &STI) {
1527 return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1528}
1529
1530int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1531unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1532bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1533
1534/// Is Reg - scalar register
1535bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1536
1537/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1538bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1539
1540/// If \p Reg is a pseudo reg, return the correct hardware register given
1541/// \p STI otherwise return \p Reg.
1542MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1543
1544/// Convert hardware register \p Reg to a pseudo register
1545LLVM_READNONE
1546MCRegister mc2PseudoReg(MCRegister Reg);
1547
1548LLVM_READNONE
1549bool isInlineValue(MCRegister Reg);
1550
1551/// Is this an AMDGPU specific source operand? These include registers,
1552/// inline constants, literals and mandatory literals (KImm).
1553constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1554 return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1555 OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1556}
1557
1558inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1559 return isSISrcOperand(OpInfo: Desc.operands()[OpNo]);
1560}
1561
1562/// Is this a KImm operand?
1563bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1564
1565/// Is this floating-point operand?
1566bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1567
1568/// Does this operand support only inlinable literals?
1569bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1570
1571/// Get the size in bits of a register from the register class \p RC.
1572unsigned getRegBitWidth(unsigned RCID);
1573
1574/// Get the size in bits of a register from the register class \p RC.
1575unsigned getRegBitWidth(const MCRegisterClass &RC);
1576
1577LLVM_READNONE
1578inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1579 switch (OpInfo.OperandType) {
1580 case AMDGPU::OPERAND_REG_IMM_INT32:
1581 case AMDGPU::OPERAND_REG_IMM_FP32:
1582 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1583 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1584 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1585 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1586 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1587 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1588 case AMDGPU::OPERAND_KIMM32:
1589 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1590 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1591 return 4;
1592
1593 case AMDGPU::OPERAND_REG_IMM_INT64:
1594 case AMDGPU::OPERAND_REG_IMM_FP64:
1595 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1596 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1597 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1598 case AMDGPU::OPERAND_REG_IMM_V2FP64:
1599 case AMDGPU::OPERAND_REG_IMM_V2INT64:
1600 case AMDGPU::OPERAND_KIMM64:
1601 return 8;
1602
1603 case AMDGPU::OPERAND_REG_IMM_INT16:
1604 case AMDGPU::OPERAND_REG_IMM_BF16:
1605 case AMDGPU::OPERAND_REG_IMM_FP16:
1606 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1607 case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1608 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1609 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1610 case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1611 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1612 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1613 case AMDGPU::OPERAND_REG_IMM_V2BF16:
1614 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1615 case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1616 case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1617 return 2;
1618
1619 default:
1620 llvm_unreachable("unhandled operand type");
1621 }
1622}
1623
1624LLVM_READNONE
1625inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1626 return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1627}
1628
1629/// Is this literal inlinable, and not one of the values intended for floating
1630/// point values.
1631LLVM_READNONE
1632inline bool isInlinableIntLiteral(int64_t Literal) {
1633 return Literal >= -16 && Literal <= 64;
1634}
1635
1636/// Is this literal inlinable
1637LLVM_READNONE
1638bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1639
1640LLVM_READNONE
1641bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1642
1643LLVM_READNONE
1644bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1645
1646LLVM_READNONE
1647bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1648
1649LLVM_READNONE
1650bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1651
1652LLVM_READNONE
1653std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1654
1655LLVM_READNONE
1656std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1657
1658LLVM_READNONE
1659std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1660
1661LLVM_READNONE
1662std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1663 bool IsGFX11Plus);
1664
1665LLVM_READNONE
1666bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1667
1668LLVM_READNONE
1669bool isInlinableLiteralV2I16(uint32_t Literal);
1670
1671LLVM_READNONE
1672bool isInlinableLiteralV2BF16(uint32_t Literal);
1673
1674LLVM_READNONE
1675bool isInlinableLiteralV2F16(uint32_t Literal);
1676
1677LLVM_READNONE
1678bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1679
1680LLVM_READNONE
1681bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1682
1683LLVM_READNONE
1684int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1685
1686bool isArgPassedInSGPR(const Argument *Arg);
1687
1688bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1689
1690LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1691
1692LLVM_READONLY bool isPacked64BitInst(unsigned Opc);
1693
1694LLVM_READONLY bool isPackedFP32or64BitInst(unsigned Opc);
1695
1696LLVM_READONLY
1697bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1698 int64_t EncodedOffset);
1699
1700LLVM_READONLY
1701bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1702 int64_t EncodedOffset, bool IsBuffer);
1703
1704/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1705/// offsets.
1706uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1707
1708/// \returns The encoding that will be used for \p ByteOffset in the
1709/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1710/// S_LOAD instructions have a signed offset, on other subtargets it is
1711/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1712std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1713 int64_t ByteOffset, bool IsBuffer,
1714 bool HasSOffset = false);
1715
1716/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1717/// instruction. This is only useful on CI.s
1718std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1719 int64_t ByteOffset);
1720
1721/// For pre-GFX12 FLAT instructions the offset must be positive;
1722/// MSB is ignored and forced to zero.
1723///
1724/// \return The number of bits available for the signed offset field in flat
1725/// instructions. Note that some forms of the instruction disallow negative
1726/// offsets.
1727unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1728
1729LLVM_READNONE
1730inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1731 if (isGFX12(STI: ST))
1732 return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1733 if (isGFX90A(STI: ST))
1734 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1735 return false;
1736}
1737
1738/// \returns true if an instruction may have a 64-bit VGPR operand.
1739bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc,
1740 const MCSubtargetInfo &ST);
1741
1742/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1743bool isDPALU_DPP32BitOpc(unsigned Opc);
1744
1745/// \returns true if an instruction is a DP ALU DPP.
1746bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1747 const MCSubtargetInfo &ST);
1748
1749/// \returns true if the intrinsic is divergent
1750bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1751
1752/// \returns true if the intrinsic is uniform
1753bool isIntrinsicAlwaysUniform(unsigned IntrID);
1754
1755/// \returns a register class for the physical register \p Reg if it is a VGPR
1756/// or nullptr otherwise.
1757const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
1758 const MCRegisterInfo &MRI);
1759
1760/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1761/// physical register \p Reg.
1762unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1763
1764/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1765MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
1766 const MCRegisterInfo &MRI);
1767
1768/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1769/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1770/// ignored.
1771std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
1772 bool HasSetregVGPRMSBFixup);
1773
1774/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1775/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1776/// ignored.
1777std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
1778 bool HasSetregVGPRMSBFixup);
1779
1780// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
1781// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
1782// maps, one for X and one for Y component.
1783std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
1784getVGPRLoweringOperandTables(const MCInstrDesc &Desc);
1785
1786/// \returns true if a memory instruction supports scale_offset modifier.
1787bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
1788
1789/// \returns lds block size in terms of dwords. \p
1790/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1791/// must be defined in terms of bytes.
1792unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1793
1794class ClusterDimsAttr {
1795public:
1796 enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
1797
1798 ClusterDimsAttr() = default;
1799
1800 Kind getKind() const { return AttrKind; }
1801
1802 bool isUnknown() const { return getKind() == Kind::Unknown; }
1803
1804 bool isNoCluster() const { return getKind() == Kind::NoCluster; }
1805
1806 bool isFixedDims() const { return getKind() == Kind::FixedDims; }
1807
1808 bool isVariableDims() const { return getKind() == Kind::VariableDims; }
1809
1810 void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }
1811
1812 void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); }
1813
1814 void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); }
1815
1816 /// \returns the dims stored. Note that this function can only be called if
1817 /// the kind is \p Fixed.
1818 const std::array<unsigned, 3> &getDims() const;
1819
1820 bool operator==(const ClusterDimsAttr &RHS) const {
1821 return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
1822 }
1823
1824 std::string to_string() const;
1825
1826 static ClusterDimsAttr get(const Function &F);
1827
1828private:
1829 enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
1830
1831 ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
1832
1833 std::array<unsigned, 3> Dims = {0, 0, 0};
1834
1835 Kind AttrKind = Kind::Unknown;
1836};
1837
1838} // namespace AMDGPU
1839
1840raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::TargetIDSetting S);
1841
1842} // end namespace llvm
1843
1844#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1845