AMDGPUBaseInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h]

1	//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10	#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12	#include "AMDGPUSubtarget.h"
13	#include "SIDefines.h"
14	#include "llvm/IR/CallingConv.h"
15	#include "llvm/IR/InstrTypes.h"
16	#include "llvm/IR/Module.h"
17	#include "llvm/Support/Alignment.h"
18	#include <array>
19	#include <functional>
20	#include <utility>
21
22	// Pull in OpName enum definition and getNamedOperandIdx() declaration.
23	#define GET_INSTRINFO_OPERAND_ENUM
24	#include "AMDGPUGenInstrInfo.inc"
25
26	struct amd_kernel_code_t;
27
28	namespace llvm {
29
30	struct Align;
31	class Argument;
32	class Function;
33	class GlobalValue;
34	class MachineInstr;
35	class MCInstrInfo;
36	class MCRegisterClass;
37	class MCRegisterInfo;
38	class MCSubtargetInfo;
39	class MDNode;
40	class StringRef;
41	class Triple;
42	class raw_ostream;
43
44	namespace AMDGPU {
45
46	struct AMDGPUMCKernelCodeT;
47	struct IsaVersion;
48
49	/// Generic target versions emitted by this version of LLVM.
50	///
51	/// These numbers are incremented every time a codegen breaking change occurs
52	/// within a generic family.
53	namespace GenericVersion {
54	static constexpr unsigned GFX9 = `1`;
55	static constexpr unsigned GFX9_4 = `1`;
56	static constexpr unsigned GFX10_1 = `1`;
57	static constexpr unsigned GFX10_3 = `1`;
58	static constexpr unsigned GFX11 = `1`;
59	static constexpr unsigned GFX12 = `1`;
60	static constexpr unsigned GFX12_5 = `1`;
61	} // namespace GenericVersion
62
63	enum { AMDHSA_COV4 = `4`, AMDHSA_COV5 = `5`, AMDHSA_COV6 = `6` };
64
65	enum class FPType { None, FP4, FP8 };
66
67	/// \returns True if \p STI is AMDHSA.
68	bool isHsaAbi(const MCSubtargetInfo &STI);
69
70	/// \returns Code object version from the IR module flag.
71	unsigned getAMDHSACodeObjectVersion(const Module &M);
72
73	/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
74	unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
75
76	/// \returns The default HSA code object version. This should only be used when
77	/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
78	/// flag or a .amdhsa_code_object_version directive)
79	unsigned getDefaultAMDHSACodeObjectVersion();
80
81	/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
82	/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
83	uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
84
85	/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
86	unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
87
88	/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
89	unsigned getHostcallImplicitArgPosition(unsigned COV);
90
91	unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
92	unsigned getCompletionActionImplicitArgPosition(unsigned COV);
93
94	struct GcnBufferFormatInfo {
95	unsigned Format;
96	unsigned BitsPerComp;
97	unsigned NumComponents;
98	unsigned NumFormat;
99	unsigned DataFormat;
100	};
101
102	struct MAIInstInfo {
103	uint32_t Opcode;
104	bool is_dgemm;
105	bool is_gfx940_xdl;
106	};
107
108	struct MFMA_F8F6F4_Info {
109	unsigned Opcode;
110	unsigned F8F8Opcode;
111	uint8_t NumRegsSrcA;
112	uint8_t NumRegsSrcB;
113	};
114
115	struct CvtScaleF32_F32F16ToF8F4_Info {
116	unsigned Opcode;
117	};
118
119	struct True16D16Info {
120	unsigned T16Op;
121	unsigned HiOp;
122	unsigned LoOp;
123	};
124
125	struct WMMAInstInfo {
126	uint32_t Opcode;
127	bool is_wmma_xdl;
128	};
129
130	#define GET_MIMGBaseOpcode_DECL
131	#define GET_MIMGDim_DECL
132	#define GET_MIMGEncoding_DECL
133	#define GET_MIMGLZMapping_DECL
134	#define GET_MIMGMIPMapping_DECL
135	#define GET_MIMGBiASMapping_DECL
136	#define GET_MAIInstInfoTable_DECL
137	#define GET_isMFMA_F8F6F4Table_DECL
138	#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
139	#define GET_True16D16Table_DECL
140	#define GET_WMMAInstInfoTable_DECL
141	#include "AMDGPUGenSearchableTables.inc"
142
143	namespace IsaInfo {
144
145	enum {
146	// The closed Vulkan driver sets 96, which limits the wave count to 8 but
147	// doesn't spill SGPRs as much as when 80 is set.
148	FIXED_NUM_SGPRS_FOR_INIT_BUG = `96`,
149	TRAP_NUM_SGPRS = `16`
150	};
151
152	enum class TargetIDSetting { Unsupported, Any, Off, On };
153
154	class AMDGPUTargetID {
155	private:
156	const MCSubtargetInfo &STI;
157	TargetIDSetting XnackSetting;
158	TargetIDSetting SramEccSetting;
159
160	public:
161	explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
162	~AMDGPUTargetID() = default;
163
164	/// \return True if the current xnack setting is not "Unsupported".
165	bool isXnackSupported() const {
166	return XnackSetting != TargetIDSetting::Unsupported;
167	}
168
169	/// \returns True if the current xnack setting is "On" or "Any".
170	bool isXnackOnOrAny() const {
171	return XnackSetting == TargetIDSetting::On \|\|
172	XnackSetting == TargetIDSetting::Any;
173	}
174
175	/// \returns True if current xnack setting is "On" or "Off",
176	/// false otherwise.
177	bool isXnackOnOrOff() const {
178	return getXnackSetting() == TargetIDSetting::On \|\|
179	getXnackSetting() == TargetIDSetting::Off;
180	}
181
182	/// \returns The current xnack TargetIDSetting, possible options are
183	/// "Unsupported", "Any", "Off", and "On".
184	TargetIDSetting getXnackSetting() const { return XnackSetting; }
185
186	/// Sets xnack setting to \p NewXnackSetting.
187	void setXnackSetting(TargetIDSetting NewXnackSetting) {
188	XnackSetting = NewXnackSetting;
189	}
190
191	/// \return True if the current sramecc setting is not "Unsupported".
192	bool isSramEccSupported() const {
193	return SramEccSetting != TargetIDSetting::Unsupported;
194	}
195
196	/// \returns True if the current sramecc setting is "On" or "Any".
197	bool isSramEccOnOrAny() const {
198	return SramEccSetting == TargetIDSetting::On \|\|
199	SramEccSetting == TargetIDSetting::Any;
200	}
201
202	/// \returns True if current sramecc setting is "On" or "Off",
203	/// false otherwise.
204	bool isSramEccOnOrOff() const {
205	return getSramEccSetting() == TargetIDSetting::On \|\|
206	getSramEccSetting() == TargetIDSetting::Off;
207	}
208
209	/// \returns The current sramecc TargetIDSetting, possible options are
210	/// "Unsupported", "Any", "Off", and "On".
211	TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
212
213	/// Sets sramecc setting to \p NewSramEccSetting.
214	void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
215	SramEccSetting = NewSramEccSetting;
216	}
217
218	void setTargetIDFromFeaturesString(StringRef FS);
219	void setTargetIDFromTargetIDStream(StringRef TargetID);
220
221	/// Write string representation to \p OS
222	void print(raw_ostream &OS) const;
223
224	/// \returns String representation of an object.
225	std::string toString() const;
226	};
227
228	inline raw_ostream &operator<<(raw_ostream &OS,
229	const AMDGPUTargetID &TargetID) {
230	TargetID.print(OS);
231	return OS;
232	}
233
234	/// \returns Wavefront size for given subtarget \p STI.
235	unsigned getWavefrontSize(const MCSubtargetInfo *STI);
236
237	/// \returns Local memory size in bytes for given subtarget \p STI.
238	unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
239
240	/// \returns Maximum addressable local memory size in bytes for given subtarget
241	/// \p STI.
242	unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
243
244	/// \returns Number of execution units per compute unit for given subtarget \p
245	/// STI.
246	unsigned getEUsPerCU(const MCSubtargetInfo *STI);
247
248	/// \returns Maximum number of work groups per compute unit for given subtarget
249	/// \p STI and limited by given \p FlatWorkGroupSize.
250	unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
251	unsigned FlatWorkGroupSize);
252
253	/// \returns Minimum number of waves per execution unit for given subtarget \p
254	/// STI.
255	unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
256
257	/// \returns Maximum number of waves per execution unit for given subtarget \p
258	/// STI without any kind of limitation.
259	unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
260
261	/// \returns Number of waves per execution unit required to support the given \p
262	/// FlatWorkGroupSize.
263	unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
264	unsigned FlatWorkGroupSize);
265
266	/// \returns Minimum flat work group size for given subtarget \p STI.
267	unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
268
269	/// \returns Maximum flat work group size
270	constexpr unsigned getMaxFlatWorkGroupSize() {
271	// Some subtargets allow encoding 2048, but this isn't tested or supported.
272	return `1024`;
273	}
274
275	/// \returns Number of waves per work group for given subtarget \p STI and
276	/// \p FlatWorkGroupSize.
277	unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
278	unsigned FlatWorkGroupSize);
279
280	/// \returns SGPR allocation granularity for given subtarget \p STI.
281	unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
282
283	/// \returns SGPR encoding granularity for given subtarget \p STI.
284	unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
285
286	/// \returns Total number of SGPRs for given subtarget \p STI.
287	unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
288
289	/// \returns Addressable number of SGPRs for given subtarget \p STI.
290	unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
291
292	/// \returns Minimum number of SGPRs that meets the given number of waves per
293	/// execution unit requirement for given subtarget \p STI.
294	unsigned getMinNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
295
296	/// \returns Maximum number of SGPRs that meets the given number of waves per
297	/// execution unit requirement for given subtarget \p STI.
298	unsigned getMaxNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
299	bool Addressable);
300
301	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
302	/// STI when the given special registers are used.
303	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
304	bool FlatScrUsed, bool XNACKUsed);
305
306	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
307	/// STI when the given special registers are used. XNACK is inferred from
308	/// \p STI.
309	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
310	bool FlatScrUsed);
311
312	/// \returns Number of SGPR blocks needed for given subtarget \p STI when
313	/// \p NumSGPRs are used. \p NumSGPRs should already include any special
314	/// register counts.
315	unsigned getNumSGPRBlocks(const MCSubtargetInfo STI, unsigned* NumSGPRs);
316
317	/// \returns VGPR allocation granularity for given subtarget \p STI.
318	///
319	/// For subtargets which support it, \p EnableWavefrontSize32 should match
320	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
321	unsigned
322	getVGPRAllocGranule(const MCSubtargetInfo STI, unsigned* DynamicVGPRBlockSize,
323	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
324
325	/// \returns VGPR encoding granularity for given subtarget \p STI.
326	///
327	/// For subtargets which support it, \p EnableWavefrontSize32 should match
328	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
329	unsigned getVGPREncodingGranule(
330	const MCSubtargetInfo *STI,
331	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
332
333	/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
334	/// returns the allocation granule for ArchVGPRs.
335	unsigned getArchVGPRAllocGranule();
336
337	/// \returns Total number of VGPRs for given subtarget \p STI.
338	unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
339
340	/// \returns Addressable number of architectural VGPRs for a given subtarget \p
341	/// STI.
342	unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
343
344	/// \returns Addressable number of VGPRs for given subtarget \p STI.
345	unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
346	unsigned DynamicVGPRBlockSize);
347
348	/// \returns Minimum number of VGPRs that meets given number of waves per
349	/// execution unit requirement for given subtarget \p STI.
350	unsigned getMinNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
351	unsigned DynamicVGPRBlockSize);
352
353	/// \returns Maximum number of VGPRs that meets given number of waves per
354	/// execution unit requirement for given subtarget \p STI.
355	unsigned getMaxNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
356	unsigned DynamicVGPRBlockSize);
357
358	/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
359	/// subtarget \p STI.
360	unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
361	unsigned NumVGPRs,
362	unsigned DynamicVGPRBlockSize);
363
364	/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
365	/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
366	unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
367	unsigned MaxWaves,
368	unsigned TotalNumVGPRs);
369
370	/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
371	/// Gen.
372	unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
373	AMDGPUSubtarget::Generation Gen);
374
375	/// \returns Number of VGPR blocks needed for given subtarget \p STI when
376	/// \p NumVGPRs are used. We actually return the number of blocks -1, since
377	/// that's what we encode.
378	///
379	/// For subtargets which support it, \p EnableWavefrontSize32 should match the
380	/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
381	unsigned getEncodedNumVGPRBlocks(
382	const MCSubtargetInfo STI, unsigned* NumVGPRs,
383	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
384
385	/// \returns Number of VGPR blocks that need to be allocated for the given
386	/// subtarget \p STI when \p NumVGPRs are used.
387	unsigned getAllocatedNumVGPRBlocks(
388	const MCSubtargetInfo STI, unsigned* NumVGPRs,
389	unsigned DynamicVGPRBlockSize,
390	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
391
392	} // end namespace IsaInfo
393
394	// Represents a field in an encoded value.
395	template <unsigned HighBit, unsigned LowBit, unsigned D = `0`>
396	struct EncodingField {
397	static_assert(HighBit >= LowBit, "Invalid bit range!");
398	static constexpr unsigned Offset = LowBit;
399	static constexpr unsigned Width = HighBit - LowBit + `1`;
400
401	using ValueType = unsigned;
402	static constexpr ValueType Default = D;
403
404	ValueType Value;
405	constexpr EncodingField(ValueType Value) : Value(Value) {}
406
407	constexpr uint64_t encode() const { return Value; }
408	static ValueType decode(uint64_t Encoded) { return Encoded; }
409	};
410
411	// Represents a single bit in an encoded value.
412	template <unsigned Bit, unsigned D = `0`>
413	using EncodingBit = EncodingField<Bit, Bit, D>;
414
415	// A helper for encoding and decoding multiple fields.
416	template <typename... Fields> struct EncodingFields {
417	static constexpr uint64_t encode(Fields... Values) {
418	return ((Values.encode() << Values.Offset) \| ...);
419	}
420
421	static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
422	return {Fields::decode((Encoded >> Fields::Offset) &
423	maxUIntN(Fields::Width))...};
424	}
425	};
426
427	LLVM_READONLY
428	inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
429	return getNamedOperandIdx(Opcode, Name: NamedIdx) != -`1`;
430	}
431
432	LLVM_READONLY
433	int32_t getSOPPWithRelaxation(uint32_t Opcode);
434
435	struct MIMGBaseOpcodeInfo {
436	MIMGBaseOpcode BaseOpcode;
437	bool Store;
438	bool Atomic;
439	bool AtomicX2;
440	bool Sampler;
441	bool Gather4;
442
443	uint8_t NumExtraArgs;
444	bool Gradients;
445	bool G16;
446	bool Coordinates;
447	bool LodOrClampOrMip;
448	bool HasD16;
449	bool MSAA;
450	bool BVH;
451	bool A16;
452	bool NoReturn;
453	bool PointSampleAccel;
454	};
455
456	LLVM_READONLY
457	const MIMGBaseOpcodeInfo getMIMGBaseOpcode(unsigned* Opc);
458
459	LLVM_READONLY
460	const MIMGBaseOpcodeInfo getMIMGBaseOpcodeInfo(unsigned* BaseOpcode);
461
462	struct MIMGDimInfo {
463	MIMGDim Dim;
464	uint8_t NumCoords;
465	uint8_t NumGradients;
466	bool MSAA;
467	bool DA;
468	uint8_t Encoding;
469	const char *AsmSuffix;
470	};
471
472	LLVM_READONLY
473	const MIMGDimInfo getMIMGDimInfo(unsigned* DimEnum);
474
475	LLVM_READONLY
476	const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
477
478	LLVM_READONLY
479	const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
480
481	struct MIMGLZMappingInfo {
482	MIMGBaseOpcode L;
483	MIMGBaseOpcode LZ;
484	};
485
486	struct MIMGMIPMappingInfo {
487	MIMGBaseOpcode MIP;
488	MIMGBaseOpcode NONMIP;
489	};
490
491	struct MIMGBiasMappingInfo {
492	MIMGBaseOpcode Bias;
493	MIMGBaseOpcode NoBias;
494	};
495
496	struct MIMGOffsetMappingInfo {
497	MIMGBaseOpcode Offset;
498	MIMGBaseOpcode NoOffset;
499	};
500
501	struct MIMGG16MappingInfo {
502	MIMGBaseOpcode G;
503	MIMGBaseOpcode G16;
504	};
505
506	LLVM_READONLY
507	const MIMGLZMappingInfo getMIMGLZMappingInfo(unsigned* L);
508
509	struct WMMAOpcodeMappingInfo {
510	unsigned Opcode2Addr;
511	unsigned Opcode3Addr;
512	};
513
514	LLVM_READONLY
515	const MIMGMIPMappingInfo getMIMGMIPMappingInfo(unsigned* MIP);
516
517	LLVM_READONLY
518	const MIMGBiasMappingInfo getMIMGBiasMappingInfo(unsigned* Bias);
519
520	LLVM_READONLY
521	const MIMGOffsetMappingInfo getMIMGOffsetMappingInfo(unsigned* Offset);
522
523	LLVM_READONLY
524	const MIMGG16MappingInfo getMIMGG16MappingInfo(unsigned* G);
525
526	LLVM_READONLY
527	int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
528	unsigned VDataDwords, unsigned VAddrDwords);
529
530	LLVM_READONLY
531	int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
532
533	LLVM_READONLY
534	unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
535	const MIMGDimInfo Dim, bool* IsA16,
536	bool IsG16Supported);
537
538	struct MIMGInfo {
539	uint32_t Opcode;
540	uint32_t BaseOpcode;
541	uint8_t MIMGEncoding;
542	uint8_t VDataDwords;
543	uint8_t VAddrDwords;
544	uint8_t VAddrOperands;
545	};
546
547	LLVM_READONLY
548	const MIMGInfo getMIMGInfo(unsigned* Opc);
549
550	LLVM_READONLY
551	int getMTBUFBaseOpcode(unsigned Opc);
552
553	LLVM_READONLY
554	int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
555
556	LLVM_READONLY
557	int getMTBUFElements(unsigned Opc);
558
559	LLVM_READONLY
560	bool getMTBUFHasVAddr(unsigned Opc);
561
562	LLVM_READONLY
563	bool getMTBUFHasSrsrc(unsigned Opc);
564
565	LLVM_READONLY
566	bool getMTBUFHasSoffset(unsigned Opc);
567
568	LLVM_READONLY
569	int getMUBUFBaseOpcode(unsigned Opc);
570
571	LLVM_READONLY
572	int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
573
574	LLVM_READONLY
575	int getMUBUFElements(unsigned Opc);
576
577	LLVM_READONLY
578	bool getMUBUFHasVAddr(unsigned Opc);
579
580	LLVM_READONLY
581	bool getMUBUFHasSrsrc(unsigned Opc);
582
583	LLVM_READONLY
584	bool getMUBUFHasSoffset(unsigned Opc);
585
586	LLVM_READONLY
587	bool getMUBUFIsBufferInv(unsigned Opc);
588
589	LLVM_READONLY
590	bool getMUBUFTfe(unsigned Opc);
591
592	LLVM_READONLY
593	bool getSMEMIsBuffer(unsigned Opc);
594
595	LLVM_READONLY
596	bool getVOP1IsSingle(unsigned Opc);
597
598	LLVM_READONLY
599	bool getVOP2IsSingle(unsigned Opc);
600
601	LLVM_READONLY
602	bool getVOP3IsSingle(unsigned Opc);
603
604	LLVM_READONLY
605	bool isVOPC64DPP(unsigned Opc);
606
607	LLVM_READONLY
608	bool isVOPCAsmOnly(unsigned Opc);
609
610	/// Returns true if MAI operation is a double precision GEMM.
611	LLVM_READONLY
612	bool getMAIIsDGEMM(unsigned Opc);
613
614	LLVM_READONLY
615	bool getMAIIsGFX940XDL(unsigned Opc);
616
617	LLVM_READONLY
618	bool getWMMAIsXDL(unsigned Opc);
619
620	// Get an equivalent BitOp3 for a binary logical \p Opc.
621	// \returns BitOp3 modifier for the logical operation or zero.
622	// Used in VOPD3 conversion.
623	unsigned getBitOp2(unsigned Opc);
624
625	struct CanBeVOPD {
626	bool X;
627	bool Y;
628	};
629
630	/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
631	LLVM_READONLY
632	unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
633
634	LLVM_READONLY
635	CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
636
637	LLVM_READNONE
638	uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
639
640	LLVM_READONLY
641	const MFMA_F8F6F4_Info getMFMA_F8F6F4_WithFormatArgs(unsigned* CBSZ,
642	unsigned BLGP,
643	unsigned F8F8Opcode);
644
645	LLVM_READNONE
646	uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt);
647
648	LLVM_READONLY
649	const MFMA_F8F6F4_Info getWMMA_F8F6F4_WithFormatArgs(unsigned* FmtA,
650	unsigned FmtB,
651	unsigned F8F8Opcode);
652
653	LLVM_READONLY
654	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
655	uint8_t NumComponents,
656	uint8_t NumFormat,
657	const MCSubtargetInfo &STI);
658	LLVM_READONLY
659	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
660	const MCSubtargetInfo &STI);
661
662	LLVM_READONLY
663	int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
664
665	LLVM_READONLY
666	unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
667
668	LLVM_READONLY
669	int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
670	bool VOPD3);
671
672	LLVM_READONLY
673	bool isVOPD(unsigned Opc);
674
675	LLVM_READNONE
676	bool isMAC(unsigned Opc);
677
678	LLVM_READNONE
679	bool isPermlane16(unsigned Opc);
680
681	LLVM_READNONE
682	bool isGenericAtomic(unsigned Opc);
683
684	LLVM_READNONE
685	bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
686
687	namespace VOPD {
688
689	enum Component : unsigned {
690	DST = `0`,
691	SRC0,
692	SRC1,
693	SRC2,
694
695	DST_NUM = `1`,
696	MAX_SRC_NUM = `3`,
697	MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
698	};
699
700	// LSB mask for VGPR banks per VOPD component operand.
701	// 4 banks result in a mask 3, setting 2 lower bits.
702	constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {`1`, `3`, `3`, `1`};
703	constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {`1`, `3`, `3`, `3`};
704
705	enum ComponentIndex : unsigned { X = `0`, Y = `1` };
706	constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
707	constexpr unsigned COMPONENTS_NUM = `2`;
708
709	// Properties of VOPD components.
710	class ComponentProps {
711	private:
712	unsigned SrcOperandsNum = `0`;
713	unsigned MandatoryLiteralIdx = ~`0u`;
714	bool HasSrc2Acc = false;
715	unsigned NumVOPD3Mods = `0`;
716	unsigned Opcode = `0`;
717	bool IsVOP3 = false;
718
719	public:
720	ComponentProps() = default;
721	ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
722
723	// Return the total number of src operands this component has.
724	unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
725
726	// Return the number of src operands of this component visible to the parser.
727	unsigned getCompParsedSrcOperandsNum() const {
728	return SrcOperandsNum - HasSrc2Acc;
729	}
730
731	// Return true iif this component has a mandatory literal.
732	bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~`0u`; }
733
734	// If this component has a mandatory literal, return component operand
735	// index of this literal (i.e. either Component::SRC1 or Component::SRC2).
736	unsigned getMandatoryLiteralCompOperandIndex() const {
737	assert(hasMandatoryLiteral());
738	return MandatoryLiteralIdx;
739	}
740
741	// Return true iif this component has operand
742	// with component index CompSrcIdx and this operand may be a register.
743	bool hasRegSrcOperand(unsigned CompSrcIdx) const {
744	assert(CompSrcIdx < Component::MAX_SRC_NUM);
745	return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
746	}
747
748	// Return true iif this component has tied src2.
749	bool hasSrc2Acc() const { return HasSrc2Acc; }
750
751	// Return a number of source modifiers if instruction is used in VOPD3.
752	unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
753
754	// Return opcode of the component.
755	unsigned getOpcode() const { return Opcode; }
756
757	// Returns if component opcode is in VOP3 encoding.
758	unsigned isVOP3() const { return IsVOP3; }
759
760	// Return index of BitOp3 operand or -1.
761	int getBitOp3OperandIdx() const;
762
763	private:
764	bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
765	assert(CompSrcIdx < Component::MAX_SRC_NUM);
766	return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
767	}
768	};
769
770	enum ComponentKind : unsigned {
771	SINGLE = `0`, // A single VOP1 or VOP2 instruction which may be used in VOPD.
772	COMPONENT_X, // A VOPD instruction, X component.
773	COMPONENT_Y, // A VOPD instruction, Y component.
774	MAX = COMPONENT_Y
775	};
776
777	// Interface functions of this class map VOPD component operand indices
778	// to indices of operands in MachineInstr/MCInst or parsed operands array.
779	//
780	// Note that this class operates with 3 kinds of indices:
781	// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
782	// - MC operand indices (they refer operands in a MachineInstr/MCInst);
783	// - parsed operand indices (they refer operands in parsed operands array).
784	//
785	// For SINGLE components mapping between these indices is trivial.
786	// But things get more complicated for COMPONENT_X and
787	// COMPONENT_Y because these components share the same
788	// MachineInstr/MCInst and the same parsed operands array.
789	// Below is an example of component operand to parsed operand
790	// mapping for the following instruction:
791	//
792	// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
793	//
794	// PARSED COMPONENT PARSED
795	// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
796	// -------------------------------------------------------------------
797	// "v_dual_add_f32" 0
798	// v_dual_add_f32 v255 0 (DST) --> 1
799	// v4 1 (SRC0) --> 2
800	// v5 2 (SRC1) --> 3
801	// "::" 4
802	// "v_dual_mov_b32" 5
803	// v_dual_mov_b32 v6 0 (DST) --> 6
804	// v1 1 (SRC0) --> 7
805	// -------------------------------------------------------------------
806	//
807	class ComponentLayout {
808	private:
809	// Regular MachineInstr/MCInst operands are ordered as follows:
810	// dst, src0 [, other src operands]
811	// VOPD MachineInstr/MCInst operands are ordered as follows:
812	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
813	// Each ComponentKind has operand indices defined below.
814	static constexpr unsigned MC_DST_IDX[] = {`0`, `0`, `1`};
815
816	// VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
817	// used if there is tied accumulator. Indexing of this array:
818	// MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
819	// instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
820	// component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
821	// For VOPD1/VOPD2 use column with zero modifiers.
822	static constexpr unsigned SINGLE_MC_SRC_IDX[`4`][`3`] = {
823	{`1`, `2`, `3`}, {`2`, `3`, `4`}, {`2`, `4`, `5`}, {`2`, `4`, `6`}};
824
825	// Parsed operands of regular instructions are ordered as follows:
826	// Mnemo dst src0 [vsrc1 ...]
827	// Parsed VOPD operands are ordered as follows:
828	// OpXMnemo dstX src0X [vsrc1X\|imm vsrc1X\|vsrc1X imm] '::'
829	// OpYMnemo dstY src0Y [vsrc1Y\|imm vsrc1Y\|vsrc1Y imm]
830	// Each ComponentKind has operand indices defined below.
831	static constexpr unsigned PARSED_DST_IDX[] = {`1`, `1`,
832	`4` / + OpX.ParsedSrcNum /};
833	static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
834	`2`, `2`, `5` / + OpX.ParsedSrcNum /};
835
836	private:
837	const ComponentKind Kind;
838	const ComponentProps PrevComp;
839	const unsigned VOPD3ModsNum;
840	const int BitOp3Idx; // Index of bitop3 operand or -1
841
842	public:
843	// Create layout for COMPONENT_X or SINGLE component.
844	ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
845	: Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
846	assert(Kind == ComponentKind::SINGLE \|\| Kind == ComponentKind::COMPONENT_X);
847	}
848
849	// Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
850	ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
851	int BitOp3Idx)
852	: Kind(ComponentKind::COMPONENT_Y), PrevComp (OpXProps),
853	VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
854
855	public:
856	// Return the index of dst operand in MCInst operands.
857	unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
858
859	// Return the index of the specified src operand in MCInst operands.
860	unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
861	assert(CompSrcIdx < Component::MAX_SRC_NUM);
862
863	if (Kind == SINGLE && CompSrcIdx == `2` && BitOp3Idx != -`1`)
864	return BitOp3Idx;
865
866	if (VOPD3) {
867	return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
868	getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? `1` : `0`);
869	}
870
871	return SINGLE_MC_SRC_IDX[`0`][CompSrcIdx] + getPrevCompSrcNum() +
872	(Kind != SINGLE ? `1` : `0`);
873	}
874
875	// Return the index of dst operand in the parsed operands array.
876	unsigned getIndexOfDstInParsedOperands() const {
877	return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
878	}
879
880	// Return the index of the specified src operand in the parsed operands array.
881	unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
882	assert(CompSrcIdx < Component::MAX_SRC_NUM);
883	return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
884	}
885
886	private:
887	unsigned getPrevCompSrcNum() const {
888	return PrevComp.getCompSrcOperandsNum();
889	}
890	unsigned getPrevCompParsedSrcNum() const {
891	return PrevComp.getCompParsedSrcOperandsNum();
892	}
893	unsigned getPrevCompVOPD3ModsNum() const {
894	return PrevComp.getCompVOPD3ModsNum();
895	}
896	};
897
898	// Layout and properties of VOPD components.
899	class ComponentInfo : public ComponentProps, public ComponentLayout {
900	public:
901	// Create ComponentInfo for COMPONENT_X or SINGLE component.
902	ComponentInfo(const MCInstrDesc &OpDesc,
903	ComponentKind Kind = ComponentKind::SINGLE,
904	bool VOP3Layout = false)
905	: ComponentProps (OpDesc, VOP3Layout),
906	ComponentLayout (Kind, getCompVOPD3ModsNum(), getBitOp3OperandIdx()) {}
907
908	// Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
909	ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
910	bool VOP3Layout = false)
911	: ComponentProps (OpDesc, VOP3Layout),
912	ComponentLayout (OpXProps, getCompVOPD3ModsNum(),
913	getBitOp3OperandIdx()) {}
914
915	// Map component operand index to parsed operand index.
916	// Return 0 if the specified operand does not exist.
917	unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
918	};
919
920	// Properties of VOPD instructions.
921	class InstInfo {
922	private:
923	const ComponentInfo CompInfo[COMPONENTS_NUM];
924
925	public:
926	using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
927
928	InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
929	: CompInfo{OpX, OpY} {}
930
931	InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
932	: CompInfo{OprInfoX, OprInfoY} {}
933
934	const ComponentInfo &operator[](size_t ComponentIdx) const {
935	assert(ComponentIdx < COMPONENTS_NUM);
936	return CompInfo[ComponentIdx];
937	}
938
939	// Check VOPD operands constraints.
940	// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
941	// for the specified component and MC operand. The callback must return 0
942	// if the operand is not a register or not a VGPR.
943	// If \p SkipSrc is set to true then constraints for source operands are not
944	// checked.
945	// If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
946	// even though it violates requirement to be from different banks.
947	// If \p VOPD3 is set to true both dst registers allowed to be either odd
948	// or even and instruction may have real src2 as opposed to tied accumulator.
949	bool
950	hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
951	const MCRegisterInfo &MRI, bool SkipSrc = false,
952	bool AllowSameVGPR = false, bool VOPD3 = false) const {
953	return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
954	VOPD3)
955	.has_value();
956	}
957
958	// Check VOPD operands constraints.
959	// Return the index of an invalid component operand, if any.
960	// If \p SkipSrc is set to true then constraints for source operands are not
961	// checked except for being from the same halves of VGPR file on gfx1250.
962	// If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
963	// even though it violates requirement to be from different banks.
964	// If \p VOPD3 is set to true both dst registers allowed to be either odd
965	// or even and instruction may have real src2 as opposed to tied accumulator.
966	std::optional<unsigned> getInvalidCompOperandIndex(
967	std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
968	const MCRegisterInfo &MRI, bool SkipSrc = false,
969	bool AllowSameVGPR = false, bool VOPD3 = false) const;
970
971	private:
972	RegIndices
973	getRegIndices(unsigned ComponentIdx,
974	std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
975	bool VOPD3) const;
976	};
977
978	} // namespace VOPD
979
980	LLVM_READONLY
981	std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
982
983	LLVM_READONLY
984	// Get properties of 2 single VOP1/VOP2 instructions
985	// used as components to create a VOPD instruction.
986	VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
987
988	LLVM_READONLY
989	// Get properties of VOPD X and Y components.
990	VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
991	const MCInstrInfo *InstrInfo);
992
993	LLVM_READONLY
994	bool isAsyncStore(unsigned Opc);
995	LLVM_READONLY
996	bool isTensorStore(unsigned Opc);
997	LLVM_READONLY
998	unsigned getTemporalHintType(const MCInstrDesc TID);
999
1000	LLVM_READONLY
1001	bool isTrue16Inst(unsigned Opc);
1002
1003	LLVM_READONLY
1004	FPType getFPDstSelType(unsigned Opc);
1005
1006	LLVM_READONLY
1007	bool isInvalidSingleUseConsumerInst(unsigned Opc);
1008
1009	LLVM_READONLY
1010	bool isInvalidSingleUseProducerInst(unsigned Opc);
1011
1012	bool isDPMACCInstruction(unsigned Opc);
1013
1014	LLVM_READONLY
1015	unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
1016
1017	LLVM_READONLY
1018	unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
1019
1020	void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
1021	const MCSubtargetInfo *STI);
1022
1023	bool isGroupSegment(const GlobalValue *GV);
1024	bool isGlobalSegment(const GlobalValue *GV);
1025	bool isReadOnlySegment(const GlobalValue *GV);
1026
1027	/// \returns True if constants should be emitted to .text section for given
1028	/// target triple \p TT, false otherwise.
1029	bool shouldEmitConstantsToTextSection(const Triple &TT);
1030
1031	/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1032	/// register name. Followed by the start register number, and the register
1033	/// width. Does not validate the number of registers exists in the class. Unlike
1034	/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1035	/// "{}".
1036	std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1037
1038	/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1039	/// register constraint. Followed by the start register number, and the register
1040	/// width. Does not validate the number of registers exists in the class.
1041	std::tuple<char, unsigned, unsigned>
1042	parseAsmConstraintPhysReg(StringRef Constraint);
1043
1044	/// \returns Integer value requested using \p F's \p Name attribute.
1045	///
1046	/// \returns \p Default if attribute is not present.
1047	///
1048	/// \returns \p Default and emits error if requested value cannot be converted
1049	/// to integer.
1050	int getIntegerAttribute(const Function &F, StringRef Name, int Default);
1051
1052	/// \returns A pair of integer values requested using \p F's \p Name attribute
1053	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1054	/// is false).
1055	///
1056	/// \returns \p Default if attribute is not present.
1057	///
1058	/// \returns \p Default and emits error if one of the requested values cannot be
1059	/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1060	/// not present.
1061	std::pair<unsigned, unsigned>
1062	getIntegerPairAttribute(const Function &F, StringRef Name,
1063	std::pair<unsigned, unsigned> Default,
1064	bool OnlyFirstRequired = false);
1065
1066	/// \returns A pair of integer values requested using \p F's \p Name attribute
1067	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1068	/// is false).
1069	///
1070	/// \returns \p std::nullopt if attribute is not present.
1071	///
1072	/// \returns \p std::nullopt and emits error if one of the requested values
1073	/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1074	/// "second" value is not present.
1075	std::optional<std::pair<unsigned, std::optional<unsigned>>>
1076	getIntegerPairAttribute(const Function &F, StringRef Name,
1077	bool OnlyFirstRequired = false);
1078
1079	/// \returns Generate a vector of integer values requested using \p F's \p Name
1080	/// attribute.
1081	/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1082	/// if any error occurs. The corresponding error will also be emitted.
1083	SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
1084	unsigned Size,
1085	unsigned DefaultVal);
1086	/// Similar to the function above, but returns std::nullopt if any error occurs.
1087	std::optional<SmallVector<unsigned>>
1088	getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1089
1090	/// Checks if \p Val is inside \p MD, a !range-like metadata.
1091	bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1092
1093	enum InstCounterType {
1094	LOAD_CNT = `0`, // VMcnt prior to gfx12.
1095	DS_CNT, // LKGMcnt prior to gfx12.
1096	EXP_CNT, //
1097	STORE_CNT, // VScnt in gfx10/gfx11.
1098	NUM_NORMAL_INST_CNTS,
1099	SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
1100	BVH_CNT, // gfx12+ only.
1101	KM_CNT, // gfx12+ only.
1102	X_CNT, // gfx1250.
1103	NUM_EXTENDED_INST_CNTS,
1104	VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
1105	VM_VSRC, // gfx12+ expert mode only.
1106	NUM_EXPERT_INST_CNTS,
1107	NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
1108	};
1109
1110	// Return an iterator over all counters between LOAD_CNT (the first counter)
1111	// and \c MaxCounter (exclusive, default value yields an enumeration over
1112	// all counters).
1113	iota_range<InstCounterType>
1114	inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS);
1115
1116	} // namespace AMDGPU
1117
1118	template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
1119	static constexpr bool is_iterable = true;
1120	};
1121
1122	namespace AMDGPU {
1123
1124	/// Represents the counter values to wait for in an s_waitcnt instruction.
1125	///
1126	/// Large values (including the maximum possible integer) can be used to
1127	/// represent "don't care" waits.
1128	class Waitcnt {
1129	unsigned LoadCnt = ~`0u`; // Corresponds to Vmcnt prior to gfx12.
1130	unsigned ExpCnt = ~`0u`;
1131	unsigned DsCnt = ~`0u`; // Corresponds to LGKMcnt prior to gfx12.
1132	unsigned StoreCnt = ~`0u`; // Corresponds to VScnt on gfx10/gfx11.
1133	unsigned SampleCnt = ~`0u`; // gfx12+ only.
1134	unsigned BvhCnt = ~`0u`; // gfx12+ only.
1135	unsigned KmCnt = ~`0u`; // gfx12+ only.
1136	unsigned XCnt = ~`0u`; // gfx1250.
1137	unsigned VaVdst = ~`0u`; // gfx12+ expert scheduling mode only.
1138	unsigned VmVsrc = ~`0u`; // gfx12+ expert scheduling mode only.
1139
1140	public:
1141	unsigned get(InstCounterType T) const {
1142	switch (T) {
1143	case LOAD_CNT:
1144	return LoadCnt;
1145	case EXP_CNT:
1146	return ExpCnt;
1147	case DS_CNT:
1148	return DsCnt;
1149	case STORE_CNT:
1150	return StoreCnt;
1151	case SAMPLE_CNT:
1152	return SampleCnt;
1153	case BVH_CNT:
1154	return BvhCnt;
1155	case KM_CNT:
1156	return KmCnt;
1157	case X_CNT:
1158	return XCnt;
1159	case VA_VDST:
1160	return VaVdst;
1161	case VM_VSRC:
1162	return VmVsrc;
1163	default:
1164	llvm_unreachable("bad InstCounterType");
1165	}
1166	}
1167	void set(InstCounterType T, unsigned Val) {
1168	switch (T) {
1169	case LOAD_CNT:
1170	LoadCnt = Val;
1171	break;
1172	case EXP_CNT:
1173	ExpCnt = Val;
1174	break;
1175	case DS_CNT:
1176	DsCnt = Val;
1177	break;
1178	case STORE_CNT:
1179	StoreCnt = Val;
1180	break;
1181	case SAMPLE_CNT:
1182	SampleCnt = Val;
1183	break;
1184	case BVH_CNT:
1185	BvhCnt = Val;
1186	break;
1187	case KM_CNT:
1188	KmCnt = Val;
1189	break;
1190	case X_CNT:
1191	XCnt = Val;
1192	break;
1193	case VA_VDST:
1194	VaVdst = Val;
1195	break;
1196	case VM_VSRC:
1197	VmVsrc = Val;
1198	break;
1199	default:
1200	llvm_unreachable("bad InstCounterType");
1201	}
1202	}
1203
1204	Waitcnt() = default;
1205	// Pre-gfx12 constructor.
1206	Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1207	: LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
1208
1209	// gfx12+ constructor.
1210	Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1211	unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
1212	unsigned VaVdst, unsigned VmVsrc)
1213	: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1214	SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt),
1215	VaVdst(VaVdst), VmVsrc(VmVsrc) {}
1216
1217	bool hasWait() const { return StoreCnt != ~`0u` \|\| hasWaitExceptStoreCnt(); }
1218
1219	bool hasWaitExceptStoreCnt() const {
1220	return LoadCnt != ~`0u` \|\| ExpCnt != ~`0u` \|\| DsCnt != ~`0u` \|\|
1221	SampleCnt != ~`0u` \|\| BvhCnt != ~`0u` \|\| KmCnt != ~`0u` \|\| XCnt != ~`0u` \|\|
1222	VaVdst != ~`0u` \|\| VmVsrc != ~`0u`;
1223	}
1224
1225	bool hasWaitStoreCnt() const { return StoreCnt != ~`0u`; }
1226
1227	bool hasWaitDepctr() const { return VaVdst != ~`0u` \|\| VmVsrc != ~`0u`; }
1228
1229	Waitcnt combined(const Waitcnt &Other) const {
1230	// Does the right thing provided self and Other are either both pre-gfx12
1231	// or both gfx12+.
1232	return Waitcnt (
1233	std::min(a: LoadCnt, b: Other.LoadCnt), std::min(a: ExpCnt, b: Other.ExpCnt),
1234	std::min(a: DsCnt, b: Other.DsCnt), std::min(a: StoreCnt, b: Other.StoreCnt),
1235	std::min(a: SampleCnt, b: Other.SampleCnt), std::min(a: BvhCnt, b: Other.BvhCnt),
1236	std::min(a: KmCnt, b: Other.KmCnt), std::min(a: XCnt, b: Other.XCnt),
1237	std::min(a: VaVdst, b: Other.VaVdst), std::min(a: VmVsrc, b: Other.VmVsrc));
1238	}
1239
1240	friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
1241	};
1242
1243	/// Represents the hardware counter limits for different wait count types.
1244	struct HardwareLimits {
1245	unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
1246	unsigned ExpcntMax;
1247	unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
1248	unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
1249	unsigned SamplecntMax; // gfx12+ only.
1250	unsigned BvhcntMax; // gfx12+ only.
1251	unsigned KmcntMax; // gfx12+ only.
1252	unsigned XcntMax; // gfx1250.
1253	unsigned VaVdstMax; // gfx12+ expert mode only.
1254	unsigned VmVsrcMax; // gfx12+ expert mode only.
1255
1256	HardwareLimits() = default;
1257
1258	/// Initializes hardware limits from ISA version.
1259	HardwareLimits(const IsaVersion &IV);
1260	};
1261
1262	// The following methods are only meaningful on targets that support
1263	// S_WAITCNT.
1264
1265	/// \returns Vmcnt bit mask for given isa \p Version.
1266	unsigned getVmcntBitMask(const IsaVersion &Version);
1267
1268	/// \returns Expcnt bit mask for given isa \p Version.
1269	unsigned getExpcntBitMask(const IsaVersion &Version);
1270
1271	/// \returns Lgkmcnt bit mask for given isa \p Version.
1272	unsigned getLgkmcntBitMask(const IsaVersion &Version);
1273
1274	/// \returns Waitcnt bit mask for given isa \p Version.
1275	unsigned getWaitcntBitMask(const IsaVersion &Version);
1276
1277	/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1278	unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1279
1280	/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1281	unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1282
1283	/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1284	unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1285
1286	/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1287	/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1288	/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1289	/// which needs it is deprecated
1290	///
1291	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1292	/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1293	/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1294	/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1295	/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1296	/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1297	/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1298	/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1299	/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1300	///
1301	void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1302	unsigned &Expcnt, unsigned &Lgkmcnt);
1303
1304	Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1305
1306	/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1307	unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1308	unsigned Vmcnt);
1309
1310	/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1311	unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1312	unsigned Expcnt);
1313
1314	/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1315	unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1316	unsigned Lgkmcnt);
1317
1318	/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1319	/// \p Version. Should not be used on gfx12+, the instruction which needs
1320	/// it is deprecated
1321	///
1322	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1323	/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1324	/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1325	/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1326	/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1327	/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1328	/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1329	/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1330	/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1331	/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1332	///
1333	/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1334	/// isa \p Version.
1335	///
1336	unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1337	unsigned Expcnt, unsigned Lgkmcnt);
1338
1339	unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1340
1341	// The following methods are only meaningful on targets that support
1342	// S_WAIT_CNT, introduced with gfx12.*
1343
1344	/// \returns Loadcnt bit mask for given isa \p Version.
1345	/// Returns 0 for versions that do not support LOADcnt
1346	unsigned getLoadcntBitMask(const IsaVersion &Version);
1347
1348	/// \returns Samplecnt bit mask for given isa \p Version.
1349	/// Returns 0 for versions that do not support SAMPLEcnt
1350	unsigned getSamplecntBitMask(const IsaVersion &Version);
1351
1352	/// \returns Bvhcnt bit mask for given isa \p Version.
1353	/// Returns 0 for versions that do not support BVHcnt
1354	unsigned getBvhcntBitMask(const IsaVersion &Version);
1355
1356	/// \returns Dscnt bit mask for given isa \p Version.
1357	/// Returns 0 for versions that do not support DScnt
1358	unsigned getDscntBitMask(const IsaVersion &Version);
1359
1360	/// \returns Dscnt bit mask for given isa \p Version.
1361	/// Returns 0 for versions that do not support KMcnt
1362	unsigned getKmcntBitMask(const IsaVersion &Version);
1363
1364	/// \returns Xcnt bit mask for given isa \p Version.
1365	/// Returns 0 for versions that do not support Xcnt.
1366	unsigned getXcntBitMask(const IsaVersion &Version);
1367
1368	/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1369	/// returns 0 for versions that do not support STOREcnt or VScnt.
1370	/// STOREcnt and VScnt are the same counter, the name used
1371	/// depends on the ISA version.
1372	unsigned getStorecntBitMask(const IsaVersion &Version);
1373
1374	// The following are only meaningful on targets that support
1375	// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1376
1377	/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1378	/// isa \p Version.
1379	Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1380
1381	/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1382	/// isa \p Version.
1383	Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1384
1385	/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1386	/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1387	/// \p Version.
1388	unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1389
1390	/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1391	/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1392	/// \p Version.
1393	unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1394
1395	namespace Hwreg {
1396
1397	using HwregId = EncodingField<`5`, `0`>;
1398	using HwregOffset = EncodingField<`10`, `6`>;
1399
1400	struct HwregSize : EncodingField<`15`, `11`, `32`> {
1401	using EncodingField::EncodingField;
1402	constexpr uint64_t encode() const { return Value - `1`; }
1403	static ValueType decode(uint64_t Encoded) { return Encoded + `1`; }
1404	};
1405
1406	using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1407
1408	} // namespace Hwreg
1409
1410	namespace DepCtr {
1411
1412	int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1413	int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1414	const MCSubtargetInfo &STI);
1415	bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1416	const MCSubtargetInfo &STI);
1417	bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1418	bool &IsDefault, const MCSubtargetInfo &STI);
1419
1420	/// \returns Maximum VaVdst value that can be encoded.
1421	unsigned getVaVdstBitMask();
1422
1423	/// \returns Maximum VaSdst value that can be encoded.
1424	unsigned getVaSdstBitMask();
1425
1426	/// \returns Maximum VaSsrc value that can be encoded.
1427	unsigned getVaSsrcBitMask();
1428
1429	/// \returns Maximum HoldCnt value that can be encoded.
1430	unsigned getHoldCntBitMask(const IsaVersion &Version);
1431
1432	/// \returns Maximum VmVsrc value that can be encoded.
1433	unsigned getVmVsrcBitMask();
1434
1435	/// \returns Maximum VaVcc value that can be encoded.
1436	unsigned getVaVccBitMask();
1437
1438	/// \returns Maximum SaSdst value that can be encoded.
1439	unsigned getSaSdstBitMask();
1440
1441	/// \returns Decoded VaVdst from given immediate \p Encoded.
1442	unsigned decodeFieldVaVdst(unsigned Encoded);
1443
1444	/// \returns Decoded VmVsrc from given immediate \p Encoded.
1445	unsigned decodeFieldVmVsrc(unsigned Encoded);
1446
1447	/// \returns Decoded SaSdst from given immediate \p Encoded.
1448	unsigned decodeFieldSaSdst(unsigned Encoded);
1449
1450	/// \returns Decoded VaSdst from given immediate \p Encoded.
1451	unsigned decodeFieldVaSdst(unsigned Encoded);
1452
1453	/// \returns Decoded VaVcc from given immediate \p Encoded.
1454	unsigned decodeFieldVaVcc(unsigned Encoded);
1455
1456	/// \returns Decoded SaSrc from given immediate \p Encoded.
1457	unsigned decodeFieldVaSsrc(unsigned Encoded);
1458
1459	/// \returns Decoded HoldCnt from given immediate \p Encoded.
1460	unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1461
1462	/// \returns \p VmVsrc as an encoded Depctr immediate.
1463	unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1464
1465	/// \returns \p Encoded combined with encoded \p VmVsrc.
1466	unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1467
1468	/// \returns \p VaVdst as an encoded Depctr immediate.
1469	unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1470
1471	/// \returns \p Encoded combined with encoded \p VaVdst.
1472	unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1473
1474	/// \returns \p SaSdst as an encoded Depctr immediate.
1475	unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1476
1477	/// \returns \p Encoded combined with encoded \p SaSdst.
1478	unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1479
1480	/// \returns \p VaSdst as an encoded Depctr immediate.
1481	unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1482
1483	/// \returns \p Encoded combined with encoded \p VaSdst.
1484	unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1485
1486	/// \returns \p VaVcc as an encoded Depctr immediate.
1487	unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1488
1489	/// \returns \p Encoded combined with encoded \p VaVcc.
1490	unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1491
1492	/// \returns \p HoldCnt as an encoded Depctr immediate.
1493	unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1494
1495	/// \returns \p Encoded combined with encoded \p HoldCnt.
1496	unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1497	const IsaVersion &Version);
1498
1499	/// \returns \p VaSsrc as an encoded Depctr immediate.
1500	unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1501
1502	/// \returns \p Encoded combined with encoded \p VaSsrc.
1503	unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1504
1505	} // namespace DepCtr
1506
1507	namespace Exp {
1508
1509	bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1510
1511	LLVM_READONLY
1512	unsigned getTgtId(const StringRef Name);
1513
1514	LLVM_READNONE
1515	bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1516
1517	} // namespace Exp
1518
1519	namespace MTBUFFormat {
1520
1521	LLVM_READNONE
1522	int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1523
1524	void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1525
1526	int64_t getDfmt(const StringRef Name);
1527
1528	StringRef getDfmtName(unsigned Id);
1529
1530	int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1531
1532	StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1533
1534	bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1535
1536	bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1537
1538	int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1539
1540	StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1541
1542	bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1543
1544	int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1545	const MCSubtargetInfo &STI);
1546
1547	bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1548
1549	unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1550
1551	} // namespace MTBUFFormat
1552
1553	namespace SendMsg {
1554
1555	LLVM_READNONE
1556	bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1557
1558	LLVM_READNONE
1559	bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1560	bool Strict = true);
1561
1562	LLVM_READNONE
1563	bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1564	const MCSubtargetInfo &STI, bool Strict = true);
1565
1566	LLVM_READNONE
1567	bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1568
1569	LLVM_READNONE
1570	bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1571
1572	void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1573	uint16_t &StreamId, const MCSubtargetInfo &STI);
1574
1575	LLVM_READNONE
1576	uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1577
1578	/// Returns true if the message does not use the m0 operand.
1579	bool msgDoesNotUseM0(int64_t MsgId, const MCSubtargetInfo &STI);
1580
1581	} // namespace SendMsg
1582
1583	unsigned getInitialPSInputAddr(const Function &F);
1584
1585	bool getHasColorExport(const Function &F);
1586
1587	bool getHasDepthExport(const Function &F);
1588
1589	bool hasDynamicVGPR(const Function &F);
1590
1591	// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1592	// the attribute is missing or its value is invalid.
1593	unsigned getDynamicVGPRBlockSize(const Function &F);
1594
1595	LLVM_READNONE
1596	constexpr bool isShader(CallingConv::ID CC) {
1597	switch (CC) {
1598	case CallingConv::AMDGPU_VS:
1599	case CallingConv::AMDGPU_LS:
1600	case CallingConv::AMDGPU_HS:
1601	case CallingConv::AMDGPU_ES:
1602	case CallingConv::AMDGPU_GS:
1603	case CallingConv::AMDGPU_PS:
1604	case CallingConv::AMDGPU_CS_Chain:
1605	case CallingConv::AMDGPU_CS_ChainPreserve:
1606	case CallingConv::AMDGPU_CS:
1607	return true;
1608	default:
1609	return false;
1610	}
1611	}
1612
1613	LLVM_READNONE
1614	constexpr bool isGraphics(CallingConv::ID CC) {
1615	return isShader(CC) \|\| CC == CallingConv::AMDGPU_Gfx \|\|
1616	CC == CallingConv::AMDGPU_Gfx_WholeWave;
1617	}
1618
1619	LLVM_READNONE
1620	constexpr bool isCompute(CallingConv::ID CC) {
1621	return !isGraphics(CC) \|\| CC == CallingConv::AMDGPU_CS;
1622	}
1623
1624	LLVM_READNONE
1625	constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1626	switch (CC) {
1627	case CallingConv::AMDGPU_KERNEL:
1628	case CallingConv::SPIR_KERNEL:
1629	case CallingConv::AMDGPU_VS:
1630	case CallingConv::AMDGPU_GS:
1631	case CallingConv::AMDGPU_PS:
1632	case CallingConv::AMDGPU_CS:
1633	case CallingConv::AMDGPU_ES:
1634	case CallingConv::AMDGPU_HS:
1635	case CallingConv::AMDGPU_LS:
1636	return true;
1637	default:
1638	return false;
1639	}
1640	}
1641
1642	LLVM_READNONE
1643	constexpr bool isChainCC(CallingConv::ID CC) {
1644	switch (CC) {
1645	case CallingConv::AMDGPU_CS_Chain:
1646	case CallingConv::AMDGPU_CS_ChainPreserve:
1647	return true;
1648	default:
1649	return false;
1650	}
1651	}
1652
1653	// These functions are considered entrypoints into the current module, i.e. they
1654	// are allowed to be called from outside the current module. This is different
1655	// from isEntryFunctionCC, which is only true for functions that are entered by
1656	// the hardware. Module entry points include all entry functions but also
1657	// include functions that can be called from other functions inside or outside
1658	// the current module. Module entry functions are allowed to allocate LDS.
1659	//
1660	// AMDGPU_CS_Chain is intended for externally callable chain functions, so it is
1661	// treated as a module entrypoint. AMDGPU_CS_ChainPreserve is used for internal
1662	// helper functions (e.g. retry helpers), so it is not a module entrypoint.
1663	LLVM_READNONE
1664	constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1665	switch (CC) {
1666	case CallingConv::AMDGPU_Gfx:
1667	case CallingConv::AMDGPU_CS_Chain:
1668	return true;
1669	default:
1670	return isEntryFunctionCC(CC);
1671	}
1672	}
1673
1674	LLVM_READNONE
1675	constexpr inline bool isKernel(CallingConv::ID CC) {
1676	switch (CC) {
1677	case CallingConv::AMDGPU_KERNEL:
1678	case CallingConv::SPIR_KERNEL:
1679	return true;
1680	default:
1681	return false;
1682	}
1683	}
1684
1685	inline bool isKernel(const Function &F) { return isKernel(CC: F.getCallingConv()); }
1686
1687	LLVM_READNONE
1688	constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1689	return CC == CallingConv::Fast;
1690	}
1691
1692	/// Return true if we might ever do TCO for calls with this calling convention.
1693	LLVM_READNONE
1694	constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1695	switch (CC) {
1696	case CallingConv::C:
1697	case CallingConv::AMDGPU_Gfx:
1698	case CallingConv::AMDGPU_Gfx_WholeWave:
1699	return true;
1700	default:
1701	return canGuaranteeTCO(CC);
1702	}
1703	}
1704
1705	bool hasXNACK(const MCSubtargetInfo &STI);
1706	bool hasSRAMECC(const MCSubtargetInfo &STI);
1707	bool hasMIMG_R128(const MCSubtargetInfo &STI);
1708	bool hasA16(const MCSubtargetInfo &STI);
1709	bool hasG16(const MCSubtargetInfo &STI);
1710	bool hasPackedD16(const MCSubtargetInfo &STI);
1711	bool hasGDS(const MCSubtargetInfo &STI);
1712	unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1713	unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1714
1715	bool isSI(const MCSubtargetInfo &STI);
1716	bool isCI(const MCSubtargetInfo &STI);
1717	bool isVI(const MCSubtargetInfo &STI);
1718	bool isGFX9(const MCSubtargetInfo &STI);
1719	bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1720	bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1721	bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1722	bool isGFX8Plus(const MCSubtargetInfo &STI);
1723	bool isGFX9Plus(const MCSubtargetInfo &STI);
1724	bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1725	bool isGFX10(const MCSubtargetInfo &STI);
1726	bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1727	bool isGFX10Plus(const MCSubtargetInfo &STI);
1728	bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1729	bool isGFX10Before1030(const MCSubtargetInfo &STI);
1730	bool isGFX11(const MCSubtargetInfo &STI);
1731	bool isGFX11Plus(const MCSubtargetInfo &STI);
1732	bool isGFX12(const MCSubtargetInfo &STI);
1733	bool isGFX12Plus(const MCSubtargetInfo &STI);
1734	bool isGFX1250(const MCSubtargetInfo &STI);
1735	bool isGFX1250Plus(const MCSubtargetInfo &STI);
1736	bool isGFX13(const MCSubtargetInfo &STI);
1737	bool isGFX13Plus(const MCSubtargetInfo &STI);
1738	bool supportsWGP(const MCSubtargetInfo &STI);
1739	bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1740	bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1741	bool isGCN3Encoding(const MCSubtargetInfo &STI);
1742	bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1743	bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1744	bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1745	bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1746	bool isGFX90A(const MCSubtargetInfo &STI);
1747	bool isGFX940(const MCSubtargetInfo &STI);
1748	bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1749	bool hasMAIInsts(const MCSubtargetInfo &STI);
1750	bool hasVOPD(const MCSubtargetInfo &STI);
1751	bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1752
1753	inline bool supportsWave32(const MCSubtargetInfo &STI) {
1754	return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1755	}
1756
1757	int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1758	unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1759	bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1760
1761	/// Is Reg - scalar register
1762	bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1763
1764	/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1765	bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1766
1767	/// If \p Reg is a pseudo reg, return the correct hardware register given
1768	/// \p STI otherwise return \p Reg.
1769	MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1770
1771	/// Convert hardware register \p Reg to a pseudo register
1772	LLVM_READNONE
1773	MCRegister mc2PseudoReg(MCRegister Reg);
1774
1775	LLVM_READNONE
1776	bool isInlineValue(MCRegister Reg);
1777
1778	/// Is this an AMDGPU specific source operand? These include registers,
1779	/// inline constants, literals and mandatory literals (KImm).
1780	constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1781	return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1782	OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1783	}
1784
1785	inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1786	return isSISrcOperand(OpInfo: Desc.operands()[OpNo]);
1787	}
1788
1789	/// Is this a KImm operand?
1790	bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1791
1792	/// Is this floating-point operand?
1793	bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1794
1795	/// Does this operand support only inlinable literals?
1796	bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1797
1798	/// Get the size in bits of a register from the register class \p RC.
1799	unsigned getRegBitWidth(unsigned RCID);
1800
1801	/// Get the size in bits of a register from the register class \p RC.
1802	unsigned getRegBitWidth(const MCRegisterClass &RC);
1803
1804	LLVM_READNONE
1805	inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1806	switch (OpInfo.OperandType) {
1807	case AMDGPU::OPERAND_REG_IMM_INT32:
1808	case AMDGPU::OPERAND_REG_IMM_FP32:
1809	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1810	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1811	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1812	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1813	case AMDGPU::OPERAND_REG_IMM_V2INT32:
1814	case AMDGPU::OPERAND_REG_IMM_V2FP32:
1815	case AMDGPU::OPERAND_KIMM32:
1816	case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1817	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1818	return `4`;
1819
1820	case AMDGPU::OPERAND_REG_IMM_INT64:
1821	case AMDGPU::OPERAND_REG_IMM_FP64:
1822	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1823	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1824	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1825	case AMDGPU::OPERAND_KIMM64:
1826	return `8`;
1827
1828	case AMDGPU::OPERAND_REG_IMM_INT16:
1829	case AMDGPU::OPERAND_REG_IMM_BF16:
1830	case AMDGPU::OPERAND_REG_IMM_FP16:
1831	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1832	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1833	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1834	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1835	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1836	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1837	case AMDGPU::OPERAND_REG_IMM_V2INT16:
1838	case AMDGPU::OPERAND_REG_IMM_V2BF16:
1839	case AMDGPU::OPERAND_REG_IMM_V2FP16:
1840	case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT:
1841	case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16:
1842	return `2`;
1843
1844	default:
1845	llvm_unreachable("unhandled operand type");
1846	}
1847	}
1848
1849	LLVM_READNONE
1850	inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1851	return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1852	}
1853
1854	/// Is this literal inlinable, and not one of the values intended for floating
1855	/// point values.
1856	LLVM_READNONE
1857	inline bool isInlinableIntLiteral(int64_t Literal) {
1858	return Literal >= -`16` && Literal <= `64`;
1859	}
1860
1861	/// Is this literal inlinable
1862	LLVM_READNONE
1863	bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1864
1865	LLVM_READNONE
1866	bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1867
1868	LLVM_READNONE
1869	bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1870
1871	LLVM_READNONE
1872	bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1873
1874	LLVM_READNONE
1875	bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1876
1877	LLVM_READNONE
1878	std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1879
1880	LLVM_READNONE
1881	std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1882
1883	LLVM_READNONE
1884	std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1885
1886	LLVM_READNONE
1887	std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1888	bool IsGFX11Plus);
1889
1890	LLVM_READNONE
1891	bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1892
1893	LLVM_READNONE
1894	bool isInlinableLiteralV2I16(uint32_t Literal);
1895
1896	LLVM_READNONE
1897	bool isInlinableLiteralV2BF16(uint32_t Literal);
1898
1899	LLVM_READNONE
1900	bool isInlinableLiteralV2F16(uint32_t Literal);
1901
1902	LLVM_READNONE
1903	bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1904
1905	LLVM_READNONE
1906	bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1907
1908	LLVM_READNONE
1909	int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1910
1911	bool isArgPassedInSGPR(const Argument *Arg);
1912
1913	bool isArgPassedInSGPR(const CallBase CB, unsigned* ArgNo);
1914
1915	LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1916
1917	LLVM_READONLY
1918	bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1919	int64_t EncodedOffset);
1920
1921	LLVM_READONLY
1922	bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1923	int64_t EncodedOffset, bool IsBuffer);
1924
1925	/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1926	/// offsets.
1927	uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1928
1929	/// \returns The encoding that will be used for \p ByteOffset in the
1930	/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1931	/// S_LOAD instructions have a signed offset, on other subtargets it is
1932	/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1933	std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1934	int64_t ByteOffset, bool IsBuffer,
1935	bool HasSOffset = false);
1936
1937	/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1938	/// instruction. This is only useful on CI.s
1939	std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1940	int64_t ByteOffset);
1941
1942	/// For pre-GFX12 FLAT instructions the offset must be positive;
1943	/// MSB is ignored and forced to zero.
1944	///
1945	/// \return The number of bits available for the signed offset field in flat
1946	/// instructions. Note that some forms of the instruction disallow negative
1947	/// offsets.
1948	unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1949
1950	/// \returns true if this offset is small enough to fit in the SMRD
1951	/// offset field. \p ByteOffset should be the offset in bytes and
1952	/// not the encoded offset.
1953	bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1954
1955	LLVM_READNONE
1956	inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1957	if (isGFX12(STI: ST))
1958	return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1959	if (isGFX90A(STI: ST))
1960	return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1961	return false;
1962	}
1963
1964	/// \returns true if an instruction may have a 64-bit VGPR operand.
1965	bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc,
1966	const MCSubtargetInfo &ST);
1967
1968	/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1969	bool isDPALU_DPP32BitOpc(unsigned Opc);
1970
1971	/// \returns true if an instruction is a DP ALU DPP.
1972	bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1973	const MCSubtargetInfo &ST);
1974
1975	/// \returns true if the intrinsic is divergent
1976	bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1977
1978	/// \returns true if the intrinsic is uniform
1979	bool isIntrinsicAlwaysUniform(unsigned IntrID);
1980
1981	/// \returns a register class for the physical register \p Reg if it is a VGPR
1982	/// or nullptr otherwise.
1983	const MCRegisterClass *getVGPRPhysRegClass(MCRegister Reg,
1984	const MCRegisterInfo &MRI);
1985
1986	/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1987	/// physical register \p Reg.
1988	unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1989
1990	/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1991	MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs,
1992	const MCRegisterInfo &MRI);
1993
1994	/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
1995	/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
1996	/// ignored.
1997	std::optional<unsigned> convertSetRegImmToVgprMSBs(const MachineInstr &MI,
1998	bool HasSetregVGPRMSBFixup);
1999
2000	/// \returns VGPR MSBs encoded in a S_SETREG_IMM32_B32 \p MI if it sets
2001	/// it. If \p HasSetregVGPRMSBFixup is true then size of the ID_MODE mask is
2002	/// ignored.
2003	std::optional<unsigned> convertSetRegImmToVgprMSBs(const MCInst &MI,
2004	bool HasSetregVGPRMSBFixup);
2005
2006	// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
2007	// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
2008	// maps, one for X and one for Y component.
2009	std::pair<const AMDGPU::OpName , const* AMDGPU::OpName *>
2010	getVGPRLoweringOperandTables(const MCInstrDesc &Desc);
2011
2012	/// \returns true if a memory instruction supports scale_offset modifier.
2013	bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
2014
2015	/// \returns lds block size in terms of dwords. \p
2016	/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
2017	/// must be defined in terms of bytes.
2018	unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
2019
2020	class ClusterDimsAttr {
2021	public:
2022	enum class Kind { Unknown, NoCluster, VariableDims, FixedDims };
2023
2024	ClusterDimsAttr() = default;
2025
2026	Kind getKind() const { return AttrKind; }
2027
2028	bool isUnknown() const { return getKind() == Kind::Unknown; }
2029
2030	bool isNoCluster() const { return getKind() == Kind::NoCluster; }
2031
2032	bool isFixedDims() const { return getKind() == Kind::FixedDims; }
2033
2034	bool isVariableDims() const { return getKind() == Kind::VariableDims; }
2035
2036	void setUnknown() { *this = ClusterDimsAttr (Kind::Unknown); }
2037
2038	void setNoCluster() { *this = ClusterDimsAttr (Kind::NoCluster); }
2039
2040	void setVariableDims() { *this = ClusterDimsAttr (Kind::VariableDims); }
2041
2042	/// \returns the dims stored. Note that this function can only be called if
2043	/// the kind is \p Fixed.
2044	const std::array<unsigned, `3`> &getDims() const;
2045
2046	bool operator==(const ClusterDimsAttr &RHS) const {
2047	return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
2048	}
2049
2050	std::string to_string() const;
2051
2052	static ClusterDimsAttr get(const Function &F);
2053
2054	private:
2055	enum Encoding { EncoNoCluster = `0`, EncoVariableDims = `1024` };
2056
2057	ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
2058
2059	std::array<unsigned, `3`> Dims = {`0`, `0`, `0`};
2060
2061	Kind AttrKind = Kind::Unknown;
2062	};
2063
2064	} // namespace AMDGPU
2065
2066	raw_ostream &operator<<(raw_ostream &OS,
2067	const AMDGPU::IsaInfo::TargetIDSetting S);
2068
2069	} // end namespace llvm
2070
2071	#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
2072

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h