AMDGPUBaseInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h]

1	//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10	#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12	#include "AMDGPUSubtarget.h"
13	#include "SIDefines.h"
14	#include "llvm/IR/CallingConv.h"
15	#include "llvm/IR/InstrTypes.h"
16	#include "llvm/IR/Module.h"
17	#include "llvm/Support/Alignment.h"
18	#include <array>
19	#include <functional>
20	#include <utility>
21
22	// Pull in OpName enum definition and getNamedOperandIdx() declaration.
23	#define GET_INSTRINFO_OPERAND_ENUM
24	#include "AMDGPUGenInstrInfo.inc"
25
26	struct amd_kernel_code_t;
27
28	namespace llvm {
29
30	struct Align;
31	class Argument;
32	class Function;
33	class GlobalValue;
34	class MCInstrInfo;
35	class MCRegisterClass;
36	class MCRegisterInfo;
37	class MCSubtargetInfo;
38	class StringRef;
39	class Triple;
40	class raw_ostream;
41
42	namespace AMDGPU {
43
44	struct AMDGPUMCKernelCodeT;
45	struct IsaVersion;
46
47	/// Generic target versions emitted by this version of LLVM.
48	///
49	/// These numbers are incremented every time a codegen breaking change occurs
50	/// within a generic family.
51	namespace GenericVersion {
52	static constexpr unsigned GFX9 = `1`;
53	static constexpr unsigned GFX9_4 = `1`;
54	static constexpr unsigned GFX10_1 = `1`;
55	static constexpr unsigned GFX10_3 = `1`;
56	static constexpr unsigned GFX11 = `1`;
57	static constexpr unsigned GFX12 = `1`;
58	} // namespace GenericVersion
59
60	enum { AMDHSA_COV4 = `4`, AMDHSA_COV5 = `5`, AMDHSA_COV6 = `6` };
61
62	enum class FPType { None, FP4, FP8 };
63
64	/// \returns True if \p STI is AMDHSA.
65	bool isHsaAbi(const MCSubtargetInfo &STI);
66
67	/// \returns Code object version from the IR module flag.
68	unsigned getAMDHSACodeObjectVersion(const Module &M);
69
70	/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
71	unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
72
73	/// \returns The default HSA code object version. This should only be used when
74	/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
75	/// flag or a .amdhsa_code_object_version directive)
76	unsigned getDefaultAMDHSACodeObjectVersion();
77
78	/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
79	/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
80	uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
81
82	/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
83	unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
84
85	/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
86	unsigned getHostcallImplicitArgPosition(unsigned COV);
87
88	unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
89	unsigned getCompletionActionImplicitArgPosition(unsigned COV);
90
91	struct GcnBufferFormatInfo {
92	unsigned Format;
93	unsigned BitsPerComp;
94	unsigned NumComponents;
95	unsigned NumFormat;
96	unsigned DataFormat;
97	};
98
99	struct MAIInstInfo {
100	uint16_t Opcode;
101	bool is_dgemm;
102	bool is_gfx940_xdl;
103	};
104
105	struct MFMA_F8F6F4_Info {
106	unsigned Opcode;
107	unsigned F8F8Opcode;
108	uint8_t NumRegsSrcA;
109	uint8_t NumRegsSrcB;
110	};
111
112	struct CvtScaleF32_F32F16ToF8F4_Info {
113	unsigned Opcode;
114	};
115
116	struct True16D16Info {
117	unsigned T16Op;
118	unsigned HiOp;
119	unsigned LoOp;
120	};
121
122	#define GET_MIMGBaseOpcode_DECL
123	#define GET_MIMGDim_DECL
124	#define GET_MIMGEncoding_DECL
125	#define GET_MIMGLZMapping_DECL
126	#define GET_MIMGMIPMapping_DECL
127	#define GET_MIMGBiASMapping_DECL
128	#define GET_MAIInstInfoTable_DECL
129	#define GET_isMFMA_F8F6F4Table_DECL
130	#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
131	#define GET_True16D16Table_DECL
132	#include "AMDGPUGenSearchableTables.inc"
133
134	namespace IsaInfo {
135
136	enum {
137	// The closed Vulkan driver sets 96, which limits the wave count to 8 but
138	// doesn't spill SGPRs as much as when 80 is set.
139	FIXED_NUM_SGPRS_FOR_INIT_BUG = `96`,
140	TRAP_NUM_SGPRS = `16`
141	};
142
143	enum class TargetIDSetting { Unsupported, Any, Off, On };
144
145	class AMDGPUTargetID {
146	private:
147	const MCSubtargetInfo &STI;
148	TargetIDSetting XnackSetting;
149	TargetIDSetting SramEccSetting;
150
151	public:
152	explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
153	~AMDGPUTargetID() = default;
154
155	/// \return True if the current xnack setting is not "Unsupported".
156	bool isXnackSupported() const {
157	return XnackSetting != TargetIDSetting::Unsupported;
158	}
159
160	/// \returns True if the current xnack setting is "On" or "Any".
161	bool isXnackOnOrAny() const {
162	return XnackSetting == TargetIDSetting::On \|\|
163	XnackSetting == TargetIDSetting::Any;
164	}
165
166	/// \returns True if current xnack setting is "On" or "Off",
167	/// false otherwise.
168	bool isXnackOnOrOff() const {
169	return getXnackSetting() == TargetIDSetting::On \|\|
170	getXnackSetting() == TargetIDSetting::Off;
171	}
172
173	/// \returns The current xnack TargetIDSetting, possible options are
174	/// "Unsupported", "Any", "Off", and "On".
175	TargetIDSetting getXnackSetting() const { return XnackSetting; }
176
177	/// Sets xnack setting to \p NewXnackSetting.
178	void setXnackSetting(TargetIDSetting NewXnackSetting) {
179	XnackSetting = NewXnackSetting;
180	}
181
182	/// \return True if the current sramecc setting is not "Unsupported".
183	bool isSramEccSupported() const {
184	return SramEccSetting != TargetIDSetting::Unsupported;
185	}
186
187	/// \returns True if the current sramecc setting is "On" or "Any".
188	bool isSramEccOnOrAny() const {
189	return SramEccSetting == TargetIDSetting::On \|\|
190	SramEccSetting == TargetIDSetting::Any;
191	}
192
193	/// \returns True if current sramecc setting is "On" or "Off",
194	/// false otherwise.
195	bool isSramEccOnOrOff() const {
196	return getSramEccSetting() == TargetIDSetting::On \|\|
197	getSramEccSetting() == TargetIDSetting::Off;
198	}
199
200	/// \returns The current sramecc TargetIDSetting, possible options are
201	/// "Unsupported", "Any", "Off", and "On".
202	TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
203
204	/// Sets sramecc setting to \p NewSramEccSetting.
205	void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
206	SramEccSetting = NewSramEccSetting;
207	}
208
209	void setTargetIDFromFeaturesString(StringRef FS);
210	void setTargetIDFromTargetIDStream(StringRef TargetID);
211
212	/// \returns String representation of an object.
213	std::string toString() const;
214	};
215
216	/// \returns Wavefront size for given subtarget \p STI.
217	unsigned getWavefrontSize(const MCSubtargetInfo *STI);
218
219	/// \returns Local memory size in bytes for given subtarget \p STI.
220	unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
221
222	/// \returns Maximum addressable local memory size in bytes for given subtarget
223	/// \p STI.
224	unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
225
226	/// \returns Number of execution units per compute unit for given subtarget \p
227	/// STI.
228	unsigned getEUsPerCU(const MCSubtargetInfo *STI);
229
230	/// \returns Maximum number of work groups per compute unit for given subtarget
231	/// \p STI and limited by given \p FlatWorkGroupSize.
232	unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
233	unsigned FlatWorkGroupSize);
234
235	/// \returns Minimum number of waves per execution unit for given subtarget \p
236	/// STI.
237	unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
238
239	/// \returns Maximum number of waves per execution unit for given subtarget \p
240	/// STI without any kind of limitation.
241	unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
242
243	/// \returns Number of waves per execution unit required to support the given \p
244	/// FlatWorkGroupSize.
245	unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
246	unsigned FlatWorkGroupSize);
247
248	/// \returns Minimum flat work group size for given subtarget \p STI.
249	unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
250
251	/// \returns Maximum flat work group size for given subtarget \p STI.
252	unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
253
254	/// \returns Number of waves per work group for given subtarget \p STI and
255	/// \p FlatWorkGroupSize.
256	unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
257	unsigned FlatWorkGroupSize);
258
259	/// \returns SGPR allocation granularity for given subtarget \p STI.
260	unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
261
262	/// \returns SGPR encoding granularity for given subtarget \p STI.
263	unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
264
265	/// \returns Total number of SGPRs for given subtarget \p STI.
266	unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
267
268	/// \returns Addressable number of SGPRs for given subtarget \p STI.
269	unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
270
271	/// \returns Minimum number of SGPRs that meets the given number of waves per
272	/// execution unit requirement for given subtarget \p STI.
273	unsigned getMinNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
274
275	/// \returns Maximum number of SGPRs that meets the given number of waves per
276	/// execution unit requirement for given subtarget \p STI.
277	unsigned getMaxNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
278	bool Addressable);
279
280	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
281	/// STI when the given special registers are used.
282	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
283	bool FlatScrUsed, bool XNACKUsed);
284
285	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
286	/// STI when the given special registers are used. XNACK is inferred from
287	/// \p STI.
288	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
289	bool FlatScrUsed);
290
291	/// \returns Number of SGPR blocks needed for given subtarget \p STI when
292	/// \p NumSGPRs are used. \p NumSGPRs should already include any special
293	/// register counts.
294	unsigned getNumSGPRBlocks(const MCSubtargetInfo STI, unsigned* NumSGPRs);
295
296	/// \returns VGPR allocation granularity for given subtarget \p STI.
297	///
298	/// For subtargets which support it, \p EnableWavefrontSize32 should match
299	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
300	unsigned
301	getVGPRAllocGranule(const MCSubtargetInfo STI, unsigned* DynamicVGPRBlockSize,
302	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
303
304	/// \returns VGPR encoding granularity for given subtarget \p STI.
305	///
306	/// For subtargets which support it, \p EnableWavefrontSize32 should match
307	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
308	unsigned getVGPREncodingGranule(
309	const MCSubtargetInfo *STI,
310	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
311
312	/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
313	/// returns the allocation granule for ArchVGPRs.
314	unsigned getArchVGPRAllocGranule();
315
316	/// \returns Total number of VGPRs for given subtarget \p STI.
317	unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
318
319	/// \returns Addressable number of architectural VGPRs for a given subtarget \p
320	/// STI.
321	unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
322
323	/// \returns Addressable number of VGPRs for given subtarget \p STI.
324	unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
325	unsigned DynamicVGPRBlockSize);
326
327	/// \returns Minimum number of VGPRs that meets given number of waves per
328	/// execution unit requirement for given subtarget \p STI.
329	unsigned getMinNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
330	unsigned DynamicVGPRBlockSize);
331
332	/// \returns Maximum number of VGPRs that meets given number of waves per
333	/// execution unit requirement for given subtarget \p STI.
334	unsigned getMaxNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
335	unsigned DynamicVGPRBlockSize);
336
337	/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
338	/// subtarget \p STI.
339	unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
340	unsigned NumVGPRs,
341	unsigned DynamicVGPRBlockSize);
342
343	/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
344	/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
345	unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
346	unsigned MaxWaves,
347	unsigned TotalNumVGPRs);
348
349	/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
350	/// Gen.
351	unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
352	AMDGPUSubtarget::Generation Gen);
353
354	/// \returns Number of VGPR blocks needed for given subtarget \p STI when
355	/// \p NumVGPRs are used. We actually return the number of blocks -1, since
356	/// that's what we encode.
357	///
358	/// For subtargets which support it, \p EnableWavefrontSize32 should match the
359	/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
360	unsigned getEncodedNumVGPRBlocks(
361	const MCSubtargetInfo STI, unsigned* NumVGPRs,
362	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
363
364	/// \returns Number of VGPR blocks that need to be allocated for the given
365	/// subtarget \p STI when \p NumVGPRs are used.
366	unsigned getAllocatedNumVGPRBlocks(
367	const MCSubtargetInfo STI, unsigned* NumVGPRs,
368	unsigned DynamicVGPRBlockSize,
369	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
370
371	} // end namespace IsaInfo
372
373	// Represents a field in an encoded value.
374	template <unsigned HighBit, unsigned LowBit, unsigned D = `0`>
375	struct EncodingField {
376	static_assert(HighBit >= LowBit, "Invalid bit range!");
377	static constexpr unsigned Offset = LowBit;
378	static constexpr unsigned Width = HighBit - LowBit + `1`;
379
380	using ValueType = unsigned;
381	static constexpr ValueType Default = D;
382
383	ValueType Value;
384	constexpr EncodingField(ValueType Value) : Value(Value) {}
385
386	constexpr uint64_t encode() const { return Value; }
387	static ValueType decode(uint64_t Encoded) { return Encoded; }
388	};
389
390	// Represents a single bit in an encoded value.
391	template <unsigned Bit, unsigned D = `0`>
392	using EncodingBit = EncodingField<Bit, Bit, D>;
393
394	// A helper for encoding and decoding multiple fields.
395	template <typename... Fields> struct EncodingFields {
396	static constexpr uint64_t encode(Fields... Values) {
397	return ((Values.encode() << Values.Offset) \| ...);
398	}
399
400	static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
401	return {Fields::decode((Encoded >> Fields::Offset) &
402	maxUIntN(Fields::Width))...};
403	}
404	};
405
406	LLVM_READONLY
407	inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
408	return getNamedOperandIdx(Opcode, Name: NamedIdx) != -`1`;
409	}
410
411	LLVM_READONLY
412	int getSOPPWithRelaxation(uint16_t Opcode);
413
414	struct MIMGBaseOpcodeInfo {
415	MIMGBaseOpcode BaseOpcode;
416	bool Store;
417	bool Atomic;
418	bool AtomicX2;
419	bool Sampler;
420	bool Gather4;
421
422	uint8_t NumExtraArgs;
423	bool Gradients;
424	bool G16;
425	bool Coordinates;
426	bool LodOrClampOrMip;
427	bool HasD16;
428	bool MSAA;
429	bool BVH;
430	bool A16;
431	bool NoReturn;
432	bool PointSampleAccel;
433	};
434
435	LLVM_READONLY
436	const MIMGBaseOpcodeInfo getMIMGBaseOpcode(unsigned* Opc);
437
438	LLVM_READONLY
439	const MIMGBaseOpcodeInfo getMIMGBaseOpcodeInfo(unsigned* BaseOpcode);
440
441	struct MIMGDimInfo {
442	MIMGDim Dim;
443	uint8_t NumCoords;
444	uint8_t NumGradients;
445	bool MSAA;
446	bool DA;
447	uint8_t Encoding;
448	const char *AsmSuffix;
449	};
450
451	LLVM_READONLY
452	const MIMGDimInfo getMIMGDimInfo(unsigned* DimEnum);
453
454	LLVM_READONLY
455	const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
456
457	LLVM_READONLY
458	const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
459
460	struct MIMGLZMappingInfo {
461	MIMGBaseOpcode L;
462	MIMGBaseOpcode LZ;
463	};
464
465	struct MIMGMIPMappingInfo {
466	MIMGBaseOpcode MIP;
467	MIMGBaseOpcode NONMIP;
468	};
469
470	struct MIMGBiasMappingInfo {
471	MIMGBaseOpcode Bias;
472	MIMGBaseOpcode NoBias;
473	};
474
475	struct MIMGOffsetMappingInfo {
476	MIMGBaseOpcode Offset;
477	MIMGBaseOpcode NoOffset;
478	};
479
480	struct MIMGG16MappingInfo {
481	MIMGBaseOpcode G;
482	MIMGBaseOpcode G16;
483	};
484
485	LLVM_READONLY
486	const MIMGLZMappingInfo getMIMGLZMappingInfo(unsigned* L);
487
488	struct WMMAOpcodeMappingInfo {
489	unsigned Opcode2Addr;
490	unsigned Opcode3Addr;
491	};
492
493	LLVM_READONLY
494	const MIMGMIPMappingInfo getMIMGMIPMappingInfo(unsigned* MIP);
495
496	LLVM_READONLY
497	const MIMGBiasMappingInfo getMIMGBiasMappingInfo(unsigned* Bias);
498
499	LLVM_READONLY
500	const MIMGOffsetMappingInfo getMIMGOffsetMappingInfo(unsigned* Offset);
501
502	LLVM_READONLY
503	const MIMGG16MappingInfo getMIMGG16MappingInfo(unsigned* G);
504
505	LLVM_READONLY
506	int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
507	unsigned VDataDwords, unsigned VAddrDwords);
508
509	LLVM_READONLY
510	int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
511
512	LLVM_READONLY
513	unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
514	const MIMGDimInfo Dim, bool* IsA16,
515	bool IsG16Supported);
516
517	struct MIMGInfo {
518	uint16_t Opcode;
519	uint16_t BaseOpcode;
520	uint8_t MIMGEncoding;
521	uint8_t VDataDwords;
522	uint8_t VAddrDwords;
523	uint8_t VAddrOperands;
524	};
525
526	LLVM_READONLY
527	const MIMGInfo getMIMGInfo(unsigned* Opc);
528
529	LLVM_READONLY
530	int getMTBUFBaseOpcode(unsigned Opc);
531
532	LLVM_READONLY
533	int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
534
535	LLVM_READONLY
536	int getMTBUFElements(unsigned Opc);
537
538	LLVM_READONLY
539	bool getMTBUFHasVAddr(unsigned Opc);
540
541	LLVM_READONLY
542	bool getMTBUFHasSrsrc(unsigned Opc);
543
544	LLVM_READONLY
545	bool getMTBUFHasSoffset(unsigned Opc);
546
547	LLVM_READONLY
548	int getMUBUFBaseOpcode(unsigned Opc);
549
550	LLVM_READONLY
551	int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
552
553	LLVM_READONLY
554	int getMUBUFElements(unsigned Opc);
555
556	LLVM_READONLY
557	bool getMUBUFHasVAddr(unsigned Opc);
558
559	LLVM_READONLY
560	bool getMUBUFHasSrsrc(unsigned Opc);
561
562	LLVM_READONLY
563	bool getMUBUFHasSoffset(unsigned Opc);
564
565	LLVM_READONLY
566	bool getMUBUFIsBufferInv(unsigned Opc);
567
568	LLVM_READONLY
569	bool getMUBUFTfe(unsigned Opc);
570
571	LLVM_READONLY
572	bool getSMEMIsBuffer(unsigned Opc);
573
574	LLVM_READONLY
575	bool getVOP1IsSingle(unsigned Opc);
576
577	LLVM_READONLY
578	bool getVOP2IsSingle(unsigned Opc);
579
580	LLVM_READONLY
581	bool getVOP3IsSingle(unsigned Opc);
582
583	LLVM_READONLY
584	bool isVOPC64DPP(unsigned Opc);
585
586	LLVM_READONLY
587	bool isVOPCAsmOnly(unsigned Opc);
588
589	/// Returns true if MAI operation is a double precision GEMM.
590	LLVM_READONLY
591	bool getMAIIsDGEMM(unsigned Opc);
592
593	LLVM_READONLY
594	bool getMAIIsGFX940XDL(unsigned Opc);
595
596	struct CanBeVOPD {
597	bool X;
598	bool Y;
599	};
600
601	/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
602	LLVM_READONLY
603	unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
604
605	LLVM_READONLY
606	CanBeVOPD getCanBeVOPD(unsigned Opc);
607
608	LLVM_READNONE
609	uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
610
611	LLVM_READONLY
612	const MFMA_F8F6F4_Info getMFMA_F8F6F4_WithFormatArgs(unsigned* CBSZ,
613	unsigned BLGP,
614	unsigned F8F8Opcode);
615
616	LLVM_READONLY
617	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
618	uint8_t NumComponents,
619	uint8_t NumFormat,
620	const MCSubtargetInfo &STI);
621	LLVM_READONLY
622	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
623	const MCSubtargetInfo &STI);
624
625	LLVM_READONLY
626	int getMCOpcode(uint16_t Opcode, unsigned Gen);
627
628	LLVM_READONLY
629	unsigned getVOPDOpcode(unsigned Opc);
630
631	LLVM_READONLY
632	int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
633
634	LLVM_READONLY
635	bool isVOPD(unsigned Opc);
636
637	LLVM_READNONE
638	bool isMAC(unsigned Opc);
639
640	LLVM_READNONE
641	bool isPermlane16(unsigned Opc);
642
643	LLVM_READNONE
644	bool isGenericAtomic(unsigned Opc);
645
646	LLVM_READNONE
647	bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
648
649	namespace VOPD {
650
651	enum Component : unsigned {
652	DST = `0`,
653	SRC0,
654	SRC1,
655	SRC2,
656
657	DST_NUM = `1`,
658	MAX_SRC_NUM = `3`,
659	MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
660	};
661
662	// LSB mask for VGPR banks per VOPD component operand.
663	// 4 banks result in a mask 3, setting 2 lower bits.
664	constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {`1`, `3`, `3`, `1`};
665
666	enum ComponentIndex : unsigned { X = `0`, Y = `1` };
667	constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
668	constexpr unsigned COMPONENTS_NUM = `2`;
669
670	// Properties of VOPD components.
671	class ComponentProps {
672	private:
673	unsigned SrcOperandsNum = `0`;
674	unsigned MandatoryLiteralIdx = ~`0u`;
675	bool HasSrc2Acc = false;
676
677	public:
678	ComponentProps() = default;
679	ComponentProps(const MCInstrDesc &OpDesc);
680
681	// Return the total number of src operands this component has.
682	unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
683
684	// Return the number of src operands of this component visible to the parser.
685	unsigned getCompParsedSrcOperandsNum() const {
686	return SrcOperandsNum - HasSrc2Acc;
687	}
688
689	// Return true iif this component has a mandatory literal.
690	bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~`0u`; }
691
692	// If this component has a mandatory literal, return component operand
693	// index of this literal (i.e. either Component::SRC1 or Component::SRC2).
694	unsigned getMandatoryLiteralCompOperandIndex() const {
695	assert(hasMandatoryLiteral());
696	return MandatoryLiteralIdx;
697	}
698
699	// Return true iif this component has operand
700	// with component index CompSrcIdx and this operand may be a register.
701	bool hasRegSrcOperand(unsigned CompSrcIdx) const {
702	assert(CompSrcIdx < Component::MAX_SRC_NUM);
703	return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
704	}
705
706	// Return true iif this component has tied src2.
707	bool hasSrc2Acc() const { return HasSrc2Acc; }
708
709	private:
710	bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
711	assert(CompSrcIdx < Component::MAX_SRC_NUM);
712	return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
713	}
714	};
715
716	enum ComponentKind : unsigned {
717	SINGLE = `0`, // A single VOP1 or VOP2 instruction which may be used in VOPD.
718	COMPONENT_X, // A VOPD instruction, X component.
719	COMPONENT_Y, // A VOPD instruction, Y component.
720	MAX = COMPONENT_Y
721	};
722
723	// Interface functions of this class map VOPD component operand indices
724	// to indices of operands in MachineInstr/MCInst or parsed operands array.
725	//
726	// Note that this class operates with 3 kinds of indices:
727	// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
728	// - MC operand indices (they refer operands in a MachineInstr/MCInst);
729	// - parsed operand indices (they refer operands in parsed operands array).
730	//
731	// For SINGLE components mapping between these indices is trivial.
732	// But things get more complicated for COMPONENT_X and
733	// COMPONENT_Y because these components share the same
734	// MachineInstr/MCInst and the same parsed operands array.
735	// Below is an example of component operand to parsed operand
736	// mapping for the following instruction:
737	//
738	// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
739	//
740	// PARSED COMPONENT PARSED
741	// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
742	// -------------------------------------------------------------------
743	// "v_dual_add_f32" 0
744	// v_dual_add_f32 v255 0 (DST) --> 1
745	// v4 1 (SRC0) --> 2
746	// v5 2 (SRC1) --> 3
747	// "::" 4
748	// "v_dual_mov_b32" 5
749	// v_dual_mov_b32 v6 0 (DST) --> 6
750	// v1 1 (SRC0) --> 7
751	// -------------------------------------------------------------------
752	//
753	class ComponentLayout {
754	private:
755	// Regular MachineInstr/MCInst operands are ordered as follows:
756	// dst, src0 [, other src operands]
757	// VOPD MachineInstr/MCInst operands are ordered as follows:
758	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
759	// Each ComponentKind has operand indices defined below.
760	static constexpr unsigned MC_DST_IDX[] = {`0`, `0`, `1`};
761	static constexpr unsigned FIRST_MC_SRC_IDX[] = {`1`, `2`, `2` / + OpX.MCSrcNum /};
762
763	// Parsed operands of regular instructions are ordered as follows:
764	// Mnemo dst src0 [vsrc1 ...]
765	// Parsed VOPD operands are ordered as follows:
766	// OpXMnemo dstX src0X [vsrc1X\|imm vsrc1X\|vsrc1X imm] '::'
767	// OpYMnemo dstY src0Y [vsrc1Y\|imm vsrc1Y\|vsrc1Y imm]
768	// Each ComponentKind has operand indices defined below.
769	static constexpr unsigned PARSED_DST_IDX[] = {`1`, `1`,
770	`4` / + OpX.ParsedSrcNum /};
771	static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
772	`2`, `2`, `5` / + OpX.ParsedSrcNum /};
773
774	private:
775	const ComponentKind Kind;
776	const ComponentProps PrevComp;
777
778	public:
779	// Create layout for COMPONENT_X or SINGLE component.
780	ComponentLayout(ComponentKind Kind) : Kind(Kind) {
781	assert(Kind == ComponentKind::SINGLE \|\| Kind == ComponentKind::COMPONENT_X);
782	}
783
784	// Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
785	ComponentLayout(const ComponentProps &OpXProps)
786	: Kind(ComponentKind::COMPONENT_Y), PrevComp (OpXProps) {}
787
788	public:
789	// Return the index of dst operand in MCInst operands.
790	unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
791
792	// Return the index of the specified src operand in MCInst operands.
793	unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
794	assert(CompSrcIdx < Component::MAX_SRC_NUM);
795	return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
796	}
797
798	// Return the index of dst operand in the parsed operands array.
799	unsigned getIndexOfDstInParsedOperands() const {
800	return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
801	}
802
803	// Return the index of the specified src operand in the parsed operands array.
804	unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
805	assert(CompSrcIdx < Component::MAX_SRC_NUM);
806	return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
807	}
808
809	private:
810	unsigned getPrevCompSrcNum() const {
811	return PrevComp.getCompSrcOperandsNum();
812	}
813	unsigned getPrevCompParsedSrcNum() const {
814	return PrevComp.getCompParsedSrcOperandsNum();
815	}
816	};
817
818	// Layout and properties of VOPD components.
819	class ComponentInfo : public ComponentLayout, public ComponentProps {
820	public:
821	// Create ComponentInfo for COMPONENT_X or SINGLE component.
822	ComponentInfo(const MCInstrDesc &OpDesc,
823	ComponentKind Kind = ComponentKind::SINGLE)
824	: ComponentLayout (Kind), ComponentProps (OpDesc) {}
825
826	// Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
827	ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
828	: ComponentLayout (OpXProps), ComponentProps (OpDesc) {}
829
830	// Map component operand index to parsed operand index.
831	// Return 0 if the specified operand does not exist.
832	unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
833	};
834
835	// Properties of VOPD instructions.
836	class InstInfo {
837	private:
838	const ComponentInfo CompInfo[COMPONENTS_NUM];
839
840	public:
841	using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
842
843	InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
844	: CompInfo{OpX, OpY} {}
845
846	InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
847	: CompInfo{OprInfoX, OprInfoY} {}
848
849	const ComponentInfo &operator[](size_t ComponentIdx) const {
850	assert(ComponentIdx < COMPONENTS_NUM);
851	return CompInfo[ComponentIdx];
852	}
853
854	// Check VOPD operands constraints.
855	// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
856	// for the specified component and MC operand. The callback must return 0
857	// if the operand is not a register or not a VGPR.
858	// If \p SkipSrc is set to true then constraints for source operands are not
859	// checked.
860	bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
861	bool SkipSrc = false) const {
862	return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
863	}
864
865	// Check VOPD operands constraints.
866	// Return the index of an invalid component operand, if any.
867	// If \p SkipSrc is set to true then constraints for source operands are not
868	// checked.
869	std::optional<unsigned> getInvalidCompOperandIndex(
870	std::function<unsigned(unsigned, unsigned)> GetRegIdx,
871	bool SkipSrc = false) const;
872
873	private:
874	RegIndices
875	getRegIndices(unsigned ComponentIdx,
876	std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
877	};
878
879	} // namespace VOPD
880
881	LLVM_READONLY
882	std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
883
884	LLVM_READONLY
885	// Get properties of 2 single VOP1/VOP2 instructions
886	// used as components to create a VOPD instruction.
887	VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
888
889	LLVM_READONLY
890	// Get properties of VOPD X and Y components.
891	VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
892	const MCInstrInfo *InstrInfo);
893
894	LLVM_READONLY
895	bool isAsyncStore(unsigned Opc);
896	LLVM_READONLY
897	bool isTensorStore(unsigned Opc);
898	LLVM_READONLY
899	unsigned getTemporalHintType(const MCInstrDesc TID);
900
901	LLVM_READONLY
902	bool isTrue16Inst(unsigned Opc);
903
904	LLVM_READONLY
905	FPType getFPDstSelType(unsigned Opc);
906
907	LLVM_READONLY
908	bool isInvalidSingleUseConsumerInst(unsigned Opc);
909
910	LLVM_READONLY
911	bool isInvalidSingleUseProducerInst(unsigned Opc);
912
913	bool isDPMACCInstruction(unsigned Opc);
914
915	LLVM_READONLY
916	unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
917
918	LLVM_READONLY
919	unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
920
921	void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
922	const MCSubtargetInfo *STI);
923
924	bool isGroupSegment(const GlobalValue *GV);
925	bool isGlobalSegment(const GlobalValue *GV);
926	bool isReadOnlySegment(const GlobalValue *GV);
927
928	/// \returns True if constants should be emitted to .text section for given
929	/// target triple \p TT, false otherwise.
930	bool shouldEmitConstantsToTextSection(const Triple &TT);
931
932	/// \returns Integer value requested using \p F's \p Name attribute.
933	///
934	/// \returns \p Default if attribute is not present.
935	///
936	/// \returns \p Default and emits error if requested value cannot be converted
937	/// to integer.
938	int getIntegerAttribute(const Function &F, StringRef Name, int Default);
939
940	/// \returns A pair of integer values requested using \p F's \p Name attribute
941	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
942	/// is false).
943	///
944	/// \returns \p Default if attribute is not present.
945	///
946	/// \returns \p Default and emits error if one of the requested values cannot be
947	/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
948	/// not present.
949	std::pair<unsigned, unsigned>
950	getIntegerPairAttribute(const Function &F, StringRef Name,
951	std::pair<unsigned, unsigned> Default,
952	bool OnlyFirstRequired = false);
953
954	/// \returns A pair of integer values requested using \p F's \p Name attribute
955	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
956	/// is false).
957	///
958	/// \returns \p std::nullopt if attribute is not present.
959	///
960	/// \returns \p std::nullopt and emits error if one of the requested values
961	/// cannot be converted to integer, or \p OnlyFirstRequired is false and
962	/// "second" value is not present.
963	std::optional<std::pair<unsigned, std::optional<unsigned>>>
964	getIntegerPairAttribute(const Function &F, StringRef Name,
965	bool OnlyFirstRequired = false);
966
967	/// \returns Generate a vector of integer values requested using \p F's \p Name
968	/// attribute.
969	/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
970	/// if any error occurs. The corresponding error will also be emitted.
971	SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
972	unsigned Size,
973	unsigned DefaultVal);
974	/// Similar to the function above, but returns std::nullopt if any error occurs.
975	std::optional<SmallVector<unsigned>>
976	getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
977
978	/// Represents the counter values to wait for in an s_waitcnt instruction.
979	///
980	/// Large values (including the maximum possible integer) can be used to
981	/// represent "don't care" waits.
982	struct Waitcnt {
983	unsigned LoadCnt = ~`0u`; // Corresponds to Vmcnt prior to gfx12.
984	unsigned ExpCnt = ~`0u`;
985	unsigned DsCnt = ~`0u`; // Corresponds to LGKMcnt prior to gfx12.
986	unsigned StoreCnt = ~`0u`; // Corresponds to VScnt on gfx10/gfx11.
987	unsigned SampleCnt = ~`0u`; // gfx12+ only.
988	unsigned BvhCnt = ~`0u`; // gfx12+ only.
989	unsigned KmCnt = ~`0u`; // gfx12+ only.
990	unsigned XCnt = ~`0u`; // gfx1250.
991
992	Waitcnt() = default;
993	// Pre-gfx12 constructor.
994	Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
995	: LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
996
997	// gfx12+ constructor.
998	Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
999	unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt)
1000	: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1001	SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt) {}
1002
1003	bool hasWait() const { return StoreCnt != ~`0u` \|\| hasWaitExceptStoreCnt(); }
1004
1005	bool hasWaitExceptStoreCnt() const {
1006	return LoadCnt != ~`0u` \|\| ExpCnt != ~`0u` \|\| DsCnt != ~`0u` \|\|
1007	SampleCnt != ~`0u` \|\| BvhCnt != ~`0u` \|\| KmCnt != ~`0u` \|\| XCnt != ~`0u`;
1008	}
1009
1010	bool hasWaitStoreCnt() const { return StoreCnt != ~`0u`; }
1011
1012	Waitcnt combined(const Waitcnt &Other) const {
1013	// Does the right thing provided self and Other are either both pre-gfx12
1014	// or both gfx12+.
1015	return Waitcnt (
1016	std::min(a: LoadCnt, b: Other.LoadCnt), std::min(a: ExpCnt, b: Other.ExpCnt),
1017	std::min(a: DsCnt, b: Other.DsCnt), std::min(a: StoreCnt, b: Other.StoreCnt),
1018	std::min(a: SampleCnt, b: Other.SampleCnt), std::min(a: BvhCnt, b: Other.BvhCnt),
1019	std::min(a: KmCnt, b: Other.KmCnt), std::min(a: XCnt, b: Other.XCnt));
1020	}
1021	};
1022
1023	// The following methods are only meaningful on targets that support
1024	// S_WAITCNT.
1025
1026	/// \returns Vmcnt bit mask for given isa \p Version.
1027	unsigned getVmcntBitMask(const IsaVersion &Version);
1028
1029	/// \returns Expcnt bit mask for given isa \p Version.
1030	unsigned getExpcntBitMask(const IsaVersion &Version);
1031
1032	/// \returns Lgkmcnt bit mask for given isa \p Version.
1033	unsigned getLgkmcntBitMask(const IsaVersion &Version);
1034
1035	/// \returns Waitcnt bit mask for given isa \p Version.
1036	unsigned getWaitcntBitMask(const IsaVersion &Version);
1037
1038	/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1039	unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1040
1041	/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1042	unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1043
1044	/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1045	unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1046
1047	/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1048	/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1049	/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1050	/// which needs it is deprecated
1051	///
1052	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1053	/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1054	/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1055	/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1056	/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1057	/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1058	/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1059	/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1060	/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1061	///
1062	void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1063	unsigned &Expcnt, unsigned &Lgkmcnt);
1064
1065	Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1066
1067	/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1068	unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1069	unsigned Vmcnt);
1070
1071	/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1072	unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1073	unsigned Expcnt);
1074
1075	/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1076	unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1077	unsigned Lgkmcnt);
1078
1079	/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1080	/// \p Version. Should not be used on gfx12+, the instruction which needs
1081	/// it is deprecated
1082	///
1083	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1084	/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1085	/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1086	/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1087	/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1088	/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1089	/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1090	/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1091	/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1092	/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1093	///
1094	/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1095	/// isa \p Version.
1096	///
1097	unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1098	unsigned Expcnt, unsigned Lgkmcnt);
1099
1100	unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1101
1102	// The following methods are only meaningful on targets that support
1103	// S_WAIT_CNT, introduced with gfx12.*
1104
1105	/// \returns Loadcnt bit mask for given isa \p Version.
1106	/// Returns 0 for versions that do not support LOADcnt
1107	unsigned getLoadcntBitMask(const IsaVersion &Version);
1108
1109	/// \returns Samplecnt bit mask for given isa \p Version.
1110	/// Returns 0 for versions that do not support SAMPLEcnt
1111	unsigned getSamplecntBitMask(const IsaVersion &Version);
1112
1113	/// \returns Bvhcnt bit mask for given isa \p Version.
1114	/// Returns 0 for versions that do not support BVHcnt
1115	unsigned getBvhcntBitMask(const IsaVersion &Version);
1116
1117	/// \returns Dscnt bit mask for given isa \p Version.
1118	/// Returns 0 for versions that do not support DScnt
1119	unsigned getDscntBitMask(const IsaVersion &Version);
1120
1121	/// \returns Dscnt bit mask for given isa \p Version.
1122	/// Returns 0 for versions that do not support KMcnt
1123	unsigned getKmcntBitMask(const IsaVersion &Version);
1124
1125	/// \returns Xcnt bit mask for given isa \p Version.
1126	/// Returns 0 for versions that do not support Xcnt.
1127	unsigned getXcntBitMask(const IsaVersion &Version);
1128
1129	/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1130	/// returns 0 for versions that do not support STOREcnt or VScnt.
1131	/// STOREcnt and VScnt are the same counter, the name used
1132	/// depends on the ISA version.
1133	unsigned getStorecntBitMask(const IsaVersion &Version);
1134
1135	// The following are only meaningful on targets that support
1136	// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1137
1138	/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1139	/// isa \p Version.
1140	Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1141
1142	/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1143	/// isa \p Version.
1144	Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1145
1146	/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1147	/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1148	/// \p Version.
1149	unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1150
1151	/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1152	/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1153	/// \p Version.
1154	unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1155
1156	namespace Hwreg {
1157
1158	using HwregId = EncodingField<`5`, `0`>;
1159	using HwregOffset = EncodingField<`10`, `6`>;
1160
1161	struct HwregSize : EncodingField<`15`, `11`, `32`> {
1162	using EncodingField::EncodingField;
1163	constexpr uint64_t encode() const { return Value - `1`; }
1164	static ValueType decode(uint64_t Encoded) { return Encoded + `1`; }
1165	};
1166
1167	using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1168
1169	} // namespace Hwreg
1170
1171	namespace DepCtr {
1172
1173	int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1174	int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1175	const MCSubtargetInfo &STI);
1176	bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1177	const MCSubtargetInfo &STI);
1178	bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1179	bool &IsDefault, const MCSubtargetInfo &STI);
1180
1181	/// \returns Decoded VaVdst from given immediate \p Encoded.
1182	unsigned decodeFieldVaVdst(unsigned Encoded);
1183
1184	/// \returns Decoded VmVsrc from given immediate \p Encoded.
1185	unsigned decodeFieldVmVsrc(unsigned Encoded);
1186
1187	/// \returns Decoded SaSdst from given immediate \p Encoded.
1188	unsigned decodeFieldSaSdst(unsigned Encoded);
1189
1190	/// \returns Decoded VaSdst from given immediate \p Encoded.
1191	unsigned decodeFieldVaSdst(unsigned Encoded);
1192
1193	/// \returns Decoded VaVcc from given immediate \p Encoded.
1194	unsigned decodeFieldVaVcc(unsigned Encoded);
1195
1196	/// \returns Decoded SaSrc from given immediate \p Encoded.
1197	unsigned decodeFieldVaSsrc(unsigned Encoded);
1198
1199	/// \returns Decoded HoldCnt from given immediate \p Encoded.
1200	unsigned decodeFieldHoldCnt(unsigned Encoded);
1201
1202	/// \returns \p VmVsrc as an encoded Depctr immediate.
1203	unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1204
1205	/// \returns \p Encoded combined with encoded \p VmVsrc.
1206	unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1207
1208	/// \returns \p VaVdst as an encoded Depctr immediate.
1209	unsigned encodeFieldVaVdst(unsigned VaVdst);
1210
1211	/// \returns \p Encoded combined with encoded \p VaVdst.
1212	unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1213
1214	/// \returns \p SaSdst as an encoded Depctr immediate.
1215	unsigned encodeFieldSaSdst(unsigned SaSdst);
1216
1217	/// \returns \p Encoded combined with encoded \p SaSdst.
1218	unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1219
1220	/// \returns \p VaSdst as an encoded Depctr immediate.
1221	unsigned encodeFieldVaSdst(unsigned VaSdst);
1222
1223	/// \returns \p Encoded combined with encoded \p VaSdst.
1224	unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1225
1226	/// \returns \p VaVcc as an encoded Depctr immediate.
1227	unsigned encodeFieldVaVcc(unsigned VaVcc);
1228
1229	/// \returns \p Encoded combined with encoded \p VaVcc.
1230	unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1231
1232	/// \returns \p HoldCnt as an encoded Depctr immediate.
1233	unsigned encodeFieldHoldCnt(unsigned HoldCnt);
1234
1235	/// \returns \p Encoded combined with encoded \p HoldCnt.
1236	unsigned encodeFieldHoldCnt(unsigned HoldCnt, unsigned Encoded);
1237
1238	/// \returns \p VaSsrc as an encoded Depctr immediate.
1239	unsigned encodeFieldVaSsrc(unsigned VaSsrc);
1240
1241	/// \returns \p Encoded combined with encoded \p VaSsrc.
1242	unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1243
1244	} // namespace DepCtr
1245
1246	namespace Exp {
1247
1248	bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1249
1250	LLVM_READONLY
1251	unsigned getTgtId(const StringRef Name);
1252
1253	LLVM_READNONE
1254	bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1255
1256	} // namespace Exp
1257
1258	namespace MTBUFFormat {
1259
1260	LLVM_READNONE
1261	int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1262
1263	void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1264
1265	int64_t getDfmt(const StringRef Name);
1266
1267	StringRef getDfmtName(unsigned Id);
1268
1269	int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1270
1271	StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1272
1273	bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1274
1275	bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1276
1277	int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1278
1279	StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1280
1281	bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1282
1283	int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1284	const MCSubtargetInfo &STI);
1285
1286	bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1287
1288	unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1289
1290	} // namespace MTBUFFormat
1291
1292	namespace SendMsg {
1293
1294	LLVM_READNONE
1295	bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1296
1297	LLVM_READNONE
1298	bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1299	bool Strict = true);
1300
1301	LLVM_READNONE
1302	bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1303	const MCSubtargetInfo &STI, bool Strict = true);
1304
1305	LLVM_READNONE
1306	bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1307
1308	LLVM_READNONE
1309	bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1310
1311	void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1312	uint16_t &StreamId, const MCSubtargetInfo &STI);
1313
1314	LLVM_READNONE
1315	uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1316
1317	} // namespace SendMsg
1318
1319	unsigned getInitialPSInputAddr(const Function &F);
1320
1321	bool getHasColorExport(const Function &F);
1322
1323	bool getHasDepthExport(const Function &F);
1324
1325	bool hasDynamicVGPR(const Function &F);
1326
1327	// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1328	// the attribute is missing or its value is invalid.
1329	unsigned getDynamicVGPRBlockSize(const Function &F);
1330
1331	LLVM_READNONE
1332	constexpr bool isShader(CallingConv::ID CC) {
1333	switch (CC) {
1334	case CallingConv::AMDGPU_VS:
1335	case CallingConv::AMDGPU_LS:
1336	case CallingConv::AMDGPU_HS:
1337	case CallingConv::AMDGPU_ES:
1338	case CallingConv::AMDGPU_GS:
1339	case CallingConv::AMDGPU_PS:
1340	case CallingConv::AMDGPU_CS_Chain:
1341	case CallingConv::AMDGPU_CS_ChainPreserve:
1342	case CallingConv::AMDGPU_CS:
1343	return true;
1344	default:
1345	return false;
1346	}
1347	}
1348
1349	LLVM_READNONE
1350	constexpr bool isGraphics(CallingConv::ID CC) {
1351	return isShader(CC) \|\| CC == CallingConv::AMDGPU_Gfx;
1352	}
1353
1354	LLVM_READNONE
1355	constexpr bool isCompute(CallingConv::ID CC) {
1356	return !isGraphics(CC) \|\| CC == CallingConv::AMDGPU_CS;
1357	}
1358
1359	LLVM_READNONE
1360	constexpr bool isEntryFunctionCC(CallingConv::ID CC) {
1361	switch (CC) {
1362	case CallingConv::AMDGPU_KERNEL:
1363	case CallingConv::SPIR_KERNEL:
1364	case CallingConv::AMDGPU_VS:
1365	case CallingConv::AMDGPU_GS:
1366	case CallingConv::AMDGPU_PS:
1367	case CallingConv::AMDGPU_CS:
1368	case CallingConv::AMDGPU_ES:
1369	case CallingConv::AMDGPU_HS:
1370	case CallingConv::AMDGPU_LS:
1371	return true;
1372	default:
1373	return false;
1374	}
1375	}
1376
1377	LLVM_READNONE
1378	constexpr bool isChainCC(CallingConv::ID CC) {
1379	switch (CC) {
1380	case CallingConv::AMDGPU_CS_Chain:
1381	case CallingConv::AMDGPU_CS_ChainPreserve:
1382	return true;
1383	default:
1384	return false;
1385	}
1386	}
1387
1388	// These functions are considered entrypoints into the current module, i.e. they
1389	// are allowed to be called from outside the current module. This is different
1390	// from isEntryFunctionCC, which is only true for functions that are entered by
1391	// the hardware. Module entry points include all entry functions but also
1392	// include functions that can be called from other functions inside or outside
1393	// the current module. Module entry functions are allowed to allocate LDS.
1394	LLVM_READNONE
1395	constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1396	switch (CC) {
1397	case CallingConv::AMDGPU_Gfx:
1398	return true;
1399	default:
1400	return isEntryFunctionCC(CC) \|\| isChainCC(CC);
1401	}
1402	}
1403
1404	LLVM_READNONE
1405	constexpr inline bool isKernel(CallingConv::ID CC) {
1406	switch (CC) {
1407	case CallingConv::AMDGPU_KERNEL:
1408	case CallingConv::SPIR_KERNEL:
1409	return true;
1410	default:
1411	return false;
1412	}
1413	}
1414
1415	LLVM_READNONE
1416	constexpr bool canGuaranteeTCO(CallingConv::ID CC) {
1417	return CC == CallingConv::Fast;
1418	}
1419
1420	/// Return true if we might ever do TCO for calls with this calling convention.
1421	LLVM_READNONE
1422	constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
1423	switch (CC) {
1424	case CallingConv::C:
1425	case CallingConv::AMDGPU_Gfx:
1426	return true;
1427	default:
1428	return canGuaranteeTCO(CC);
1429	}
1430	}
1431
1432	bool hasXNACK(const MCSubtargetInfo &STI);
1433	bool hasSRAMECC(const MCSubtargetInfo &STI);
1434	bool hasMIMG_R128(const MCSubtargetInfo &STI);
1435	bool hasA16(const MCSubtargetInfo &STI);
1436	bool hasG16(const MCSubtargetInfo &STI);
1437	bool hasPackedD16(const MCSubtargetInfo &STI);
1438	bool hasGDS(const MCSubtargetInfo &STI);
1439	unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1440	unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1441
1442	bool isSI(const MCSubtargetInfo &STI);
1443	bool isCI(const MCSubtargetInfo &STI);
1444	bool isVI(const MCSubtargetInfo &STI);
1445	bool isGFX9(const MCSubtargetInfo &STI);
1446	bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1447	bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1448	bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1449	bool isGFX8Plus(const MCSubtargetInfo &STI);
1450	bool isGFX9Plus(const MCSubtargetInfo &STI);
1451	bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1452	bool isGFX10(const MCSubtargetInfo &STI);
1453	bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1454	bool isGFX10Plus(const MCSubtargetInfo &STI);
1455	bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1456	bool isGFX10Before1030(const MCSubtargetInfo &STI);
1457	bool isGFX11(const MCSubtargetInfo &STI);
1458	bool isGFX11Plus(const MCSubtargetInfo &STI);
1459	bool isGFX12(const MCSubtargetInfo &STI);
1460	bool isGFX12Plus(const MCSubtargetInfo &STI);
1461	bool isGFX1250(const MCSubtargetInfo &STI);
1462	bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1463	bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1464	bool isGCN3Encoding(const MCSubtargetInfo &STI);
1465	bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1466	bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1467	bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1468	bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1469	bool isGFX90A(const MCSubtargetInfo &STI);
1470	bool isGFX940(const MCSubtargetInfo &STI);
1471	bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1472	bool hasMAIInsts(const MCSubtargetInfo &STI);
1473	bool hasVOPD(const MCSubtargetInfo &STI);
1474	bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1475	int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1476	unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1477	bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1478
1479	/// Is Reg - scalar register
1480	bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1481
1482	/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1483	bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1484
1485	/// If \p Reg is a pseudo reg, return the correct hardware register given
1486	/// \p STI otherwise return \p Reg.
1487	MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI);
1488
1489	/// Convert hardware register \p Reg to a pseudo register
1490	LLVM_READNONE
1491	MCRegister mc2PseudoReg(MCRegister Reg);
1492
1493	LLVM_READNONE
1494	bool isInlineValue(unsigned Reg);
1495
1496	/// Is this an AMDGPU specific source operand? These include registers,
1497	/// inline constants, literals and mandatory literals (KImm).
1498	bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1499
1500	/// Is this a KImm operand?
1501	bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1502
1503	/// Is this floating-point operand?
1504	bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1505
1506	/// Does this operand support only inlinable literals?
1507	bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1508
1509	/// Get the size in bits of a register from the register class \p RC.
1510	unsigned getRegBitWidth(unsigned RCID);
1511
1512	/// Get the size in bits of a register from the register class \p RC.
1513	unsigned getRegBitWidth(const MCRegisterClass &RC);
1514
1515	/// Get size of register operand
1516	unsigned getRegOperandSize(const MCRegisterInfo MRI, const* MCInstrDesc &Desc,
1517	unsigned OpNo);
1518
1519	LLVM_READNONE
1520	inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1521	switch (OpInfo.OperandType) {
1522	case AMDGPU::OPERAND_REG_IMM_INT32:
1523	case AMDGPU::OPERAND_REG_IMM_FP32:
1524	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1525	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1526	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1527	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1528	case AMDGPU::OPERAND_REG_IMM_V2INT32:
1529	case AMDGPU::OPERAND_REG_IMM_V2FP32:
1530	case AMDGPU::OPERAND_KIMM32:
1531	case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1532	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1533	return `4`;
1534
1535	case AMDGPU::OPERAND_REG_IMM_INT64:
1536	case AMDGPU::OPERAND_REG_IMM_FP64:
1537	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1538	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1539	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1540	return `8`;
1541
1542	case AMDGPU::OPERAND_REG_IMM_INT16:
1543	case AMDGPU::OPERAND_REG_IMM_BF16:
1544	case AMDGPU::OPERAND_REG_IMM_FP16:
1545	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1546	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1547	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1548	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1549	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1550	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1551	case AMDGPU::OPERAND_REG_IMM_V2INT16:
1552	case AMDGPU::OPERAND_REG_IMM_V2BF16:
1553	case AMDGPU::OPERAND_REG_IMM_V2FP16:
1554	return `2`;
1555
1556	default:
1557	llvm_unreachable("unhandled operand type");
1558	}
1559	}
1560
1561	LLVM_READNONE
1562	inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1563	return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1564	}
1565
1566	/// Is this literal inlinable, and not one of the values intended for floating
1567	/// point values.
1568	LLVM_READNONE
1569	inline bool isInlinableIntLiteral(int64_t Literal) {
1570	return Literal >= -`16` && Literal <= `64`;
1571	}
1572
1573	/// Is this literal inlinable
1574	LLVM_READNONE
1575	bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1576
1577	LLVM_READNONE
1578	bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1579
1580	LLVM_READNONE
1581	bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1582
1583	LLVM_READNONE
1584	bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1585
1586	LLVM_READNONE
1587	bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1588
1589	LLVM_READNONE
1590	bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1591
1592	LLVM_READNONE
1593	std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1594
1595	LLVM_READNONE
1596	std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1597
1598	LLVM_READNONE
1599	std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1600
1601	LLVM_READNONE
1602	bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1603
1604	LLVM_READNONE
1605	bool isInlinableLiteralV2I16(uint32_t Literal);
1606
1607	LLVM_READNONE
1608	bool isInlinableLiteralV2BF16(uint32_t Literal);
1609
1610	LLVM_READNONE
1611	bool isInlinableLiteralV2F16(uint32_t Literal);
1612
1613	LLVM_READNONE
1614	bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1615
1616	bool isArgPassedInSGPR(const Argument *Arg);
1617
1618	bool isArgPassedInSGPR(const CallBase CB, unsigned* ArgNo);
1619
1620	LLVM_READONLY
1621	bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1622	int64_t EncodedOffset);
1623
1624	LLVM_READONLY
1625	bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1626	int64_t EncodedOffset, bool IsBuffer);
1627
1628	/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1629	/// offsets.
1630	uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1631
1632	/// \returns The encoding that will be used for \p ByteOffset in the
1633	/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1634	/// S_LOAD instructions have a signed offset, on other subtargets it is
1635	/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1636	std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1637	int64_t ByteOffset, bool IsBuffer,
1638	bool HasSOffset = false);
1639
1640	/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1641	/// instruction. This is only useful on CI.s
1642	std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1643	int64_t ByteOffset);
1644
1645	/// For pre-GFX12 FLAT instructions the offset must be positive;
1646	/// MSB is ignored and forced to zero.
1647	///
1648	/// \return The number of bits available for the signed offset field in flat
1649	/// instructions. Note that some forms of the instruction disallow negative
1650	/// offsets.
1651	unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1652
1653	/// \returns true if this offset is small enough to fit in the SMRD
1654	/// offset field. \p ByteOffset should be the offset in bytes and
1655	/// not the encoded offset.
1656	bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1657
1658	LLVM_READNONE
1659	inline bool isLegalDPALU_DPPControl(unsigned DC) {
1660	return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1661	}
1662
1663	/// \returns true if an instruction may have a 64-bit VGPR operand.
1664	bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1665
1666	/// \returns true if an instruction is a DP ALU DPP.
1667	bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1668
1669	/// \returns true if the intrinsic is divergent
1670	bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1671
1672	/// \returns true if the intrinsic is uniform
1673	bool isIntrinsicAlwaysUniform(unsigned IntrID);
1674
1675	/// \returns lds block size in terms of dwords. \p
1676	/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1677	/// must be defined in terms of bytes.
1678	unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1679
1680	} // end namespace AMDGPU
1681
1682	raw_ostream &operator<<(raw_ostream &OS,
1683	const AMDGPU::IsaInfo::TargetIDSetting S);
1684
1685	} // end namespace llvm
1686
1687	#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1688

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h