AMDGPUBaseInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h]

1	//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10	#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12	#include "AMDGPUSubtarget.h"
13	#include "SIDefines.h"
14	#include "llvm/IR/CallingConv.h"
15	#include "llvm/IR/InstrTypes.h"
16	#include "llvm/IR/Module.h"
17	#include "llvm/Support/Alignment.h"
18	#include <array>
19	#include <functional>
20	#include <utility>
21
22	struct amd_kernel_code_t;
23
24	namespace llvm {
25
26	struct Align;
27	class Argument;
28	class Function;
29	class GlobalValue;
30	class MCInstrInfo;
31	class MCRegisterClass;
32	class MCRegisterInfo;
33	class MCSubtargetInfo;
34	class StringRef;
35	class Triple;
36	class raw_ostream;
37
38	namespace AMDGPU {
39
40	struct AMDGPUMCKernelCodeT;
41	struct IsaVersion;
42
43	/// Generic target versions emitted by this version of LLVM.
44	///
45	/// These numbers are incremented every time a codegen breaking change occurs
46	/// within a generic family.
47	namespace GenericVersion {
48	static constexpr unsigned GFX9 = `1`;
49	static constexpr unsigned GFX10_1 = `1`;
50	static constexpr unsigned GFX10_3 = `1`;
51	static constexpr unsigned GFX11 = `1`;
52	static constexpr unsigned GFX12 = `1`;
53	} // namespace GenericVersion
54
55	enum { AMDHSA_COV4 = `4`, AMDHSA_COV5 = `5`, AMDHSA_COV6 = `6` };
56
57	/// \returns True if \p STI is AMDHSA.
58	bool isHsaAbi(const MCSubtargetInfo &STI);
59
60	/// \returns Code object version from the IR module flag.
61	unsigned getAMDHSACodeObjectVersion(const Module &M);
62
63	/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
64	unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
65
66	/// \returns The default HSA code object version. This should only be used when
67	/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
68	/// flag or a .amdhsa_code_object_version directive)
69	unsigned getDefaultAMDHSACodeObjectVersion();
70
71	/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
72	/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
73	uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
74
75	/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
76	unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
77
78	/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
79	unsigned getHostcallImplicitArgPosition(unsigned COV);
80
81	unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
82	unsigned getCompletionActionImplicitArgPosition(unsigned COV);
83
84	struct GcnBufferFormatInfo {
85	unsigned Format;
86	unsigned BitsPerComp;
87	unsigned NumComponents;
88	unsigned NumFormat;
89	unsigned DataFormat;
90	};
91
92	struct MAIInstInfo {
93	uint16_t Opcode;
94	bool is_dgemm;
95	bool is_gfx940_xdl;
96	};
97
98	#define GET_MIMGBaseOpcode_DECL
99	#define GET_MIMGDim_DECL
100	#define GET_MIMGEncoding_DECL
101	#define GET_MIMGLZMapping_DECL
102	#define GET_MIMGMIPMapping_DECL
103	#define GET_MIMGBiASMapping_DECL
104	#define GET_MAIInstInfoTable_DECL
105	#include "AMDGPUGenSearchableTables.inc"
106
107	namespace IsaInfo {
108
109	enum {
110	// The closed Vulkan driver sets 96, which limits the wave count to 8 but
111	// doesn't spill SGPRs as much as when 80 is set.
112	FIXED_NUM_SGPRS_FOR_INIT_BUG = `96`,
113	TRAP_NUM_SGPRS = `16`
114	};
115
116	enum class TargetIDSetting {
117	Unsupported,
118	Any,
119	Off,
120	On
121	};
122
123	class AMDGPUTargetID {
124	private:
125	const MCSubtargetInfo &STI;
126	TargetIDSetting XnackSetting;
127	TargetIDSetting SramEccSetting;
128
129	public:
130	explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
131	~AMDGPUTargetID() = default;
132
133	/// \return True if the current xnack setting is not "Unsupported".
134	bool isXnackSupported() const {
135	return XnackSetting != TargetIDSetting::Unsupported;
136	}
137
138	/// \returns True if the current xnack setting is "On" or "Any".
139	bool isXnackOnOrAny() const {
140	return XnackSetting == TargetIDSetting::On \|\|
141	XnackSetting == TargetIDSetting::Any;
142	}
143
144	/// \returns True if current xnack setting is "On" or "Off",
145	/// false otherwise.
146	bool isXnackOnOrOff() const {
147	return getXnackSetting() == TargetIDSetting::On \|\|
148	getXnackSetting() == TargetIDSetting::Off;
149	}
150
151	/// \returns The current xnack TargetIDSetting, possible options are
152	/// "Unsupported", "Any", "Off", and "On".
153	TargetIDSetting getXnackSetting() const {
154	return XnackSetting;
155	}
156
157	/// Sets xnack setting to \p NewXnackSetting.
158	void setXnackSetting(TargetIDSetting NewXnackSetting) {
159	XnackSetting = NewXnackSetting;
160	}
161
162	/// \return True if the current sramecc setting is not "Unsupported".
163	bool isSramEccSupported() const {
164	return SramEccSetting != TargetIDSetting::Unsupported;
165	}
166
167	/// \returns True if the current sramecc setting is "On" or "Any".
168	bool isSramEccOnOrAny() const {
169	return SramEccSetting == TargetIDSetting::On \|\|
170	SramEccSetting == TargetIDSetting::Any;
171	}
172
173	/// \returns True if current sramecc setting is "On" or "Off",
174	/// false otherwise.
175	bool isSramEccOnOrOff() const {
176	return getSramEccSetting() == TargetIDSetting::On \|\|
177	getSramEccSetting() == TargetIDSetting::Off;
178	}
179
180	/// \returns The current sramecc TargetIDSetting, possible options are
181	/// "Unsupported", "Any", "Off", and "On".
182	TargetIDSetting getSramEccSetting() const {
183	return SramEccSetting;
184	}
185
186	/// Sets sramecc setting to \p NewSramEccSetting.
187	void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
188	SramEccSetting = NewSramEccSetting;
189	}
190
191	void setTargetIDFromFeaturesString(StringRef FS);
192	void setTargetIDFromTargetIDStream(StringRef TargetID);
193
194	/// \returns String representation of an object.
195	std::string toString() const;
196	};
197
198	/// \returns Wavefront size for given subtarget \p STI.
199	unsigned getWavefrontSize(const MCSubtargetInfo *STI);
200
201	/// \returns Local memory size in bytes for given subtarget \p STI.
202	unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
203
204	/// \returns Maximum addressable local memory size in bytes for given subtarget
205	/// \p STI.
206	unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
207
208	/// \returns Number of execution units per compute unit for given subtarget \p
209	/// STI.
210	unsigned getEUsPerCU(const MCSubtargetInfo *STI);
211
212	/// \returns Maximum number of work groups per compute unit for given subtarget
213	/// \p STI and limited by given \p FlatWorkGroupSize.
214	unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
215	unsigned FlatWorkGroupSize);
216
217	/// \returns Minimum number of waves per execution unit for given subtarget \p
218	/// STI.
219	unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
220
221	/// \returns Maximum number of waves per execution unit for given subtarget \p
222	/// STI without any kind of limitation.
223	unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
224
225	/// \returns Number of waves per execution unit required to support the given \p
226	/// FlatWorkGroupSize.
227	unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
228	unsigned FlatWorkGroupSize);
229
230	/// \returns Minimum flat work group size for given subtarget \p STI.
231	unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
232
233	/// \returns Maximum flat work group size for given subtarget \p STI.
234	unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
235
236	/// \returns Number of waves per work group for given subtarget \p STI and
237	/// \p FlatWorkGroupSize.
238	unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
239	unsigned FlatWorkGroupSize);
240
241	/// \returns SGPR allocation granularity for given subtarget \p STI.
242	unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
243
244	/// \returns SGPR encoding granularity for given subtarget \p STI.
245	unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
246
247	/// \returns Total number of SGPRs for given subtarget \p STI.
248	unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
249
250	/// \returns Addressable number of SGPRs for given subtarget \p STI.
251	unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
252
253	/// \returns Minimum number of SGPRs that meets the given number of waves per
254	/// execution unit requirement for given subtarget \p STI.
255	unsigned getMinNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
256
257	/// \returns Maximum number of SGPRs that meets the given number of waves per
258	/// execution unit requirement for given subtarget \p STI.
259	unsigned getMaxNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
260	bool Addressable);
261
262	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
263	/// STI when the given special registers are used.
264	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
265	bool FlatScrUsed, bool XNACKUsed);
266
267	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
268	/// STI when the given special registers are used. XNACK is inferred from
269	/// \p STI.
270	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
271	bool FlatScrUsed);
272
273	/// \returns Number of SGPR blocks needed for given subtarget \p STI when
274	/// \p NumSGPRs are used. \p NumSGPRs should already include any special
275	/// register counts.
276	unsigned getNumSGPRBlocks(const MCSubtargetInfo STI, unsigned* NumSGPRs);
277
278	/// \returns VGPR allocation granularity for given subtarget \p STI.
279	///
280	/// For subtargets which support it, \p EnableWavefrontSize32 should match
281	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
282	unsigned
283	getVGPRAllocGranule(const MCSubtargetInfo *STI,
284	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
285
286	/// \returns VGPR encoding granularity for given subtarget \p STI.
287	///
288	/// For subtargets which support it, \p EnableWavefrontSize32 should match
289	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
290	unsigned getVGPREncodingGranule(
291	const MCSubtargetInfo *STI,
292	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
293
294	/// \returns Total number of VGPRs for given subtarget \p STI.
295	unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
296
297	/// \returns Addressable number of architectural VGPRs for a given subtarget \p
298	/// STI.
299	unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
300
301	/// \returns Addressable number of VGPRs for given subtarget \p STI.
302	unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
303
304	/// \returns Minimum number of VGPRs that meets given number of waves per
305	/// execution unit requirement for given subtarget \p STI.
306	unsigned getMinNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
307
308	/// \returns Maximum number of VGPRs that meets given number of waves per
309	/// execution unit requirement for given subtarget \p STI.
310	unsigned getMaxNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
311
312	/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
313	/// subtarget \p STI.
314	unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
315	unsigned NumVGPRs);
316
317	/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
318	/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
319	unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
320	unsigned MaxWaves,
321	unsigned TotalNumVGPRs);
322
323	/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
324	/// Gen.
325	unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
326	AMDGPUSubtarget::Generation Gen);
327
328	/// \returns Number of VGPR blocks needed for given subtarget \p STI when
329	/// \p NumVGPRs are used. We actually return the number of blocks -1, since
330	/// that's what we encode.
331	///
332	/// For subtargets which support it, \p EnableWavefrontSize32 should match the
333	/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
334	unsigned getEncodedNumVGPRBlocks(
335	const MCSubtargetInfo STI, unsigned* NumVGPRs,
336	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
337
338	/// \returns Number of VGPR blocks that need to be allocated for the given
339	/// subtarget \p STI when \p NumVGPRs are used.
340	unsigned getAllocatedNumVGPRBlocks(
341	const MCSubtargetInfo STI, unsigned* NumVGPRs,
342	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
343
344	} // end namespace IsaInfo
345
346	// Represents a field in an encoded value.
347	template <unsigned HighBit, unsigned LowBit, unsigned D = `0`>
348	struct EncodingField {
349	static_assert(HighBit >= LowBit, "Invalid bit range!");
350	static constexpr unsigned Offset = LowBit;
351	static constexpr unsigned Width = HighBit - LowBit + `1`;
352
353	using ValueType = unsigned;
354	static constexpr ValueType Default = D;
355
356	ValueType Value;
357	constexpr EncodingField(ValueType Value) : Value(Value) {}
358
359	constexpr uint64_t encode() const { return Value; }
360	static ValueType decode(uint64_t Encoded) { return Encoded; }
361	};
362
363	// Represents a single bit in an encoded value.
364	template <unsigned Bit, unsigned D = `0`>
365	using EncodingBit = EncodingField<Bit, Bit, D>;
366
367	// A helper for encoding and decoding multiple fields.
368	template <typename... Fields> struct EncodingFields {
369	static constexpr uint64_t encode(Fields... Values) {
370	return ((Values.encode() << Values.Offset) \| ...);
371	}
372
373	static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
374	return {Fields::decode((Encoded >> Fields::Offset) &
375	maxUIntN(Fields::Width))...};
376	}
377	};
378
379	LLVM_READONLY
380	int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
381
382	LLVM_READONLY
383	inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
384	return getNamedOperandIdx(Opcode, NamedIdx) != -`1`;
385	}
386
387	LLVM_READONLY
388	int getSOPPWithRelaxation(uint16_t Opcode);
389
390	struct MIMGBaseOpcodeInfo {
391	MIMGBaseOpcode BaseOpcode;
392	bool Store;
393	bool Atomic;
394	bool AtomicX2;
395	bool Sampler;
396	bool Gather4;
397
398	uint8_t NumExtraArgs;
399	bool Gradients;
400	bool G16;
401	bool Coordinates;
402	bool LodOrClampOrMip;
403	bool HasD16;
404	bool MSAA;
405	bool BVH;
406	bool A16;
407	bool NoReturn;
408	};
409
410	LLVM_READONLY
411	const MIMGBaseOpcodeInfo getMIMGBaseOpcode(unsigned* Opc);
412
413	LLVM_READONLY
414	const MIMGBaseOpcodeInfo getMIMGBaseOpcodeInfo(unsigned* BaseOpcode);
415
416	struct MIMGDimInfo {
417	MIMGDim Dim;
418	uint8_t NumCoords;
419	uint8_t NumGradients;
420	bool MSAA;
421	bool DA;
422	uint8_t Encoding;
423	const char *AsmSuffix;
424	};
425
426	LLVM_READONLY
427	const MIMGDimInfo getMIMGDimInfo(unsigned* DimEnum);
428
429	LLVM_READONLY
430	const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
431
432	LLVM_READONLY
433	const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
434
435	struct MIMGLZMappingInfo {
436	MIMGBaseOpcode L;
437	MIMGBaseOpcode LZ;
438	};
439
440	struct MIMGMIPMappingInfo {
441	MIMGBaseOpcode MIP;
442	MIMGBaseOpcode NONMIP;
443	};
444
445	struct MIMGBiasMappingInfo {
446	MIMGBaseOpcode Bias;
447	MIMGBaseOpcode NoBias;
448	};
449
450	struct MIMGOffsetMappingInfo {
451	MIMGBaseOpcode Offset;
452	MIMGBaseOpcode NoOffset;
453	};
454
455	struct MIMGG16MappingInfo {
456	MIMGBaseOpcode G;
457	MIMGBaseOpcode G16;
458	};
459
460	LLVM_READONLY
461	const MIMGLZMappingInfo getMIMGLZMappingInfo(unsigned* L);
462
463	struct WMMAOpcodeMappingInfo {
464	unsigned Opcode2Addr;
465	unsigned Opcode3Addr;
466	};
467
468	LLVM_READONLY
469	const MIMGMIPMappingInfo getMIMGMIPMappingInfo(unsigned* MIP);
470
471	LLVM_READONLY
472	const MIMGBiasMappingInfo getMIMGBiasMappingInfo(unsigned* Bias);
473
474	LLVM_READONLY
475	const MIMGOffsetMappingInfo getMIMGOffsetMappingInfo(unsigned* Offset);
476
477	LLVM_READONLY
478	const MIMGG16MappingInfo getMIMGG16MappingInfo(unsigned* G);
479
480	LLVM_READONLY
481	int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
482	unsigned VDataDwords, unsigned VAddrDwords);
483
484	LLVM_READONLY
485	int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
486
487	LLVM_READONLY
488	unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
489	const MIMGDimInfo Dim, bool* IsA16,
490	bool IsG16Supported);
491
492	struct MIMGInfo {
493	uint16_t Opcode;
494	uint16_t BaseOpcode;
495	uint8_t MIMGEncoding;
496	uint8_t VDataDwords;
497	uint8_t VAddrDwords;
498	uint8_t VAddrOperands;
499	};
500
501	LLVM_READONLY
502	const MIMGInfo getMIMGInfo(unsigned* Opc);
503
504	LLVM_READONLY
505	int getMTBUFBaseOpcode(unsigned Opc);
506
507	LLVM_READONLY
508	int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
509
510	LLVM_READONLY
511	int getMTBUFElements(unsigned Opc);
512
513	LLVM_READONLY
514	bool getMTBUFHasVAddr(unsigned Opc);
515
516	LLVM_READONLY
517	bool getMTBUFHasSrsrc(unsigned Opc);
518
519	LLVM_READONLY
520	bool getMTBUFHasSoffset(unsigned Opc);
521
522	LLVM_READONLY
523	int getMUBUFBaseOpcode(unsigned Opc);
524
525	LLVM_READONLY
526	int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
527
528	LLVM_READONLY
529	int getMUBUFElements(unsigned Opc);
530
531	LLVM_READONLY
532	bool getMUBUFHasVAddr(unsigned Opc);
533
534	LLVM_READONLY
535	bool getMUBUFHasSrsrc(unsigned Opc);
536
537	LLVM_READONLY
538	bool getMUBUFHasSoffset(unsigned Opc);
539
540	LLVM_READONLY
541	bool getMUBUFIsBufferInv(unsigned Opc);
542
543	LLVM_READONLY
544	bool getMUBUFTfe(unsigned Opc);
545
546	LLVM_READONLY
547	bool getSMEMIsBuffer(unsigned Opc);
548
549	LLVM_READONLY
550	bool getVOP1IsSingle(unsigned Opc);
551
552	LLVM_READONLY
553	bool getVOP2IsSingle(unsigned Opc);
554
555	LLVM_READONLY
556	bool getVOP3IsSingle(unsigned Opc);
557
558	LLVM_READONLY
559	bool isVOPC64DPP(unsigned Opc);
560
561	LLVM_READONLY
562	bool isVOPCAsmOnly(unsigned Opc);
563
564	/// Returns true if MAI operation is a double precision GEMM.
565	LLVM_READONLY
566	bool getMAIIsDGEMM(unsigned Opc);
567
568	LLVM_READONLY
569	bool getMAIIsGFX940XDL(unsigned Opc);
570
571	struct CanBeVOPD {
572	bool X;
573	bool Y;
574	};
575
576	/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
577	LLVM_READONLY
578	unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
579
580	LLVM_READONLY
581	CanBeVOPD getCanBeVOPD(unsigned Opc);
582
583	LLVM_READONLY
584	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
585	uint8_t NumComponents,
586	uint8_t NumFormat,
587	const MCSubtargetInfo &STI);
588	LLVM_READONLY
589	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
590	const MCSubtargetInfo &STI);
591
592	LLVM_READONLY
593	int getMCOpcode(uint16_t Opcode, unsigned Gen);
594
595	LLVM_READONLY
596	unsigned getVOPDOpcode(unsigned Opc);
597
598	LLVM_READONLY
599	int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
600
601	LLVM_READONLY
602	bool isVOPD(unsigned Opc);
603
604	LLVM_READNONE
605	bool isMAC(unsigned Opc);
606
607	LLVM_READNONE
608	bool isPermlane16(unsigned Opc);
609
610	LLVM_READNONE
611	bool isGenericAtomic(unsigned Opc);
612
613	LLVM_READNONE
614	bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
615
616	namespace VOPD {
617
618	enum Component : unsigned {
619	DST = `0`,
620	SRC0,
621	SRC1,
622	SRC2,
623
624	DST_NUM = `1`,
625	MAX_SRC_NUM = `3`,
626	MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
627	};
628
629	// LSB mask for VGPR banks per VOPD component operand.
630	// 4 banks result in a mask 3, setting 2 lower bits.
631	constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {`1`, `3`, `3`, `1`};
632
633	enum ComponentIndex : unsigned { X = `0`, Y = `1` };
634	constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
635	constexpr unsigned COMPONENTS_NUM = `2`;
636
637	// Properties of VOPD components.
638	class ComponentProps {
639	private:
640	unsigned SrcOperandsNum = `0`;
641	unsigned MandatoryLiteralIdx = ~`0u`;
642	bool HasSrc2Acc = false;
643
644	public:
645	ComponentProps() = default;
646	ComponentProps(const MCInstrDesc &OpDesc);
647
648	// Return the total number of src operands this component has.
649	unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
650
651	// Return the number of src operands of this component visible to the parser.
652	unsigned getCompParsedSrcOperandsNum() const {
653	return SrcOperandsNum - HasSrc2Acc;
654	}
655
656	// Return true iif this component has a mandatory literal.
657	bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~`0u`; }
658
659	// If this component has a mandatory literal, return component operand
660	// index of this literal (i.e. either Component::SRC1 or Component::SRC2).
661	unsigned getMandatoryLiteralCompOperandIndex() const {
662	assert(hasMandatoryLiteral());
663	return MandatoryLiteralIdx;
664	}
665
666	// Return true iif this component has operand
667	// with component index CompSrcIdx and this operand may be a register.
668	bool hasRegSrcOperand(unsigned CompSrcIdx) const {
669	assert(CompSrcIdx < Component::MAX_SRC_NUM);
670	return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
671	}
672
673	// Return true iif this component has tied src2.
674	bool hasSrc2Acc() const { return HasSrc2Acc; }
675
676	private:
677	bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
678	assert(CompSrcIdx < Component::MAX_SRC_NUM);
679	return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
680	}
681	};
682
683	enum ComponentKind : unsigned {
684	SINGLE = `0`, // A single VOP1 or VOP2 instruction which may be used in VOPD.
685	COMPONENT_X, // A VOPD instruction, X component.
686	COMPONENT_Y, // A VOPD instruction, Y component.
687	MAX = COMPONENT_Y
688	};
689
690	// Interface functions of this class map VOPD component operand indices
691	// to indices of operands in MachineInstr/MCInst or parsed operands array.
692	//
693	// Note that this class operates with 3 kinds of indices:
694	// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
695	// - MC operand indices (they refer operands in a MachineInstr/MCInst);
696	// - parsed operand indices (they refer operands in parsed operands array).
697	//
698	// For SINGLE components mapping between these indices is trivial.
699	// But things get more complicated for COMPONENT_X and
700	// COMPONENT_Y because these components share the same
701	// MachineInstr/MCInst and the same parsed operands array.
702	// Below is an example of component operand to parsed operand
703	// mapping for the following instruction:
704	//
705	// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
706	//
707	// PARSED COMPONENT PARSED
708	// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
709	// -------------------------------------------------------------------
710	// "v_dual_add_f32" 0
711	// v_dual_add_f32 v255 0 (DST) --> 1
712	// v4 1 (SRC0) --> 2
713	// v5 2 (SRC1) --> 3
714	// "::" 4
715	// "v_dual_mov_b32" 5
716	// v_dual_mov_b32 v6 0 (DST) --> 6
717	// v1 1 (SRC0) --> 7
718	// -------------------------------------------------------------------
719	//
720	class ComponentLayout {
721	private:
722	// Regular MachineInstr/MCInst operands are ordered as follows:
723	// dst, src0 [, other src operands]
724	// VOPD MachineInstr/MCInst operands are ordered as follows:
725	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
726	// Each ComponentKind has operand indices defined below.
727	static constexpr unsigned MC_DST_IDX[] = {`0`, `0`, `1`};
728	static constexpr unsigned FIRST_MC_SRC_IDX[] = {`1`, `2`, `2` / + OpX.MCSrcNum /};
729
730	// Parsed operands of regular instructions are ordered as follows:
731	// Mnemo dst src0 [vsrc1 ...]
732	// Parsed VOPD operands are ordered as follows:
733	// OpXMnemo dstX src0X [vsrc1X\|imm vsrc1X\|vsrc1X imm] '::'
734	// OpYMnemo dstY src0Y [vsrc1Y\|imm vsrc1Y\|vsrc1Y imm]
735	// Each ComponentKind has operand indices defined below.
736	static constexpr unsigned PARSED_DST_IDX[] = {`1`, `1`,
737	`4` / + OpX.ParsedSrcNum /};
738	static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
739	`2`, `2`, `5` / + OpX.ParsedSrcNum /};
740
741	private:
742	const ComponentKind Kind;
743	const ComponentProps PrevComp;
744
745	public:
746	// Create layout for COMPONENT_X or SINGLE component.
747	ComponentLayout(ComponentKind Kind) : Kind(Kind) {
748	assert(Kind == ComponentKind::SINGLE \|\| Kind == ComponentKind::COMPONENT_X);
749	}
750
751	// Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
752	ComponentLayout(const ComponentProps &OpXProps)
753	: Kind(ComponentKind::COMPONENT_Y), PrevComp (OpXProps) {}
754
755	public:
756	// Return the index of dst operand in MCInst operands.
757	unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
758
759	// Return the index of the specified src operand in MCInst operands.
760	unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
761	assert(CompSrcIdx < Component::MAX_SRC_NUM);
762	return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
763	}
764
765	// Return the index of dst operand in the parsed operands array.
766	unsigned getIndexOfDstInParsedOperands() const {
767	return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
768	}
769
770	// Return the index of the specified src operand in the parsed operands array.
771	unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
772	assert(CompSrcIdx < Component::MAX_SRC_NUM);
773	return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
774	}
775
776	private:
777	unsigned getPrevCompSrcNum() const {
778	return PrevComp.getCompSrcOperandsNum();
779	}
780	unsigned getPrevCompParsedSrcNum() const {
781	return PrevComp.getCompParsedSrcOperandsNum();
782	}
783	};
784
785	// Layout and properties of VOPD components.
786	class ComponentInfo : public ComponentLayout, public ComponentProps {
787	public:
788	// Create ComponentInfo for COMPONENT_X or SINGLE component.
789	ComponentInfo(const MCInstrDesc &OpDesc,
790	ComponentKind Kind = ComponentKind::SINGLE)
791	: ComponentLayout (Kind), ComponentProps (OpDesc) {}
792
793	// Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
794	ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
795	: ComponentLayout (OpXProps), ComponentProps (OpDesc) {}
796
797	// Map component operand index to parsed operand index.
798	// Return 0 if the specified operand does not exist.
799	unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
800	};
801
802	// Properties of VOPD instructions.
803	class InstInfo {
804	private:
805	const ComponentInfo CompInfo[COMPONENTS_NUM];
806
807	public:
808	using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
809
810	InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
811	: CompInfo{OpX, OpY} {}
812
813	InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
814	: CompInfo{OprInfoX, OprInfoY} {}
815
816	const ComponentInfo &operator[](size_t ComponentIdx) const {
817	assert(ComponentIdx < COMPONENTS_NUM);
818	return CompInfo[ComponentIdx];
819	}
820
821	// Check VOPD operands constraints.
822	// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
823	// for the specified component and MC operand. The callback must return 0
824	// if the operand is not a register or not a VGPR.
825	// If \p SkipSrc is set to true then constraints for source operands are not
826	// checked.
827	bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
828	bool SkipSrc = false) const {
829	return getInvalidCompOperandIndex(GetRegIdx, SkipSrc).has_value();
830	}
831
832	// Check VOPD operands constraints.
833	// Return the index of an invalid component operand, if any.
834	// If \p SkipSrc is set to true then constraints for source operands are not
835	// checked.
836	std::optional<unsigned> getInvalidCompOperandIndex(
837	std::function<unsigned(unsigned, unsigned)> GetRegIdx,
838	bool SkipSrc = false) const;
839
840	private:
841	RegIndices
842	getRegIndices(unsigned ComponentIdx,
843	std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
844	};
845
846	} // namespace VOPD
847
848	LLVM_READONLY
849	std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
850
851	LLVM_READONLY
852	// Get properties of 2 single VOP1/VOP2 instructions
853	// used as components to create a VOPD instruction.
854	VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
855
856	LLVM_READONLY
857	// Get properties of VOPD X and Y components.
858	VOPD::InstInfo
859	getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
860
861	LLVM_READONLY
862	bool isTrue16Inst(unsigned Opc);
863
864	LLVM_READONLY
865	bool isInvalidSingleUseConsumerInst(unsigned Opc);
866
867	LLVM_READONLY
868	bool isInvalidSingleUseProducerInst(unsigned Opc);
869
870	LLVM_READONLY
871	unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
872
873	LLVM_READONLY
874	unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
875
876	void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
877	const MCSubtargetInfo *STI);
878
879	bool isGroupSegment(const GlobalValue *GV);
880	bool isGlobalSegment(const GlobalValue *GV);
881	bool isReadOnlySegment(const GlobalValue *GV);
882
883	/// \returns True if constants should be emitted to .text section for given
884	/// target triple \p TT, false otherwise.
885	bool shouldEmitConstantsToTextSection(const Triple &TT);
886
887	/// \returns Integer value requested using \p F's \p Name attribute.
888	///
889	/// \returns \p Default if attribute is not present.
890	///
891	/// \returns \p Default and emits error if requested value cannot be converted
892	/// to integer.
893	int getIntegerAttribute(const Function &F, StringRef Name, int Default);
894
895	/// \returns A pair of integer values requested using \p F's \p Name attribute
896	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
897	/// is false).
898	///
899	/// \returns \p Default if attribute is not present.
900	///
901	/// \returns \p Default and emits error if one of the requested values cannot be
902	/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
903	/// not present.
904	std::pair<unsigned, unsigned>
905	getIntegerPairAttribute(const Function &F, StringRef Name,
906	std::pair<unsigned, unsigned> Default,
907	bool OnlyFirstRequired = false);
908
909	/// \returns Generate a vector of integer values requested using \p F's \p Name
910	/// attribute.
911	///
912	/// \returns true if exactly Size (>2) number of integers are found in the
913	/// attribute.
914	///
915	/// \returns false if any error occurs.
916	SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
917	unsigned Size);
918
919	/// Represents the counter values to wait for in an s_waitcnt instruction.
920	///
921	/// Large values (including the maximum possible integer) can be used to
922	/// represent "don't care" waits.
923	struct Waitcnt {
924	unsigned LoadCnt = ~`0u`; // Corresponds to Vmcnt prior to gfx12.
925	unsigned ExpCnt = ~`0u`;
926	unsigned DsCnt = ~`0u`; // Corresponds to LGKMcnt prior to gfx12.
927	unsigned StoreCnt = ~`0u`; // Corresponds to VScnt on gfx10/gfx11.
928	unsigned SampleCnt = ~`0u`; // gfx12+ only.
929	unsigned BvhCnt = ~`0u`; // gfx12+ only.
930	unsigned KmCnt = ~`0u`; // gfx12+ only.
931
932	Waitcnt() = default;
933	// Pre-gfx12 constructor.
934	Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
935	: LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt),
936	SampleCnt(~`0u`), BvhCnt(~`0u`), KmCnt(~`0u`) {}
937
938	// gfx12+ constructor.
939	Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
940	unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt)
941	: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
942	SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
943
944	bool hasWait() const { return StoreCnt != ~`0u` \|\| hasWaitExceptStoreCnt(); }
945
946	bool hasWaitExceptStoreCnt() const {
947	return LoadCnt != ~`0u` \|\| ExpCnt != ~`0u` \|\| DsCnt != ~`0u` \|\|
948	SampleCnt != ~`0u` \|\| BvhCnt != ~`0u` \|\| KmCnt != ~`0u`;
949	}
950
951	bool hasWaitStoreCnt() const { return StoreCnt != ~`0u`; }
952
953	Waitcnt combined(const Waitcnt &Other) const {
954	// Does the right thing provided self and Other are either both pre-gfx12
955	// or both gfx12+.
956	return Waitcnt (
957	std::min(a: LoadCnt, b: Other.LoadCnt), std::min(a: ExpCnt, b: Other.ExpCnt),
958	std::min(a: DsCnt, b: Other.DsCnt), std::min(a: StoreCnt, b: Other.StoreCnt),
959	std::min(a: SampleCnt, b: Other.SampleCnt), std::min(a: BvhCnt, b: Other.BvhCnt),
960	std::min(a: KmCnt, b: Other.KmCnt));
961	}
962	};
963
964	// The following methods are only meaningful on targets that support
965	// S_WAITCNT.
966
967	/// \returns Vmcnt bit mask for given isa \p Version.
968	unsigned getVmcntBitMask(const IsaVersion &Version);
969
970	/// \returns Expcnt bit mask for given isa \p Version.
971	unsigned getExpcntBitMask(const IsaVersion &Version);
972
973	/// \returns Lgkmcnt bit mask for given isa \p Version.
974	unsigned getLgkmcntBitMask(const IsaVersion &Version);
975
976	/// \returns Waitcnt bit mask for given isa \p Version.
977	unsigned getWaitcntBitMask(const IsaVersion &Version);
978
979	/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
980	unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
981
982	/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
983	unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
984
985	/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
986	unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
987
988	/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
989	/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
990	/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
991	/// which needs it is deprecated
992	///
993	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
994	/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
995	/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
996	/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
997	/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
998	/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
999	/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1000	/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1001	/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1002	///
1003	void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1004	unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
1005
1006	Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1007
1008	/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1009	unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1010	unsigned Vmcnt);
1011
1012	/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1013	unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1014	unsigned Expcnt);
1015
1016	/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1017	unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1018	unsigned Lgkmcnt);
1019
1020	/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1021	/// \p Version. Should not be used on gfx12+, the instruction which needs
1022	/// it is deprecated
1023	///
1024	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1025	/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1026	/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1027	/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1028	/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1029	/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1030	/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1031	/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1032	/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1033	/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1034	///
1035	/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1036	/// isa \p Version.
1037	///
1038	unsigned encodeWaitcnt(const IsaVersion &Version,
1039	unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
1040
1041	unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1042
1043	// The following methods are only meaningful on targets that support
1044	// S_WAIT_CNT, introduced with gfx12.*
1045
1046	/// \returns Loadcnt bit mask for given isa \p Version.
1047	/// Returns 0 for versions that do not support LOADcnt
1048	unsigned getLoadcntBitMask(const IsaVersion &Version);
1049
1050	/// \returns Samplecnt bit mask for given isa \p Version.
1051	/// Returns 0 for versions that do not support SAMPLEcnt
1052	unsigned getSamplecntBitMask(const IsaVersion &Version);
1053
1054	/// \returns Bvhcnt bit mask for given isa \p Version.
1055	/// Returns 0 for versions that do not support BVHcnt
1056	unsigned getBvhcntBitMask(const IsaVersion &Version);
1057
1058	/// \returns Dscnt bit mask for given isa \p Version.
1059	/// Returns 0 for versions that do not support DScnt
1060	unsigned getDscntBitMask(const IsaVersion &Version);
1061
1062	/// \returns Dscnt bit mask for given isa \p Version.
1063	/// Returns 0 for versions that do not support KMcnt
1064	unsigned getKmcntBitMask(const IsaVersion &Version);
1065
1066	/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1067	/// returns 0 for versions that do not support STOREcnt or VScnt.
1068	/// STOREcnt and VScnt are the same counter, the name used
1069	/// depends on the ISA version.
1070	unsigned getStorecntBitMask(const IsaVersion &Version);
1071
1072	// The following are only meaningful on targets that support
1073	// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1074
1075	/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1076	/// isa \p Version.
1077	Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1078
1079	/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1080	/// isa \p Version.
1081	Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1082
1083	/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1084	/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1085	/// \p Version.
1086	unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1087
1088	/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1089	/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1090	/// \p Version.
1091	unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1092
1093	namespace Hwreg {
1094
1095	using HwregId = EncodingField<`5`, `0`>;
1096	using HwregOffset = EncodingField<`10`, `6`>;
1097
1098	struct HwregSize : EncodingField<`15`, `11`, `32`> {
1099	using EncodingField::EncodingField;
1100	constexpr uint64_t encode() const { return Value - `1`; }
1101	static ValueType decode(uint64_t Encoded) { return Encoded + `1`; }
1102	};
1103
1104	using HwregEncoding = EncodingFields<HwregId, HwregOffset, HwregSize>;
1105
1106	} // namespace Hwreg
1107
1108	namespace DepCtr {
1109
1110	int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1111	int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1112	const MCSubtargetInfo &STI);
1113	bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1114	const MCSubtargetInfo &STI);
1115	bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1116	bool &IsDefault, const MCSubtargetInfo &STI);
1117
1118	/// \returns Decoded VaVdst from given immediate \p Encoded.
1119	unsigned decodeFieldVaVdst(unsigned Encoded);
1120
1121	/// \returns Decoded VmVsrc from given immediate \p Encoded.
1122	unsigned decodeFieldVmVsrc(unsigned Encoded);
1123
1124	/// \returns Decoded SaSdst from given immediate \p Encoded.
1125	unsigned decodeFieldSaSdst(unsigned Encoded);
1126
1127	/// \returns \p VmVsrc as an encoded Depctr immediate.
1128	unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1129
1130	/// \returns \p Encoded combined with encoded \p VmVsrc.
1131	unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1132
1133	/// \returns \p VaVdst as an encoded Depctr immediate.
1134	unsigned encodeFieldVaVdst(unsigned VaVdst);
1135
1136	/// \returns \p Encoded combined with encoded \p VaVdst.
1137	unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1138
1139	/// \returns \p SaSdst as an encoded Depctr immediate.
1140	unsigned encodeFieldSaSdst(unsigned SaSdst);
1141
1142	/// \returns \p Encoded combined with encoded \p SaSdst.
1143	unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1144
1145	} // namespace DepCtr
1146
1147	namespace Exp {
1148
1149	bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1150
1151	LLVM_READONLY
1152	unsigned getTgtId(const StringRef Name);
1153
1154	LLVM_READNONE
1155	bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1156
1157	} // namespace Exp
1158
1159	namespace MTBUFFormat {
1160
1161	LLVM_READNONE
1162	int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1163
1164	void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1165
1166	int64_t getDfmt(const StringRef Name);
1167
1168	StringRef getDfmtName(unsigned Id);
1169
1170	int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1171
1172	StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1173
1174	bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1175
1176	bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1177
1178	int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1179
1180	StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1181
1182	bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1183
1184	int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1185	const MCSubtargetInfo &STI);
1186
1187	bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1188
1189	unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1190
1191	} // namespace MTBUFFormat
1192
1193	namespace SendMsg {
1194
1195	LLVM_READNONE
1196	bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1197
1198	LLVM_READNONE
1199	bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1200	bool Strict = true);
1201
1202	LLVM_READNONE
1203	bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1204	const MCSubtargetInfo &STI, bool Strict = true);
1205
1206	LLVM_READNONE
1207	bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1208
1209	LLVM_READNONE
1210	bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1211
1212	void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1213	uint16_t &StreamId, const MCSubtargetInfo &STI);
1214
1215	LLVM_READNONE
1216	uint64_t encodeMsg(uint64_t MsgId,
1217	uint64_t OpId,
1218	uint64_t StreamId);
1219
1220	} // namespace SendMsg
1221
1222
1223	unsigned getInitialPSInputAddr(const Function &F);
1224
1225	bool getHasColorExport(const Function &F);
1226
1227	bool getHasDepthExport(const Function &F);
1228
1229	LLVM_READNONE
1230	bool isShader(CallingConv::ID CC);
1231
1232	LLVM_READNONE
1233	bool isGraphics(CallingConv::ID CC);
1234
1235	LLVM_READNONE
1236	bool isCompute(CallingConv::ID CC);
1237
1238	LLVM_READNONE
1239	bool isEntryFunctionCC(CallingConv::ID CC);
1240
1241	// These functions are considered entrypoints into the current module, i.e. they
1242	// are allowed to be called from outside the current module. This is different
1243	// from isEntryFunctionCC, which is only true for functions that are entered by
1244	// the hardware. Module entry points include all entry functions but also
1245	// include functions that can be called from other functions inside or outside
1246	// the current module. Module entry functions are allowed to allocate LDS.
1247	LLVM_READNONE
1248	bool isModuleEntryFunctionCC(CallingConv::ID CC);
1249
1250	LLVM_READNONE
1251	bool isChainCC(CallingConv::ID CC);
1252
1253	bool isKernelCC(const Function *Func);
1254
1255	// FIXME: Remove this when calling conventions cleaned up
1256	LLVM_READNONE
1257	inline bool isKernel(CallingConv::ID CC) {
1258	switch (CC) {
1259	case CallingConv::AMDGPU_KERNEL:
1260	case CallingConv::SPIR_KERNEL:
1261	return true;
1262	default:
1263	return false;
1264	}
1265	}
1266
1267	bool hasXNACK(const MCSubtargetInfo &STI);
1268	bool hasSRAMECC(const MCSubtargetInfo &STI);
1269	bool hasMIMG_R128(const MCSubtargetInfo &STI);
1270	bool hasA16(const MCSubtargetInfo &STI);
1271	bool hasG16(const MCSubtargetInfo &STI);
1272	bool hasPackedD16(const MCSubtargetInfo &STI);
1273	bool hasGDS(const MCSubtargetInfo &STI);
1274	unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1275	unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1276
1277	bool isSI(const MCSubtargetInfo &STI);
1278	bool isCI(const MCSubtargetInfo &STI);
1279	bool isVI(const MCSubtargetInfo &STI);
1280	bool isGFX9(const MCSubtargetInfo &STI);
1281	bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1282	bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1283	bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1284	bool isGFX8Plus(const MCSubtargetInfo &STI);
1285	bool isGFX9Plus(const MCSubtargetInfo &STI);
1286	bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1287	bool isGFX10(const MCSubtargetInfo &STI);
1288	bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1289	bool isGFX10Plus(const MCSubtargetInfo &STI);
1290	bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1291	bool isGFX10Before1030(const MCSubtargetInfo &STI);
1292	bool isGFX11(const MCSubtargetInfo &STI);
1293	bool isGFX11Plus(const MCSubtargetInfo &STI);
1294	bool isGFX12(const MCSubtargetInfo &STI);
1295	bool isGFX12Plus(const MCSubtargetInfo &STI);
1296	bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1297	bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1298	bool isGCN3Encoding(const MCSubtargetInfo &STI);
1299	bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1300	bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1301	bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1302	bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1303	bool isGFX90A(const MCSubtargetInfo &STI);
1304	bool isGFX940(const MCSubtargetInfo &STI);
1305	bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1306	bool hasMAIInsts(const MCSubtargetInfo &STI);
1307	bool hasVOPD(const MCSubtargetInfo &STI);
1308	bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1309	int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1310	unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1311	bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
1312
1313	/// Is Reg - scalar register
1314	bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
1315
1316	/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1317	/// The bit indicating isHi is the LSB of the encoding.
1318	bool isHi(unsigned Reg, const MCRegisterInfo &MRI);
1319
1320	/// If \p Reg is a pseudo reg, return the correct hardware register given
1321	/// \p STI otherwise return \p Reg.
1322	unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
1323
1324	/// Convert hardware register \p Reg to a pseudo register
1325	LLVM_READNONE
1326	unsigned mc2PseudoReg(unsigned Reg);
1327
1328	LLVM_READNONE
1329	bool isInlineValue(unsigned Reg);
1330
1331	/// Is this an AMDGPU specific source operand? These include registers,
1332	/// inline constants, literals and mandatory literals (KImm).
1333	bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1334
1335	/// Is this a KImm operand?
1336	bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1337
1338	/// Is this floating-point operand?
1339	bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1340
1341	/// Does this operand support only inlinable literals?
1342	bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1343
1344	/// Get the size in bits of a register from the register class \p RC.
1345	unsigned getRegBitWidth(unsigned RCID);
1346
1347	/// Get the size in bits of a register from the register class \p RC.
1348	unsigned getRegBitWidth(const MCRegisterClass &RC);
1349
1350	/// Get size of register operand
1351	unsigned getRegOperandSize(const MCRegisterInfo MRI, const* MCInstrDesc &Desc,
1352	unsigned OpNo);
1353
1354	LLVM_READNONE
1355	inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1356	switch (OpInfo.OperandType) {
1357	case AMDGPU::OPERAND_REG_IMM_INT32:
1358	case AMDGPU::OPERAND_REG_IMM_FP32:
1359	case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1360	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1361	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1362	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1363	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1364	case AMDGPU::OPERAND_REG_IMM_V2INT32:
1365	case AMDGPU::OPERAND_REG_IMM_V2FP32:
1366	case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1367	case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1368	case AMDGPU::OPERAND_KIMM32:
1369	case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1370	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1371	return `4`;
1372
1373	case AMDGPU::OPERAND_REG_IMM_INT64:
1374	case AMDGPU::OPERAND_REG_IMM_FP64:
1375	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1376	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1377	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1378	return `8`;
1379
1380	case AMDGPU::OPERAND_REG_IMM_INT16:
1381	case AMDGPU::OPERAND_REG_IMM_BF16:
1382	case AMDGPU::OPERAND_REG_IMM_FP16:
1383	case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
1384	case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1385	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1386	case AMDGPU::OPERAND_REG_INLINE_C_BF16:
1387	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1388	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1389	case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
1390	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1391	case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1392	case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
1393	case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1394	case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1395	case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
1396	case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1397	case AMDGPU::OPERAND_REG_IMM_V2INT16:
1398	case AMDGPU::OPERAND_REG_IMM_V2BF16:
1399	case AMDGPU::OPERAND_REG_IMM_V2FP16:
1400	return `2`;
1401
1402	default:
1403	llvm_unreachable("unhandled operand type");
1404	}
1405	}
1406
1407	LLVM_READNONE
1408	inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1409	return getOperandSize(OpInfo: Desc.operands()[OpNo]);
1410	}
1411
1412	/// Is this literal inlinable, and not one of the values intended for floating
1413	/// point values.
1414	LLVM_READNONE
1415	inline bool isInlinableIntLiteral(int64_t Literal) {
1416	return Literal >= -`16` && Literal <= `64`;
1417	}
1418
1419	/// Is this literal inlinable
1420	LLVM_READNONE
1421	bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1422
1423	LLVM_READNONE
1424	bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1425
1426	LLVM_READNONE
1427	bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1428
1429	LLVM_READNONE
1430	bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1431
1432	LLVM_READNONE
1433	bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1434
1435	LLVM_READNONE
1436	bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1437
1438	LLVM_READNONE
1439	std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1440
1441	LLVM_READNONE
1442	std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1443
1444	LLVM_READNONE
1445	std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1446
1447	LLVM_READNONE
1448	bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1449
1450	LLVM_READNONE
1451	bool isInlinableLiteralV2I16(uint32_t Literal);
1452
1453	LLVM_READNONE
1454	bool isInlinableLiteralV2BF16(uint32_t Literal);
1455
1456	LLVM_READNONE
1457	bool isInlinableLiteralV2F16(uint32_t Literal);
1458
1459	LLVM_READNONE
1460	bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1461
1462	bool isArgPassedInSGPR(const Argument *Arg);
1463
1464	bool isArgPassedInSGPR(const CallBase CB, unsigned* ArgNo);
1465
1466	LLVM_READONLY
1467	bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1468	int64_t EncodedOffset);
1469
1470	LLVM_READONLY
1471	bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1472	int64_t EncodedOffset,
1473	bool IsBuffer);
1474
1475	/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1476	/// offsets.
1477	uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1478
1479	/// \returns The encoding that will be used for \p ByteOffset in the
1480	/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1481	/// S_LOAD instructions have a signed offset, on other subtargets it is
1482	/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1483	std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1484	int64_t ByteOffset, bool IsBuffer,
1485	bool HasSOffset = false);
1486
1487	/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1488	/// instruction. This is only useful on CI.s
1489	std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1490	int64_t ByteOffset);
1491
1492	/// For pre-GFX12 FLAT instructions the offset must be positive;
1493	/// MSB is ignored and forced to zero.
1494	///
1495	/// \return The number of bits available for the signed offset field in flat
1496	/// instructions. Note that some forms of the instruction disallow negative
1497	/// offsets.
1498	unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1499
1500	/// \returns true if this offset is small enough to fit in the SMRD
1501	/// offset field. \p ByteOffset should be the offset in bytes and
1502	/// not the encoded offset.
1503	bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1504
1505	LLVM_READNONE
1506	inline bool isLegalDPALU_DPPControl(unsigned DC) {
1507	return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1508	}
1509
1510	/// \returns true if an instruction may have a 64-bit VGPR operand.
1511	bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1512
1513	/// \returns true if an instruction is a DP ALU DPP.
1514	bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1515
1516	/// \returns true if the intrinsic is divergent
1517	bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1518
1519	/// \returns true if the intrinsic is uniform
1520	bool isIntrinsicAlwaysUniform(unsigned IntrID);
1521
1522	/// \returns lds block size in terms of dwords. \p
1523	/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
1524	/// must be defined in terms of bytes.
1525	unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
1526
1527	} // end namespace AMDGPU
1528
1529	raw_ostream &operator<<(raw_ostream &OS,
1530	const AMDGPU::IsaInfo::TargetIDSetting S);
1531
1532	} // end namespace llvm
1533
1534	#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1535

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h