AMDGPUSubtarget.h source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h]

1	//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//==-----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// Base class for AMDGPU specific classes of TargetSubtarget.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15	#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16
17	#include "llvm/IR/CallingConv.h"
18	#include "llvm/Support/Alignment.h"
19	#include "llvm/TargetParser/Triple.h"
20
21	namespace llvm {
22
23	enum AMDGPUDwarfFlavour : unsigned;
24	class Function;
25	class Instruction;
26	class MachineFunction;
27	class TargetMachine;
28
29	class AMDGPUSubtarget {
30	public:
31	enum Generation {
32	INVALID = `0`,
33	R600 = `1`,
34	R700 = `2`,
35	EVERGREEN = `3`,
36	NORTHERN_ISLANDS = `4`,
37	SOUTHERN_ISLANDS = `5`,
38	SEA_ISLANDS = `6`,
39	VOLCANIC_ISLANDS = `7`,
40	GFX9 = `8`,
41	GFX10 = `9`,
42	GFX11 = `10`,
43	GFX12 = `11`,
44	};
45
46	private:
47	Triple TargetTriple;
48
49	protected:
50	bool GCN3Encoding = false;
51	bool Has16BitInsts = false;
52	bool HasTrue16BitInsts = false;
53	bool EnableRealTrue16Insts = false;
54	bool HasMadMixInsts = false;
55	bool HasMadMacF32Insts = false;
56	bool HasDsSrc2Insts = false;
57	bool HasSDWA = false;
58	bool HasVOP3PInsts = false;
59	bool HasMulI24 = true;
60	bool HasMulU24 = true;
61	bool HasSMulHi = false;
62	bool HasInv2PiInlineImm = false;
63	bool HasFminFmaxLegacy = true;
64	bool EnablePromoteAlloca = false;
65	bool HasTrigReducedRange = false;
66	bool FastFMAF32 = false;
67	unsigned EUsPerCU = `4`;
68	unsigned MaxWavesPerEU = `10`;
69	unsigned LocalMemorySize = `0`;
70	unsigned AddressableLocalMemorySize = `0`;
71	char WavefrontSizeLog2 = `0`;
72
73	public:
74	AMDGPUSubtarget(Triple TT);
75
76	static const AMDGPUSubtarget &get(const MachineFunction &MF);
77	static const AMDGPUSubtarget &get(const TargetMachine &TM,
78	const Function &F);
79
80	/// \returns Default range flat work group size for a calling convention.
81	std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
82
83	/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
84	/// for function \p F, or minimum/maximum flat work group sizes explicitly
85	/// requested using "amdgpu-flat-work-group-size" attribute attached to
86	/// function \p F.
87	///
88	/// \returns Subtarget's default values if explicitly requested values cannot
89	/// be converted to integer, or violate subtarget's specifications.
90	std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
91
92	/// \returns Subtarget's default pair of minimum/maximum number of waves per
93	/// execution unit for function \p F, or minimum/maximum number of waves per
94	/// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
95	/// attached to function \p F.
96	///
97	/// \returns Subtarget's default values if explicitly requested values cannot
98	/// be converted to integer, violate subtarget's specifications, or are not
99	/// compatible with minimum/maximum number of waves limited by flat work group
100	/// size, register usage, and/or lds usage.
101	std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
102	// Default/requested minimum/maximum flat work group sizes.
103	std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
104	return getWavesPerEU(F, FlatWorkGroupSizes);
105	}
106
107	/// Overload which uses the specified values for the flat work group sizes,
108	/// rather than querying the function itself. \p FlatWorkGroupSizes Should
109	/// correspond to the function's value for getFlatWorkGroupSizes.
110	std::pair<unsigned, unsigned>
111	getWavesPerEU(const Function &F,
112	std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
113	std::pair<unsigned, unsigned> getEffectiveWavesPerEU(
114	std::pair<unsigned, unsigned> WavesPerEU,
115	std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
116
117	/// Return the amount of LDS that can be used that will not restrict the
118	/// occupancy lower than WaveCount.
119	unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
120	const Function &) const;
121
122	/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
123	/// the given LDS memory size is the only constraint.
124	unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
125
126	unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
127
128	bool isAmdHsaOS() const {
129	return TargetTriple.getOS() == Triple::AMDHSA;
130	}
131
132	bool isAmdPalOS() const {
133	return TargetTriple.getOS() == Triple::AMDPAL;
134	}
135
136	bool isMesa3DOS() const {
137	return TargetTriple.getOS() == Triple::Mesa3D;
138	}
139
140	bool isMesaKernel(const Function &F) const;
141
142	bool isAmdHsaOrMesa(const Function &F) const {
143	return isAmdHsaOS() \|\| isMesaKernel(F);
144	}
145
146	bool isGCN() const {
147	return TargetTriple.getArch() == Triple::amdgcn;
148	}
149
150	bool isGCN3Encoding() const {
151	return GCN3Encoding;
152	}
153
154	bool has16BitInsts() const {
155	return Has16BitInsts;
156	}
157
158	/// Return true if the subtarget supports True16 instructions.
159	bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
160
161	/// Return true if real (non-fake) variants of True16 instructions using
162	/// 16-bit registers should be code-generated. Fake True16 instructions are
163	/// identical to non-fake ones except that they take 32-bit registers as
164	/// operands and always use their low halves.
165	// TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
166	// supported and the support for fake True16 instructions is removed.
167	bool useRealTrue16Insts() const;
168
169	bool hasMadMixInsts() const {
170	return HasMadMixInsts;
171	}
172
173	bool hasMadMacF32Insts() const {
174	return HasMadMacF32Insts \|\| !isGCN();
175	}
176
177	bool hasDsSrc2Insts() const {
178	return HasDsSrc2Insts;
179	}
180
181	bool hasSDWA() const {
182	return HasSDWA;
183	}
184
185	bool hasVOP3PInsts() const {
186	return HasVOP3PInsts;
187	}
188
189	bool hasMulI24() const {
190	return HasMulI24;
191	}
192
193	bool hasMulU24() const {
194	return HasMulU24;
195	}
196
197	bool hasSMulHi() const {
198	return HasSMulHi;
199	}
200
201	bool hasInv2PiInlineImm() const {
202	return HasInv2PiInlineImm;
203	}
204
205	bool hasFminFmaxLegacy() const {
206	return HasFminFmaxLegacy;
207	}
208
209	bool hasTrigReducedRange() const {
210	return HasTrigReducedRange;
211	}
212
213	bool hasFastFMAF32() const {
214	return FastFMAF32;
215	}
216
217	bool isPromoteAllocaEnabled() const {
218	return EnablePromoteAlloca;
219	}
220
221	unsigned getWavefrontSize() const {
222	return `1` << WavefrontSizeLog2;
223	}
224
225	unsigned getWavefrontSizeLog2() const {
226	return WavefrontSizeLog2;
227	}
228
229	unsigned getLocalMemorySize() const {
230	return LocalMemorySize;
231	}
232
233	unsigned getAddressableLocalMemorySize() const {
234	return AddressableLocalMemorySize;
235	}
236
237	/// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the
238	/// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.
239	/// CU mode into account.
240	unsigned getEUsPerCU() const { return EUsPerCU; }
241
242	Align getAlignmentForImplicitArgPtr() const {
243	return isAmdHsaOS() ? Align (`8`) : Align (`4`);
244	}
245
246	/// Returns the offset in bytes from the start of the input buffer
247	/// of the first explicit kernel argument.
248	unsigned getExplicitKernelArgOffset() const {
249	switch (TargetTriple.getOS()) {
250	case Triple::AMDHSA:
251	case Triple::AMDPAL:
252	case Triple::Mesa3D:
253	return `0`;
254	case Triple::UnknownOS:
255	default:
256	// For legacy reasons unknown/other is treated as a different version of
257	// mesa.
258	return `36`;
259	}
260
261	llvm_unreachable("invalid triple OS");
262	}
263
264	/// \returns Maximum number of work groups per compute unit supported by the
265	/// subtarget and limited by given \p FlatWorkGroupSize.
266	virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = `0`;
267
268	/// \returns Minimum flat work group size supported by the subtarget.
269	virtual unsigned getMinFlatWorkGroupSize() const = `0`;
270
271	/// \returns Maximum flat work group size supported by the subtarget.
272	virtual unsigned getMaxFlatWorkGroupSize() const = `0`;
273
274	/// \returns Number of waves per execution unit required to support the given
275	/// \p FlatWorkGroupSize.
276	virtual unsigned
277	getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = `0`;
278
279	/// \returns Minimum number of waves per execution unit supported by the
280	/// subtarget.
281	virtual unsigned getMinWavesPerEU() const = `0`;
282
283	/// \returns Maximum number of waves per execution unit supported by the
284	/// subtarget without any kind of limitation.
285	unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
286
287	/// Return the maximum workitem ID value in the function, for the given (0, 1,
288	/// 2) dimension.
289	unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
290
291	/// Return the number of work groups for the function.
292	SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) const;
293
294	/// Return true if only a single workitem can be active in a wave.
295	bool isSingleLaneExecution(const Function &Kernel) const;
296
297	/// Creates value range metadata on an workitemid. intrinsic call or load.*
298	bool makeLIDRangeMetadata(Instruction I) const*;
299
300	/// \returns Number of bytes of arguments that are passed to a shader or
301	/// kernel in addition to the explicit ones declared for the function.
302	unsigned getImplicitArgNumBytes(const Function &F) const;
303	uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
304	unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
305
306	/// \returns Corresponding DWARF register number mapping flavour for the
307	/// \p WavefrontSize.
308	AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
309
310	virtual ~AMDGPUSubtarget() = default;
311	};
312
313	} // end namespace llvm
314
315	#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
316

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h