AMDGPU.cpp source code [llvm_projects/clang/lib/Basic/Targets/AMDGPU.cpp]

1	//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements AMDGPU TargetInfo objects.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AMDGPU.h"
14	#include "clang/Basic/Builtins.h"
15	#include "clang/Basic/Diagnostic.h"
16	#include "clang/Basic/LangOptions.h"
17	#include "clang/Basic/MacroBuilder.h"
18	#include "clang/Basic/TargetBuiltins.h"
19	#include "llvm/ADT/SmallString.h"
20	using namespace clang;
21	using namespace clang::targets;
22
23	namespace clang {
24	namespace targets {
25
26	// If you edit the description strings, make sure you update
27	// getPointerWidthV().
28
29	const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
30	llvm::AMDGPUAS::FLAT_ADDRESS, // Default
31	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
32	llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
33	llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
34	llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
35	llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
36	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
37	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
38	llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
39	llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
40	llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
41	llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
42	llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
43	llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
44	llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
45	llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
46	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
47	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
48	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
49	llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
50	llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
51	// FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this
52	// will break loudly.
53	llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
54	llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
55	llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
56	llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_output
57	llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
58	};
59
60	const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
61	llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
62	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
63	llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
64	llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
65	llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
66	llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
67	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
68	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
69	llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
70	llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
71	llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
72	// SYCL address space values for this map are dummy
73	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
74	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
75	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
76	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
77	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
78	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
79	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
80	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
81	llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
82	llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
83	llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
84	llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
85	llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
86	llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_output
87	llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
88	};
89	} // namespace targets
90	} // namespace clang
91
92	static constexpr int NumBuiltins =
93	clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
94
95	#define GET_BUILTIN_STR_TABLE
96	#include "clang/Basic/BuiltinsAMDGPU.inc"
97	#undef GET_BUILTIN_STR_TABLE
98
99	static constexpr Builtin::Info BuiltinInfos[] = {
100	#define GET_BUILTIN_INFOS
101	#include "clang/Basic/BuiltinsAMDGPU.inc"
102	#undef GET_BUILTIN_INFOS
103	};
104	static_assert(std::size(BuiltinInfos) == NumBuiltins);
105
106	const char *const AMDGPUTargetInfo::GCCRegNames[] = {
107	"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
108	"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
109	"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
110	"v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
111	"v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
112	"v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
113	"v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
114	"v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
115	"v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
116	"v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
117	"v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
118	"v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
119	"v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
120	"v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
121	"v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
122	"v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
123	"v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
124	"v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
125	"v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
126	"v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
127	"v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
128	"v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
129	"v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
130	"v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
131	"v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
132	"v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
133	"v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
134	"v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
135	"v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
136	"s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
137	"s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
138	"s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
139	"s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
140	"s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
141	"s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
142	"s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
143	"s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
144	"s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
145	"s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
146	"s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
147	"s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
148	"s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
149	"s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
150	"m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
151	"flat_scratch_lo", "flat_scratch_hi",
152	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
153	"a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
154	"a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
155	"a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
156	"a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
157	"a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
158	"a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
159	"a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
160	"a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
161	"a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
162	"a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
163	"a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
164	"a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
165	"a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
166	"a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
167	"a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
168	"a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
169	"a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
170	"a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
171	"a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
172	"a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
173	"a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
174	"a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
175	"a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
176	"a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
177	"a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
178	"a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
179	"a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
180	"a252", "a253", "a254", "a255"
181	};
182
183	ArrayRef<const char > AMDGPUTargetInfo::getGCCRegNames() const* {
184	return llvm::ArrayRef(GCCRegNames);
185	}
186
187	bool AMDGPUTargetInfo::initFeatureMap(
188	llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
189	const std::vector<std::string> &FeatureVec) const {
190
191	using namespace llvm::AMDGPU;
192
193	if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
194	return false;
195
196	auto HasError = fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features);
197	switch (HasError.first) {
198	default:
199	break;
200	case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
201	Diags.Report(DiagID: diag::err_invalid_feature_combination) << HasError.second;
202	return false;
203	case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
204	Diags.Report(DiagID: diag::err_opt_not_valid_on_target) << HasError.second;
205	return false;
206	}
207
208	return true;
209	}
210
211	void AMDGPUTargetInfo::fillValidCPUList(
212	SmallVectorImpl<StringRef> &Values) const {
213	if (getTriple().isAMDGCN())
214	llvm::AMDGPU::fillValidArchListAMDGCN(Values);
215	else
216	llvm::AMDGPU::fillValidArchListR600(Values);
217	}
218
219	void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
220	AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
221	}
222
223	AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
224	const TargetOptions &Opts)
225	: TargetInfo (Triple),
226	GPUKind(Triple.isAMDGCN() ? llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU)
227	: llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)),
228	GPUFeatures(Triple.isAMDGCN() ? llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind)
229	: llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) {
230	resetDataLayout();
231
232	setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D \|\|
233	!Triple.isAMDGCN());
234	UseAddrSpaceMapMangling = true;
235
236	if (Triple.isAMDGCN()) {
237	// __bf16 is always available as a load/store only type on AMDGCN.
238	BFloat16Width = BFloat16Align = `16`;
239	BFloat16Format = &llvm::APFloat::BFloat();
240	}
241
242	// TODO: This is not really true for targets without half support, but also
243	// should just be assumed true for the dummy target.
244	HasFastHalfType = true;
245	HasFloat16 = true;
246	WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? `32` : `64`;
247
248	// Set pointer width and alignment for the generic address space.
249	PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default);
250	if (getMaxPointerWidth() == `64`) {
251	LongWidth = LongAlign = `64`;
252	SizeType = UnsignedLong;
253	PtrDiffType = SignedLong;
254	IntPtrType = SignedLong;
255	}
256
257	MaxAtomicPromoteWidth = MaxAtomicInlineWidth = `64`;
258	CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
259
260	for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) {
261	if (GPUKind != llvm::AMDGPU::GK_NONE)
262	ReadOnlyFeatures.insert(key: F);
263	}
264	HalfArgsAndReturns = true;
265	}
266
267	void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts,
268	const TargetInfo *Aux) {
269	TargetInfo::adjust(Diags, Opts, Aux);
270	// ToDo: There are still a few places using default address space as private
271	// address space in OpenCL, which needs to be cleaned up, then the references
272	// to OpenCL can be removed from the following line.
273	setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) \|\|
274	!getTriple().isAMDGCN());
275
276	AtomicOpts = AtomicOptions (Opts);
277	}
278
279	llvm::SmallVector<Builtin::InfosShard>
280	AMDGPUTargetInfo::getTargetBuiltins() const {
281	return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}};
282	}
283
284	void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
285	MacroBuilder &Builder) const {
286	Builder.defineMacro(Name: "__AMD__");
287	Builder.defineMacro(Name: "__AMDGPU__");
288
289	if (getTriple().isAMDGCN())
290	Builder.defineMacro(Name: "__AMDGCN__");
291	else
292	Builder.defineMacro(Name: "__R600__");
293
294	// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
295	// removed in the near future.
296	if (hasFMAF())
297	Builder.defineMacro(Name: "__HAS_FMAF__");
298	if (hasFastFMAF())
299	Builder.defineMacro(Name: "FP_FAST_FMAF");
300	if (hasLDEXPF())
301	Builder.defineMacro(Name: "__HAS_LDEXPF__");
302	if (hasFP64())
303	Builder.defineMacro(Name: "__HAS_FP64__");
304	if (hasFastFMA())
305	Builder.defineMacro(Name: "FP_FAST_FMA");
306	if (HasFastHalfType)
307	Builder.defineMacro(Name: "FP_FAST_FMA_HALF");
308
309	Builder.defineMacro(Name: "__AMDGCN_CUMODE__", Value: Twine(CUMode));
310
311	// Legacy HIP host code relies on these default attributes to be defined.
312	bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
313	if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
314	return;
315
316	llvm::SmallString<`16`> CanonName =
317	(getTriple().isAMDGCN() ? getArchNameAMDGCN(AK: GPUKind)
318	: getArchNameR600(AK: GPUKind));
319
320	// Sanitize the name of generic targets.
321	// e.g. gfx10-1-generic -> gfx10_1_generic
322	if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
323	GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
324	llvm::replace(Range&: CanonName, OldValue: `'-'`, NewValue: `'_'`);
325	}
326
327	Builder.defineMacro(Name: Twine("__") + Twine(CanonName) + Twine("__"));
328	// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
329	if (getTriple().isAMDGCN() && !IsHIPHost) {
330	assert(StringRef(CanonName).starts_with("gfx") &&
331	"Invalid amdgcn canonical name");
332	StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind);
333	Builder.defineMacro(Name: Twine("__") + Twine(CanonFamilyName.upper()) +
334	Twine("__"));
335	Builder.defineMacro(Name: "__amdgcn_processor__",
336	Value: Twine("\"") + Twine(CanonName) + Twine("\""));
337	Builder.defineMacro(Name: "__amdgcn_target_id__",
338	Value: Twine("\"") + Twine(*getTargetID()) + Twine("\""));
339	for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) {
340	auto Loc = OffloadArchFeatures.find(Key: F);
341	if (Loc != OffloadArchFeatures.end()) {
342	std::string NewF = F.str();
343	llvm::replace(Range&: NewF, OldValue: `'-'`, NewValue: `'_'`);
344	Builder.defineMacro(Name: Twine("__amdgcn_feature_") + Twine(NewF) +
345	Twine("__"),
346	Value: Loc ->second ? "1" : "0");
347	}
348	}
349	}
350
351	if (Opts.AtomicIgnoreDenormalMode)
352	Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__");
353	}
354
355	void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
356	assert(HalfFormat == Aux->HalfFormat);
357	assert(FloatFormat == Aux->FloatFormat);
358	assert(DoubleFormat == Aux->DoubleFormat);
359
360	// On x86_64 long double is 80-bit extended precision format, which is
361	// not supported by AMDGPU. 128-bit floating point format is also not
362	// supported by AMDGPU. Therefore keep its own format for these two types.
363	auto SaveLongDoubleFormat = LongDoubleFormat;
364	auto SaveFloat128Format = Float128Format;
365	auto SaveLongDoubleWidth = LongDoubleWidth;
366	auto SaveLongDoubleAlign = LongDoubleAlign;
367	copyAuxTarget(Aux);
368	LongDoubleFormat = SaveLongDoubleFormat;
369	Float128Format = SaveFloat128Format;
370	LongDoubleWidth = SaveLongDoubleWidth;
371	LongDoubleAlign = SaveLongDoubleAlign;
372	// For certain builtin types support on the host target, claim they are
373	// support to pass the compilation of the host code during the device-side
374	// compilation.
375	// FIXME: As the side effect, we also accept `__float128` uses in the device
376	// code. To rejct these builtin types supported in the host target but not in
377	// the device target, one approach would support `device_builtin` attribute
378	// so that we could tell the device builtin types from the host ones. The
379	// also solves the different representations of the same builtin type, such
380	// as `size_t` in the MSVC environment.
381	if (Aux->hasFloat128Type()) {
382	HasFloat128 = true;
383	Float128Format = DoubleFormat;
384	}
385	}
386

Browse the source code of llvm_projects/clang/lib/Basic/Targets/AMDGPU.cpp