AMDGPU.cpp source code [llvm_projects/clang/lib/Basic/Targets/AMDGPU.cpp]

1	//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements AMDGPU TargetInfo objects.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AMDGPU.h"
14	#include "clang/Basic/Builtins.h"
15	#include "clang/Basic/CodeGenOptions.h"
16	#include "clang/Basic/Diagnostic.h"
17	#include "clang/Basic/LangOptions.h"
18	#include "clang/Basic/MacroBuilder.h"
19	#include "clang/Basic/TargetBuiltins.h"
20	#include "llvm/ADT/SmallString.h"
21	using namespace clang;
22	using namespace clang::targets;
23
24	namespace clang {
25	namespace targets {
26
27	// If you edit the description strings, make sure you update
28	// getPointerWidthV().
29
30	static const char *const DataLayoutStringR600 =
31	"e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32	"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33
34	static const char *const DataLayoutStringAMDGCN =
35	"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36	"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
37	"32-v48:64-v96:128"
38	"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39	"-ni:7:8:9";
40
41	const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42	llvm::AMDGPUAS::FLAT_ADDRESS, // Default
43	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
44	llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
45	llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
46	llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
47	llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
48	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
49	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
50	llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
51	llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
52	llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
53	llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
54	llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
55	llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
56	llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
57	llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
58	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
59	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
60	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
61	llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
62	};
63
64	const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
65	llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
66	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
67	llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
68	llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
69	llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
70	llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
71	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
72	llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
73	llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
74	llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
75	llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
76	// SYCL address space values for this map are dummy
77	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
78	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
79	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
80	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
81	llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
82	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
83	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
84	llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
85	llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
86
87	};
88	} // namespace targets
89	} // namespace clang
90
91	static constexpr Builtin::Info BuiltinInfo[] = {
92	#define BUILTIN(ID, TYPE, ATTRS) \
93	{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94	#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
95	{#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
96	#include "clang/Basic/BuiltinsAMDGPU.def"
97	};
98
99	const char *const AMDGPUTargetInfo::GCCRegNames[] = {
100	"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
101	"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
102	"v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
103	"v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
104	"v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
105	"v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
106	"v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
107	"v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
108	"v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
109	"v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
110	"v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
111	"v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
112	"v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
113	"v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
114	"v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
115	"v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
116	"v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
117	"v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
118	"v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
119	"v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
120	"v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
121	"v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
122	"v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
123	"v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
124	"v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
125	"v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
126	"v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
127	"v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
128	"v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
129	"s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
130	"s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
131	"s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
132	"s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
133	"s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
134	"s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
135	"s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
136	"s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
137	"s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
138	"s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
139	"s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
140	"s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
141	"s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
142	"s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
143	"m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
144	"flat_scratch_lo", "flat_scratch_hi",
145	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
146	"a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
147	"a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
148	"a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
149	"a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
150	"a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
151	"a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
152	"a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
153	"a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
154	"a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
155	"a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
156	"a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
157	"a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
158	"a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
159	"a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
160	"a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
161	"a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
162	"a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
163	"a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
164	"a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
165	"a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
166	"a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
167	"a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
168	"a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
169	"a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
170	"a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
171	"a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
172	"a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
173	"a252", "a253", "a254", "a255"
174	};
175
176	ArrayRef<const char > AMDGPUTargetInfo::getGCCRegNames() const* {
177	return llvm::ArrayRef(GCCRegNames);
178	}
179
180	bool AMDGPUTargetInfo::initFeatureMap(
181	llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
182	const std::vector<std::string> &FeatureVec) const {
183
184	using namespace llvm::AMDGPU;
185	fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features);
186	if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
187	return false;
188
189	// TODO: Should move this logic into TargetParser
190	auto HasError = insertWaveSizeFeature(GPU: CPU, T: getTriple(), Features);
191	switch (HasError.first) {
192	default:
193	break;
194	case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
195	Diags.Report(DiagID: diag::err_invalid_feature_combination) << HasError.second;
196	return false;
197	case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
198	Diags.Report(DiagID: diag::err_opt_not_valid_on_target) << HasError.second;
199	return false;
200	}
201
202	return true;
203	}
204
205	void AMDGPUTargetInfo::fillValidCPUList(
206	SmallVectorImpl<StringRef> &Values) const {
207	if (isAMDGCN(TT: getTriple()))
208	llvm::AMDGPU::fillValidArchListAMDGCN(Values);
209	else
210	llvm::AMDGPU::fillValidArchListR600(Values);
211	}
212
213	void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
214	AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
215	}
216
217	AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
218	const TargetOptions &Opts)
219	: TargetInfo (Triple),
220	GPUKind(isAMDGCN(TT: Triple) ?
221	llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU) :
222	llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)),
223	GPUFeatures(isAMDGCN(TT: Triple) ?
224	llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind) :
225	llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) {
226	resetDataLayout(DL: isAMDGCN(TT: getTriple()) ? DataLayoutStringAMDGCN
227	: DataLayoutStringR600);
228
229	setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D \|\|
230	!isAMDGCN(TT: Triple));
231	UseAddrSpaceMapMangling = true;
232
233	if (isAMDGCN(TT: Triple)) {
234	// __bf16 is always available as a load/store only type on AMDGCN.
235	BFloat16Width = BFloat16Align = `16`;
236	BFloat16Format = &llvm::APFloat::BFloat();
237	}
238
239	HasLegalHalfType = true;
240	HasFloat16 = true;
241	WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? `32` : `64`;
242	AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
243
244	// Set pointer width and alignment for the generic address space.
245	PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default);
246	if (getMaxPointerWidth() == `64`) {
247	LongWidth = LongAlign = `64`;
248	SizeType = UnsignedLong;
249	PtrDiffType = SignedLong;
250	IntPtrType = SignedLong;
251	}
252
253	MaxAtomicPromoteWidth = MaxAtomicInlineWidth = `64`;
254	CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
255	for (auto F : {"image-insts", "gws"})
256	ReadOnlyFeatures.insert(key: F);
257	HalfArgsAndReturns = true;
258	}
259
260	void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
261	TargetInfo::adjust(Diags, Opts);
262	// ToDo: There are still a few places using default address space as private
263	// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
264	// can be removed from the following line.
265	setAddressSpaceMap(/DefaultIsPrivate=/Opts.OpenCL \|\|
266	!isAMDGCN(TT: getTriple()));
267	}
268
269	ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
270	return llvm::ArrayRef(BuiltinInfo,
271	clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
272	}
273
274	void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
275	MacroBuilder &Builder) const {
276	Builder.defineMacro(Name: "__AMD__");
277	Builder.defineMacro(Name: "__AMDGPU__");
278
279	if (isAMDGCN(TT: getTriple()))
280	Builder.defineMacro(Name: "__AMDGCN__");
281	else
282	Builder.defineMacro(Name: "__R600__");
283
284	// Legacy HIP host code relies on these default attributes to be defined.
285	bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
286	if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
287	return;
288
289	llvm::SmallString<`16`> CanonName =
290	(isAMDGCN(TT: getTriple()) ? getArchNameAMDGCN(AK: GPUKind)
291	: getArchNameR600(AK: GPUKind));
292
293	// Sanitize the name of generic targets.
294	// e.g. gfx10-1-generic -> gfx10_1_generic
295	if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
296	GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
297	std::replace(first: CanonName.begin(), last: CanonName.end(), old_value: `'-'`, new_value: `'_'`);
298	}
299
300	Builder.defineMacro(Name: Twine ("__") + Twine (CanonName) + Twine ("__"));
301	// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
302	if (isAMDGCN(TT: getTriple()) && !IsHIPHost) {
303	assert(StringRef(CanonName).starts_with("gfx") &&
304	"Invalid amdgcn canonical name");
305	StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind);
306	Builder.defineMacro(Name: Twine ("__") + Twine (CanonFamilyName.upper()) +
307	Twine ("__"));
308	Builder.defineMacro(Name: "__amdgcn_processor__",
309	Value: Twine ("\"") + Twine (CanonName) + Twine ("\""));
310	Builder.defineMacro(Name: "__amdgcn_target_id__",
311	Value: Twine ("\"") + Twine (*getTargetID()) + Twine ("\""));
312	for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) {
313	auto Loc = OffloadArchFeatures.find(Key: F);
314	if (Loc != OffloadArchFeatures.end()) {
315	std::string NewF = F.str();
316	std::replace(first: NewF.begin(), last: NewF.end(), old_value: `'-'`, new_value: `'_'`);
317	Builder.defineMacro(Name: Twine ("__amdgcn_feature_") + Twine (NewF) +
318	Twine ("__"),
319	Value: Loc ->second ? "1" : "0");
320	}
321	}
322	}
323
324	if (AllowAMDGPUUnsafeFPAtomics)
325	Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__");
326
327	// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
328	// removed in the near future.
329	if (hasFMAF())
330	Builder.defineMacro(Name: "__HAS_FMAF__");
331	if (hasFastFMAF())
332	Builder.defineMacro(Name: "FP_FAST_FMAF");
333	if (hasLDEXPF())
334	Builder.defineMacro(Name: "__HAS_LDEXPF__");
335	if (hasFP64())
336	Builder.defineMacro(Name: "__HAS_FP64__");
337	if (hasFastFMA())
338	Builder.defineMacro(Name: "FP_FAST_FMA");
339
340	Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE__", Value: Twine (WavefrontSize));
341	// ToDo: deprecate this macro for naming consistency.
342	Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE", Value: Twine (WavefrontSize));
343	Builder.defineMacro(Name: "__AMDGCN_CUMODE__", Value: Twine (CUMode));
344	}
345
346	void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
347	assert(HalfFormat == Aux->HalfFormat);
348	assert(FloatFormat == Aux->FloatFormat);
349	assert(DoubleFormat == Aux->DoubleFormat);
350
351	// On x86_64 long double is 80-bit extended precision format, which is
352	// not supported by AMDGPU. 128-bit floating point format is also not
353	// supported by AMDGPU. Therefore keep its own format for these two types.
354	auto SaveLongDoubleFormat = LongDoubleFormat;
355	auto SaveFloat128Format = Float128Format;
356	auto SaveLongDoubleWidth = LongDoubleWidth;
357	auto SaveLongDoubleAlign = LongDoubleAlign;
358	copyAuxTarget(Aux);
359	LongDoubleFormat = SaveLongDoubleFormat;
360	Float128Format = SaveFloat128Format;
361	LongDoubleWidth = SaveLongDoubleWidth;
362	LongDoubleAlign = SaveLongDoubleAlign;
363	// For certain builtin types support on the host target, claim they are
364	// support to pass the compilation of the host code during the device-side
365	// compilation.
366	// FIXME: As the side effect, we also accept `__float128` uses in the device
367	// code. To rejct these builtin types supported in the host target but not in
368	// the device target, one approach would support `device_builtin` attribute
369	// so that we could tell the device builtin types from the host ones. The
370	// also solves the different representations of the same builtin type, such
371	// as `size_t` in the MSVC environment.
372	if (Aux->hasFloat128Type()) {
373	HasFloat128 = true;
374	Float128Format = DoubleFormat;
375	}
376	}
377

Browse the source code of llvm_projects/clang/lib/Basic/Targets/AMDGPU.cpp