1//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "clang/Basic/Builtins.h"
15#include "clang/Basic/Diagnostic.h"
16#include "clang/Basic/LangOptions.h"
17#include "clang/Basic/MacroBuilder.h"
18#include "clang/Basic/TargetBuiltins.h"
19#include "llvm/ADT/SmallString.h"
20using namespace clang;
21using namespace clang::targets;
22
23namespace clang {
24namespace targets {
25
26// If you edit the description strings, make sure you update
27// getPointerWidthV().
28
29const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
30 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
31 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
32 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
33 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
34 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
35 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
36 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
37 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
38 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
39 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
40 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
41 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
42 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
44 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
45 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
46 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
47 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
48 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
49 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
50 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
51 // FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this
52 // will break loudly.
53 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
55 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
56 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_output
57 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
58};
59
60const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
61 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
62 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
63 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
64 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
65 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
66 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
67 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
68 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
69 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
70 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
71 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
72 // SYCL address space values for this map are dummy
73 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
74 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
75 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
76 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
77 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
78 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
79 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
80 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
81 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
82 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
83 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
84 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
85 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
86 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_output
87 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
88};
89} // namespace targets
90} // namespace clang
91
92static constexpr int NumBuiltins =
93 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
94
95#define GET_BUILTIN_STR_TABLE
96#include "clang/Basic/BuiltinsAMDGPU.inc"
97#undef GET_BUILTIN_STR_TABLE
98
99static constexpr Builtin::Info BuiltinInfos[] = {
100#define GET_BUILTIN_INFOS
101#include "clang/Basic/BuiltinsAMDGPU.inc"
102#undef GET_BUILTIN_INFOS
103};
104static_assert(std::size(BuiltinInfos) == NumBuiltins);
105
106const char *const AMDGPUTargetInfo::GCCRegNames[] = {
107 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
108 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
109 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
110 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
111 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
112 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
113 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
114 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
115 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
116 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
117 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
118 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
119 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
120 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
121 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
122 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
123 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
124 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
125 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
126 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
127 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
128 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
129 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
130 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
131 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
132 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
133 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
134 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
135 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
136 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
137 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
138 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
139 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
140 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
141 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
142 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
143 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
144 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
145 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
146 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
147 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
148 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
149 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
150 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
151 "flat_scratch_lo", "flat_scratch_hi",
152 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
153 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
154 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
155 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
156 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
157 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
158 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
159 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
160 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
161 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
162 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
163 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
164 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
165 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
166 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
167 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
168 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
169 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
170 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
171 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
172 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
173 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
174 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
175 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
176 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
177 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
178 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
179 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
180 "a252", "a253", "a254", "a255"
181};
182
183ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
184 return llvm::ArrayRef(GCCRegNames);
185}
186
187bool AMDGPUTargetInfo::initFeatureMap(
188 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
189 const std::vector<std::string> &FeatureVec) const {
190
191 using namespace llvm::AMDGPU;
192
193 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
194 return false;
195
196 auto HasError = fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features);
197 switch (HasError.first) {
198 default:
199 break;
200 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
201 Diags.Report(DiagID: diag::err_invalid_feature_combination) << HasError.second;
202 return false;
203 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
204 Diags.Report(DiagID: diag::err_opt_not_valid_on_target) << HasError.second;
205 return false;
206 }
207
208 return true;
209}
210
211void AMDGPUTargetInfo::fillValidCPUList(
212 SmallVectorImpl<StringRef> &Values) const {
213 if (getTriple().isAMDGCN())
214 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
215 else
216 llvm::AMDGPU::fillValidArchListR600(Values);
217}
218
219void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
220 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
221}
222
223AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
224 const TargetOptions &Opts)
225 : TargetInfo(Triple),
226 GPUKind(Triple.isAMDGCN() ? llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU)
227 : llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)),
228 GPUFeatures(Triple.isAMDGCN() ? llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind)
229 : llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) {
230 resetDataLayout();
231
232 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
233 !Triple.isAMDGCN());
234 UseAddrSpaceMapMangling = true;
235
236 if (Triple.isAMDGCN()) {
237 // __bf16 is always available as a load/store only type on AMDGCN.
238 BFloat16Width = BFloat16Align = 16;
239 BFloat16Format = &llvm::APFloat::BFloat();
240 }
241
242 // TODO: This is not really true for targets without half support, but also
243 // should just be assumed true for the dummy target.
244 HasFastHalfType = true;
245 HasFloat16 = true;
246 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
247
248 // Set pointer width and alignment for the generic address space.
249 PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default);
250 if (getMaxPointerWidth() == 64) {
251 LongWidth = LongAlign = 64;
252 SizeType = UnsignedLong;
253 PtrDiffType = SignedLong;
254 IntPtrType = SignedLong;
255 }
256
257 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
258 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
259
260 for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) {
261 if (GPUKind != llvm::AMDGPU::GK_NONE)
262 ReadOnlyFeatures.insert(key: F);
263 }
264 HalfArgsAndReturns = true;
265}
266
267void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts,
268 const TargetInfo *Aux) {
269 TargetInfo::adjust(Diags, Opts, Aux);
270 // ToDo: There are still a few places using default address space as private
271 // address space in OpenCL, which needs to be cleaned up, then the references
272 // to OpenCL can be removed from the following line.
273 setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
274 !getTriple().isAMDGCN());
275
276 AtomicOpts = AtomicOptions(Opts);
277}
278
279llvm::SmallVector<Builtin::InfosShard>
280AMDGPUTargetInfo::getTargetBuiltins() const {
281 return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}};
282}
283
284void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
285 MacroBuilder &Builder) const {
286 Builder.defineMacro(Name: "__AMD__");
287 Builder.defineMacro(Name: "__AMDGPU__");
288
289 if (getTriple().isAMDGCN())
290 Builder.defineMacro(Name: "__AMDGCN__");
291 else
292 Builder.defineMacro(Name: "__R600__");
293
294 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
295 // removed in the near future.
296 if (hasFMAF())
297 Builder.defineMacro(Name: "__HAS_FMAF__");
298 if (hasFastFMAF())
299 Builder.defineMacro(Name: "FP_FAST_FMAF");
300 if (hasLDEXPF())
301 Builder.defineMacro(Name: "__HAS_LDEXPF__");
302 if (hasFP64())
303 Builder.defineMacro(Name: "__HAS_FP64__");
304 if (hasFastFMA())
305 Builder.defineMacro(Name: "FP_FAST_FMA");
306 if (HasFastHalfType)
307 Builder.defineMacro(Name: "FP_FAST_FMA_HALF");
308
309 Builder.defineMacro(Name: "__AMDGCN_CUMODE__", Value: Twine(CUMode));
310
311 // Legacy HIP host code relies on these default attributes to be defined.
312 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
313 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
314 return;
315
316 llvm::SmallString<16> CanonName =
317 (getTriple().isAMDGCN() ? getArchNameAMDGCN(AK: GPUKind)
318 : getArchNameR600(AK: GPUKind));
319
320 // Sanitize the name of generic targets.
321 // e.g. gfx10-1-generic -> gfx10_1_generic
322 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
323 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
324 llvm::replace(Range&: CanonName, OldValue: '-', NewValue: '_');
325 }
326
327 Builder.defineMacro(Name: Twine("__") + Twine(CanonName) + Twine("__"));
328 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
329 if (getTriple().isAMDGCN() && !IsHIPHost) {
330 assert(StringRef(CanonName).starts_with("gfx") &&
331 "Invalid amdgcn canonical name");
332 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind);
333 Builder.defineMacro(Name: Twine("__") + Twine(CanonFamilyName.upper()) +
334 Twine("__"));
335 Builder.defineMacro(Name: "__amdgcn_processor__",
336 Value: Twine("\"") + Twine(CanonName) + Twine("\""));
337 Builder.defineMacro(Name: "__amdgcn_target_id__",
338 Value: Twine("\"") + Twine(*getTargetID()) + Twine("\""));
339 for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) {
340 auto Loc = OffloadArchFeatures.find(Key: F);
341 if (Loc != OffloadArchFeatures.end()) {
342 std::string NewF = F.str();
343 llvm::replace(Range&: NewF, OldValue: '-', NewValue: '_');
344 Builder.defineMacro(Name: Twine("__amdgcn_feature_") + Twine(NewF) +
345 Twine("__"),
346 Value: Loc->second ? "1" : "0");
347 }
348 }
349 }
350
351 if (Opts.AtomicIgnoreDenormalMode)
352 Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__");
353}
354
355void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
356 assert(HalfFormat == Aux->HalfFormat);
357 assert(FloatFormat == Aux->FloatFormat);
358 assert(DoubleFormat == Aux->DoubleFormat);
359
360 // On x86_64 long double is 80-bit extended precision format, which is
361 // not supported by AMDGPU. 128-bit floating point format is also not
362 // supported by AMDGPU. Therefore keep its own format for these two types.
363 auto SaveLongDoubleFormat = LongDoubleFormat;
364 auto SaveFloat128Format = Float128Format;
365 auto SaveLongDoubleWidth = LongDoubleWidth;
366 auto SaveLongDoubleAlign = LongDoubleAlign;
367 copyAuxTarget(Aux);
368 LongDoubleFormat = SaveLongDoubleFormat;
369 Float128Format = SaveFloat128Format;
370 LongDoubleWidth = SaveLongDoubleWidth;
371 LongDoubleAlign = SaveLongDoubleAlign;
372 // For certain builtin types support on the host target, claim they are
373 // support to pass the compilation of the host code during the device-side
374 // compilation.
375 // FIXME: As the side effect, we also accept `__float128` uses in the device
376 // code. To rejct these builtin types supported in the host target but not in
377 // the device target, one approach would support `device_builtin` attribute
378 // so that we could tell the device builtin types from the host ones. The
379 // also solves the different representations of the same builtin type, such
380 // as `size_t` in the MSVC environment.
381 if (Aux->hasFloat128Type()) {
382 HasFloat128 = true;
383 Float128Format = DoubleFormat;
384 }
385}
386