1//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "clang/Basic/Builtins.h"
15#include "clang/Basic/Diagnostic.h"
16#include "clang/Basic/LangOptions.h"
17#include "clang/Basic/MacroBuilder.h"
18#include "clang/Basic/TargetBuiltins.h"
19#include "llvm/ADT/SmallString.h"
20using namespace clang;
21using namespace clang::targets;
22
23namespace clang {
24namespace targets {
25
26// If you edit the description strings, make sure you update
27// getPointerWidthV().
28
29const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
30 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
31 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
32 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
33 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
34 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
35 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
36 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
37 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
38 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
39 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
40 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
41 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
42 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
44 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
45 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
46 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
47 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
48 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
49 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
50 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
51 // FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this
52 // will break loudly.
53 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
55 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
56 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
57};
58
59const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
61 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
62 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
63 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
64 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
65 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
66 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
67 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
68 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
69 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
70 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
71 // SYCL address space values for this map are dummy
72 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
73 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
74 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
75 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
76 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
77 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
78 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
79 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
80 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
81 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
82 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
83 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
84 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
85 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
86};
87} // namespace targets
88} // namespace clang
89
90static constexpr int NumBuiltins =
91 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
92
93#define GET_BUILTIN_STR_TABLE
94#include "clang/Basic/BuiltinsAMDGPU.inc"
95#undef GET_BUILTIN_STR_TABLE
96
97static constexpr Builtin::Info BuiltinInfos[] = {
98#define GET_BUILTIN_INFOS
99#include "clang/Basic/BuiltinsAMDGPU.inc"
100#undef GET_BUILTIN_INFOS
101};
102static_assert(std::size(BuiltinInfos) == NumBuiltins);
103
104const char *const AMDGPUTargetInfo::GCCRegNames[] = {
105 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
106 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
107 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
108 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
109 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
110 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
111 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
112 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
113 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
114 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
115 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
116 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
117 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
118 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
119 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
120 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
121 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
122 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
123 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
124 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
125 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
126 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
127 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
128 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
129 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
130 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
131 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
132 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
133 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
134 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
135 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
136 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
137 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
138 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
139 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
140 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
141 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
142 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
143 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
144 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
145 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
146 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
147 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
148 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
149 "flat_scratch_lo", "flat_scratch_hi",
150 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
151 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
152 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
153 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
154 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
155 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
156 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
157 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
158 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
159 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
160 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
161 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
162 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
163 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
164 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
165 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
166 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
167 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
168 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
169 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
170 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
171 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
172 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
173 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
174 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
175 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
176 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
177 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
178 "a252", "a253", "a254", "a255"
179};
180
181ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
182 return llvm::ArrayRef(GCCRegNames);
183}
184
185bool AMDGPUTargetInfo::initFeatureMap(
186 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
187 const std::vector<std::string> &FeatureVec) const {
188
189 using namespace llvm::AMDGPU;
190
191 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
192 return false;
193
194 auto HasError = fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features);
195 switch (HasError.first) {
196 default:
197 break;
198 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
199 Diags.Report(DiagID: diag::err_invalid_feature_combination) << HasError.second;
200 return false;
201 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
202 Diags.Report(DiagID: diag::err_opt_not_valid_on_target) << HasError.second;
203 return false;
204 }
205
206 return true;
207}
208
209void AMDGPUTargetInfo::fillValidCPUList(
210 SmallVectorImpl<StringRef> &Values) const {
211 if (isAMDGCN(TT: getTriple()))
212 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
213 else
214 llvm::AMDGPU::fillValidArchListR600(Values);
215}
216
217void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
218 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
219}
220
221AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
222 const TargetOptions &Opts)
223 : TargetInfo(Triple),
224 GPUKind(isAMDGCN(TT: Triple) ?
225 llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU) :
226 llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)),
227 GPUFeatures(isAMDGCN(TT: Triple) ?
228 llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind) :
229 llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) {
230 resetDataLayout();
231
232 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
233 !isAMDGCN(TT: Triple));
234 UseAddrSpaceMapMangling = true;
235
236 if (isAMDGCN(TT: Triple)) {
237 // __bf16 is always available as a load/store only type on AMDGCN.
238 BFloat16Width = BFloat16Align = 16;
239 BFloat16Format = &llvm::APFloat::BFloat();
240 }
241
242 HasFastHalfType = true;
243 HasFloat16 = true;
244 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
245
246 // Set pointer width and alignment for the generic address space.
247 PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default);
248 if (getMaxPointerWidth() == 64) {
249 LongWidth = LongAlign = 64;
250 SizeType = UnsignedLong;
251 PtrDiffType = SignedLong;
252 IntPtrType = SignedLong;
253 }
254
255 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
256 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
257
258 for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) {
259 if (GPUKind != llvm::AMDGPU::GK_NONE)
260 ReadOnlyFeatures.insert(key: F);
261 }
262 HalfArgsAndReturns = true;
263}
264
265void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts,
266 const TargetInfo *Aux) {
267 TargetInfo::adjust(Diags, Opts, Aux);
268 // ToDo: There are still a few places using default address space as private
269 // address space in OpenCL, which needs to be cleaned up, then the references
270 // to OpenCL can be removed from the following line.
271 setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
272 !isAMDGCN(TT: getTriple()));
273
274 AtomicOpts = AtomicOptions(Opts);
275}
276
277llvm::SmallVector<Builtin::InfosShard>
278AMDGPUTargetInfo::getTargetBuiltins() const {
279 return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}};
280}
281
282void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
283 MacroBuilder &Builder) const {
284 Builder.defineMacro(Name: "__AMD__");
285 Builder.defineMacro(Name: "__AMDGPU__");
286
287 if (isAMDGCN(TT: getTriple()))
288 Builder.defineMacro(Name: "__AMDGCN__");
289 else
290 Builder.defineMacro(Name: "__R600__");
291
292 // Legacy HIP host code relies on these default attributes to be defined.
293 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
294 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
295 return;
296
297 llvm::SmallString<16> CanonName =
298 (isAMDGCN(TT: getTriple()) ? getArchNameAMDGCN(AK: GPUKind)
299 : getArchNameR600(AK: GPUKind));
300
301 // Sanitize the name of generic targets.
302 // e.g. gfx10-1-generic -> gfx10_1_generic
303 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
304 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
305 llvm::replace(Range&: CanonName, OldValue: '-', NewValue: '_');
306 }
307
308 Builder.defineMacro(Name: Twine("__") + Twine(CanonName) + Twine("__"));
309 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
310 if (isAMDGCN(TT: getTriple()) && !IsHIPHost) {
311 assert(StringRef(CanonName).starts_with("gfx") &&
312 "Invalid amdgcn canonical name");
313 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind);
314 Builder.defineMacro(Name: Twine("__") + Twine(CanonFamilyName.upper()) +
315 Twine("__"));
316 Builder.defineMacro(Name: "__amdgcn_processor__",
317 Value: Twine("\"") + Twine(CanonName) + Twine("\""));
318 Builder.defineMacro(Name: "__amdgcn_target_id__",
319 Value: Twine("\"") + Twine(*getTargetID()) + Twine("\""));
320 for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) {
321 auto Loc = OffloadArchFeatures.find(Key: F);
322 if (Loc != OffloadArchFeatures.end()) {
323 std::string NewF = F.str();
324 llvm::replace(Range&: NewF, OldValue: '-', NewValue: '_');
325 Builder.defineMacro(Name: Twine("__amdgcn_feature_") + Twine(NewF) +
326 Twine("__"),
327 Value: Loc->second ? "1" : "0");
328 }
329 }
330 }
331
332 if (Opts.AtomicIgnoreDenormalMode)
333 Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__");
334
335 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
336 // removed in the near future.
337 if (hasFMAF())
338 Builder.defineMacro(Name: "__HAS_FMAF__");
339 if (hasFastFMAF())
340 Builder.defineMacro(Name: "FP_FAST_FMAF");
341 if (hasLDEXPF())
342 Builder.defineMacro(Name: "__HAS_LDEXPF__");
343 if (hasFP64())
344 Builder.defineMacro(Name: "__HAS_FP64__");
345 if (hasFastFMA())
346 Builder.defineMacro(Name: "FP_FAST_FMA");
347
348 Builder.defineMacro(Name: "__AMDGCN_CUMODE__", Value: Twine(CUMode));
349}
350
351void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
352 assert(HalfFormat == Aux->HalfFormat);
353 assert(FloatFormat == Aux->FloatFormat);
354 assert(DoubleFormat == Aux->DoubleFormat);
355
356 // On x86_64 long double is 80-bit extended precision format, which is
357 // not supported by AMDGPU. 128-bit floating point format is also not
358 // supported by AMDGPU. Therefore keep its own format for these two types.
359 auto SaveLongDoubleFormat = LongDoubleFormat;
360 auto SaveFloat128Format = Float128Format;
361 auto SaveLongDoubleWidth = LongDoubleWidth;
362 auto SaveLongDoubleAlign = LongDoubleAlign;
363 copyAuxTarget(Aux);
364 LongDoubleFormat = SaveLongDoubleFormat;
365 Float128Format = SaveFloat128Format;
366 LongDoubleWidth = SaveLongDoubleWidth;
367 LongDoubleAlign = SaveLongDoubleAlign;
368 // For certain builtin types support on the host target, claim they are
369 // support to pass the compilation of the host code during the device-side
370 // compilation.
371 // FIXME: As the side effect, we also accept `__float128` uses in the device
372 // code. To rejct these builtin types supported in the host target but not in
373 // the device target, one approach would support `device_builtin` attribute
374 // so that we could tell the device builtin types from the host ones. The
375 // also solves the different representations of the same builtin type, such
376 // as `size_t` in the MSVC environment.
377 if (Aux->hasFloat128Type()) {
378 HasFloat128 = true;
379 Float128Format = DoubleFormat;
380 }
381}
382