1//===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements AMDGPU TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "clang/Basic/Builtins.h"
15#include "clang/Basic/Diagnostic.h"
16#include "clang/Basic/LangOptions.h"
17#include "clang/Basic/MacroBuilder.h"
18#include "clang/Basic/TargetBuiltins.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/TargetParser/AMDGPUTargetParser.h"
21using namespace clang;
22using namespace clang::targets;
23
24namespace clang {
25namespace targets {
26
27// If you edit the description strings, make sure you update
28// getPointerWidthV().
29
30const LangASMap AMDGPUTargetInfo::AMDGPUAddrSpaceMap = {
31 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
32 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
33 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
34 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
35 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
36 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
37 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
38 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
39 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
40 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
41 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
42 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
43 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
44 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
45 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
46 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
47 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
48 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
49 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
50 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
51 llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant
52 // FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this
53 // will break loudly.
54 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private
55 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device
56 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input
57 llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_output
58 llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_push_constant
59};
60
61} // namespace targets
62} // namespace clang
63
64static constexpr int NumBuiltins =
65 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
66
67#define GET_BUILTIN_STR_TABLE
68#include "clang/Basic/BuiltinsAMDGPU.inc"
69#undef GET_BUILTIN_STR_TABLE
70
71static constexpr Builtin::Info BuiltinInfos[] = {
72#define GET_BUILTIN_INFOS
73#include "clang/Basic/BuiltinsAMDGPU.inc"
74#undef GET_BUILTIN_INFOS
75};
76static_assert(std::size(BuiltinInfos) == NumBuiltins);
77
78const char *const AMDGPUTargetInfo::GCCRegNames[] = {
79 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
80 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
81 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
82 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
83 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
84 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
85 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
86 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
87 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
88 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
89 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
90 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
91 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
92 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
93 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
94 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
95 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
96 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
97 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
98 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
99 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
100 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
101 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
102 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
103 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
104 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
105 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
106 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
107 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
108 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
109 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
110 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
111 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
112 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
113 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
114 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
115 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
116 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
117 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
118 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
119 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
120 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
121 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
122 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
123 "flat_scratch_lo", "flat_scratch_hi",
124 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
125 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
126 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
127 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
128 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
129 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
130 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
131 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
132 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
133 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
134 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
135 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
136 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
137 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
138 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
139 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
140 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
141 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
142 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
143 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
144 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
145 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
146 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
147 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
148 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
149 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
150 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
151 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
152 "a252", "a253", "a254", "a255"
153};
154
155ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
156 return llvm::ArrayRef(GCCRegNames);
157}
158
159bool AMDGPUTargetInfo::initFeatureMap(
160 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
161 const std::vector<std::string> &FeatureVec) const {
162
163 using namespace llvm::AMDGPU;
164
165 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
166 return false;
167
168 auto HasError = fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features);
169 switch (HasError.first) {
170 default:
171 break;
172 case llvm::AMDGPU::INVALID_FEATURE_COMBINATION:
173 Diags.Report(DiagID: diag::err_invalid_feature_combination) << HasError.second;
174 return false;
175 case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE:
176 Diags.Report(DiagID: diag::err_opt_not_valid_on_target) << HasError.second;
177 return false;
178 }
179
180 return true;
181}
182
183void AMDGPUTargetInfo::fillValidCPUList(
184 SmallVectorImpl<StringRef> &Values) const {
185 if (getTriple().isAMDGCN())
186 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
187 else
188 llvm::AMDGPU::fillValidArchListR600(Values);
189}
190
191AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
192 const TargetOptions &Opts)
193 : TargetInfo(Triple),
194 GPUKind(Triple.isAMDGCN() ? llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU)
195 : llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)),
196 GPUFeatures(Triple.isAMDGCN() ? llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind)
197 : llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) {
198 resetDataLayout();
199
200 AddrSpaceMap = &AMDGPUAddrSpaceMap;
201 UseAddrSpaceMapMangling = true;
202
203 if (Triple.isAMDGCN()) {
204 // __bf16 is always available as a load/store only type on AMDGCN.
205 BFloat16Width = BFloat16Align = 16;
206 BFloat16Format = &llvm::APFloat::BFloat();
207 }
208
209 // TODO: This is not really true for targets without half support, but also
210 // should just be assumed true for the dummy target.
211 HasFastHalfType = true;
212 HasFloat16 = true;
213 WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64;
214
215 // Set pointer width and alignment for the generic address space.
216 PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default);
217 if (getMaxPointerWidth() == 64) {
218 LongWidth = LongAlign = 64;
219 SizeType = UnsignedLong;
220 PtrDiffType = SignedLong;
221 IntPtrType = SignedLong;
222 }
223
224 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
225 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
226
227 for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) {
228 if (GPUKind != llvm::AMDGPU::GK_NONE)
229 ReadOnlyFeatures.insert(key: F);
230 }
231 HalfArgsAndReturns = true;
232}
233
234void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts,
235 const TargetInfo *Aux) {
236 TargetInfo::adjust(Diags, Opts, Aux);
237 AtomicOpts = AtomicOptions(Opts);
238}
239
240llvm::SmallVector<Builtin::InfosShard>
241AMDGPUTargetInfo::getTargetBuiltins() const {
242 return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}};
243}
244
245void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
246 MacroBuilder &Builder) const {
247 Builder.defineMacro(Name: "__AMD__");
248 Builder.defineMacro(Name: "__AMDGPU__");
249
250 if (getTriple().isAMDGCN())
251 Builder.defineMacro(Name: "__AMDGCN__");
252 else
253 Builder.defineMacro(Name: "__R600__");
254
255 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
256 // removed in the near future.
257 if (hasFMAF())
258 Builder.defineMacro(Name: "__HAS_FMAF__");
259 if (hasFastFMAF())
260 Builder.defineMacro(Name: "FP_FAST_FMAF");
261 if (hasLDEXPF())
262 Builder.defineMacro(Name: "__HAS_LDEXPF__");
263 if (hasFP64())
264 Builder.defineMacro(Name: "__HAS_FP64__");
265 if (hasFastFMA())
266 Builder.defineMacro(Name: "FP_FAST_FMA");
267 if (HasFastHalfType)
268 Builder.defineMacro(Name: "FP_FAST_FMA_HALF");
269
270 Builder.defineMacro(Name: "__AMDGCN_CUMODE__", Value: Twine(CUMode));
271
272 // Legacy HIP host code relies on these default attributes to be defined.
273 bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice;
274 if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
275 return;
276
277 llvm::SmallString<16> CanonName =
278 (getTriple().isAMDGCN() ? getArchNameAMDGCN(AK: GPUKind)
279 : getArchNameR600(AK: GPUKind));
280
281 // Sanitize the name of generic targets.
282 // e.g. gfx10-1-generic -> gfx10_1_generic
283 if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
284 GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
285 llvm::replace(Range&: CanonName, OldValue: '-', NewValue: '_');
286 }
287
288 Builder.defineMacro(Name: Twine("__") + Twine(CanonName) + Twine("__"));
289 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
290 if (getTriple().isAMDGCN() && !IsHIPHost) {
291 assert(StringRef(CanonName).starts_with("gfx") &&
292 "Invalid amdgcn canonical name");
293 StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind);
294 Builder.defineMacro(Name: Twine("__") + Twine(CanonFamilyName.upper()) +
295 Twine("__"));
296 Builder.defineMacro(Name: "__amdgcn_processor__",
297 Value: Twine("\"") + Twine(CanonName) + Twine("\""));
298 Builder.defineMacro(
299 Name: "__amdgcn_target_id__",
300 Value: Twine("\"") +
301 Twine(getCanonicalTargetID(Processor: getArchNameAMDGCN(AK: GPUKind),
302 Features: OffloadArchFeatures)) +
303 Twine("\""));
304 for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) {
305 auto Loc = OffloadArchFeatures.find(Key: F);
306 if (Loc != OffloadArchFeatures.end()) {
307 std::string NewF = F.str();
308 llvm::replace(Range&: NewF, OldValue: '-', NewValue: '_');
309 Builder.defineMacro(Name: Twine("__amdgcn_feature_") + Twine(NewF) +
310 Twine("__"),
311 Value: Loc->second ? "1" : "0");
312 }
313 }
314 }
315
316 if (Opts.AtomicIgnoreDenormalMode)
317 Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__");
318}
319
320void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
321 assert(HalfFormat == Aux->HalfFormat);
322 assert(FloatFormat == Aux->FloatFormat);
323 assert(DoubleFormat == Aux->DoubleFormat);
324
325 // On x86_64 long double is 80-bit extended precision format, which is
326 // not supported by AMDGPU. 128-bit floating point format is also not
327 // supported by AMDGPU. Therefore keep its own format for these two types.
328 auto SaveLongDoubleFormat = LongDoubleFormat;
329 auto SaveFloat128Format = Float128Format;
330 auto SaveLongDoubleWidth = LongDoubleWidth;
331 auto SaveLongDoubleAlign = LongDoubleAlign;
332 copyAuxTarget(Aux);
333 LongDoubleFormat = SaveLongDoubleFormat;
334 Float128Format = SaveFloat128Format;
335 LongDoubleWidth = SaveLongDoubleWidth;
336 LongDoubleAlign = SaveLongDoubleAlign;
337 // For certain builtin types support on the host target, claim they are
338 // support to pass the compilation of the host code during the device-side
339 // compilation.
340 // FIXME: As the side effect, we also accept `__float128` uses in the device
341 // code. To rejct these builtin types supported in the host target but not in
342 // the device target, one approach would support `device_builtin` attribute
343 // so that we could tell the device builtin types from the host ones. The
344 // also solves the different representations of the same builtin type, such
345 // as `size_t` in the MSVC environment.
346 if (Aux->hasFloat128Type()) {
347 HasFloat128 = true;
348 Float128Format = DoubleFormat;
349 }
350}
351