1 | //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements AMDGPU TargetInfo objects. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "AMDGPU.h" |
14 | #include "clang/Basic/Builtins.h" |
15 | #include "clang/Basic/CodeGenOptions.h" |
16 | #include "clang/Basic/Diagnostic.h" |
17 | #include "clang/Basic/LangOptions.h" |
18 | #include "clang/Basic/MacroBuilder.h" |
19 | #include "clang/Basic/TargetBuiltins.h" |
20 | #include "llvm/ADT/SmallString.h" |
21 | using namespace clang; |
22 | using namespace clang::targets; |
23 | |
24 | namespace clang { |
25 | namespace targets { |
26 | |
27 | // If you edit the description strings, make sure you update |
28 | // getPointerWidthV(). |
29 | |
30 | static const char *const DataLayoutStringR600 = |
31 | "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" |
32 | "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" ; |
33 | |
34 | static const char *const DataLayoutStringAMDGCN = |
35 | "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" |
36 | "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" |
37 | "32-v48:64-v96:128" |
38 | "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" |
39 | "-ni:7:8:9" ; |
40 | |
41 | const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { |
42 | llvm::AMDGPUAS::FLAT_ADDRESS, // Default |
43 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global |
44 | llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local |
45 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant |
46 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private |
47 | llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic |
48 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device |
49 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host |
50 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device |
51 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant |
52 | llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared |
53 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global |
54 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device |
55 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host |
56 | llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local |
57 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private |
58 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr |
59 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr |
60 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 |
61 | llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared |
62 | }; |
63 | |
64 | const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { |
65 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default |
66 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global |
67 | llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local |
68 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant |
69 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private |
70 | llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic |
71 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device |
72 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host |
73 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device |
74 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant |
75 | llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared |
76 | // SYCL address space values for this map are dummy |
77 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global |
78 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device |
79 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host |
80 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local |
81 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private |
82 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr |
83 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr |
84 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 |
85 | llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared |
86 | |
87 | }; |
88 | } // namespace targets |
89 | } // namespace clang |
90 | |
91 | static constexpr Builtin::Info BuiltinInfo[] = { |
92 | #define BUILTIN(ID, TYPE, ATTRS) \ |
93 | {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
94 | #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ |
95 | {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
96 | #include "clang/Basic/BuiltinsAMDGPU.def" |
97 | }; |
98 | |
99 | const char *const AMDGPUTargetInfo::GCCRegNames[] = { |
100 | "v0" , "v1" , "v2" , "v3" , "v4" , "v5" , "v6" , "v7" , "v8" , |
101 | "v9" , "v10" , "v11" , "v12" , "v13" , "v14" , "v15" , "v16" , "v17" , |
102 | "v18" , "v19" , "v20" , "v21" , "v22" , "v23" , "v24" , "v25" , "v26" , |
103 | "v27" , "v28" , "v29" , "v30" , "v31" , "v32" , "v33" , "v34" , "v35" , |
104 | "v36" , "v37" , "v38" , "v39" , "v40" , "v41" , "v42" , "v43" , "v44" , |
105 | "v45" , "v46" , "v47" , "v48" , "v49" , "v50" , "v51" , "v52" , "v53" , |
106 | "v54" , "v55" , "v56" , "v57" , "v58" , "v59" , "v60" , "v61" , "v62" , |
107 | "v63" , "v64" , "v65" , "v66" , "v67" , "v68" , "v69" , "v70" , "v71" , |
108 | "v72" , "v73" , "v74" , "v75" , "v76" , "v77" , "v78" , "v79" , "v80" , |
109 | "v81" , "v82" , "v83" , "v84" , "v85" , "v86" , "v87" , "v88" , "v89" , |
110 | "v90" , "v91" , "v92" , "v93" , "v94" , "v95" , "v96" , "v97" , "v98" , |
111 | "v99" , "v100" , "v101" , "v102" , "v103" , "v104" , "v105" , "v106" , "v107" , |
112 | "v108" , "v109" , "v110" , "v111" , "v112" , "v113" , "v114" , "v115" , "v116" , |
113 | "v117" , "v118" , "v119" , "v120" , "v121" , "v122" , "v123" , "v124" , "v125" , |
114 | "v126" , "v127" , "v128" , "v129" , "v130" , "v131" , "v132" , "v133" , "v134" , |
115 | "v135" , "v136" , "v137" , "v138" , "v139" , "v140" , "v141" , "v142" , "v143" , |
116 | "v144" , "v145" , "v146" , "v147" , "v148" , "v149" , "v150" , "v151" , "v152" , |
117 | "v153" , "v154" , "v155" , "v156" , "v157" , "v158" , "v159" , "v160" , "v161" , |
118 | "v162" , "v163" , "v164" , "v165" , "v166" , "v167" , "v168" , "v169" , "v170" , |
119 | "v171" , "v172" , "v173" , "v174" , "v175" , "v176" , "v177" , "v178" , "v179" , |
120 | "v180" , "v181" , "v182" , "v183" , "v184" , "v185" , "v186" , "v187" , "v188" , |
121 | "v189" , "v190" , "v191" , "v192" , "v193" , "v194" , "v195" , "v196" , "v197" , |
122 | "v198" , "v199" , "v200" , "v201" , "v202" , "v203" , "v204" , "v205" , "v206" , |
123 | "v207" , "v208" , "v209" , "v210" , "v211" , "v212" , "v213" , "v214" , "v215" , |
124 | "v216" , "v217" , "v218" , "v219" , "v220" , "v221" , "v222" , "v223" , "v224" , |
125 | "v225" , "v226" , "v227" , "v228" , "v229" , "v230" , "v231" , "v232" , "v233" , |
126 | "v234" , "v235" , "v236" , "v237" , "v238" , "v239" , "v240" , "v241" , "v242" , |
127 | "v243" , "v244" , "v245" , "v246" , "v247" , "v248" , "v249" , "v250" , "v251" , |
128 | "v252" , "v253" , "v254" , "v255" , "s0" , "s1" , "s2" , "s3" , "s4" , |
129 | "s5" , "s6" , "s7" , "s8" , "s9" , "s10" , "s11" , "s12" , "s13" , |
130 | "s14" , "s15" , "s16" , "s17" , "s18" , "s19" , "s20" , "s21" , "s22" , |
131 | "s23" , "s24" , "s25" , "s26" , "s27" , "s28" , "s29" , "s30" , "s31" , |
132 | "s32" , "s33" , "s34" , "s35" , "s36" , "s37" , "s38" , "s39" , "s40" , |
133 | "s41" , "s42" , "s43" , "s44" , "s45" , "s46" , "s47" , "s48" , "s49" , |
134 | "s50" , "s51" , "s52" , "s53" , "s54" , "s55" , "s56" , "s57" , "s58" , |
135 | "s59" , "s60" , "s61" , "s62" , "s63" , "s64" , "s65" , "s66" , "s67" , |
136 | "s68" , "s69" , "s70" , "s71" , "s72" , "s73" , "s74" , "s75" , "s76" , |
137 | "s77" , "s78" , "s79" , "s80" , "s81" , "s82" , "s83" , "s84" , "s85" , |
138 | "s86" , "s87" , "s88" , "s89" , "s90" , "s91" , "s92" , "s93" , "s94" , |
139 | "s95" , "s96" , "s97" , "s98" , "s99" , "s100" , "s101" , "s102" , "s103" , |
140 | "s104" , "s105" , "s106" , "s107" , "s108" , "s109" , "s110" , "s111" , "s112" , |
141 | "s113" , "s114" , "s115" , "s116" , "s117" , "s118" , "s119" , "s120" , "s121" , |
142 | "s122" , "s123" , "s124" , "s125" , "s126" , "s127" , "exec" , "vcc" , "scc" , |
143 | "m0" , "flat_scratch" , "exec_lo" , "exec_hi" , "vcc_lo" , "vcc_hi" , |
144 | "flat_scratch_lo" , "flat_scratch_hi" , |
145 | "a0" , "a1" , "a2" , "a3" , "a4" , "a5" , "a6" , "a7" , "a8" , |
146 | "a9" , "a10" , "a11" , "a12" , "a13" , "a14" , "a15" , "a16" , "a17" , |
147 | "a18" , "a19" , "a20" , "a21" , "a22" , "a23" , "a24" , "a25" , "a26" , |
148 | "a27" , "a28" , "a29" , "a30" , "a31" , "a32" , "a33" , "a34" , "a35" , |
149 | "a36" , "a37" , "a38" , "a39" , "a40" , "a41" , "a42" , "a43" , "a44" , |
150 | "a45" , "a46" , "a47" , "a48" , "a49" , "a50" , "a51" , "a52" , "a53" , |
151 | "a54" , "a55" , "a56" , "a57" , "a58" , "a59" , "a60" , "a61" , "a62" , |
152 | "a63" , "a64" , "a65" , "a66" , "a67" , "a68" , "a69" , "a70" , "a71" , |
153 | "a72" , "a73" , "a74" , "a75" , "a76" , "a77" , "a78" , "a79" , "a80" , |
154 | "a81" , "a82" , "a83" , "a84" , "a85" , "a86" , "a87" , "a88" , "a89" , |
155 | "a90" , "a91" , "a92" , "a93" , "a94" , "a95" , "a96" , "a97" , "a98" , |
156 | "a99" , "a100" , "a101" , "a102" , "a103" , "a104" , "a105" , "a106" , "a107" , |
157 | "a108" , "a109" , "a110" , "a111" , "a112" , "a113" , "a114" , "a115" , "a116" , |
158 | "a117" , "a118" , "a119" , "a120" , "a121" , "a122" , "a123" , "a124" , "a125" , |
159 | "a126" , "a127" , "a128" , "a129" , "a130" , "a131" , "a132" , "a133" , "a134" , |
160 | "a135" , "a136" , "a137" , "a138" , "a139" , "a140" , "a141" , "a142" , "a143" , |
161 | "a144" , "a145" , "a146" , "a147" , "a148" , "a149" , "a150" , "a151" , "a152" , |
162 | "a153" , "a154" , "a155" , "a156" , "a157" , "a158" , "a159" , "a160" , "a161" , |
163 | "a162" , "a163" , "a164" , "a165" , "a166" , "a167" , "a168" , "a169" , "a170" , |
164 | "a171" , "a172" , "a173" , "a174" , "a175" , "a176" , "a177" , "a178" , "a179" , |
165 | "a180" , "a181" , "a182" , "a183" , "a184" , "a185" , "a186" , "a187" , "a188" , |
166 | "a189" , "a190" , "a191" , "a192" , "a193" , "a194" , "a195" , "a196" , "a197" , |
167 | "a198" , "a199" , "a200" , "a201" , "a202" , "a203" , "a204" , "a205" , "a206" , |
168 | "a207" , "a208" , "a209" , "a210" , "a211" , "a212" , "a213" , "a214" , "a215" , |
169 | "a216" , "a217" , "a218" , "a219" , "a220" , "a221" , "a222" , "a223" , "a224" , |
170 | "a225" , "a226" , "a227" , "a228" , "a229" , "a230" , "a231" , "a232" , "a233" , |
171 | "a234" , "a235" , "a236" , "a237" , "a238" , "a239" , "a240" , "a241" , "a242" , |
172 | "a243" , "a244" , "a245" , "a246" , "a247" , "a248" , "a249" , "a250" , "a251" , |
173 | "a252" , "a253" , "a254" , "a255" |
174 | }; |
175 | |
176 | ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { |
177 | return llvm::ArrayRef(GCCRegNames); |
178 | } |
179 | |
180 | bool AMDGPUTargetInfo::initFeatureMap( |
181 | llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, |
182 | const std::vector<std::string> &FeatureVec) const { |
183 | |
184 | using namespace llvm::AMDGPU; |
185 | fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features); |
186 | if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) |
187 | return false; |
188 | |
189 | // TODO: Should move this logic into TargetParser |
190 | auto HasError = insertWaveSizeFeature(GPU: CPU, T: getTriple(), Features); |
191 | switch (HasError.first) { |
192 | default: |
193 | break; |
194 | case llvm::AMDGPU::INVALID_FEATURE_COMBINATION: |
195 | Diags.Report(DiagID: diag::err_invalid_feature_combination) << HasError.second; |
196 | return false; |
197 | case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE: |
198 | Diags.Report(DiagID: diag::err_opt_not_valid_on_target) << HasError.second; |
199 | return false; |
200 | } |
201 | |
202 | return true; |
203 | } |
204 | |
205 | void AMDGPUTargetInfo::fillValidCPUList( |
206 | SmallVectorImpl<StringRef> &Values) const { |
207 | if (isAMDGCN(TT: getTriple())) |
208 | llvm::AMDGPU::fillValidArchListAMDGCN(Values); |
209 | else |
210 | llvm::AMDGPU::fillValidArchListR600(Values); |
211 | } |
212 | |
213 | void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { |
214 | AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; |
215 | } |
216 | |
217 | AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, |
218 | const TargetOptions &Opts) |
219 | : TargetInfo(Triple), |
220 | GPUKind(isAMDGCN(TT: Triple) ? |
221 | llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU) : |
222 | llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)), |
223 | GPUFeatures(isAMDGCN(TT: Triple) ? |
224 | llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind) : |
225 | llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) { |
226 | resetDataLayout(DL: isAMDGCN(TT: getTriple()) ? DataLayoutStringAMDGCN |
227 | : DataLayoutStringR600); |
228 | |
229 | setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || |
230 | !isAMDGCN(TT: Triple)); |
231 | UseAddrSpaceMapMangling = true; |
232 | |
233 | if (isAMDGCN(TT: Triple)) { |
234 | // __bf16 is always available as a load/store only type on AMDGCN. |
235 | BFloat16Width = BFloat16Align = 16; |
236 | BFloat16Format = &llvm::APFloat::BFloat(); |
237 | } |
238 | |
239 | HasLegalHalfType = true; |
240 | HasFloat16 = true; |
241 | WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64; |
242 | AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics; |
243 | |
244 | // Set pointer width and alignment for the generic address space. |
245 | PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default); |
246 | if (getMaxPointerWidth() == 64) { |
247 | LongWidth = LongAlign = 64; |
248 | SizeType = UnsignedLong; |
249 | PtrDiffType = SignedLong; |
250 | IntPtrType = SignedLong; |
251 | } |
252 | |
253 | MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; |
254 | CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); |
255 | for (auto F : {"image-insts" , "gws" }) |
256 | ReadOnlyFeatures.insert(key: F); |
257 | HalfArgsAndReturns = true; |
258 | } |
259 | |
260 | void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { |
261 | TargetInfo::adjust(Diags, Opts); |
262 | // ToDo: There are still a few places using default address space as private |
263 | // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL |
264 | // can be removed from the following line. |
265 | setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL || |
266 | !isAMDGCN(TT: getTriple())); |
267 | } |
268 | |
269 | ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const { |
270 | return llvm::ArrayRef(BuiltinInfo, |
271 | clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin); |
272 | } |
273 | |
274 | void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, |
275 | MacroBuilder &Builder) const { |
276 | Builder.defineMacro(Name: "__AMD__" ); |
277 | Builder.defineMacro(Name: "__AMDGPU__" ); |
278 | |
279 | if (isAMDGCN(TT: getTriple())) |
280 | Builder.defineMacro(Name: "__AMDGCN__" ); |
281 | else |
282 | Builder.defineMacro(Name: "__R600__" ); |
283 | |
284 | // Legacy HIP host code relies on these default attributes to be defined. |
285 | bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice; |
286 | if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost) |
287 | return; |
288 | |
289 | llvm::SmallString<16> CanonName = |
290 | (isAMDGCN(TT: getTriple()) ? getArchNameAMDGCN(AK: GPUKind) |
291 | : getArchNameR600(AK: GPUKind)); |
292 | |
293 | // Sanitize the name of generic targets. |
294 | // e.g. gfx10-1-generic -> gfx10_1_generic |
295 | if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && |
296 | GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { |
297 | std::replace(first: CanonName.begin(), last: CanonName.end(), old_value: '-', new_value: '_'); |
298 | } |
299 | |
300 | Builder.defineMacro(Name: Twine("__" ) + Twine(CanonName) + Twine("__" )); |
301 | // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ |
302 | if (isAMDGCN(TT: getTriple()) && !IsHIPHost) { |
303 | assert(StringRef(CanonName).starts_with("gfx" ) && |
304 | "Invalid amdgcn canonical name" ); |
305 | StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind); |
306 | Builder.defineMacro(Name: Twine("__" ) + Twine(CanonFamilyName.upper()) + |
307 | Twine("__" )); |
308 | Builder.defineMacro(Name: "__amdgcn_processor__" , |
309 | Value: Twine("\"" ) + Twine(CanonName) + Twine("\"" )); |
310 | Builder.defineMacro(Name: "__amdgcn_target_id__" , |
311 | Value: Twine("\"" ) + Twine(*getTargetID()) + Twine("\"" )); |
312 | for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) { |
313 | auto Loc = OffloadArchFeatures.find(Key: F); |
314 | if (Loc != OffloadArchFeatures.end()) { |
315 | std::string NewF = F.str(); |
316 | std::replace(first: NewF.begin(), last: NewF.end(), old_value: '-', new_value: '_'); |
317 | Builder.defineMacro(Name: Twine("__amdgcn_feature_" ) + Twine(NewF) + |
318 | Twine("__" ), |
319 | Value: Loc->second ? "1" : "0" ); |
320 | } |
321 | } |
322 | } |
323 | |
324 | if (AllowAMDGPUUnsafeFPAtomics) |
325 | Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__" ); |
326 | |
327 | // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be |
328 | // removed in the near future. |
329 | if (hasFMAF()) |
330 | Builder.defineMacro(Name: "__HAS_FMAF__" ); |
331 | if (hasFastFMAF()) |
332 | Builder.defineMacro(Name: "FP_FAST_FMAF" ); |
333 | if (hasLDEXPF()) |
334 | Builder.defineMacro(Name: "__HAS_LDEXPF__" ); |
335 | if (hasFP64()) |
336 | Builder.defineMacro(Name: "__HAS_FP64__" ); |
337 | if (hasFastFMA()) |
338 | Builder.defineMacro(Name: "FP_FAST_FMA" ); |
339 | |
340 | Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE__" , Value: Twine(WavefrontSize)); |
341 | // ToDo: deprecate this macro for naming consistency. |
342 | Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE" , Value: Twine(WavefrontSize)); |
343 | Builder.defineMacro(Name: "__AMDGCN_CUMODE__" , Value: Twine(CUMode)); |
344 | } |
345 | |
346 | void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { |
347 | assert(HalfFormat == Aux->HalfFormat); |
348 | assert(FloatFormat == Aux->FloatFormat); |
349 | assert(DoubleFormat == Aux->DoubleFormat); |
350 | |
351 | // On x86_64 long double is 80-bit extended precision format, which is |
352 | // not supported by AMDGPU. 128-bit floating point format is also not |
353 | // supported by AMDGPU. Therefore keep its own format for these two types. |
354 | auto SaveLongDoubleFormat = LongDoubleFormat; |
355 | auto SaveFloat128Format = Float128Format; |
356 | auto SaveLongDoubleWidth = LongDoubleWidth; |
357 | auto SaveLongDoubleAlign = LongDoubleAlign; |
358 | copyAuxTarget(Aux); |
359 | LongDoubleFormat = SaveLongDoubleFormat; |
360 | Float128Format = SaveFloat128Format; |
361 | LongDoubleWidth = SaveLongDoubleWidth; |
362 | LongDoubleAlign = SaveLongDoubleAlign; |
363 | // For certain builtin types support on the host target, claim they are |
364 | // support to pass the compilation of the host code during the device-side |
365 | // compilation. |
366 | // FIXME: As the side effect, we also accept `__float128` uses in the device |
367 | // code. To rejct these builtin types supported in the host target but not in |
368 | // the device target, one approach would support `device_builtin` attribute |
369 | // so that we could tell the device builtin types from the host ones. The |
370 | // also solves the different representations of the same builtin type, such |
371 | // as `size_t` in the MSVC environment. |
372 | if (Aux->hasFloat128Type()) { |
373 | HasFloat128 = true; |
374 | Float128Format = DoubleFormat; |
375 | } |
376 | } |
377 | |