| 1 | //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements AMDGPU TargetInfo objects. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "AMDGPU.h" |
| 14 | #include "clang/Basic/Builtins.h" |
| 15 | #include "clang/Basic/Diagnostic.h" |
| 16 | #include "clang/Basic/LangOptions.h" |
| 17 | #include "clang/Basic/MacroBuilder.h" |
| 18 | #include "clang/Basic/TargetBuiltins.h" |
| 19 | #include "llvm/ADT/SmallString.h" |
| 20 | using namespace clang; |
| 21 | using namespace clang::targets; |
| 22 | |
| 23 | namespace clang { |
| 24 | namespace targets { |
| 25 | |
| 26 | // If you edit the description strings, make sure you update |
| 27 | // getPointerWidthV(). |
| 28 | |
| 29 | static const char *const DataLayoutStringR600 = |
| 30 | "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" |
| 31 | "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" ; |
| 32 | |
| 33 | static const char *const DataLayoutStringAMDGCN = |
| 34 | "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" |
| 35 | "-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" |
| 36 | "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" |
| 37 | "v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" ; |
| 38 | |
| 39 | const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { |
| 40 | llvm::AMDGPUAS::FLAT_ADDRESS, // Default |
| 41 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global |
| 42 | llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local |
| 43 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant |
| 44 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private |
| 45 | llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic |
| 46 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device |
| 47 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host |
| 48 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device |
| 49 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant |
| 50 | llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared |
| 51 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global |
| 52 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device |
| 53 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host |
| 54 | llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local |
| 55 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private |
| 56 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr |
| 57 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr |
| 58 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 |
| 59 | llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared |
| 60 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant |
| 61 | // FIXME(pr/122103): hlsl_private -> PRIVATE is wrong, but at least this |
| 62 | // will break loudly. |
| 63 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private |
| 64 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device |
| 65 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input |
| 66 | }; |
| 67 | |
| 68 | const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { |
| 69 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default |
| 70 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global |
| 71 | llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local |
| 72 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant |
| 73 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private |
| 74 | llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic |
| 75 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device |
| 76 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host |
| 77 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device |
| 78 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant |
| 79 | llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared |
| 80 | // SYCL address space values for this map are dummy |
| 81 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global |
| 82 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device |
| 83 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host |
| 84 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local |
| 85 | llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private |
| 86 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr |
| 87 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr |
| 88 | llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64 |
| 89 | llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared |
| 90 | llvm::AMDGPUAS::CONSTANT_ADDRESS, // hlsl_constant |
| 91 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_private |
| 92 | llvm::AMDGPUAS::GLOBAL_ADDRESS, // hlsl_device |
| 93 | llvm::AMDGPUAS::PRIVATE_ADDRESS, // hlsl_input |
| 94 | }; |
| 95 | } // namespace targets |
| 96 | } // namespace clang |
| 97 | |
| 98 | static constexpr int NumBuiltins = |
| 99 | clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin; |
| 100 | |
| 101 | static constexpr llvm::StringTable BuiltinStrings = |
| 102 | CLANG_BUILTIN_STR_TABLE_START |
| 103 | #define BUILTIN CLANG_BUILTIN_STR_TABLE |
| 104 | #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_STR_TABLE |
| 105 | #include "clang/Basic/BuiltinsAMDGPU.def" |
| 106 | ; |
| 107 | |
| 108 | static constexpr auto BuiltinInfos = Builtin::MakeInfos<NumBuiltins>(Infos: { |
| 109 | #define BUILTIN CLANG_BUILTIN_ENTRY |
| 110 | #define TARGET_BUILTIN CLANG_TARGET_BUILTIN_ENTRY |
| 111 | #include "clang/Basic/BuiltinsAMDGPU.def" |
| 112 | }); |
| 113 | |
| 114 | const char *const AMDGPUTargetInfo::GCCRegNames[] = { |
| 115 | "v0" , "v1" , "v2" , "v3" , "v4" , "v5" , "v6" , "v7" , "v8" , |
| 116 | "v9" , "v10" , "v11" , "v12" , "v13" , "v14" , "v15" , "v16" , "v17" , |
| 117 | "v18" , "v19" , "v20" , "v21" , "v22" , "v23" , "v24" , "v25" , "v26" , |
| 118 | "v27" , "v28" , "v29" , "v30" , "v31" , "v32" , "v33" , "v34" , "v35" , |
| 119 | "v36" , "v37" , "v38" , "v39" , "v40" , "v41" , "v42" , "v43" , "v44" , |
| 120 | "v45" , "v46" , "v47" , "v48" , "v49" , "v50" , "v51" , "v52" , "v53" , |
| 121 | "v54" , "v55" , "v56" , "v57" , "v58" , "v59" , "v60" , "v61" , "v62" , |
| 122 | "v63" , "v64" , "v65" , "v66" , "v67" , "v68" , "v69" , "v70" , "v71" , |
| 123 | "v72" , "v73" , "v74" , "v75" , "v76" , "v77" , "v78" , "v79" , "v80" , |
| 124 | "v81" , "v82" , "v83" , "v84" , "v85" , "v86" , "v87" , "v88" , "v89" , |
| 125 | "v90" , "v91" , "v92" , "v93" , "v94" , "v95" , "v96" , "v97" , "v98" , |
| 126 | "v99" , "v100" , "v101" , "v102" , "v103" , "v104" , "v105" , "v106" , "v107" , |
| 127 | "v108" , "v109" , "v110" , "v111" , "v112" , "v113" , "v114" , "v115" , "v116" , |
| 128 | "v117" , "v118" , "v119" , "v120" , "v121" , "v122" , "v123" , "v124" , "v125" , |
| 129 | "v126" , "v127" , "v128" , "v129" , "v130" , "v131" , "v132" , "v133" , "v134" , |
| 130 | "v135" , "v136" , "v137" , "v138" , "v139" , "v140" , "v141" , "v142" , "v143" , |
| 131 | "v144" , "v145" , "v146" , "v147" , "v148" , "v149" , "v150" , "v151" , "v152" , |
| 132 | "v153" , "v154" , "v155" , "v156" , "v157" , "v158" , "v159" , "v160" , "v161" , |
| 133 | "v162" , "v163" , "v164" , "v165" , "v166" , "v167" , "v168" , "v169" , "v170" , |
| 134 | "v171" , "v172" , "v173" , "v174" , "v175" , "v176" , "v177" , "v178" , "v179" , |
| 135 | "v180" , "v181" , "v182" , "v183" , "v184" , "v185" , "v186" , "v187" , "v188" , |
| 136 | "v189" , "v190" , "v191" , "v192" , "v193" , "v194" , "v195" , "v196" , "v197" , |
| 137 | "v198" , "v199" , "v200" , "v201" , "v202" , "v203" , "v204" , "v205" , "v206" , |
| 138 | "v207" , "v208" , "v209" , "v210" , "v211" , "v212" , "v213" , "v214" , "v215" , |
| 139 | "v216" , "v217" , "v218" , "v219" , "v220" , "v221" , "v222" , "v223" , "v224" , |
| 140 | "v225" , "v226" , "v227" , "v228" , "v229" , "v230" , "v231" , "v232" , "v233" , |
| 141 | "v234" , "v235" , "v236" , "v237" , "v238" , "v239" , "v240" , "v241" , "v242" , |
| 142 | "v243" , "v244" , "v245" , "v246" , "v247" , "v248" , "v249" , "v250" , "v251" , |
| 143 | "v252" , "v253" , "v254" , "v255" , "s0" , "s1" , "s2" , "s3" , "s4" , |
| 144 | "s5" , "s6" , "s7" , "s8" , "s9" , "s10" , "s11" , "s12" , "s13" , |
| 145 | "s14" , "s15" , "s16" , "s17" , "s18" , "s19" , "s20" , "s21" , "s22" , |
| 146 | "s23" , "s24" , "s25" , "s26" , "s27" , "s28" , "s29" , "s30" , "s31" , |
| 147 | "s32" , "s33" , "s34" , "s35" , "s36" , "s37" , "s38" , "s39" , "s40" , |
| 148 | "s41" , "s42" , "s43" , "s44" , "s45" , "s46" , "s47" , "s48" , "s49" , |
| 149 | "s50" , "s51" , "s52" , "s53" , "s54" , "s55" , "s56" , "s57" , "s58" , |
| 150 | "s59" , "s60" , "s61" , "s62" , "s63" , "s64" , "s65" , "s66" , "s67" , |
| 151 | "s68" , "s69" , "s70" , "s71" , "s72" , "s73" , "s74" , "s75" , "s76" , |
| 152 | "s77" , "s78" , "s79" , "s80" , "s81" , "s82" , "s83" , "s84" , "s85" , |
| 153 | "s86" , "s87" , "s88" , "s89" , "s90" , "s91" , "s92" , "s93" , "s94" , |
| 154 | "s95" , "s96" , "s97" , "s98" , "s99" , "s100" , "s101" , "s102" , "s103" , |
| 155 | "s104" , "s105" , "s106" , "s107" , "s108" , "s109" , "s110" , "s111" , "s112" , |
| 156 | "s113" , "s114" , "s115" , "s116" , "s117" , "s118" , "s119" , "s120" , "s121" , |
| 157 | "s122" , "s123" , "s124" , "s125" , "s126" , "s127" , "exec" , "vcc" , "scc" , |
| 158 | "m0" , "flat_scratch" , "exec_lo" , "exec_hi" , "vcc_lo" , "vcc_hi" , |
| 159 | "flat_scratch_lo" , "flat_scratch_hi" , |
| 160 | "a0" , "a1" , "a2" , "a3" , "a4" , "a5" , "a6" , "a7" , "a8" , |
| 161 | "a9" , "a10" , "a11" , "a12" , "a13" , "a14" , "a15" , "a16" , "a17" , |
| 162 | "a18" , "a19" , "a20" , "a21" , "a22" , "a23" , "a24" , "a25" , "a26" , |
| 163 | "a27" , "a28" , "a29" , "a30" , "a31" , "a32" , "a33" , "a34" , "a35" , |
| 164 | "a36" , "a37" , "a38" , "a39" , "a40" , "a41" , "a42" , "a43" , "a44" , |
| 165 | "a45" , "a46" , "a47" , "a48" , "a49" , "a50" , "a51" , "a52" , "a53" , |
| 166 | "a54" , "a55" , "a56" , "a57" , "a58" , "a59" , "a60" , "a61" , "a62" , |
| 167 | "a63" , "a64" , "a65" , "a66" , "a67" , "a68" , "a69" , "a70" , "a71" , |
| 168 | "a72" , "a73" , "a74" , "a75" , "a76" , "a77" , "a78" , "a79" , "a80" , |
| 169 | "a81" , "a82" , "a83" , "a84" , "a85" , "a86" , "a87" , "a88" , "a89" , |
| 170 | "a90" , "a91" , "a92" , "a93" , "a94" , "a95" , "a96" , "a97" , "a98" , |
| 171 | "a99" , "a100" , "a101" , "a102" , "a103" , "a104" , "a105" , "a106" , "a107" , |
| 172 | "a108" , "a109" , "a110" , "a111" , "a112" , "a113" , "a114" , "a115" , "a116" , |
| 173 | "a117" , "a118" , "a119" , "a120" , "a121" , "a122" , "a123" , "a124" , "a125" , |
| 174 | "a126" , "a127" , "a128" , "a129" , "a130" , "a131" , "a132" , "a133" , "a134" , |
| 175 | "a135" , "a136" , "a137" , "a138" , "a139" , "a140" , "a141" , "a142" , "a143" , |
| 176 | "a144" , "a145" , "a146" , "a147" , "a148" , "a149" , "a150" , "a151" , "a152" , |
| 177 | "a153" , "a154" , "a155" , "a156" , "a157" , "a158" , "a159" , "a160" , "a161" , |
| 178 | "a162" , "a163" , "a164" , "a165" , "a166" , "a167" , "a168" , "a169" , "a170" , |
| 179 | "a171" , "a172" , "a173" , "a174" , "a175" , "a176" , "a177" , "a178" , "a179" , |
| 180 | "a180" , "a181" , "a182" , "a183" , "a184" , "a185" , "a186" , "a187" , "a188" , |
| 181 | "a189" , "a190" , "a191" , "a192" , "a193" , "a194" , "a195" , "a196" , "a197" , |
| 182 | "a198" , "a199" , "a200" , "a201" , "a202" , "a203" , "a204" , "a205" , "a206" , |
| 183 | "a207" , "a208" , "a209" , "a210" , "a211" , "a212" , "a213" , "a214" , "a215" , |
| 184 | "a216" , "a217" , "a218" , "a219" , "a220" , "a221" , "a222" , "a223" , "a224" , |
| 185 | "a225" , "a226" , "a227" , "a228" , "a229" , "a230" , "a231" , "a232" , "a233" , |
| 186 | "a234" , "a235" , "a236" , "a237" , "a238" , "a239" , "a240" , "a241" , "a242" , |
| 187 | "a243" , "a244" , "a245" , "a246" , "a247" , "a248" , "a249" , "a250" , "a251" , |
| 188 | "a252" , "a253" , "a254" , "a255" |
| 189 | }; |
| 190 | |
| 191 | ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const { |
| 192 | return llvm::ArrayRef(GCCRegNames); |
| 193 | } |
| 194 | |
| 195 | bool AMDGPUTargetInfo::initFeatureMap( |
| 196 | llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU, |
| 197 | const std::vector<std::string> &FeatureVec) const { |
| 198 | |
| 199 | using namespace llvm::AMDGPU; |
| 200 | fillAMDGPUFeatureMap(GPU: CPU, T: getTriple(), Features); |
| 201 | if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) |
| 202 | return false; |
| 203 | |
| 204 | // TODO: Should move this logic into TargetParser |
| 205 | auto HasError = insertWaveSizeFeature(GPU: CPU, T: getTriple(), Features); |
| 206 | switch (HasError.first) { |
| 207 | default: |
| 208 | break; |
| 209 | case llvm::AMDGPU::INVALID_FEATURE_COMBINATION: |
| 210 | Diags.Report(DiagID: diag::err_invalid_feature_combination) << HasError.second; |
| 211 | return false; |
| 212 | case llvm::AMDGPU::UNSUPPORTED_TARGET_FEATURE: |
| 213 | Diags.Report(DiagID: diag::err_opt_not_valid_on_target) << HasError.second; |
| 214 | return false; |
| 215 | } |
| 216 | |
| 217 | return true; |
| 218 | } |
| 219 | |
| 220 | void AMDGPUTargetInfo::fillValidCPUList( |
| 221 | SmallVectorImpl<StringRef> &Values) const { |
| 222 | if (isAMDGCN(TT: getTriple())) |
| 223 | llvm::AMDGPU::fillValidArchListAMDGCN(Values); |
| 224 | else |
| 225 | llvm::AMDGPU::fillValidArchListR600(Values); |
| 226 | } |
| 227 | |
| 228 | void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) { |
| 229 | AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap; |
| 230 | } |
| 231 | |
| 232 | AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple, |
| 233 | const TargetOptions &Opts) |
| 234 | : TargetInfo(Triple), |
| 235 | GPUKind(isAMDGCN(TT: Triple) ? |
| 236 | llvm::AMDGPU::parseArchAMDGCN(CPU: Opts.CPU) : |
| 237 | llvm::AMDGPU::parseArchR600(CPU: Opts.CPU)), |
| 238 | GPUFeatures(isAMDGCN(TT: Triple) ? |
| 239 | llvm::AMDGPU::getArchAttrAMDGCN(AK: GPUKind) : |
| 240 | llvm::AMDGPU::getArchAttrR600(AK: GPUKind)) { |
| 241 | resetDataLayout(DL: isAMDGCN(TT: getTriple()) ? DataLayoutStringAMDGCN |
| 242 | : DataLayoutStringR600); |
| 243 | |
| 244 | setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || |
| 245 | !isAMDGCN(TT: Triple)); |
| 246 | UseAddrSpaceMapMangling = true; |
| 247 | |
| 248 | if (isAMDGCN(TT: Triple)) { |
| 249 | // __bf16 is always available as a load/store only type on AMDGCN. |
| 250 | BFloat16Width = BFloat16Align = 16; |
| 251 | BFloat16Format = &llvm::APFloat::BFloat(); |
| 252 | } |
| 253 | |
| 254 | HasLegalHalfType = true; |
| 255 | HasFloat16 = true; |
| 256 | WavefrontSize = (GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32) ? 32 : 64; |
| 257 | |
| 258 | // Set pointer width and alignment for the generic address space. |
| 259 | PointerWidth = PointerAlign = getPointerWidthV(AS: LangAS::Default); |
| 260 | if (getMaxPointerWidth() == 64) { |
| 261 | LongWidth = LongAlign = 64; |
| 262 | SizeType = UnsignedLong; |
| 263 | PtrDiffType = SignedLong; |
| 264 | IntPtrType = SignedLong; |
| 265 | } |
| 266 | |
| 267 | MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; |
| 268 | CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP); |
| 269 | for (auto F : {"image-insts" , "gws" , "vmem-to-lds-load-insts" }) |
| 270 | ReadOnlyFeatures.insert(key: F); |
| 271 | HalfArgsAndReturns = true; |
| 272 | } |
| 273 | |
| 274 | void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) { |
| 275 | TargetInfo::adjust(Diags, Opts); |
| 276 | // ToDo: There are still a few places using default address space as private |
| 277 | // address space in OpenCL, which needs to be cleaned up, then the references |
| 278 | // to OpenCL can be removed from the following line. |
| 279 | setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) || |
| 280 | !isAMDGCN(TT: getTriple())); |
| 281 | |
| 282 | AtomicOpts = AtomicOptions(Opts); |
| 283 | } |
| 284 | |
| 285 | llvm::SmallVector<Builtin::InfosShard> |
| 286 | AMDGPUTargetInfo::getTargetBuiltins() const { |
| 287 | return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}}; |
| 288 | } |
| 289 | |
| 290 | void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, |
| 291 | MacroBuilder &Builder) const { |
| 292 | Builder.defineMacro(Name: "__AMD__" ); |
| 293 | Builder.defineMacro(Name: "__AMDGPU__" ); |
| 294 | |
| 295 | if (isAMDGCN(TT: getTriple())) |
| 296 | Builder.defineMacro(Name: "__AMDGCN__" ); |
| 297 | else |
| 298 | Builder.defineMacro(Name: "__R600__" ); |
| 299 | |
| 300 | // Legacy HIP host code relies on these default attributes to be defined. |
| 301 | bool IsHIPHost = Opts.HIP && !Opts.CUDAIsDevice; |
| 302 | if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost) |
| 303 | return; |
| 304 | |
| 305 | llvm::SmallString<16> CanonName = |
| 306 | (isAMDGCN(TT: getTriple()) ? getArchNameAMDGCN(AK: GPUKind) |
| 307 | : getArchNameR600(AK: GPUKind)); |
| 308 | |
| 309 | // Sanitize the name of generic targets. |
| 310 | // e.g. gfx10-1-generic -> gfx10_1_generic |
| 311 | if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST && |
| 312 | GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) { |
| 313 | llvm::replace(Range&: CanonName, OldValue: '-', NewValue: '_'); |
| 314 | } |
| 315 | |
| 316 | Builder.defineMacro(Name: Twine("__" ) + Twine(CanonName) + Twine("__" )); |
| 317 | // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___ |
| 318 | if (isAMDGCN(TT: getTriple()) && !IsHIPHost) { |
| 319 | assert(StringRef(CanonName).starts_with("gfx" ) && |
| 320 | "Invalid amdgcn canonical name" ); |
| 321 | StringRef CanonFamilyName = getArchFamilyNameAMDGCN(AK: GPUKind); |
| 322 | Builder.defineMacro(Name: Twine("__" ) + Twine(CanonFamilyName.upper()) + |
| 323 | Twine("__" )); |
| 324 | Builder.defineMacro(Name: "__amdgcn_processor__" , |
| 325 | Value: Twine("\"" ) + Twine(CanonName) + Twine("\"" )); |
| 326 | Builder.defineMacro(Name: "__amdgcn_target_id__" , |
| 327 | Value: Twine("\"" ) + Twine(*getTargetID()) + Twine("\"" )); |
| 328 | for (auto F : getAllPossibleTargetIDFeatures(T: getTriple(), Processor: CanonName)) { |
| 329 | auto Loc = OffloadArchFeatures.find(Key: F); |
| 330 | if (Loc != OffloadArchFeatures.end()) { |
| 331 | std::string NewF = F.str(); |
| 332 | llvm::replace(Range&: NewF, OldValue: '-', NewValue: '_'); |
| 333 | Builder.defineMacro(Name: Twine("__amdgcn_feature_" ) + Twine(NewF) + |
| 334 | Twine("__" ), |
| 335 | Value: Loc->second ? "1" : "0" ); |
| 336 | } |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | if (Opts.AtomicIgnoreDenormalMode) |
| 341 | Builder.defineMacro(Name: "__AMDGCN_UNSAFE_FP_ATOMICS__" ); |
| 342 | |
| 343 | // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be |
| 344 | // removed in the near future. |
| 345 | if (hasFMAF()) |
| 346 | Builder.defineMacro(Name: "__HAS_FMAF__" ); |
| 347 | if (hasFastFMAF()) |
| 348 | Builder.defineMacro(Name: "FP_FAST_FMAF" ); |
| 349 | if (hasLDEXPF()) |
| 350 | Builder.defineMacro(Name: "__HAS_LDEXPF__" ); |
| 351 | if (hasFP64()) |
| 352 | Builder.defineMacro(Name: "__HAS_FP64__" ); |
| 353 | if (hasFastFMA()) |
| 354 | Builder.defineMacro(Name: "FP_FAST_FMA" ); |
| 355 | |
| 356 | Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE__" , Value: Twine(WavefrontSize), |
| 357 | DeprecationMsg: "compile-time-constant access to the wavefront size will " |
| 358 | "be removed in a future release" ); |
| 359 | Builder.defineMacro(Name: "__AMDGCN_WAVEFRONT_SIZE" , Value: Twine(WavefrontSize), |
| 360 | DeprecationMsg: "compile-time-constant access to the wavefront size will " |
| 361 | "be removed in a future release" ); |
| 362 | Builder.defineMacro(Name: "__AMDGCN_CUMODE__" , Value: Twine(CUMode)); |
| 363 | } |
| 364 | |
| 365 | void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { |
| 366 | assert(HalfFormat == Aux->HalfFormat); |
| 367 | assert(FloatFormat == Aux->FloatFormat); |
| 368 | assert(DoubleFormat == Aux->DoubleFormat); |
| 369 | |
| 370 | // On x86_64 long double is 80-bit extended precision format, which is |
| 371 | // not supported by AMDGPU. 128-bit floating point format is also not |
| 372 | // supported by AMDGPU. Therefore keep its own format for these two types. |
| 373 | auto SaveLongDoubleFormat = LongDoubleFormat; |
| 374 | auto SaveFloat128Format = Float128Format; |
| 375 | auto SaveLongDoubleWidth = LongDoubleWidth; |
| 376 | auto SaveLongDoubleAlign = LongDoubleAlign; |
| 377 | copyAuxTarget(Aux); |
| 378 | LongDoubleFormat = SaveLongDoubleFormat; |
| 379 | Float128Format = SaveFloat128Format; |
| 380 | LongDoubleWidth = SaveLongDoubleWidth; |
| 381 | LongDoubleAlign = SaveLongDoubleAlign; |
| 382 | // For certain builtin types support on the host target, claim they are |
| 383 | // support to pass the compilation of the host code during the device-side |
| 384 | // compilation. |
| 385 | // FIXME: As the side effect, we also accept `__float128` uses in the device |
| 386 | // code. To rejct these builtin types supported in the host target but not in |
| 387 | // the device target, one approach would support `device_builtin` attribute |
| 388 | // so that we could tell the device builtin types from the host ones. The |
| 389 | // also solves the different representations of the same builtin type, such |
| 390 | // as `size_t` in the MSVC environment. |
| 391 | if (Aux->hasFloat128Type()) { |
| 392 | HasFloat128 = true; |
| 393 | Float128Format = DoubleFormat; |
| 394 | } |
| 395 | } |
| 396 | |