1 | //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements NVPTX TargetInfo objects. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "NVPTX.h" |
14 | #include "clang/Basic/Builtins.h" |
15 | #include "clang/Basic/MacroBuilder.h" |
16 | #include "clang/Basic/TargetBuiltins.h" |
17 | #include "llvm/ADT/StringSwitch.h" |
18 | |
19 | using namespace clang; |
20 | using namespace clang::targets; |
21 | |
22 | static constexpr int NumBuiltins = |
23 | clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin; |
24 | |
25 | #define GET_BUILTIN_STR_TABLE |
26 | #include "clang/Basic/BuiltinsNVPTX.inc" |
27 | #undef GET_BUILTIN_STR_TABLE |
28 | |
29 | static constexpr Builtin::Info BuiltinInfos[] = { |
30 | #define GET_BUILTIN_INFOS |
31 | #include "clang/Basic/BuiltinsNVPTX.inc" |
32 | #undef GET_BUILTIN_INFOS |
33 | }; |
34 | static_assert(std::size(BuiltinInfos) == NumBuiltins); |
35 | |
36 | const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0" }; |
37 | |
38 | NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, |
39 | const TargetOptions &Opts, |
40 | unsigned TargetPointerWidth) |
41 | : TargetInfo(Triple) { |
42 | assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && |
43 | "NVPTX only supports 32- and 64-bit modes." ); |
44 | |
45 | PTXVersion = 32; |
46 | for (const StringRef Feature : Opts.FeaturesAsWritten) { |
47 | int PTXV; |
48 | if (!Feature.starts_with(Prefix: "+ptx" ) || |
49 | Feature.drop_front(N: 4).getAsInteger(Radix: 10, Result&: PTXV)) |
50 | continue; |
51 | PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? |
52 | } |
53 | |
54 | TLSSupported = false; |
55 | VLASupported = false; |
56 | AddrSpaceMap = &NVPTXAddrSpaceMap; |
57 | UseAddrSpaceMapMangling = true; |
58 | // __bf16 is always available as a load/store only type. |
59 | BFloat16Width = BFloat16Align = 16; |
60 | BFloat16Format = &llvm::APFloat::BFloat(); |
61 | |
62 | // Define available target features |
63 | // These must be defined in sorted order! |
64 | NoAsmVariants = true; |
65 | GPU = OffloadArch::UNUSED; |
66 | |
67 | // PTX supports f16 as a fundamental type. |
68 | HasLegalHalfType = true; |
69 | HasFloat16 = true; |
70 | |
71 | if (TargetPointerWidth == 32) |
72 | resetDataLayout( |
73 | DL: "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
74 | else if (Opts.NVPTXUseShortPointers) |
75 | resetDataLayout( |
76 | DL: "e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:" |
77 | "16-v32:32-n16:32:64" ); |
78 | else |
79 | resetDataLayout(DL: "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
80 | |
81 | // If possible, get a TargetInfo for our host triple, so we can match its |
82 | // types. |
83 | llvm::Triple HostTriple(Opts.HostTriple); |
84 | if (!HostTriple.isNVPTX()) |
85 | HostTarget = AllocateTarget(Triple: llvm::Triple(Opts.HostTriple), Opts); |
86 | |
87 | // If no host target, make some guesses about the data layout and return. |
88 | if (!HostTarget) { |
89 | LongWidth = LongAlign = TargetPointerWidth; |
90 | PointerWidth = PointerAlign = TargetPointerWidth; |
91 | switch (TargetPointerWidth) { |
92 | case 32: |
93 | SizeType = TargetInfo::UnsignedInt; |
94 | PtrDiffType = TargetInfo::SignedInt; |
95 | IntPtrType = TargetInfo::SignedInt; |
96 | break; |
97 | case 64: |
98 | SizeType = TargetInfo::UnsignedLong; |
99 | PtrDiffType = TargetInfo::SignedLong; |
100 | IntPtrType = TargetInfo::SignedLong; |
101 | break; |
102 | default: |
103 | llvm_unreachable("TargetPointerWidth must be 32 or 64" ); |
104 | } |
105 | |
106 | MaxAtomicInlineWidth = TargetPointerWidth; |
107 | return; |
108 | } |
109 | |
110 | // Copy properties from host target. |
111 | PointerWidth = HostTarget->getPointerWidth(AddrSpace: LangAS::Default); |
112 | PointerAlign = HostTarget->getPointerAlign(AddrSpace: LangAS::Default); |
113 | BoolWidth = HostTarget->getBoolWidth(); |
114 | BoolAlign = HostTarget->getBoolAlign(); |
115 | IntWidth = HostTarget->getIntWidth(); |
116 | IntAlign = HostTarget->getIntAlign(); |
117 | HalfWidth = HostTarget->getHalfWidth(); |
118 | HalfAlign = HostTarget->getHalfAlign(); |
119 | FloatWidth = HostTarget->getFloatWidth(); |
120 | FloatAlign = HostTarget->getFloatAlign(); |
121 | DoubleWidth = HostTarget->getDoubleWidth(); |
122 | DoubleAlign = HostTarget->getDoubleAlign(); |
123 | LongWidth = HostTarget->getLongWidth(); |
124 | LongAlign = HostTarget->getLongAlign(); |
125 | LongLongWidth = HostTarget->getLongLongWidth(); |
126 | LongLongAlign = HostTarget->getLongLongAlign(); |
127 | MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ Size: 0, |
128 | /* HasNonWeakDef = */ true); |
129 | NewAlign = HostTarget->getNewAlign(); |
130 | DefaultAlignForAttributeAligned = |
131 | HostTarget->getDefaultAlignForAttributeAligned(); |
132 | SizeType = HostTarget->getSizeType(); |
133 | IntMaxType = HostTarget->getIntMaxType(); |
134 | PtrDiffType = HostTarget->getPtrDiffType(AddrSpace: LangAS::Default); |
135 | IntPtrType = HostTarget->getIntPtrType(); |
136 | WCharType = HostTarget->getWCharType(); |
137 | WIntType = HostTarget->getWIntType(); |
138 | Char16Type = HostTarget->getChar16Type(); |
139 | Char32Type = HostTarget->getChar32Type(); |
140 | Int64Type = HostTarget->getInt64Type(); |
141 | SigAtomicType = HostTarget->getSigAtomicType(); |
142 | ProcessIDType = HostTarget->getProcessIDType(); |
143 | |
144 | UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); |
145 | UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); |
146 | UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); |
147 | ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); |
148 | |
149 | // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and |
150 | // we need those macros to be identical on host and device, because (among |
151 | // other things) they affect which standard library classes are defined, and |
152 | // we need all classes to be defined on both the host and device. |
153 | MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); |
154 | |
155 | // Properties intentionally not copied from host: |
156 | // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the |
157 | // host/device boundary. |
158 | // - SuitableAlign: Not visible across the host/device boundary, and may |
159 | // correctly be different on host/device, e.g. if host has wider vector |
160 | // types than device. |
161 | // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same |
162 | // as its double type, but that's not necessarily true on the host. |
163 | // TODO: nvcc emits a warning when using long double on device; we should |
164 | // do the same. |
165 | } |
166 | |
167 | ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { |
168 | return llvm::ArrayRef(GCCRegNames); |
169 | } |
170 | |
171 | bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { |
172 | return llvm::StringSwitch<bool>(Feature) |
173 | .Cases(S0: "ptx" , S1: "nvptx" , Value: true) |
174 | .Default(Value: false); |
175 | } |
176 | |
177 | void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, |
178 | MacroBuilder &Builder) const { |
179 | Builder.defineMacro(Name: "__PTX__" ); |
180 | Builder.defineMacro(Name: "__NVPTX__" ); |
181 | |
182 | // Skip setting architecture dependent macros if undefined. |
183 | if (GPU == OffloadArch::UNUSED && !HostTarget) |
184 | return; |
185 | |
186 | if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { |
187 | // Set __CUDA_ARCH__ for the GPU specified. |
188 | llvm::StringRef CUDAArchCode = [this] { |
189 | switch (GPU) { |
190 | case OffloadArch::GFX600: |
191 | case OffloadArch::GFX601: |
192 | case OffloadArch::GFX602: |
193 | case OffloadArch::GFX700: |
194 | case OffloadArch::GFX701: |
195 | case OffloadArch::GFX702: |
196 | case OffloadArch::GFX703: |
197 | case OffloadArch::GFX704: |
198 | case OffloadArch::GFX705: |
199 | case OffloadArch::GFX801: |
200 | case OffloadArch::GFX802: |
201 | case OffloadArch::GFX803: |
202 | case OffloadArch::GFX805: |
203 | case OffloadArch::GFX810: |
204 | case OffloadArch::GFX9_GENERIC: |
205 | case OffloadArch::GFX900: |
206 | case OffloadArch::GFX902: |
207 | case OffloadArch::GFX904: |
208 | case OffloadArch::GFX906: |
209 | case OffloadArch::GFX908: |
210 | case OffloadArch::GFX909: |
211 | case OffloadArch::GFX90a: |
212 | case OffloadArch::GFX90c: |
213 | case OffloadArch::GFX9_4_GENERIC: |
214 | case OffloadArch::GFX942: |
215 | case OffloadArch::GFX950: |
216 | case OffloadArch::GFX10_1_GENERIC: |
217 | case OffloadArch::GFX1010: |
218 | case OffloadArch::GFX1011: |
219 | case OffloadArch::GFX1012: |
220 | case OffloadArch::GFX1013: |
221 | case OffloadArch::GFX10_3_GENERIC: |
222 | case OffloadArch::GFX1030: |
223 | case OffloadArch::GFX1031: |
224 | case OffloadArch::GFX1032: |
225 | case OffloadArch::GFX1033: |
226 | case OffloadArch::GFX1034: |
227 | case OffloadArch::GFX1035: |
228 | case OffloadArch::GFX1036: |
229 | case OffloadArch::GFX11_GENERIC: |
230 | case OffloadArch::GFX1100: |
231 | case OffloadArch::GFX1101: |
232 | case OffloadArch::GFX1102: |
233 | case OffloadArch::GFX1103: |
234 | case OffloadArch::GFX1150: |
235 | case OffloadArch::GFX1151: |
236 | case OffloadArch::GFX1152: |
237 | case OffloadArch::GFX1153: |
238 | case OffloadArch::GFX12_GENERIC: |
239 | case OffloadArch::GFX1200: |
240 | case OffloadArch::GFX1201: |
241 | case OffloadArch::GFX1250: |
242 | case OffloadArch::AMDGCNSPIRV: |
243 | case OffloadArch::Generic: |
244 | case OffloadArch::GRANITERAPIDS: |
245 | case OffloadArch::BMG_G21: |
246 | case OffloadArch::LAST: |
247 | break; |
248 | case OffloadArch::UNKNOWN: |
249 | assert(false && "No GPU arch when compiling CUDA device code." ); |
250 | return "" ; |
251 | case OffloadArch::UNUSED: |
252 | case OffloadArch::SM_20: |
253 | return "200" ; |
254 | case OffloadArch::SM_21: |
255 | return "210" ; |
256 | case OffloadArch::SM_30: |
257 | return "300" ; |
258 | case OffloadArch::SM_32_: |
259 | return "320" ; |
260 | case OffloadArch::SM_35: |
261 | return "350" ; |
262 | case OffloadArch::SM_37: |
263 | return "370" ; |
264 | case OffloadArch::SM_50: |
265 | return "500" ; |
266 | case OffloadArch::SM_52: |
267 | return "520" ; |
268 | case OffloadArch::SM_53: |
269 | return "530" ; |
270 | case OffloadArch::SM_60: |
271 | return "600" ; |
272 | case OffloadArch::SM_61: |
273 | return "610" ; |
274 | case OffloadArch::SM_62: |
275 | return "620" ; |
276 | case OffloadArch::SM_70: |
277 | return "700" ; |
278 | case OffloadArch::SM_72: |
279 | return "720" ; |
280 | case OffloadArch::SM_75: |
281 | return "750" ; |
282 | case OffloadArch::SM_80: |
283 | return "800" ; |
284 | case OffloadArch::SM_86: |
285 | return "860" ; |
286 | case OffloadArch::SM_87: |
287 | return "870" ; |
288 | case OffloadArch::SM_89: |
289 | return "890" ; |
290 | case OffloadArch::SM_90: |
291 | case OffloadArch::SM_90a: |
292 | return "900" ; |
293 | case OffloadArch::SM_100: |
294 | case OffloadArch::SM_100a: |
295 | return "1000" ; |
296 | case OffloadArch::SM_101: |
297 | case OffloadArch::SM_101a: |
298 | return "1010" ; |
299 | case OffloadArch::SM_120: |
300 | case OffloadArch::SM_120a: |
301 | return "1200" ; |
302 | } |
303 | llvm_unreachable("unhandled OffloadArch" ); |
304 | }(); |
305 | Builder.defineMacro(Name: "__CUDA_ARCH__" , Value: CUDAArchCode); |
306 | switch(GPU) { |
307 | case OffloadArch::SM_90a: |
308 | case OffloadArch::SM_100a: |
309 | case OffloadArch::SM_101a: |
310 | case OffloadArch::SM_120a: |
311 | Builder.defineMacro(Name: "__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL" , Value: "1" ); |
312 | break; |
313 | default: |
314 | // Do nothing if this is not an enhanced architecture. |
315 | break; |
316 | } |
317 | } |
318 | } |
319 | |
320 | llvm::SmallVector<Builtin::InfosShard> |
321 | NVPTXTargetInfo::getTargetBuiltins() const { |
322 | return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}}; |
323 | } |
324 | |