1 | //===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements NVPTX TargetInfo objects. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "NVPTX.h" |
14 | #include "Targets.h" |
15 | #include "clang/Basic/Builtins.h" |
16 | #include "clang/Basic/MacroBuilder.h" |
17 | #include "clang/Basic/TargetBuiltins.h" |
18 | #include "llvm/ADT/StringSwitch.h" |
19 | |
20 | using namespace clang; |
21 | using namespace clang::targets; |
22 | |
23 | static constexpr Builtin::Info BuiltinInfo[] = { |
24 | #define BUILTIN(ID, TYPE, ATTRS) \ |
25 | {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
26 | #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ |
27 | {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, |
28 | #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ |
29 | {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, |
30 | #include "clang/Basic/BuiltinsNVPTX.def" |
31 | }; |
32 | |
33 | const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0" }; |
34 | |
35 | NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, |
36 | const TargetOptions &Opts, |
37 | unsigned TargetPointerWidth) |
38 | : TargetInfo(Triple) { |
39 | assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) && |
40 | "NVPTX only supports 32- and 64-bit modes." ); |
41 | |
42 | PTXVersion = 32; |
43 | for (const StringRef Feature : Opts.FeaturesAsWritten) { |
44 | int PTXV; |
45 | if (!Feature.starts_with(Prefix: "+ptx" ) || |
46 | Feature.drop_front(N: 4).getAsInteger(Radix: 10, Result&: PTXV)) |
47 | continue; |
48 | PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)? |
49 | } |
50 | |
51 | TLSSupported = false; |
52 | VLASupported = false; |
53 | AddrSpaceMap = &NVPTXAddrSpaceMap; |
54 | UseAddrSpaceMapMangling = true; |
55 | // __bf16 is always available as a load/store only type. |
56 | BFloat16Width = BFloat16Align = 16; |
57 | BFloat16Format = &llvm::APFloat::BFloat(); |
58 | |
59 | // Define available target features |
60 | // These must be defined in sorted order! |
61 | NoAsmVariants = true; |
62 | GPU = OffloadArch::UNUSED; |
63 | |
64 | // PTX supports f16 as a fundamental type. |
65 | HasLegalHalfType = true; |
66 | HasFloat16 = true; |
67 | |
68 | if (TargetPointerWidth == 32) |
69 | resetDataLayout(DL: "e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
70 | else if (Opts.NVPTXUseShortPointers) |
71 | resetDataLayout( |
72 | DL: "e-p3:32:32-p4:32:32-p5:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
73 | else |
74 | resetDataLayout(DL: "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" ); |
75 | |
76 | // If possible, get a TargetInfo for our host triple, so we can match its |
77 | // types. |
78 | llvm::Triple HostTriple(Opts.HostTriple); |
79 | if (!HostTriple.isNVPTX()) |
80 | HostTarget = AllocateTarget(Triple: llvm::Triple(Opts.HostTriple), Opts); |
81 | |
82 | // If no host target, make some guesses about the data layout and return. |
83 | if (!HostTarget) { |
84 | LongWidth = LongAlign = TargetPointerWidth; |
85 | PointerWidth = PointerAlign = TargetPointerWidth; |
86 | switch (TargetPointerWidth) { |
87 | case 32: |
88 | SizeType = TargetInfo::UnsignedInt; |
89 | PtrDiffType = TargetInfo::SignedInt; |
90 | IntPtrType = TargetInfo::SignedInt; |
91 | break; |
92 | case 64: |
93 | SizeType = TargetInfo::UnsignedLong; |
94 | PtrDiffType = TargetInfo::SignedLong; |
95 | IntPtrType = TargetInfo::SignedLong; |
96 | break; |
97 | default: |
98 | llvm_unreachable("TargetPointerWidth must be 32 or 64" ); |
99 | } |
100 | |
101 | MaxAtomicInlineWidth = TargetPointerWidth; |
102 | return; |
103 | } |
104 | |
105 | // Copy properties from host target. |
106 | PointerWidth = HostTarget->getPointerWidth(AddrSpace: LangAS::Default); |
107 | PointerAlign = HostTarget->getPointerAlign(AddrSpace: LangAS::Default); |
108 | BoolWidth = HostTarget->getBoolWidth(); |
109 | BoolAlign = HostTarget->getBoolAlign(); |
110 | IntWidth = HostTarget->getIntWidth(); |
111 | IntAlign = HostTarget->getIntAlign(); |
112 | HalfWidth = HostTarget->getHalfWidth(); |
113 | HalfAlign = HostTarget->getHalfAlign(); |
114 | FloatWidth = HostTarget->getFloatWidth(); |
115 | FloatAlign = HostTarget->getFloatAlign(); |
116 | DoubleWidth = HostTarget->getDoubleWidth(); |
117 | DoubleAlign = HostTarget->getDoubleAlign(); |
118 | LongWidth = HostTarget->getLongWidth(); |
119 | LongAlign = HostTarget->getLongAlign(); |
120 | LongLongWidth = HostTarget->getLongLongWidth(); |
121 | LongLongAlign = HostTarget->getLongLongAlign(); |
122 | MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ Size: 0, |
123 | /* HasNonWeakDef = */ true); |
124 | NewAlign = HostTarget->getNewAlign(); |
125 | DefaultAlignForAttributeAligned = |
126 | HostTarget->getDefaultAlignForAttributeAligned(); |
127 | SizeType = HostTarget->getSizeType(); |
128 | IntMaxType = HostTarget->getIntMaxType(); |
129 | PtrDiffType = HostTarget->getPtrDiffType(AddrSpace: LangAS::Default); |
130 | IntPtrType = HostTarget->getIntPtrType(); |
131 | WCharType = HostTarget->getWCharType(); |
132 | WIntType = HostTarget->getWIntType(); |
133 | Char16Type = HostTarget->getChar16Type(); |
134 | Char32Type = HostTarget->getChar32Type(); |
135 | Int64Type = HostTarget->getInt64Type(); |
136 | SigAtomicType = HostTarget->getSigAtomicType(); |
137 | ProcessIDType = HostTarget->getProcessIDType(); |
138 | |
139 | UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); |
140 | UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment(); |
141 | UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); |
142 | ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); |
143 | |
144 | // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and |
145 | // we need those macros to be identical on host and device, because (among |
146 | // other things) they affect which standard library classes are defined, and |
147 | // we need all classes to be defined on both the host and device. |
148 | MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth(); |
149 | |
150 | // Properties intentionally not copied from host: |
151 | // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the |
152 | // host/device boundary. |
153 | // - SuitableAlign: Not visible across the host/device boundary, and may |
154 | // correctly be different on host/device, e.g. if host has wider vector |
155 | // types than device. |
156 | // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same |
157 | // as its double type, but that's not necessarily true on the host. |
158 | // TODO: nvcc emits a warning when using long double on device; we should |
159 | // do the same. |
160 | } |
161 | |
162 | ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const { |
163 | return llvm::ArrayRef(GCCRegNames); |
164 | } |
165 | |
166 | bool NVPTXTargetInfo::hasFeature(StringRef Feature) const { |
167 | return llvm::StringSwitch<bool>(Feature) |
168 | .Cases(S0: "ptx" , S1: "nvptx" , Value: true) |
169 | .Default(Value: false); |
170 | } |
171 | |
172 | void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, |
173 | MacroBuilder &Builder) const { |
174 | Builder.defineMacro(Name: "__PTX__" ); |
175 | Builder.defineMacro(Name: "__NVPTX__" ); |
176 | |
177 | // Skip setting architecture dependent macros if undefined. |
178 | if (GPU == OffloadArch::UNUSED && !HostTarget) |
179 | return; |
180 | |
181 | if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { |
182 | // Set __CUDA_ARCH__ for the GPU specified. |
183 | std::string CUDAArchCode = [this] { |
184 | switch (GPU) { |
185 | case OffloadArch::GFX600: |
186 | case OffloadArch::GFX601: |
187 | case OffloadArch::GFX602: |
188 | case OffloadArch::GFX700: |
189 | case OffloadArch::GFX701: |
190 | case OffloadArch::GFX702: |
191 | case OffloadArch::GFX703: |
192 | case OffloadArch::GFX704: |
193 | case OffloadArch::GFX705: |
194 | case OffloadArch::GFX801: |
195 | case OffloadArch::GFX802: |
196 | case OffloadArch::GFX803: |
197 | case OffloadArch::GFX805: |
198 | case OffloadArch::GFX810: |
199 | case OffloadArch::GFX9_GENERIC: |
200 | case OffloadArch::GFX900: |
201 | case OffloadArch::GFX902: |
202 | case OffloadArch::GFX904: |
203 | case OffloadArch::GFX906: |
204 | case OffloadArch::GFX908: |
205 | case OffloadArch::GFX909: |
206 | case OffloadArch::GFX90a: |
207 | case OffloadArch::GFX90c: |
208 | case OffloadArch::GFX940: |
209 | case OffloadArch::GFX941: |
210 | case OffloadArch::GFX942: |
211 | case OffloadArch::GFX10_1_GENERIC: |
212 | case OffloadArch::GFX1010: |
213 | case OffloadArch::GFX1011: |
214 | case OffloadArch::GFX1012: |
215 | case OffloadArch::GFX1013: |
216 | case OffloadArch::GFX10_3_GENERIC: |
217 | case OffloadArch::GFX1030: |
218 | case OffloadArch::GFX1031: |
219 | case OffloadArch::GFX1032: |
220 | case OffloadArch::GFX1033: |
221 | case OffloadArch::GFX1034: |
222 | case OffloadArch::GFX1035: |
223 | case OffloadArch::GFX1036: |
224 | case OffloadArch::GFX11_GENERIC: |
225 | case OffloadArch::GFX1100: |
226 | case OffloadArch::GFX1101: |
227 | case OffloadArch::GFX1102: |
228 | case OffloadArch::GFX1103: |
229 | case OffloadArch::GFX1150: |
230 | case OffloadArch::GFX1151: |
231 | case OffloadArch::GFX1152: |
232 | case OffloadArch::GFX12_GENERIC: |
233 | case OffloadArch::GFX1200: |
234 | case OffloadArch::GFX1201: |
235 | case OffloadArch::AMDGCNSPIRV: |
236 | case OffloadArch::Generic: |
237 | case OffloadArch::LAST: |
238 | break; |
239 | case OffloadArch::UNKNOWN: |
240 | assert(false && "No GPU arch when compiling CUDA device code." ); |
241 | return "" ; |
242 | case OffloadArch::UNUSED: |
243 | case OffloadArch::SM_20: |
244 | return "200" ; |
245 | case OffloadArch::SM_21: |
246 | return "210" ; |
247 | case OffloadArch::SM_30: |
248 | return "300" ; |
249 | case OffloadArch::SM_32_: |
250 | return "320" ; |
251 | case OffloadArch::SM_35: |
252 | return "350" ; |
253 | case OffloadArch::SM_37: |
254 | return "370" ; |
255 | case OffloadArch::SM_50: |
256 | return "500" ; |
257 | case OffloadArch::SM_52: |
258 | return "520" ; |
259 | case OffloadArch::SM_53: |
260 | return "530" ; |
261 | case OffloadArch::SM_60: |
262 | return "600" ; |
263 | case OffloadArch::SM_61: |
264 | return "610" ; |
265 | case OffloadArch::SM_62: |
266 | return "620" ; |
267 | case OffloadArch::SM_70: |
268 | return "700" ; |
269 | case OffloadArch::SM_72: |
270 | return "720" ; |
271 | case OffloadArch::SM_75: |
272 | return "750" ; |
273 | case OffloadArch::SM_80: |
274 | return "800" ; |
275 | case OffloadArch::SM_86: |
276 | return "860" ; |
277 | case OffloadArch::SM_87: |
278 | return "870" ; |
279 | case OffloadArch::SM_89: |
280 | return "890" ; |
281 | case OffloadArch::SM_90: |
282 | case OffloadArch::SM_90a: |
283 | return "900" ; |
284 | } |
285 | llvm_unreachable("unhandled OffloadArch" ); |
286 | }(); |
287 | Builder.defineMacro(Name: "__CUDA_ARCH__" , Value: CUDAArchCode); |
288 | if (GPU == OffloadArch::SM_90a) |
289 | Builder.defineMacro(Name: "__CUDA_ARCH_FEAT_SM90_ALL" , Value: "1" ); |
290 | } |
291 | } |
292 | |
293 | ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const { |
294 | return llvm::ArrayRef(BuiltinInfo, |
295 | clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin); |
296 | } |
297 | |