1//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements NVPTX TargetInfo objects.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTX.h"
14#include "clang/Basic/Builtins.h"
15#include "clang/Basic/MacroBuilder.h"
16#include "clang/Basic/TargetBuiltins.h"
17#include "llvm/ADT/StringSwitch.h"
18
19using namespace clang;
20using namespace clang::targets;
21
22static constexpr int NumBuiltins =
23 clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin;
24
25#define GET_BUILTIN_STR_TABLE
26#include "clang/Basic/BuiltinsNVPTX.inc"
27#undef GET_BUILTIN_STR_TABLE
28
29static constexpr Builtin::Info BuiltinInfos[] = {
30#define GET_BUILTIN_INFOS
31#include "clang/Basic/BuiltinsNVPTX.inc"
32#undef GET_BUILTIN_INFOS
33};
34static_assert(std::size(BuiltinInfos) == NumBuiltins);
35
36const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
37
38NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
39 const TargetOptions &Opts,
40 unsigned TargetPointerWidth)
41 : TargetInfo(Triple) {
42 assert((TargetPointerWidth == 32 || TargetPointerWidth == 64) &&
43 "NVPTX only supports 32- and 64-bit modes.");
44
45 PTXVersion = 32;
46 for (const StringRef Feature : Opts.FeaturesAsWritten) {
47 int PTXV;
48 if (!Feature.starts_with(Prefix: "+ptx") ||
49 Feature.drop_front(N: 4).getAsInteger(Radix: 10, Result&: PTXV))
50 continue;
51 PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
52 }
53
54 TLSSupported = false;
55 VLASupported = false;
56 AddrSpaceMap = &NVPTXAddrSpaceMap;
57 UseAddrSpaceMapMangling = true;
58 // __bf16 is always available as a load/store only type.
59 BFloat16Width = BFloat16Align = 16;
60 BFloat16Format = &llvm::APFloat::BFloat();
61
62 // Define available target features
63 // These must be defined in sorted order!
64 NoAsmVariants = true;
65 GPU = OffloadArch::UNUSED;
66
67 // PTX supports f16 as a fundamental type.
68 HasLegalHalfType = true;
69 HasFloat16 = true;
70
71 if (TargetPointerWidth == 32)
72 resetDataLayout(
73 DL: "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
74 else if (Opts.NVPTXUseShortPointers)
75 resetDataLayout(
76 DL: "e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:"
77 "16-v32:32-n16:32:64");
78 else
79 resetDataLayout(DL: "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
80
81 // If possible, get a TargetInfo for our host triple, so we can match its
82 // types.
83 llvm::Triple HostTriple(Opts.HostTriple);
84 if (!HostTriple.isNVPTX())
85 HostTarget = AllocateTarget(Triple: llvm::Triple(Opts.HostTriple), Opts);
86
87 // If no host target, make some guesses about the data layout and return.
88 if (!HostTarget) {
89 LongWidth = LongAlign = TargetPointerWidth;
90 PointerWidth = PointerAlign = TargetPointerWidth;
91 switch (TargetPointerWidth) {
92 case 32:
93 SizeType = TargetInfo::UnsignedInt;
94 PtrDiffType = TargetInfo::SignedInt;
95 IntPtrType = TargetInfo::SignedInt;
96 break;
97 case 64:
98 SizeType = TargetInfo::UnsignedLong;
99 PtrDiffType = TargetInfo::SignedLong;
100 IntPtrType = TargetInfo::SignedLong;
101 break;
102 default:
103 llvm_unreachable("TargetPointerWidth must be 32 or 64");
104 }
105
106 MaxAtomicInlineWidth = TargetPointerWidth;
107 return;
108 }
109
110 // Copy properties from host target.
111 PointerWidth = HostTarget->getPointerWidth(AddrSpace: LangAS::Default);
112 PointerAlign = HostTarget->getPointerAlign(AddrSpace: LangAS::Default);
113 BoolWidth = HostTarget->getBoolWidth();
114 BoolAlign = HostTarget->getBoolAlign();
115 IntWidth = HostTarget->getIntWidth();
116 IntAlign = HostTarget->getIntAlign();
117 HalfWidth = HostTarget->getHalfWidth();
118 HalfAlign = HostTarget->getHalfAlign();
119 FloatWidth = HostTarget->getFloatWidth();
120 FloatAlign = HostTarget->getFloatAlign();
121 DoubleWidth = HostTarget->getDoubleWidth();
122 DoubleAlign = HostTarget->getDoubleAlign();
123 LongWidth = HostTarget->getLongWidth();
124 LongAlign = HostTarget->getLongAlign();
125 LongLongWidth = HostTarget->getLongLongWidth();
126 LongLongAlign = HostTarget->getLongLongAlign();
127 MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ Size: 0,
128 /* HasNonWeakDef = */ true);
129 NewAlign = HostTarget->getNewAlign();
130 DefaultAlignForAttributeAligned =
131 HostTarget->getDefaultAlignForAttributeAligned();
132 SizeType = HostTarget->getSizeType();
133 IntMaxType = HostTarget->getIntMaxType();
134 PtrDiffType = HostTarget->getPtrDiffType(AddrSpace: LangAS::Default);
135 IntPtrType = HostTarget->getIntPtrType();
136 WCharType = HostTarget->getWCharType();
137 WIntType = HostTarget->getWIntType();
138 Char16Type = HostTarget->getChar16Type();
139 Char32Type = HostTarget->getChar32Type();
140 Int64Type = HostTarget->getInt64Type();
141 SigAtomicType = HostTarget->getSigAtomicType();
142 ProcessIDType = HostTarget->getProcessIDType();
143
144 UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
145 UseZeroLengthBitfieldAlignment = HostTarget->useZeroLengthBitfieldAlignment();
146 UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
147 ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
148
149 // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
150 // we need those macros to be identical on host and device, because (among
151 // other things) they affect which standard library classes are defined, and
152 // we need all classes to be defined on both the host and device.
153 MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
154
155 // Properties intentionally not copied from host:
156 // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
157 // host/device boundary.
158 // - SuitableAlign: Not visible across the host/device boundary, and may
159 // correctly be different on host/device, e.g. if host has wider vector
160 // types than device.
161 // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
162 // as its double type, but that's not necessarily true on the host.
163 // TODO: nvcc emits a warning when using long double on device; we should
164 // do the same.
165}
166
167ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
168 return llvm::ArrayRef(GCCRegNames);
169}
170
171bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
172 return llvm::StringSwitch<bool>(Feature)
173 .Cases(S0: "ptx", S1: "nvptx", Value: true)
174 .Default(Value: false);
175}
176
177void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
178 MacroBuilder &Builder) const {
179 Builder.defineMacro(Name: "__PTX__");
180 Builder.defineMacro(Name: "__NVPTX__");
181
182 // Skip setting architecture dependent macros if undefined.
183 if (GPU == OffloadArch::UNUSED && !HostTarget)
184 return;
185
186 if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
187 // Set __CUDA_ARCH__ for the GPU specified.
188 llvm::StringRef CUDAArchCode = [this] {
189 switch (GPU) {
190 case OffloadArch::GFX600:
191 case OffloadArch::GFX601:
192 case OffloadArch::GFX602:
193 case OffloadArch::GFX700:
194 case OffloadArch::GFX701:
195 case OffloadArch::GFX702:
196 case OffloadArch::GFX703:
197 case OffloadArch::GFX704:
198 case OffloadArch::GFX705:
199 case OffloadArch::GFX801:
200 case OffloadArch::GFX802:
201 case OffloadArch::GFX803:
202 case OffloadArch::GFX805:
203 case OffloadArch::GFX810:
204 case OffloadArch::GFX9_GENERIC:
205 case OffloadArch::GFX900:
206 case OffloadArch::GFX902:
207 case OffloadArch::GFX904:
208 case OffloadArch::GFX906:
209 case OffloadArch::GFX908:
210 case OffloadArch::GFX909:
211 case OffloadArch::GFX90a:
212 case OffloadArch::GFX90c:
213 case OffloadArch::GFX9_4_GENERIC:
214 case OffloadArch::GFX942:
215 case OffloadArch::GFX950:
216 case OffloadArch::GFX10_1_GENERIC:
217 case OffloadArch::GFX1010:
218 case OffloadArch::GFX1011:
219 case OffloadArch::GFX1012:
220 case OffloadArch::GFX1013:
221 case OffloadArch::GFX10_3_GENERIC:
222 case OffloadArch::GFX1030:
223 case OffloadArch::GFX1031:
224 case OffloadArch::GFX1032:
225 case OffloadArch::GFX1033:
226 case OffloadArch::GFX1034:
227 case OffloadArch::GFX1035:
228 case OffloadArch::GFX1036:
229 case OffloadArch::GFX11_GENERIC:
230 case OffloadArch::GFX1100:
231 case OffloadArch::GFX1101:
232 case OffloadArch::GFX1102:
233 case OffloadArch::GFX1103:
234 case OffloadArch::GFX1150:
235 case OffloadArch::GFX1151:
236 case OffloadArch::GFX1152:
237 case OffloadArch::GFX1153:
238 case OffloadArch::GFX12_GENERIC:
239 case OffloadArch::GFX1200:
240 case OffloadArch::GFX1201:
241 case OffloadArch::GFX1250:
242 case OffloadArch::AMDGCNSPIRV:
243 case OffloadArch::Generic:
244 case OffloadArch::GRANITERAPIDS:
245 case OffloadArch::BMG_G21:
246 case OffloadArch::LAST:
247 break;
248 case OffloadArch::UNKNOWN:
249 assert(false && "No GPU arch when compiling CUDA device code.");
250 return "";
251 case OffloadArch::UNUSED:
252 case OffloadArch::SM_20:
253 return "200";
254 case OffloadArch::SM_21:
255 return "210";
256 case OffloadArch::SM_30:
257 return "300";
258 case OffloadArch::SM_32_:
259 return "320";
260 case OffloadArch::SM_35:
261 return "350";
262 case OffloadArch::SM_37:
263 return "370";
264 case OffloadArch::SM_50:
265 return "500";
266 case OffloadArch::SM_52:
267 return "520";
268 case OffloadArch::SM_53:
269 return "530";
270 case OffloadArch::SM_60:
271 return "600";
272 case OffloadArch::SM_61:
273 return "610";
274 case OffloadArch::SM_62:
275 return "620";
276 case OffloadArch::SM_70:
277 return "700";
278 case OffloadArch::SM_72:
279 return "720";
280 case OffloadArch::SM_75:
281 return "750";
282 case OffloadArch::SM_80:
283 return "800";
284 case OffloadArch::SM_86:
285 return "860";
286 case OffloadArch::SM_87:
287 return "870";
288 case OffloadArch::SM_89:
289 return "890";
290 case OffloadArch::SM_90:
291 case OffloadArch::SM_90a:
292 return "900";
293 case OffloadArch::SM_100:
294 case OffloadArch::SM_100a:
295 return "1000";
296 case OffloadArch::SM_101:
297 case OffloadArch::SM_101a:
298 return "1010";
299 case OffloadArch::SM_120:
300 case OffloadArch::SM_120a:
301 return "1200";
302 }
303 llvm_unreachable("unhandled OffloadArch");
304 }();
305 Builder.defineMacro(Name: "__CUDA_ARCH__", Value: CUDAArchCode);
306 switch(GPU) {
307 case OffloadArch::SM_90a:
308 case OffloadArch::SM_100a:
309 case OffloadArch::SM_101a:
310 case OffloadArch::SM_120a:
311 Builder.defineMacro(Name: "__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", Value: "1");
312 break;
313 default:
314 // Do nothing if this is not an enhanced architecture.
315 break;
316 }
317 }
318}
319
320llvm::SmallVector<Builtin::InfosShard>
321NVPTXTargetInfo::getTargetBuiltins() const {
322 return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}};
323}
324