NVPTX.cpp source code [llvm_projects/clang/lib/Basic/Targets/NVPTX.cpp]

1	//===--- NVPTX.cpp - Implement NVPTX target feature support ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements NVPTX TargetInfo objects.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "NVPTX.h"
14	#include "clang/Basic/Builtins.h"
15	#include "clang/Basic/MacroBuilder.h"
16	#include "clang/Basic/TargetBuiltins.h"
17	#include "llvm/ADT/StringSwitch.h"
18
19	using namespace clang;
20	using namespace clang::targets;
21
22	static constexpr int NumBuiltins =
23	clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin;
24
25	#define GET_BUILTIN_STR_TABLE
26	#include "clang/Basic/BuiltinsNVPTX.inc"
27	#undef GET_BUILTIN_STR_TABLE
28
29	static constexpr Builtin::Info BuiltinInfos[] = {
30	#define GET_BUILTIN_INFOS
31	#include "clang/Basic/BuiltinsNVPTX.inc"
32	#undef GET_BUILTIN_INFOS
33	};
34	static_assert(std::size(BuiltinInfos) == NumBuiltins);
35
36	const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
37
38	NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
39	const TargetOptions &Opts,
40	unsigned TargetPointerWidth)
41	: TargetInfo (Triple) {
42	assert((TargetPointerWidth == `32` \|\| TargetPointerWidth == `64`) &&
43	"NVPTX only supports 32- and 64-bit modes.");
44
45	PTXVersion = `32`;
46	for (const StringRef Feature : Opts.FeaturesAsWritten) {
47	int PTXV;
48	if (!Feature.starts_with(Prefix: "+ptx") \|\|
49	Feature.drop_front(N: `4`).getAsInteger(Radix: `10`, Result&: PTXV))
50	continue;
51	PTXVersion = PTXV; // TODO: should it be max(PTXVersion, PTXV)?
52	}
53
54	TLSSupported = false;
55	VLASupported = false;
56	AddrSpaceMap = &NVPTXAddrSpaceMap;
57	UseAddrSpaceMapMangling = true;
58	// __bf16 is always available as a load/store only type.
59	BFloat16Width = BFloat16Align = `16`;
60	BFloat16Format = &llvm::APFloat::BFloat();
61
62	// Define available target features
63	// These must be defined in sorted order!
64	NoAsmVariants = true;
65	GPU = OffloadArch::UNUSED;
66
67	// PTX supports f16 as a fundamental type.
68	HasLegalHalfType = true;
69	HasFloat16 = true;
70
71	if (TargetPointerWidth == `32`)
72	resetDataLayout(
73	DL: "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
74	else if (Opts.NVPTXUseShortPointers)
75	resetDataLayout(
76	DL: "e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:"
77	"16-v32:32-n16:32:64");
78	else
79	resetDataLayout(DL: "e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
80
81	// If possible, get a TargetInfo for our host triple, so we can match its
82	// types.
83	llvm::Triple HostTriple(Opts.HostTriple);
84	if (!HostTriple.isNVPTX())
85	HostTarget = AllocateTarget(Triple: llvm::Triple (Opts.HostTriple), Opts);
86
87	// If no host target, make some guesses about the data layout and return.
88	if (!HostTarget) {
89	LongWidth = LongAlign = TargetPointerWidth;
90	PointerWidth = PointerAlign = TargetPointerWidth;
91	switch (TargetPointerWidth) {
92	case `32`:
93	SizeType = TargetInfo::UnsignedInt;
94	PtrDiffType = TargetInfo::SignedInt;
95	IntPtrType = TargetInfo::SignedInt;
96	break;
97	case `64`:
98	SizeType = TargetInfo::UnsignedLong;
99	PtrDiffType = TargetInfo::SignedLong;
100	IntPtrType = TargetInfo::SignedLong;
101	break;
102	default:
103	llvm_unreachable("TargetPointerWidth must be 32 or 64");
104	}
105
106	MaxAtomicInlineWidth = TargetPointerWidth;
107	return;
108	}
109
110	// Copy properties from host target.
111	PointerWidth = HostTarget ->getPointerWidth(AddrSpace: LangAS::Default);
112	PointerAlign = HostTarget ->getPointerAlign(AddrSpace: LangAS::Default);
113	BoolWidth = HostTarget ->getBoolWidth();
114	BoolAlign = HostTarget ->getBoolAlign();
115	IntWidth = HostTarget ->getIntWidth();
116	IntAlign = HostTarget ->getIntAlign();
117	HalfWidth = HostTarget ->getHalfWidth();
118	HalfAlign = HostTarget ->getHalfAlign();
119	FloatWidth = HostTarget ->getFloatWidth();
120	FloatAlign = HostTarget ->getFloatAlign();
121	DoubleWidth = HostTarget ->getDoubleWidth();
122	DoubleAlign = HostTarget ->getDoubleAlign();
123	LongWidth = HostTarget ->getLongWidth();
124	LongAlign = HostTarget ->getLongAlign();
125	LongLongWidth = HostTarget ->getLongLongWidth();
126	LongLongAlign = HostTarget ->getLongLongAlign();
127	MinGlobalAlign = HostTarget ->getMinGlobalAlign(/ TypeSize = / Size: `0`,
128	/ HasNonWeakDef = / true);
129	NewAlign = HostTarget ->getNewAlign();
130	DefaultAlignForAttributeAligned =
131	HostTarget ->getDefaultAlignForAttributeAligned();
132	SizeType = HostTarget ->getSizeType();
133	IntMaxType = HostTarget ->getIntMaxType();
134	PtrDiffType = HostTarget ->getPtrDiffType(AddrSpace: LangAS::Default);
135	IntPtrType = HostTarget ->getIntPtrType();
136	WCharType = HostTarget ->getWCharType();
137	WIntType = HostTarget ->getWIntType();
138	Char16Type = HostTarget ->getChar16Type();
139	Char32Type = HostTarget ->getChar32Type();
140	Int64Type = HostTarget ->getInt64Type();
141	SigAtomicType = HostTarget ->getSigAtomicType();
142	ProcessIDType = HostTarget ->getProcessIDType();
143
144	UseBitFieldTypeAlignment = HostTarget ->useBitFieldTypeAlignment();
145	UseZeroLengthBitfieldAlignment = HostTarget ->useZeroLengthBitfieldAlignment();
146	UseExplicitBitFieldAlignment = HostTarget ->useExplicitBitFieldAlignment();
147	ZeroLengthBitfieldBoundary = HostTarget ->getZeroLengthBitfieldBoundary();
148
149	// This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
150	// we need those macros to be identical on host and device, because (among
151	// other things) they affect which standard library classes are defined, and
152	// we need all classes to be defined on both the host and device.
153	MaxAtomicInlineWidth = HostTarget ->getMaxAtomicInlineWidth();
154
155	// Properties intentionally not copied from host:
156	// - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
157	// host/device boundary.
158	// - SuitableAlign: Not visible across the host/device boundary, and may
159	// correctly be different on host/device, e.g. if host has wider vector
160	// types than device.
161	// - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
162	// as its double type, but that's not necessarily true on the host.
163	// TODO: nvcc emits a warning when using long double on device; we should
164	// do the same.
165	}
166
167	ArrayRef<const char > NVPTXTargetInfo::getGCCRegNames() const* {
168	return llvm::ArrayRef(GCCRegNames);
169	}
170
171	bool NVPTXTargetInfo::hasFeature(StringRef Feature) const {
172	return llvm::StringSwitch<bool>(Feature)
173	.Cases(S0: "ptx", S1: "nvptx", Value: true)
174	.Default(Value: false);
175	}
176
177	void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
178	MacroBuilder &Builder) const {
179	Builder.defineMacro(Name: "__PTX__");
180	Builder.defineMacro(Name: "__NVPTX__");
181
182	// Skip setting architecture dependent macros if undefined.
183	if (GPU == OffloadArch::UNUSED && !HostTarget)
184	return;
185
186	if (Opts.CUDAIsDevice \|\| Opts.OpenMPIsTargetDevice \|\| !HostTarget) {
187	// Set __CUDA_ARCH__ for the GPU specified.
188	llvm::StringRef CUDAArchCode = [this] {
189	switch (GPU) {
190	case OffloadArch::GFX600:
191	case OffloadArch::GFX601:
192	case OffloadArch::GFX602:
193	case OffloadArch::GFX700:
194	case OffloadArch::GFX701:
195	case OffloadArch::GFX702:
196	case OffloadArch::GFX703:
197	case OffloadArch::GFX704:
198	case OffloadArch::GFX705:
199	case OffloadArch::GFX801:
200	case OffloadArch::GFX802:
201	case OffloadArch::GFX803:
202	case OffloadArch::GFX805:
203	case OffloadArch::GFX810:
204	case OffloadArch::GFX9_GENERIC:
205	case OffloadArch::GFX900:
206	case OffloadArch::GFX902:
207	case OffloadArch::GFX904:
208	case OffloadArch::GFX906:
209	case OffloadArch::GFX908:
210	case OffloadArch::GFX909:
211	case OffloadArch::GFX90a:
212	case OffloadArch::GFX90c:
213	case OffloadArch::GFX9_4_GENERIC:
214	case OffloadArch::GFX942:
215	case OffloadArch::GFX950:
216	case OffloadArch::GFX10_1_GENERIC:
217	case OffloadArch::GFX1010:
218	case OffloadArch::GFX1011:
219	case OffloadArch::GFX1012:
220	case OffloadArch::GFX1013:
221	case OffloadArch::GFX10_3_GENERIC:
222	case OffloadArch::GFX1030:
223	case OffloadArch::GFX1031:
224	case OffloadArch::GFX1032:
225	case OffloadArch::GFX1033:
226	case OffloadArch::GFX1034:
227	case OffloadArch::GFX1035:
228	case OffloadArch::GFX1036:
229	case OffloadArch::GFX11_GENERIC:
230	case OffloadArch::GFX1100:
231	case OffloadArch::GFX1101:
232	case OffloadArch::GFX1102:
233	case OffloadArch::GFX1103:
234	case OffloadArch::GFX1150:
235	case OffloadArch::GFX1151:
236	case OffloadArch::GFX1152:
237	case OffloadArch::GFX1153:
238	case OffloadArch::GFX12_GENERIC:
239	case OffloadArch::GFX1200:
240	case OffloadArch::GFX1201:
241	case OffloadArch::GFX1250:
242	case OffloadArch::AMDGCNSPIRV:
243	case OffloadArch::Generic:
244	case OffloadArch::GRANITERAPIDS:
245	case OffloadArch::BMG_G21:
246	case OffloadArch::LAST:
247	break;
248	case OffloadArch::UNKNOWN:
249	assert(false && "No GPU arch when compiling CUDA device code.");
250	return "";
251	case OffloadArch::UNUSED:
252	case OffloadArch::SM_20:
253	return "200";
254	case OffloadArch::SM_21:
255	return "210";
256	case OffloadArch::SM_30:
257	return "300";
258	case OffloadArch::SM_32_:
259	return "320";
260	case OffloadArch::SM_35:
261	return "350";
262	case OffloadArch::SM_37:
263	return "370";
264	case OffloadArch::SM_50:
265	return "500";
266	case OffloadArch::SM_52:
267	return "520";
268	case OffloadArch::SM_53:
269	return "530";
270	case OffloadArch::SM_60:
271	return "600";
272	case OffloadArch::SM_61:
273	return "610";
274	case OffloadArch::SM_62:
275	return "620";
276	case OffloadArch::SM_70:
277	return "700";
278	case OffloadArch::SM_72:
279	return "720";
280	case OffloadArch::SM_75:
281	return "750";
282	case OffloadArch::SM_80:
283	return "800";
284	case OffloadArch::SM_86:
285	return "860";
286	case OffloadArch::SM_87:
287	return "870";
288	case OffloadArch::SM_89:
289	return "890";
290	case OffloadArch::SM_90:
291	case OffloadArch::SM_90a:
292	return "900";
293	case OffloadArch::SM_100:
294	case OffloadArch::SM_100a:
295	return "1000";
296	case OffloadArch::SM_101:
297	case OffloadArch::SM_101a:
298	return "1010";
299	case OffloadArch::SM_120:
300	case OffloadArch::SM_120a:
301	return "1200";
302	}
303	llvm_unreachable("unhandled OffloadArch");
304	}();
305	Builder.defineMacro(Name: "__CUDA_ARCH__", Value: CUDAArchCode);
306	switch(GPU) {
307	case OffloadArch::SM_90a:
308	case OffloadArch::SM_100a:
309	case OffloadArch::SM_101a:
310	case OffloadArch::SM_120a:
311	Builder.defineMacro(Name: "__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", Value: "1");
312	break;
313	default:
314	// Do nothing if this is not an enhanced architecture.
315	break;
316	}
317	}
318	}
319
320	llvm::SmallVector<Builtin::InfosShard>
321	NVPTXTargetInfo::getTargetBuiltins() const {
322	return {{.Strings: &BuiltinStrings, .Infos: BuiltinInfos}};
323	}
324

Browse the source code of llvm_projects/clang/lib/Basic/Targets/NVPTX.cpp