NVPTX.cpp source code [llvm_projects/clang/lib/CodeGen/Targets/NVPTX.cpp]

1	//===- NVPTX.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ABIInfoImpl.h"
10	#include "TargetInfo.h"
11	#include "llvm/IR/IntrinsicsNVPTX.h"
12
13	using namespace clang;
14	using namespace clang::CodeGen;
15
16	//===----------------------------------------------------------------------===//
17	// NVPTX ABI Implementation
18	//===----------------------------------------------------------------------===//
19
20	namespace {
21
22	class NVPTXTargetCodeGenInfo;
23
24	class NVPTXABIInfo : public ABIInfo {
25	NVPTXTargetCodeGenInfo &CGInfo;
26
27	public:
28	NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29	: ABIInfo (CGT), CGInfo(Info) {}
30
31	ABIArgInfo classifyReturnType(QualType RetTy) const;
32	ABIArgInfo classifyArgumentType(QualType Ty) const;
33
34	void computeInfo(CGFunctionInfo &FI) const override;
35	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
36	AggValueSlot Slot) const override;
37	bool isUnsupportedType(QualType T) const;
38	ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39	};
40
41	class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42	public:
43	NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44	: TargetCodeGenInfo (std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {}
45
46	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
47	CodeGen::CodeGenModule &M) const override;
48	bool shouldEmitStaticExternCAliases() const override;
49
50	llvm::Constant getNullPointer(const* CodeGen::CodeGenModule &CGM,
51	llvm::PointerType *T,
52	QualType QT) const override;
53
54	llvm::Type getCUDADeviceBuiltinSurfaceDeviceType() const* override {
55	// On the device side, surface reference is represented as an object handle
56	// in 64-bit integer.
57	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
58	}
59
60	llvm::Type getCUDADeviceBuiltinTextureDeviceType() const* override {
61	// On the device side, texture reference is represented as an object handle
62	// in 64-bit integer.
63	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
64	}
65
66	bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
67	LValue Src) const override {
68	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
69	return true;
70	}
71
72	bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
73	LValue Src) const override {
74	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
75	return true;
76	}
77
78	// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
79	// resulting MDNode to the nvvm.annotations MDNode.
80	static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
81	int Operand);
82
83	private:
84	static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
85	LValue Src) {
86	llvm::Value Handle = nullptr*;
87	llvm::Constant *C =
88	llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF));
89	// Lookup `addrspacecast` through the constant pointer if any.
90	if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C))
91	C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand());
92	if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) {
93	// Load the handle from the specific global variable using
94	// `nvvm.texsurf.handle.internal` intrinsic.
95	Handle = CGF.EmitRuntimeCall(
96	callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal,
97	Tys: {GV->getType()}),
98	args: {GV}, name: "texsurf_handle");
99	} else
100	Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation ());
101	CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst);
102	}
103	};
104
105	/// Checks if the type is unsupported directly by the current target.
106	bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
107	ASTContext &Context = getContext();
108	if (!Context.getTargetInfo().hasFloat16Type() && T ->isFloat16Type())
109	return true;
110	if (!Context.getTargetInfo().hasFloat128Type() &&
111	(T ->isFloat128Type() \|\|
112	(T ->isRealFloatingType() && Context.getTypeSize(T) == `128`)))
113	return true;
114	if (const auto *EIT = T ->getAs<BitIntType>())
115	return EIT->getNumBits() >
116	(Context.getTargetInfo().hasInt128Type() ? `128U` : `64U`);
117	if (!Context.getTargetInfo().hasInt128Type() && T ->isIntegerType() &&
118	Context.getTypeSize(T) > `64U`)
119	return true;
120	if (const auto *AT = T ->getAsArrayTypeUnsafe())
121	return isUnsupportedType(T: AT->getElementType());
122	const auto *RT = T ->getAs<RecordType>();
123	if (!RT)
124	return false;
125	const RecordDecl *RD = RT->getDecl();
126
127	// If this is a C++ record, check the bases first.
128	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD))
129	for (const CXXBaseSpecifier &I : CXXRD->bases())
130	if (isUnsupportedType(T: I.getType()))
131	return true;
132
133	for (const FieldDecl *I : RD->fields())
134	if (isUnsupportedType(T: I->getType()))
135	return true;
136	return false;
137	}
138
139	/// Coerce the given type into an array with maximum allowed size of elements.
140	ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
141	unsigned MaxSize) const {
142	// Alignment and Size are measured in bits.
143	const uint64_t Size = getContext().getTypeSize(T: Ty);
144	const uint64_t Alignment = getContext().getTypeAlign(T: Ty);
145	const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment);
146	llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div);
147	const uint64_t NumElements = (Size + Div - `1`) / Div;
148	return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements));
149	}
150
151	ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
152	if (RetTy ->isVoidType())
153	return ABIArgInfo::getIgnore();
154
155	if (getContext().getLangOpts().OpenMP &&
156	getContext().getLangOpts().OpenMPIsTargetDevice &&
157	isUnsupportedType(T: RetTy))
158	return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: `64`);
159
160	// note: this is different from default ABI
161	if (!RetTy ->isScalarType())
162	return ABIArgInfo::getDirect();
163
164	// Treat an enum type as its underlying type.
165	if (const EnumType *EnumTy = RetTy ->getAs<EnumType>())
166	RetTy = EnumTy->getDecl()->getIntegerType();
167
168	return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
169	: ABIArgInfo::getDirect());
170	}
171
172	ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
173	// Treat an enum type as its underlying type.
174	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
175	Ty = EnumTy->getDecl()->getIntegerType();
176
177	// Return aggregates type as indirect by value
178	if (isAggregateTypeForABI(T: Ty)) {
179	// Under CUDA device compilation, tex/surf builtin types are replaced with
180	// object types and passed directly.
181	if (getContext().getLangOpts().CUDAIsDevice) {
182	if (Ty ->isCUDADeviceBuiltinSurfaceType())
183	return ABIArgInfo::getDirect(
184	T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
185	if (Ty ->isCUDADeviceBuiltinTextureType())
186	return ABIArgInfo::getDirect(
187	T: CGInfo.getCUDADeviceBuiltinTextureDeviceType());
188	}
189	return getNaturalAlignIndirect(Ty, / byval / ByVal: true);
190	}
191
192	if (const auto *EIT = Ty ->getAs<BitIntType>()) {
193	if ((EIT->getNumBits() > `128`) \|\|
194	(!getContext().getTargetInfo().hasInt128Type() &&
195	EIT->getNumBits() > `64`))
196	return getNaturalAlignIndirect(Ty, / byval / ByVal: true);
197	}
198
199	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
200	: ABIArgInfo::getDirect());
201	}
202
203	void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
204	if (!getCXXABI().classifyReturnType(FI))
205	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
206
207	for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments()))
208	I.info = ArgumentsCount < FI.getNumRequiredArgs()
209	? classifyArgumentType(Ty: I.type)
210	: ABIArgInfo::getDirect();
211
212	// Always honor user-specified calling convention.
213	if (FI.getCallingConvention() != llvm::CallingConv::C)
214	return;
215
216	FI.setEffectiveCallingConvention(getRuntimeCC());
217	}
218
219	RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
220	QualType Ty, AggValueSlot Slot) const {
221	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /IsIndirect=/false,
222	ValueInfo: getContext().getTypeInfoInChars(T: Ty),
223	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `1`),
224	/AllowHigherAlign=/true, Slot);
225	}
226
227	void NVPTXTargetCodeGenInfo::setTargetAttributes(
228	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
229	if (GV->isDeclaration())
230	return;
231	const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D);
232	if (VD) {
233	if (M.getLangOpts().CUDA) {
234	if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
235	addNVVMMetadata(GV, Name: "surface", Operand: `1`);
236	else if (VD->getType()->isCUDADeviceBuiltinTextureType())
237	addNVVMMetadata(GV, Name: "texture", Operand: `1`);
238	return;
239	}
240	}
241
242	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D);
243	if (!FD) return;
244
245	llvm::Function *F = cast<llvm::Function>(Val: GV);
246
247	// Perform special handling in OpenCL mode
248	if (M.getLangOpts().OpenCL) {
249	// Use OpenCL function attributes to check for kernel functions
250	// By default, all functions are device functions
251	if (FD->hasAttr<OpenCLKernelAttr>()) {
252	// OpenCL __kernel functions get kernel metadata
253	// Create !{<func-ref>, metadata !"kernel", i32 1} node
254	addNVVMMetadata(GV: F, Name: "kernel", Operand: `1`);
255	// And kernel functions are not subject to inlining
256	F->addFnAttr(Kind: llvm::Attribute::NoInline);
257	}
258	}
259
260	// Perform special handling in CUDA mode.
261	if (M.getLangOpts().CUDA) {
262	// CUDA __global__ functions get a kernel metadata entry. Since
263	// __global__ functions cannot be called from the device, we do not
264	// need to set the noinline attribute.
265	if (FD->hasAttr<CUDAGlobalAttr>()) {
266	// Create !{<func-ref>, metadata !"kernel", i32 1} node
267	addNVVMMetadata(GV: F, Name: "kernel", Operand: `1`);
268	}
269	if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
270	M.handleCUDALaunchBoundsAttr(F, A: Attr);
271	}
272
273	// Attach kernel metadata directly if compiling for NVPTX.
274	if (FD->hasAttr<NVPTXKernelAttr>()) {
275	addNVVMMetadata(GV: F, Name: "kernel", Operand: `1`);
276	}
277	}
278
279	void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
280	StringRef Name, int Operand) {
281	llvm::Module *M = GV->getParent();
282	llvm::LLVMContext &Ctx = M->getContext();
283
284	// Get "nvvm.annotations" metadata node
285	llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations");
286
287	llvm::Metadata *MDVals[] = {
288	llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name),
289	llvm::ConstantAsMetadata::get(
290	C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))};
291	// Append metadata to nvvm.annotations
292	MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
293	}
294
295	bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
296	return false;
297	}
298
299	llvm::Constant *
300	NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
301	llvm::PointerType *PT,
302	QualType QT) const {
303	auto &Ctx = CGM.getContext();
304	if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local))
305	return llvm::ConstantPointerNull::get(T: PT);
306
307	auto NPT = llvm::PointerType::get(
308	C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic));
309	return llvm::ConstantExpr::getAddrSpaceCast(
310	C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT);
311	}
312	}
313
314	void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
315	const CUDALaunchBoundsAttr *Attr,
316	int32_t *MaxThreadsVal,
317	int32_t *MinBlocksVal,
318	int32_t *MaxClusterRankVal) {
319	// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
320	llvm::APSInt MaxThreads(`32`);
321	MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext());
322	if (MaxThreads > `0`) {
323	if (MaxThreadsVal)
324	*MaxThreadsVal = MaxThreads.getExtValue();
325	if (F) {
326	// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
327	NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxntidx",
328	Operand: MaxThreads.getExtValue());
329	}
330	}
331
332	// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
333	// was not specified in __launch_bounds__ or if the user specified a 0 value,
334	// we don't have to add a PTX directive.
335	if (Attr->getMinBlocks()) {
336	llvm::APSInt MinBlocks(`32`);
337	MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext());
338	if (MinBlocks > `0`) {
339	if (MinBlocksVal)
340	*MinBlocksVal = MinBlocks.getExtValue();
341	if (F) {
342	// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
343	NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "minctasm",
344	Operand: MinBlocks.getExtValue());
345	}
346	}
347	}
348	if (Attr->getMaxBlocks()) {
349	llvm::APSInt MaxBlocks(`32`);
350	MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext());
351	if (MaxBlocks > `0`) {
352	if (MaxClusterRankVal)
353	*MaxClusterRankVal = MaxBlocks.getExtValue();
354	if (F) {
355	// Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
356	NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxclusterrank",
357	Operand: MaxBlocks.getExtValue());
358	}
359	}
360	}
361	}
362
363	std::unique_ptr<TargetCodeGenInfo>
364	CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
365	return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes());
366	}
367

Browse the source code of llvm_projects/clang/lib/CodeGen/Targets/NVPTX.cpp