NVPTX.cpp source code [llvm_projects/clang/lib/CodeGen/Targets/NVPTX.cpp]

1	//===- NVPTX.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ABIInfoImpl.h"
10	#include "TargetInfo.h"
11	#include "llvm/ADT/STLExtras.h"
12	#include "llvm/ADT/StringExtras.h"
13	#include "llvm/IR/CallingConv.h"
14	#include "llvm/IR/IntrinsicsNVPTX.h"
15
16	using namespace clang;
17	using namespace clang::CodeGen;
18
19	//===----------------------------------------------------------------------===//
20	// NVPTX ABI Implementation
21	//===----------------------------------------------------------------------===//
22
23	namespace {
24
25	class NVPTXTargetCodeGenInfo;
26
27	class NVPTXABIInfo : public ABIInfo {
28	NVPTXTargetCodeGenInfo &CGInfo;
29
30	public:
31	NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
32	: ABIInfo (CGT), CGInfo(Info) {}
33
34	ABIArgInfo classifyReturnType(QualType RetTy) const;
35	ABIArgInfo classifyArgumentType(QualType Ty) const;
36
37	void computeInfo(CGFunctionInfo &FI) const override;
38	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
39	AggValueSlot Slot) const override;
40	bool isUnsupportedType(QualType T) const;
41	ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
42	};
43
44	class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
45	public:
46	NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
47	: TargetCodeGenInfo (std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {}
48
49	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
50	CodeGen::CodeGenModule &M) const override;
51	bool shouldEmitStaticExternCAliases() const override;
52
53	llvm::Constant getNullPointer(const* CodeGen::CodeGenModule &CGM,
54	llvm::PointerType *T,
55	QualType QT) const override;
56
57	llvm::Type getCUDADeviceBuiltinSurfaceDeviceType() const* override {
58	// On the device side, surface reference is represented as an object handle
59	// in 64-bit integer.
60	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
61	}
62
63	llvm::Type getCUDADeviceBuiltinTextureDeviceType() const* override {
64	// On the device side, texture reference is represented as an object handle
65	// in 64-bit integer.
66	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
67	}
68
69	bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
70	LValue Src) const override {
71	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
72	return true;
73	}
74
75	bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
76	LValue Src) const override {
77	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
78	return true;
79	}
80
81	unsigned getDeviceKernelCallingConv() const override {
82	return llvm::CallingConv::PTX_Kernel;
83	}
84
85	// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
86	// resulting MDNode to the nvvm.annotations MDNode.
87	static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
88	int Operand);
89
90	static void
91	addGridConstantNVVMMetadata(llvm::GlobalValue *GV,
92	const SmallVectorImpl<int> &GridConstantArgs);
93
94	private:
95	static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
96	LValue Src) {
97	llvm::Value Handle = nullptr*;
98	llvm::Constant *C =
99	llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF));
100	// Lookup `addrspacecast` through the constant pointer if any.
101	if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C))
102	C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand());
103	if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) {
104	// Load the handle from the specific global variable using
105	// `nvvm.texsurf.handle.internal` intrinsic.
106	Handle = CGF.EmitRuntimeCall(
107	callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal,
108	Tys: {GV->getType()}),
109	args: {GV}, name: "texsurf_handle");
110	} else
111	Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation ());
112	CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst);
113	}
114	};
115
116	/// Checks if the type is unsupported directly by the current target.
117	bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
118	ASTContext &Context = getContext();
119	if (!Context.getTargetInfo().hasFloat16Type() && T ->isFloat16Type())
120	return true;
121	if (!Context.getTargetInfo().hasFloat128Type() &&
122	(T ->isFloat128Type() \|\|
123	(T ->isRealFloatingType() && Context.getTypeSize(T) == `128`)))
124	return true;
125	if (const auto *EIT = T ->getAs<BitIntType>())
126	return EIT->getNumBits() >
127	(Context.getTargetInfo().hasInt128Type() ? `128U` : `64U`);
128	if (!Context.getTargetInfo().hasInt128Type() && T ->isIntegerType() &&
129	Context.getTypeSize(T) > `64U`)
130	return true;
131	if (const auto *AT = T ->getAsArrayTypeUnsafe())
132	return isUnsupportedType(T: AT->getElementType());
133	const auto *RT = T ->getAs<RecordType>();
134	if (!RT)
135	return false;
136	const RecordDecl *RD = RT->getDecl();
137
138	// If this is a C++ record, check the bases first.
139	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD))
140	for (const CXXBaseSpecifier &I : CXXRD->bases())
141	if (isUnsupportedType(T: I.getType()))
142	return true;
143
144	for (const FieldDecl *I : RD->fields())
145	if (isUnsupportedType(T: I->getType()))
146	return true;
147	return false;
148	}
149
150	/// Coerce the given type into an array with maximum allowed size of elements.
151	ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
152	unsigned MaxSize) const {
153	// Alignment and Size are measured in bits.
154	const uint64_t Size = getContext().getTypeSize(T: Ty);
155	const uint64_t Alignment = getContext().getTypeAlign(T: Ty);
156	const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment);
157	llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div);
158	const uint64_t NumElements = (Size + Div - `1`) / Div;
159	return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements));
160	}
161
162	ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
163	if (RetTy ->isVoidType())
164	return ABIArgInfo::getIgnore();
165
166	if (getContext().getLangOpts().OpenMP &&
167	getContext().getLangOpts().OpenMPIsTargetDevice &&
168	isUnsupportedType(T: RetTy))
169	return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: `64`);
170
171	// note: this is different from default ABI
172	if (!RetTy ->isScalarType())
173	return ABIArgInfo::getDirect();
174
175	// Treat an enum type as its underlying type.
176	if (const EnumType *EnumTy = RetTy ->getAs<EnumType>())
177	RetTy = EnumTy->getDecl()->getIntegerType();
178
179	return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
180	: ABIArgInfo::getDirect());
181	}
182
183	ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
184	// Treat an enum type as its underlying type.
185	if (const EnumType *EnumTy = Ty ->getAs<EnumType>())
186	Ty = EnumTy->getDecl()->getIntegerType();
187
188	// Return aggregates type as indirect by value
189	if (isAggregateTypeForABI(T: Ty)) {
190	// Under CUDA device compilation, tex/surf builtin types are replaced with
191	// object types and passed directly.
192	if (getContext().getLangOpts().CUDAIsDevice) {
193	if (Ty ->isCUDADeviceBuiltinSurfaceType())
194	return ABIArgInfo::getDirect(
195	T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
196	if (Ty ->isCUDADeviceBuiltinTextureType())
197	return ABIArgInfo::getDirect(
198	T: CGInfo.getCUDADeviceBuiltinTextureDeviceType());
199	}
200	return getNaturalAlignIndirect(
201	Ty, / AddrSpace / getDataLayout().getAllocaAddrSpace(),
202	/ byval / ByVal: true);
203	}
204
205	if (const auto *EIT = Ty ->getAs<BitIntType>()) {
206	if ((EIT->getNumBits() > `128`) \|\|
207	(!getContext().getTargetInfo().hasInt128Type() &&
208	EIT->getNumBits() > `64`))
209	return getNaturalAlignIndirect(
210	Ty, / AddrSpace / getDataLayout().getAllocaAddrSpace(),
211	/ byval / ByVal: true);
212	}
213
214	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
215	: ABIArgInfo::getDirect());
216	}
217
218	void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
219	if (!getCXXABI().classifyReturnType(FI))
220	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
221
222	for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments()))
223	I.info = ArgumentsCount < FI.getNumRequiredArgs()
224	? classifyArgumentType(Ty: I.type)
225	: ABIArgInfo::getDirect();
226
227	// Always honor user-specified calling convention.
228	if (FI.getCallingConvention() != llvm::CallingConv::C)
229	return;
230
231	FI.setEffectiveCallingConvention(getRuntimeCC());
232	}
233
234	RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
235	QualType Ty, AggValueSlot Slot) const {
236	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /IsIndirect=/false,
237	ValueInfo: getContext().getTypeInfoInChars(T: Ty),
238	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `1`),
239	/AllowHigherAlign=/true, Slot);
240	}
241
242	void NVPTXTargetCodeGenInfo::setTargetAttributes(
243	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
244	if (GV->isDeclaration())
245	return;
246	const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D);
247	if (VD) {
248	if (M.getLangOpts().CUDA) {
249	if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
250	addNVVMMetadata(GV, Name: "surface", Operand: `1`);
251	else if (VD->getType()->isCUDADeviceBuiltinTextureType())
252	addNVVMMetadata(GV, Name: "texture", Operand: `1`);
253	return;
254	}
255	}
256
257	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D);
258	if (!FD)
259	return;
260
261	llvm::Function *F = cast<llvm::Function>(Val: GV);
262
263	// Perform special handling in OpenCL/CUDA mode
264	if (M.getLangOpts().OpenCL \|\| M.getLangOpts().CUDA) {
265	// Use function attributes to check for kernel functions
266	// By default, all functions are device functions
267	if (FD->hasAttr<DeviceKernelAttr>() \|\| FD->hasAttr<CUDAGlobalAttr>()) {
268	// OpenCL/CUDA kernel functions get kernel metadata
269	// Create !{<func-ref>, metadata !"kernel", i32 1} node
270	// And kernel functions are not subject to inlining
271	F->addFnAttr(Kind: llvm::Attribute::NoInline);
272	if (FD->hasAttr<CUDAGlobalAttr>()) {
273	SmallVector<int, `10`> GCI;
274	for (auto IV : llvm::enumerate(First: FD->parameters()))
275	if (IV.value()->hasAttr<CUDAGridConstantAttr>())
276	// For some reason arg indices are 1-based in NVVM
277	GCI.push_back(Elt: IV.index() + `1`);
278	// Create !{<func-ref>, metadata !"kernel", i32 1} node
279	F->setCallingConv(llvm::CallingConv::PTX_Kernel);
280	addGridConstantNVVMMetadata(GV: F, GridConstantArgs: GCI);
281	}
282	if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
283	M.handleCUDALaunchBoundsAttr(F, A: Attr);
284	}
285	}
286	// Attach kernel metadata directly if compiling for NVPTX.
287	if (FD->hasAttr<DeviceKernelAttr>()) {
288	F->setCallingConv(llvm::CallingConv::PTX_Kernel);
289	}
290	}
291
292	void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
293	StringRef Name, int Operand) {
294	llvm::Module *M = GV->getParent();
295	llvm::LLVMContext &Ctx = M->getContext();
296
297	// Get "nvvm.annotations" metadata node
298	llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations");
299
300	SmallVector<llvm::Metadata *, `5`> MDVals = {
301	llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name),
302	llvm::ConstantAsMetadata::get(
303	C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))};
304
305	// Append metadata to nvvm.annotations
306	MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
307	}
308
309	void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata(
310	llvm::GlobalValue GV, const* SmallVectorImpl<int> &GridConstantArgs) {
311
312	llvm::Module *M = GV->getParent();
313	llvm::LLVMContext &Ctx = M->getContext();
314
315	// Get "nvvm.annotations" metadata node
316	llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations");
317
318	SmallVector<llvm::Metadata *, `5`> MDVals = {llvm::ConstantAsMetadata::get(C: GV)};
319	if (!GridConstantArgs.empty()) {
320	SmallVector<llvm::Metadata *, `10`> GCM;
321	for (int I : GridConstantArgs)
322	GCM.push_back(Elt: llvm::ConstantAsMetadata::get(
323	C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: I)));
324	MDVals.append(IL: {llvm::MDString::get(Context&: Ctx, Str: "grid_constant"),
325	llvm::MDNode::get(Context&: Ctx, MDs: GCM)});
326	}
327
328	// Append metadata to nvvm.annotations
329	MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
330	}
331
332	bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
333	return false;
334	}
335
336	llvm::Constant *
337	NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
338	llvm::PointerType *PT,
339	QualType QT) const {
340	auto &Ctx = CGM.getContext();
341	if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local))
342	return llvm::ConstantPointerNull::get(T: PT);
343
344	auto NPT = llvm::PointerType::get(
345	C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic));
346	return llvm::ConstantExpr::getAddrSpaceCast(
347	C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT);
348	}
349	} // namespace
350
351	void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
352	const CUDALaunchBoundsAttr *Attr,
353	int32_t *MaxThreadsVal,
354	int32_t *MinBlocksVal,
355	int32_t *MaxClusterRankVal) {
356	llvm::APSInt MaxThreads(`32`);
357	MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext());
358	if (MaxThreads > `0`) {
359	if (MaxThreadsVal)
360	*MaxThreadsVal = MaxThreads.getExtValue();
361	if (F)
362	F->addFnAttr(Kind: "nvvm.maxntid", Val: llvm::utostr(X: MaxThreads.getExtValue()));
363	}
364
365	// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
366	// was not specified in __launch_bounds__ or if the user specified a 0 value,
367	// we don't have to add a PTX directive.
368	if (Attr->getMinBlocks()) {
369	llvm::APSInt MinBlocks(`32`);
370	MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext());
371	if (MinBlocks > `0`) {
372	if (MinBlocksVal)
373	*MinBlocksVal = MinBlocks.getExtValue();
374	if (F)
375	F->addFnAttr(Kind: "nvvm.minctasm", Val: llvm::utostr(X: MinBlocks.getExtValue()));
376	}
377	}
378	if (Attr->getMaxBlocks()) {
379	llvm::APSInt MaxBlocks(`32`);
380	MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext());
381	if (MaxBlocks > `0`) {
382	if (MaxClusterRankVal)
383	*MaxClusterRankVal = MaxBlocks.getExtValue();
384	if (F)
385	F->addFnAttr(Kind: "nvvm.maxclusterrank",
386	Val: llvm::utostr(X: MaxBlocks.getExtValue()));
387	}
388	}
389	}
390
391	std::unique_ptr<TargetCodeGenInfo>
392	CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
393	return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes());
394	}
395

Browse the source code of llvm_projects/clang/lib/CodeGen/Targets/NVPTX.cpp