NVPTX.cpp source code [llvm_projects/clang/lib/CodeGen/Targets/NVPTX.cpp]

1	//===- NVPTX.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ABIInfoImpl.h"
10	#include "TargetInfo.h"
11	#include "clang/Basic/SyncScope.h"
12	#include "llvm/ADT/STLExtras.h"
13	#include "llvm/ADT/StringExtras.h"
14	#include "llvm/IR/CallingConv.h"
15	#include "llvm/IR/IntrinsicsNVPTX.h"
16
17	using namespace clang;
18	using namespace clang::CodeGen;
19
20	//===----------------------------------------------------------------------===//
21	// NVPTX ABI Implementation
22	//===----------------------------------------------------------------------===//
23
24	namespace {
25
26	class NVPTXTargetCodeGenInfo;
27
28	class NVPTXABIInfo : public ABIInfo {
29	NVPTXTargetCodeGenInfo &CGInfo;
30
31	public:
32	NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
33	: ABIInfo (CGT), CGInfo(Info) {}
34
35	ABIArgInfo classifyReturnType(QualType RetTy) const;
36	ABIArgInfo classifyArgumentType(QualType Ty) const;
37
38	void computeInfo(CGFunctionInfo &FI) const override;
39	RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
40	AggValueSlot Slot) const override;
41	bool isUnsupportedType(QualType T) const;
42	ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
43	};
44
45	class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
46	public:
47	NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
48	: TargetCodeGenInfo (std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {}
49
50	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
51	CodeGen::CodeGenModule &M) const override;
52	bool shouldEmitStaticExternCAliases() const override;
53
54	StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope,
55	llvm::AtomicOrdering Ordering) const override;
56
57	llvm::Constant getNullPointer(const* CodeGen::CodeGenModule &CGM,
58	llvm::PointerType *T,
59	QualType QT) const override;
60
61	llvm::Type getCUDADeviceBuiltinSurfaceDeviceType() const* override {
62	// On the device side, surface reference is represented as an object handle
63	// in 64-bit integer.
64	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
65	}
66
67	llvm::Type getCUDADeviceBuiltinTextureDeviceType() const* override {
68	// On the device side, texture reference is represented as an object handle
69	// in 64-bit integer.
70	return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
71	}
72
73	bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
74	LValue Src) const override {
75	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
76	return true;
77	}
78
79	bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
80	LValue Src) const override {
81	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
82	return true;
83	}
84
85	unsigned getDeviceKernelCallingConv() const override {
86	return llvm::CallingConv::PTX_Kernel;
87	}
88
89	// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
90	// resulting MDNode to the nvvm.annotations MDNode.
91	static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
92	int Operand);
93
94	private:
95	static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
96	LValue Src) {
97	llvm::Value Handle = nullptr*;
98	llvm::Constant *C =
99	llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF));
100	// Lookup `addrspacecast` through the constant pointer if any.
101	if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C))
102	C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand());
103	if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) {
104	// Load the handle from the specific global variable using
105	// `nvvm.texsurf.handle.internal` intrinsic.
106	Handle = CGF.EmitRuntimeCall(
107	callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal,
108	Tys: {GV->getType()}),
109	args: {GV}, name: "texsurf_handle");
110	} else
111	Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation ());
112	CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst);
113	}
114	};
115
116	/// Checks if the type is unsupported directly by the current target.
117	bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
118	ASTContext &Context = getContext();
119	if (!Context.getTargetInfo().hasFloat16Type() && T ->isFloat16Type())
120	return true;
121	if (!Context.getTargetInfo().hasFloat128Type() &&
122	(T ->isFloat128Type() \|\|
123	(T ->isRealFloatingType() && Context.getTypeSize(T) == `128`)))
124	return true;
125	if (const auto *EIT = T ->getAs<BitIntType>())
126	return EIT->getNumBits() >
127	(Context.getTargetInfo().hasInt128Type() ? `128U` : `64U`);
128	if (!Context.getTargetInfo().hasInt128Type() && T ->isIntegerType() &&
129	Context.getTypeSize(T) > `64U`)
130	return true;
131	if (const auto *AT = T ->getAsArrayTypeUnsafe())
132	return isUnsupportedType(T: AT->getElementType());
133	const auto *RD = T ->getAsRecordDecl();
134	if (!RD)
135	return false;
136
137	// If this is a C++ record, check the bases first.
138	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD))
139	for (const CXXBaseSpecifier &I : CXXRD->bases())
140	if (isUnsupportedType(T: I.getType()))
141	return true;
142
143	for (const FieldDecl *I : RD->fields())
144	if (isUnsupportedType(T: I->getType()))
145	return true;
146	return false;
147	}
148
149	/// Coerce the given type into an array with maximum allowed size of elements.
150	ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
151	unsigned MaxSize) const {
152	// Alignment and Size are measured in bits.
153	const uint64_t Size = getContext().getTypeSize(T: Ty);
154	const uint64_t Alignment = getContext().getTypeAlign(T: Ty);
155	const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment);
156	llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div);
157	const uint64_t NumElements = (Size + Div - `1`) / Div;
158	return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements));
159	}
160
161	ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
162	if (RetTy ->isVoidType())
163	return ABIArgInfo::getIgnore();
164
165	if (getContext().getLangOpts().OpenMP &&
166	getContext().getLangOpts().OpenMPIsTargetDevice &&
167	isUnsupportedType(T: RetTy))
168	return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: `64`);
169
170	// note: this is different from default ABI
171	if (!RetTy ->isScalarType())
172	return ABIArgInfo::getDirect();
173
174	// Treat an enum type as its underlying type.
175	if (const auto *ED = RetTy ->getAsEnumDecl())
176	RetTy = ED->getIntegerType();
177
178	return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
179	: ABIArgInfo::getDirect());
180	}
181
182	ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
183	// Treat an enum type as its underlying type.
184	if (const auto *ED = Ty ->getAsEnumDecl())
185	Ty = ED->getIntegerType();
186
187	// Return aggregates type as indirect by value
188	if (isAggregateTypeForABI(T: Ty)) {
189	// Under CUDA device compilation, tex/surf builtin types are replaced with
190	// object types and passed directly.
191	if (getContext().getLangOpts().CUDAIsDevice) {
192	if (Ty ->isCUDADeviceBuiltinSurfaceType())
193	return ABIArgInfo::getDirect(
194	T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
195	if (Ty ->isCUDADeviceBuiltinTextureType())
196	return ABIArgInfo::getDirect(
197	T: CGInfo.getCUDADeviceBuiltinTextureDeviceType());
198	}
199	return getNaturalAlignIndirect(
200	Ty, / AddrSpace / getDataLayout().getAllocaAddrSpace(),
201	/ byval / ByVal: true);
202	}
203
204	if (const auto *EIT = Ty ->getAs<BitIntType>()) {
205	if ((EIT->getNumBits() > `128`) \|\|
206	(!getContext().getTargetInfo().hasInt128Type() &&
207	EIT->getNumBits() > `64`))
208	return getNaturalAlignIndirect(
209	Ty, / AddrSpace / getDataLayout().getAllocaAddrSpace(),
210	/ byval / ByVal: true);
211	}
212
213	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
214	: ABIArgInfo::getDirect());
215	}
216
217	void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
218	if (!getCXXABI().classifyReturnType(FI))
219	FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
220
221	for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments()))
222	I.info = ArgumentsCount < FI.getNumRequiredArgs()
223	? classifyArgumentType(Ty: I.type)
224	: ABIArgInfo::getDirect();
225
226	// Always honor user-specified calling convention.
227	if (FI.getCallingConvention() != llvm::CallingConv::C)
228	return;
229
230	FI.setEffectiveCallingConvention(getRuntimeCC());
231	}
232
233	RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
234	QualType Ty, AggValueSlot Slot) const {
235	return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /IsIndirect=/false,
236	ValueInfo: getContext().getTypeInfoInChars(T: Ty),
237	SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: `1`),
238	/AllowHigherAlign=/true, Slot);
239	}
240
241	void NVPTXTargetCodeGenInfo::setTargetAttributes(
242	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
243	if (GV->isDeclaration())
244	return;
245	const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D);
246	if (VD) {
247	if (M.getLangOpts().CUDA) {
248	if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
249	addNVVMMetadata(GV, Name: "surface", Operand: `1`);
250	else if (VD->getType()->isCUDADeviceBuiltinTextureType())
251	addNVVMMetadata(GV, Name: "texture", Operand: `1`);
252	return;
253	}
254	}
255
256	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D);
257	if (!FD)
258	return;
259
260	llvm::Function *F = cast<llvm::Function>(Val: GV);
261
262	// Perform special handling in OpenCL/CUDA mode
263	if (M.getLangOpts().OpenCL \|\| M.getLangOpts().CUDA) {
264	// Use function attributes to check for kernel functions
265	// By default, all functions are device functions
266	if (FD->hasAttr<DeviceKernelAttr>() \|\| FD->hasAttr<CUDAGlobalAttr>()) {
267	// OpenCL/CUDA kernel functions get kernel metadata
268	// And kernel functions are not subject to inlining
269	F->addFnAttr(Kind: llvm::Attribute::NoInline);
270	if (FD->hasAttr<CUDAGlobalAttr>()) {
271	F->setCallingConv(getDeviceKernelCallingConv());
272
273	for (auto IV : llvm::enumerate(First: FD->parameters()))
274	if (IV.value()->hasAttr<CUDAGridConstantAttr>())
275	F->addParamAttr(
276	ArgNo: IV.index(),
277	Attr: llvm::Attribute::get(Context&: F->getContext(), Kind: "nvvm.grid_constant"));
278	}
279	if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
280	M.handleCUDALaunchBoundsAttr(F, A: Attr);
281	}
282	}
283	}
284
285	void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
286	StringRef Name, int Operand) {
287	llvm::Module *M = GV->getParent();
288	llvm::LLVMContext &Ctx = M->getContext();
289
290	// Get "nvvm.annotations" metadata node
291	llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations");
292
293	SmallVector<llvm::Metadata *, `5`> MDVals = {
294	llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name),
295	llvm::ConstantAsMetadata::get(
296	C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))};
297
298	// Append metadata to nvvm.annotations
299	MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
300	}
301
302	bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
303	return false;
304	}
305
306	StringRef NVPTXTargetCodeGenInfo::getLLVMSyncScopeStr(
307	const LangOptions &LangOpts, SyncScope Scope,
308	llvm::AtomicOrdering Ordering) const {
309	switch (Scope) {
310	case SyncScope::HIPSingleThread:
311	case SyncScope::SingleScope:
312	return "singlethread";
313	case SyncScope::HIPWavefront:
314	case SyncScope::OpenCLSubGroup:
315	case SyncScope::WavefrontScope:
316	case SyncScope::HIPWorkgroup:
317	case SyncScope::OpenCLWorkGroup:
318	case SyncScope::WorkgroupScope:
319	return "block";
320	case SyncScope::HIPCluster:
321	case SyncScope::ClusterScope:
322	return "cluster";
323	case SyncScope::HIPAgent:
324	case SyncScope::OpenCLDevice:
325	case SyncScope::DeviceScope:
326	return "device";
327	case SyncScope::SystemScope:
328	case SyncScope::HIPSystem:
329	case SyncScope::OpenCLAllSVMDevices:
330	return "";
331	}
332	llvm_unreachable("Unknown SyncScope enum");
333	}
334
335	llvm::Constant *
336	NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
337	llvm::PointerType *PT,
338	QualType QT) const {
339	auto &Ctx = CGM.getContext();
340	if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local))
341	return llvm::ConstantPointerNull::get(T: PT);
342
343	auto NPT = llvm::PointerType::get(
344	C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic));
345	return llvm::ConstantExpr::getAddrSpaceCast(
346	C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT);
347	}
348	} // namespace
349
350	void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
351	const CUDALaunchBoundsAttr *Attr,
352	int32_t *MaxThreadsVal,
353	int32_t *MinBlocksVal,
354	int32_t *MaxClusterRankVal) {
355	llvm::APSInt MaxThreads(`32`);
356	MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext());
357	if (MaxThreads > `0`) {
358	if (MaxThreadsVal)
359	*MaxThreadsVal = MaxThreads.getExtValue();
360	if (F)
361	F->addFnAttr(Kind: "nvvm.maxntid", Val: llvm::utostr(X: MaxThreads.getExtValue()));
362	}
363
364	// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
365	// was not specified in __launch_bounds__ or if the user specified a 0 value,
366	// we don't have to add a PTX directive.
367	if (Attr->getMinBlocks()) {
368	llvm::APSInt MinBlocks(`32`);
369	MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext());
370	if (MinBlocks > `0`) {
371	if (MinBlocksVal)
372	*MinBlocksVal = MinBlocks.getExtValue();
373	if (F)
374	F->addFnAttr(Kind: "nvvm.minctasm", Val: llvm::utostr(X: MinBlocks.getExtValue()));
375	}
376	}
377	if (Attr->getMaxBlocks()) {
378	llvm::APSInt MaxBlocks(`32`);
379	MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext());
380	if (MaxBlocks > `0`) {
381	if (MaxClusterRankVal)
382	*MaxClusterRankVal = MaxBlocks.getExtValue();
383	if (F)
384	F->addFnAttr(Kind: "nvvm.maxclusterrank",
385	Val: llvm::utostr(X: MaxBlocks.getExtValue()));
386	}
387	}
388	}
389
390	std::unique_ptr<TargetCodeGenInfo>
391	CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
392	return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes());
393	}
394

Browse the source code of llvm_projects/clang/lib/CodeGen/Targets/NVPTX.cpp