1//===- NVPTX.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ABIInfoImpl.h"
10#include "TargetInfo.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/StringExtras.h"
13#include "llvm/IR/CallingConv.h"
14#include "llvm/IR/IntrinsicsNVPTX.h"
15
16using namespace clang;
17using namespace clang::CodeGen;
18
19//===----------------------------------------------------------------------===//
20// NVPTX ABI Implementation
21//===----------------------------------------------------------------------===//
22
23namespace {
24
25class NVPTXTargetCodeGenInfo;
26
27class NVPTXABIInfo : public ABIInfo {
28 NVPTXTargetCodeGenInfo &CGInfo;
29
30public:
31 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
32 : ABIInfo(CGT), CGInfo(Info) {}
33
34 ABIArgInfo classifyReturnType(QualType RetTy) const;
35 ABIArgInfo classifyArgumentType(QualType Ty) const;
36
37 void computeInfo(CGFunctionInfo &FI) const override;
38 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
39 AggValueSlot Slot) const override;
40 bool isUnsupportedType(QualType T) const;
41 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
42};
43
44class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
45public:
46 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
47 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {}
48
49 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
50 CodeGen::CodeGenModule &M) const override;
51 bool shouldEmitStaticExternCAliases() const override;
52
53 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
54 llvm::PointerType *T,
55 QualType QT) const override;
56
57 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
58 // On the device side, surface reference is represented as an object handle
59 // in 64-bit integer.
60 return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
61 }
62
63 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
64 // On the device side, texture reference is represented as an object handle
65 // in 64-bit integer.
66 return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
67 }
68
69 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
70 LValue Src) const override {
71 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
72 return true;
73 }
74
75 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
76 LValue Src) const override {
77 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
78 return true;
79 }
80
81 unsigned getDeviceKernelCallingConv() const override {
82 return llvm::CallingConv::PTX_Kernel;
83 }
84
85 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
86 // resulting MDNode to the nvvm.annotations MDNode.
87 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
88 int Operand);
89
90private:
91 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
92 LValue Src) {
93 llvm::Value *Handle = nullptr;
94 llvm::Constant *C =
95 llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF));
96 // Lookup `addrspacecast` through the constant pointer if any.
97 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C))
98 C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand());
99 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) {
100 // Load the handle from the specific global variable using
101 // `nvvm.texsurf.handle.internal` intrinsic.
102 Handle = CGF.EmitRuntimeCall(
103 callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal,
104 Tys: {GV->getType()}),
105 args: {GV}, name: "texsurf_handle");
106 } else
107 Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation());
108 CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst);
109 }
110};
111
112/// Checks if the type is unsupported directly by the current target.
113bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
114 ASTContext &Context = getContext();
115 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
116 return true;
117 if (!Context.getTargetInfo().hasFloat128Type() &&
118 (T->isFloat128Type() ||
119 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
120 return true;
121 if (const auto *EIT = T->getAs<BitIntType>())
122 return EIT->getNumBits() >
123 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
124 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
125 Context.getTypeSize(T) > 64U)
126 return true;
127 if (const auto *AT = T->getAsArrayTypeUnsafe())
128 return isUnsupportedType(T: AT->getElementType());
129 const auto *RD = T->getAsRecordDecl();
130 if (!RD)
131 return false;
132
133 // If this is a C++ record, check the bases first.
134 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD))
135 for (const CXXBaseSpecifier &I : CXXRD->bases())
136 if (isUnsupportedType(T: I.getType()))
137 return true;
138
139 for (const FieldDecl *I : RD->fields())
140 if (isUnsupportedType(T: I->getType()))
141 return true;
142 return false;
143}
144
145/// Coerce the given type into an array with maximum allowed size of elements.
146ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
147 unsigned MaxSize) const {
148 // Alignment and Size are measured in bits.
149 const uint64_t Size = getContext().getTypeSize(T: Ty);
150 const uint64_t Alignment = getContext().getTypeAlign(T: Ty);
151 const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment);
152 llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div);
153 const uint64_t NumElements = (Size + Div - 1) / Div;
154 return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements));
155}
156
157ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
158 if (RetTy->isVoidType())
159 return ABIArgInfo::getIgnore();
160
161 if (getContext().getLangOpts().OpenMP &&
162 getContext().getLangOpts().OpenMPIsTargetDevice &&
163 isUnsupportedType(T: RetTy))
164 return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: 64);
165
166 // note: this is different from default ABI
167 if (!RetTy->isScalarType())
168 return ABIArgInfo::getDirect();
169
170 // Treat an enum type as its underlying type.
171 if (const auto *ED = RetTy->getAsEnumDecl())
172 RetTy = ED->getIntegerType();
173
174 return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
175 : ABIArgInfo::getDirect());
176}
177
178ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
179 // Treat an enum type as its underlying type.
180 if (const auto *ED = Ty->getAsEnumDecl())
181 Ty = ED->getIntegerType();
182
183 // Return aggregates type as indirect by value
184 if (isAggregateTypeForABI(T: Ty)) {
185 // Under CUDA device compilation, tex/surf builtin types are replaced with
186 // object types and passed directly.
187 if (getContext().getLangOpts().CUDAIsDevice) {
188 if (Ty->isCUDADeviceBuiltinSurfaceType())
189 return ABIArgInfo::getDirect(
190 T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
191 if (Ty->isCUDADeviceBuiltinTextureType())
192 return ABIArgInfo::getDirect(
193 T: CGInfo.getCUDADeviceBuiltinTextureDeviceType());
194 }
195 return getNaturalAlignIndirect(
196 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
197 /* byval */ ByVal: true);
198 }
199
200 if (const auto *EIT = Ty->getAs<BitIntType>()) {
201 if ((EIT->getNumBits() > 128) ||
202 (!getContext().getTargetInfo().hasInt128Type() &&
203 EIT->getNumBits() > 64))
204 return getNaturalAlignIndirect(
205 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
206 /* byval */ ByVal: true);
207 }
208
209 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
210 : ABIArgInfo::getDirect());
211}
212
213void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
214 if (!getCXXABI().classifyReturnType(FI))
215 FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
216
217 for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments()))
218 I.info = ArgumentsCount < FI.getNumRequiredArgs()
219 ? classifyArgumentType(Ty: I.type)
220 : ABIArgInfo::getDirect();
221
222 // Always honor user-specified calling convention.
223 if (FI.getCallingConvention() != llvm::CallingConv::C)
224 return;
225
226 FI.setEffectiveCallingConvention(getRuntimeCC());
227}
228
229RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
230 QualType Ty, AggValueSlot Slot) const {
231 return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /*IsIndirect=*/false,
232 ValueInfo: getContext().getTypeInfoInChars(T: Ty),
233 SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: 1),
234 /*AllowHigherAlign=*/true, Slot);
235}
236
237void NVPTXTargetCodeGenInfo::setTargetAttributes(
238 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
239 if (GV->isDeclaration())
240 return;
241 const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D);
242 if (VD) {
243 if (M.getLangOpts().CUDA) {
244 if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
245 addNVVMMetadata(GV, Name: "surface", Operand: 1);
246 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
247 addNVVMMetadata(GV, Name: "texture", Operand: 1);
248 return;
249 }
250 }
251
252 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D);
253 if (!FD)
254 return;
255
256 llvm::Function *F = cast<llvm::Function>(Val: GV);
257
258 // Perform special handling in OpenCL/CUDA mode
259 if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
260 // Use function attributes to check for kernel functions
261 // By default, all functions are device functions
262 if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
263 // OpenCL/CUDA kernel functions get kernel metadata
264 // And kernel functions are not subject to inlining
265 F->addFnAttr(Kind: llvm::Attribute::NoInline);
266 if (FD->hasAttr<CUDAGlobalAttr>()) {
267 F->setCallingConv(getDeviceKernelCallingConv());
268
269 for (auto IV : llvm::enumerate(First: FD->parameters()))
270 if (IV.value()->hasAttr<CUDAGridConstantAttr>())
271 F->addParamAttr(
272 ArgNo: IV.index(),
273 Attr: llvm::Attribute::get(Context&: F->getContext(), Kind: "nvvm.grid_constant"));
274 }
275 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
276 M.handleCUDALaunchBoundsAttr(F, A: Attr);
277 }
278 }
279}
280
281void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
282 StringRef Name, int Operand) {
283 llvm::Module *M = GV->getParent();
284 llvm::LLVMContext &Ctx = M->getContext();
285
286 // Get "nvvm.annotations" metadata node
287 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations");
288
289 SmallVector<llvm::Metadata *, 5> MDVals = {
290 llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name),
291 llvm::ConstantAsMetadata::get(
292 C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))};
293
294 // Append metadata to nvvm.annotations
295 MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
296}
297
298bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
299 return false;
300}
301
302llvm::Constant *
303NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
304 llvm::PointerType *PT,
305 QualType QT) const {
306 auto &Ctx = CGM.getContext();
307 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local))
308 return llvm::ConstantPointerNull::get(T: PT);
309
310 auto NPT = llvm::PointerType::get(
311 C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic));
312 return llvm::ConstantExpr::getAddrSpaceCast(
313 C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT);
314}
315} // namespace
316
317void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
318 const CUDALaunchBoundsAttr *Attr,
319 int32_t *MaxThreadsVal,
320 int32_t *MinBlocksVal,
321 int32_t *MaxClusterRankVal) {
322 llvm::APSInt MaxThreads(32);
323 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext());
324 if (MaxThreads > 0) {
325 if (MaxThreadsVal)
326 *MaxThreadsVal = MaxThreads.getExtValue();
327 if (F)
328 F->addFnAttr(Kind: "nvvm.maxntid", Val: llvm::utostr(X: MaxThreads.getExtValue()));
329 }
330
331 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
332 // was not specified in __launch_bounds__ or if the user specified a 0 value,
333 // we don't have to add a PTX directive.
334 if (Attr->getMinBlocks()) {
335 llvm::APSInt MinBlocks(32);
336 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext());
337 if (MinBlocks > 0) {
338 if (MinBlocksVal)
339 *MinBlocksVal = MinBlocks.getExtValue();
340 if (F)
341 F->addFnAttr(Kind: "nvvm.minctasm", Val: llvm::utostr(X: MinBlocks.getExtValue()));
342 }
343 }
344 if (Attr->getMaxBlocks()) {
345 llvm::APSInt MaxBlocks(32);
346 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext());
347 if (MaxBlocks > 0) {
348 if (MaxClusterRankVal)
349 *MaxClusterRankVal = MaxBlocks.getExtValue();
350 if (F)
351 F->addFnAttr(Kind: "nvvm.maxclusterrank",
352 Val: llvm::utostr(X: MaxBlocks.getExtValue()));
353 }
354 }
355}
356
357std::unique_ptr<TargetCodeGenInfo>
358CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
359 return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes());
360}
361