1//===- NVPTX.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ABIInfoImpl.h"
10#include "TargetInfo.h"
11#include "clang/Basic/SyncScope.h"
12#include "llvm/ADT/STLExtras.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/IntrinsicsNVPTX.h"
16
17using namespace clang;
18using namespace clang::CodeGen;
19
20//===----------------------------------------------------------------------===//
21// NVPTX ABI Implementation
22//===----------------------------------------------------------------------===//
23
24namespace {
25
26class NVPTXTargetCodeGenInfo;
27
28class NVPTXABIInfo : public ABIInfo {
29 NVPTXTargetCodeGenInfo &CGInfo;
30
31public:
32 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
33 : ABIInfo(CGT), CGInfo(Info) {}
34
35 ABIArgInfo classifyReturnType(QualType RetTy) const;
36 ABIArgInfo classifyArgumentType(QualType Ty) const;
37
38 void computeInfo(CGFunctionInfo &FI) const override;
39 RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
40 AggValueSlot Slot) const override;
41 bool isUnsupportedType(QualType T) const;
42 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
43};
44
45class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
46public:
47 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
48 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {}
49
50 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
51 CodeGen::CodeGenModule &M) const override;
52 bool shouldEmitStaticExternCAliases() const override;
53
54 StringRef getLLVMSyncScopeStr(const LangOptions &LangOpts, SyncScope Scope,
55 llvm::AtomicOrdering Ordering) const override;
56
57 llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
58 llvm::PointerType *T,
59 QualType QT) const override;
60
61 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
62 // On the device side, surface reference is represented as an object handle
63 // in 64-bit integer.
64 return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
65 }
66
67 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
68 // On the device side, texture reference is represented as an object handle
69 // in 64-bit integer.
70 return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext());
71 }
72
73 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
74 LValue Src) const override {
75 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
76 return true;
77 }
78
79 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
80 LValue Src) const override {
81 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
82 return true;
83 }
84
85 unsigned getDeviceKernelCallingConv() const override {
86 return llvm::CallingConv::PTX_Kernel;
87 }
88
89 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
90 // resulting MDNode to the nvvm.annotations MDNode.
91 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
92 int Operand);
93
94private:
95 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
96 LValue Src) {
97 llvm::Value *Handle = nullptr;
98 llvm::Constant *C =
99 llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF));
100 // Lookup `addrspacecast` through the constant pointer if any.
101 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C))
102 C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand());
103 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) {
104 // Load the handle from the specific global variable using
105 // `nvvm.texsurf.handle.internal` intrinsic.
106 Handle = CGF.EmitRuntimeCall(
107 callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal,
108 Tys: {GV->getType()}),
109 args: {GV}, name: "texsurf_handle");
110 } else
111 Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation());
112 CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst);
113 }
114};
115
116/// Checks if the type is unsupported directly by the current target.
117bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
118 ASTContext &Context = getContext();
119 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
120 return true;
121 if (!Context.getTargetInfo().hasFloat128Type() &&
122 (T->isFloat128Type() ||
123 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
124 return true;
125 if (const auto *EIT = T->getAs<BitIntType>())
126 return EIT->getNumBits() >
127 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
128 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
129 Context.getTypeSize(T) > 64U)
130 return true;
131 if (const auto *AT = T->getAsArrayTypeUnsafe())
132 return isUnsupportedType(T: AT->getElementType());
133 const auto *RD = T->getAsRecordDecl();
134 if (!RD)
135 return false;
136
137 // If this is a C++ record, check the bases first.
138 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD))
139 for (const CXXBaseSpecifier &I : CXXRD->bases())
140 if (isUnsupportedType(T: I.getType()))
141 return true;
142
143 for (const FieldDecl *I : RD->fields())
144 if (isUnsupportedType(T: I->getType()))
145 return true;
146 return false;
147}
148
149/// Coerce the given type into an array with maximum allowed size of elements.
150ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
151 unsigned MaxSize) const {
152 // Alignment and Size are measured in bits.
153 const uint64_t Size = getContext().getTypeSize(T: Ty);
154 const uint64_t Alignment = getContext().getTypeAlign(T: Ty);
155 const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment);
156 llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div);
157 const uint64_t NumElements = (Size + Div - 1) / Div;
158 return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements));
159}
160
161ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
162 if (RetTy->isVoidType())
163 return ABIArgInfo::getIgnore();
164
165 if (getContext().getLangOpts().OpenMP &&
166 getContext().getLangOpts().OpenMPIsTargetDevice &&
167 isUnsupportedType(T: RetTy))
168 return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: 64);
169
170 // note: this is different from default ABI
171 if (!RetTy->isScalarType())
172 return ABIArgInfo::getDirect();
173
174 // Treat an enum type as its underlying type.
175 if (const auto *ED = RetTy->getAsEnumDecl())
176 RetTy = ED->getIntegerType();
177
178 return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy)
179 : ABIArgInfo::getDirect());
180}
181
182ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
183 // Treat an enum type as its underlying type.
184 if (const auto *ED = Ty->getAsEnumDecl())
185 Ty = ED->getIntegerType();
186
187 // Return aggregates type as indirect by value
188 if (isAggregateTypeForABI(T: Ty)) {
189 // Under CUDA device compilation, tex/surf builtin types are replaced with
190 // object types and passed directly.
191 if (getContext().getLangOpts().CUDAIsDevice) {
192 if (Ty->isCUDADeviceBuiltinSurfaceType())
193 return ABIArgInfo::getDirect(
194 T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
195 if (Ty->isCUDADeviceBuiltinTextureType())
196 return ABIArgInfo::getDirect(
197 T: CGInfo.getCUDADeviceBuiltinTextureDeviceType());
198 }
199 return getNaturalAlignIndirect(
200 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
201 /* byval */ ByVal: true);
202 }
203
204 if (const auto *EIT = Ty->getAs<BitIntType>()) {
205 if ((EIT->getNumBits() > 128) ||
206 (!getContext().getTargetInfo().hasInt128Type() &&
207 EIT->getNumBits() > 64))
208 return getNaturalAlignIndirect(
209 Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(),
210 /* byval */ ByVal: true);
211 }
212
213 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
214 : ABIArgInfo::getDirect());
215}
216
217void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
218 if (!getCXXABI().classifyReturnType(FI))
219 FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType());
220
221 for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments()))
222 I.info = ArgumentsCount < FI.getNumRequiredArgs()
223 ? classifyArgumentType(Ty: I.type)
224 : ABIArgInfo::getDirect();
225
226 // Always honor user-specified calling convention.
227 if (FI.getCallingConvention() != llvm::CallingConv::C)
228 return;
229
230 FI.setEffectiveCallingConvention(getRuntimeCC());
231}
232
233RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
234 QualType Ty, AggValueSlot Slot) const {
235 return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /*IsIndirect=*/false,
236 ValueInfo: getContext().getTypeInfoInChars(T: Ty),
237 SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: 1),
238 /*AllowHigherAlign=*/true, Slot);
239}
240
241void NVPTXTargetCodeGenInfo::setTargetAttributes(
242 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
243 if (GV->isDeclaration())
244 return;
245 const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D);
246 if (VD) {
247 if (M.getLangOpts().CUDA) {
248 if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
249 addNVVMMetadata(GV, Name: "surface", Operand: 1);
250 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
251 addNVVMMetadata(GV, Name: "texture", Operand: 1);
252 return;
253 }
254 }
255
256 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D);
257 if (!FD)
258 return;
259
260 llvm::Function *F = cast<llvm::Function>(Val: GV);
261
262 // Perform special handling in OpenCL/CUDA mode
263 if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) {
264 // Use function attributes to check for kernel functions
265 // By default, all functions are device functions
266 if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) {
267 // OpenCL/CUDA kernel functions get kernel metadata
268 // And kernel functions are not subject to inlining
269 F->addFnAttr(Kind: llvm::Attribute::NoInline);
270 if (FD->hasAttr<CUDAGlobalAttr>()) {
271 F->setCallingConv(getDeviceKernelCallingConv());
272
273 for (auto IV : llvm::enumerate(First: FD->parameters()))
274 if (IV.value()->hasAttr<CUDAGridConstantAttr>())
275 F->addParamAttr(
276 ArgNo: IV.index(),
277 Attr: llvm::Attribute::get(Context&: F->getContext(), Kind: "nvvm.grid_constant"));
278 }
279 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
280 M.handleCUDALaunchBoundsAttr(F, A: Attr);
281 }
282 }
283}
284
285void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
286 StringRef Name, int Operand) {
287 llvm::Module *M = GV->getParent();
288 llvm::LLVMContext &Ctx = M->getContext();
289
290 // Get "nvvm.annotations" metadata node
291 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations");
292
293 SmallVector<llvm::Metadata *, 5> MDVals = {
294 llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name),
295 llvm::ConstantAsMetadata::get(
296 C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))};
297
298 // Append metadata to nvvm.annotations
299 MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals));
300}
301
302bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
303 return false;
304}
305
306StringRef NVPTXTargetCodeGenInfo::getLLVMSyncScopeStr(
307 const LangOptions &LangOpts, SyncScope Scope,
308 llvm::AtomicOrdering Ordering) const {
309 switch (Scope) {
310 case SyncScope::HIPSingleThread:
311 case SyncScope::SingleScope:
312 return "singlethread";
313 case SyncScope::HIPWavefront:
314 case SyncScope::OpenCLSubGroup:
315 case SyncScope::WavefrontScope:
316 case SyncScope::HIPWorkgroup:
317 case SyncScope::OpenCLWorkGroup:
318 case SyncScope::WorkgroupScope:
319 return "block";
320 case SyncScope::HIPCluster:
321 case SyncScope::ClusterScope:
322 return "cluster";
323 case SyncScope::HIPAgent:
324 case SyncScope::OpenCLDevice:
325 case SyncScope::DeviceScope:
326 return "device";
327 case SyncScope::SystemScope:
328 case SyncScope::HIPSystem:
329 case SyncScope::OpenCLAllSVMDevices:
330 return "";
331 }
332 llvm_unreachable("Unknown SyncScope enum");
333}
334
335llvm::Constant *
336NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
337 llvm::PointerType *PT,
338 QualType QT) const {
339 auto &Ctx = CGM.getContext();
340 if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local))
341 return llvm::ConstantPointerNull::get(T: PT);
342
343 auto NPT = llvm::PointerType::get(
344 C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic));
345 return llvm::ConstantExpr::getAddrSpaceCast(
346 C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT);
347}
348} // namespace
349
350void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
351 const CUDALaunchBoundsAttr *Attr,
352 int32_t *MaxThreadsVal,
353 int32_t *MinBlocksVal,
354 int32_t *MaxClusterRankVal) {
355 llvm::APSInt MaxThreads(32);
356 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext());
357 if (MaxThreads > 0) {
358 if (MaxThreadsVal)
359 *MaxThreadsVal = MaxThreads.getExtValue();
360 if (F)
361 F->addFnAttr(Kind: "nvvm.maxntid", Val: llvm::utostr(X: MaxThreads.getExtValue()));
362 }
363
364 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
365 // was not specified in __launch_bounds__ or if the user specified a 0 value,
366 // we don't have to add a PTX directive.
367 if (Attr->getMinBlocks()) {
368 llvm::APSInt MinBlocks(32);
369 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext());
370 if (MinBlocks > 0) {
371 if (MinBlocksVal)
372 *MinBlocksVal = MinBlocks.getExtValue();
373 if (F)
374 F->addFnAttr(Kind: "nvvm.minctasm", Val: llvm::utostr(X: MinBlocks.getExtValue()));
375 }
376 }
377 if (Attr->getMaxBlocks()) {
378 llvm::APSInt MaxBlocks(32);
379 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext());
380 if (MaxBlocks > 0) {
381 if (MaxClusterRankVal)
382 *MaxClusterRankVal = MaxBlocks.getExtValue();
383 if (F)
384 F->addFnAttr(Kind: "nvvm.maxclusterrank",
385 Val: llvm::utostr(X: MaxBlocks.getExtValue()));
386 }
387 }
388}
389
390std::unique_ptr<TargetCodeGenInfo>
391CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
392 return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes());
393}
394