1 | //===- NVPTX.cpp ----------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ABIInfoImpl.h" |
10 | #include "TargetInfo.h" |
11 | #include "llvm/IR/IntrinsicsNVPTX.h" |
12 | |
13 | using namespace clang; |
14 | using namespace clang::CodeGen; |
15 | |
16 | //===----------------------------------------------------------------------===// |
17 | // NVPTX ABI Implementation |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | namespace { |
21 | |
22 | class NVPTXTargetCodeGenInfo; |
23 | |
24 | class NVPTXABIInfo : public ABIInfo { |
25 | NVPTXTargetCodeGenInfo &CGInfo; |
26 | |
27 | public: |
28 | NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) |
29 | : ABIInfo(CGT), CGInfo(Info) {} |
30 | |
31 | ABIArgInfo classifyReturnType(QualType RetTy) const; |
32 | ABIArgInfo classifyArgumentType(QualType Ty) const; |
33 | |
34 | void computeInfo(CGFunctionInfo &FI) const override; |
35 | RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, |
36 | AggValueSlot Slot) const override; |
37 | bool isUnsupportedType(QualType T) const; |
38 | ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; |
39 | }; |
40 | |
41 | class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { |
42 | public: |
43 | NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) |
44 | : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {} |
45 | |
46 | void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, |
47 | CodeGen::CodeGenModule &M) const override; |
48 | bool shouldEmitStaticExternCAliases() const override; |
49 | |
50 | llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, |
51 | llvm::PointerType *T, |
52 | QualType QT) const override; |
53 | |
54 | llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { |
55 | // On the device side, surface reference is represented as an object handle |
56 | // in 64-bit integer. |
57 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
58 | } |
59 | |
60 | llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { |
61 | // On the device side, texture reference is represented as an object handle |
62 | // in 64-bit integer. |
63 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
64 | } |
65 | |
66 | bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
67 | LValue Src) const override { |
68 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
69 | return true; |
70 | } |
71 | |
72 | bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
73 | LValue Src) const override { |
74 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
75 | return true; |
76 | } |
77 | |
78 | // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the |
79 | // resulting MDNode to the nvvm.annotations MDNode. |
80 | static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, |
81 | int Operand); |
82 | |
83 | private: |
84 | static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
85 | LValue Src) { |
86 | llvm::Value *Handle = nullptr; |
87 | llvm::Constant *C = |
88 | llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF)); |
89 | // Lookup `addrspacecast` through the constant pointer if any. |
90 | if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C)) |
91 | C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand()); |
92 | if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) { |
93 | // Load the handle from the specific global variable using |
94 | // `nvvm.texsurf.handle.internal` intrinsic. |
95 | Handle = CGF.EmitRuntimeCall( |
96 | callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal, |
97 | Tys: {GV->getType()}), |
98 | args: {GV}, name: "texsurf_handle" ); |
99 | } else |
100 | Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation()); |
101 | CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst); |
102 | } |
103 | }; |
104 | |
105 | /// Checks if the type is unsupported directly by the current target. |
106 | bool NVPTXABIInfo::isUnsupportedType(QualType T) const { |
107 | ASTContext &Context = getContext(); |
108 | if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) |
109 | return true; |
110 | if (!Context.getTargetInfo().hasFloat128Type() && |
111 | (T->isFloat128Type() || |
112 | (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) |
113 | return true; |
114 | if (const auto *EIT = T->getAs<BitIntType>()) |
115 | return EIT->getNumBits() > |
116 | (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); |
117 | if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && |
118 | Context.getTypeSize(T) > 64U) |
119 | return true; |
120 | if (const auto *AT = T->getAsArrayTypeUnsafe()) |
121 | return isUnsupportedType(T: AT->getElementType()); |
122 | const auto *RT = T->getAs<RecordType>(); |
123 | if (!RT) |
124 | return false; |
125 | const RecordDecl *RD = RT->getDecl(); |
126 | |
127 | // If this is a C++ record, check the bases first. |
128 | if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) |
129 | for (const CXXBaseSpecifier &I : CXXRD->bases()) |
130 | if (isUnsupportedType(T: I.getType())) |
131 | return true; |
132 | |
133 | for (const FieldDecl *I : RD->fields()) |
134 | if (isUnsupportedType(T: I->getType())) |
135 | return true; |
136 | return false; |
137 | } |
138 | |
139 | /// Coerce the given type into an array with maximum allowed size of elements. |
140 | ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, |
141 | unsigned MaxSize) const { |
142 | // Alignment and Size are measured in bits. |
143 | const uint64_t Size = getContext().getTypeSize(T: Ty); |
144 | const uint64_t Alignment = getContext().getTypeAlign(T: Ty); |
145 | const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment); |
146 | llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div); |
147 | const uint64_t NumElements = (Size + Div - 1) / Div; |
148 | return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements)); |
149 | } |
150 | |
151 | ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { |
152 | if (RetTy->isVoidType()) |
153 | return ABIArgInfo::getIgnore(); |
154 | |
155 | if (getContext().getLangOpts().OpenMP && |
156 | getContext().getLangOpts().OpenMPIsTargetDevice && |
157 | isUnsupportedType(T: RetTy)) |
158 | return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: 64); |
159 | |
160 | // note: this is different from default ABI |
161 | if (!RetTy->isScalarType()) |
162 | return ABIArgInfo::getDirect(); |
163 | |
164 | // Treat an enum type as its underlying type. |
165 | if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) |
166 | RetTy = EnumTy->getDecl()->getIntegerType(); |
167 | |
168 | return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy) |
169 | : ABIArgInfo::getDirect()); |
170 | } |
171 | |
172 | ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { |
173 | // Treat an enum type as its underlying type. |
174 | if (const EnumType *EnumTy = Ty->getAs<EnumType>()) |
175 | Ty = EnumTy->getDecl()->getIntegerType(); |
176 | |
177 | // Return aggregates type as indirect by value |
178 | if (isAggregateTypeForABI(T: Ty)) { |
179 | // Under CUDA device compilation, tex/surf builtin types are replaced with |
180 | // object types and passed directly. |
181 | if (getContext().getLangOpts().CUDAIsDevice) { |
182 | if (Ty->isCUDADeviceBuiltinSurfaceType()) |
183 | return ABIArgInfo::getDirect( |
184 | T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); |
185 | if (Ty->isCUDADeviceBuiltinTextureType()) |
186 | return ABIArgInfo::getDirect( |
187 | T: CGInfo.getCUDADeviceBuiltinTextureDeviceType()); |
188 | } |
189 | return getNaturalAlignIndirect(Ty, /* byval */ ByVal: true); |
190 | } |
191 | |
192 | if (const auto *EIT = Ty->getAs<BitIntType>()) { |
193 | if ((EIT->getNumBits() > 128) || |
194 | (!getContext().getTargetInfo().hasInt128Type() && |
195 | EIT->getNumBits() > 64)) |
196 | return getNaturalAlignIndirect(Ty, /* byval */ ByVal: true); |
197 | } |
198 | |
199 | return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) |
200 | : ABIArgInfo::getDirect()); |
201 | } |
202 | |
203 | void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { |
204 | if (!getCXXABI().classifyReturnType(FI)) |
205 | FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType()); |
206 | |
207 | for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments())) |
208 | I.info = ArgumentsCount < FI.getNumRequiredArgs() |
209 | ? classifyArgumentType(Ty: I.type) |
210 | : ABIArgInfo::getDirect(); |
211 | |
212 | // Always honor user-specified calling convention. |
213 | if (FI.getCallingConvention() != llvm::CallingConv::C) |
214 | return; |
215 | |
216 | FI.setEffectiveCallingConvention(getRuntimeCC()); |
217 | } |
218 | |
219 | RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, |
220 | QualType Ty, AggValueSlot Slot) const { |
221 | return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /*IsIndirect=*/false, |
222 | ValueInfo: getContext().getTypeInfoInChars(T: Ty), |
223 | SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: 1), |
224 | /*AllowHigherAlign=*/true, Slot); |
225 | } |
226 | |
227 | void NVPTXTargetCodeGenInfo::setTargetAttributes( |
228 | const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { |
229 | if (GV->isDeclaration()) |
230 | return; |
231 | const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D); |
232 | if (VD) { |
233 | if (M.getLangOpts().CUDA) { |
234 | if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) |
235 | addNVVMMetadata(GV, Name: "surface" , Operand: 1); |
236 | else if (VD->getType()->isCUDADeviceBuiltinTextureType()) |
237 | addNVVMMetadata(GV, Name: "texture" , Operand: 1); |
238 | return; |
239 | } |
240 | } |
241 | |
242 | const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D); |
243 | if (!FD) return; |
244 | |
245 | llvm::Function *F = cast<llvm::Function>(Val: GV); |
246 | |
247 | // Perform special handling in OpenCL mode |
248 | if (M.getLangOpts().OpenCL) { |
249 | // Use OpenCL function attributes to check for kernel functions |
250 | // By default, all functions are device functions |
251 | if (FD->hasAttr<OpenCLKernelAttr>()) { |
252 | // OpenCL __kernel functions get kernel metadata |
253 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
254 | addNVVMMetadata(GV: F, Name: "kernel" , Operand: 1); |
255 | // And kernel functions are not subject to inlining |
256 | F->addFnAttr(Kind: llvm::Attribute::NoInline); |
257 | } |
258 | } |
259 | |
260 | // Perform special handling in CUDA mode. |
261 | if (M.getLangOpts().CUDA) { |
262 | // CUDA __global__ functions get a kernel metadata entry. Since |
263 | // __global__ functions cannot be called from the device, we do not |
264 | // need to set the noinline attribute. |
265 | if (FD->hasAttr<CUDAGlobalAttr>()) { |
266 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
267 | addNVVMMetadata(GV: F, Name: "kernel" , Operand: 1); |
268 | } |
269 | if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) |
270 | M.handleCUDALaunchBoundsAttr(F, A: Attr); |
271 | } |
272 | |
273 | // Attach kernel metadata directly if compiling for NVPTX. |
274 | if (FD->hasAttr<NVPTXKernelAttr>()) { |
275 | addNVVMMetadata(GV: F, Name: "kernel" , Operand: 1); |
276 | } |
277 | } |
278 | |
279 | void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, |
280 | StringRef Name, int Operand) { |
281 | llvm::Module *M = GV->getParent(); |
282 | llvm::LLVMContext &Ctx = M->getContext(); |
283 | |
284 | // Get "nvvm.annotations" metadata node |
285 | llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations" ); |
286 | |
287 | llvm::Metadata *MDVals[] = { |
288 | llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name), |
289 | llvm::ConstantAsMetadata::get( |
290 | C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))}; |
291 | // Append metadata to nvvm.annotations |
292 | MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals)); |
293 | } |
294 | |
295 | bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { |
296 | return false; |
297 | } |
298 | |
299 | llvm::Constant * |
300 | NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, |
301 | llvm::PointerType *PT, |
302 | QualType QT) const { |
303 | auto &Ctx = CGM.getContext(); |
304 | if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local)) |
305 | return llvm::ConstantPointerNull::get(T: PT); |
306 | |
307 | auto NPT = llvm::PointerType::get( |
308 | C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic)); |
309 | return llvm::ConstantExpr::getAddrSpaceCast( |
310 | C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT); |
311 | } |
312 | } |
313 | |
314 | void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, |
315 | const CUDALaunchBoundsAttr *Attr, |
316 | int32_t *MaxThreadsVal, |
317 | int32_t *MinBlocksVal, |
318 | int32_t *MaxClusterRankVal) { |
319 | // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node |
320 | llvm::APSInt MaxThreads(32); |
321 | MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext()); |
322 | if (MaxThreads > 0) { |
323 | if (MaxThreadsVal) |
324 | *MaxThreadsVal = MaxThreads.getExtValue(); |
325 | if (F) { |
326 | // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node |
327 | NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxntidx" , |
328 | Operand: MaxThreads.getExtValue()); |
329 | } |
330 | } |
331 | |
332 | // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it |
333 | // was not specified in __launch_bounds__ or if the user specified a 0 value, |
334 | // we don't have to add a PTX directive. |
335 | if (Attr->getMinBlocks()) { |
336 | llvm::APSInt MinBlocks(32); |
337 | MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext()); |
338 | if (MinBlocks > 0) { |
339 | if (MinBlocksVal) |
340 | *MinBlocksVal = MinBlocks.getExtValue(); |
341 | if (F) { |
342 | // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node |
343 | NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "minctasm" , |
344 | Operand: MinBlocks.getExtValue()); |
345 | } |
346 | } |
347 | } |
348 | if (Attr->getMaxBlocks()) { |
349 | llvm::APSInt MaxBlocks(32); |
350 | MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext()); |
351 | if (MaxBlocks > 0) { |
352 | if (MaxClusterRankVal) |
353 | *MaxClusterRankVal = MaxBlocks.getExtValue(); |
354 | if (F) { |
355 | // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node |
356 | NVPTXTargetCodeGenInfo::addNVVMMetadata(GV: F, Name: "maxclusterrank" , |
357 | Operand: MaxBlocks.getExtValue()); |
358 | } |
359 | } |
360 | } |
361 | } |
362 | |
363 | std::unique_ptr<TargetCodeGenInfo> |
364 | CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { |
365 | return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes()); |
366 | } |
367 | |