1 | //===- NVPTX.cpp ----------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ABIInfoImpl.h" |
10 | #include "TargetInfo.h" |
11 | #include "llvm/ADT/STLExtras.h" |
12 | #include "llvm/ADT/StringExtras.h" |
13 | #include "llvm/IR/CallingConv.h" |
14 | #include "llvm/IR/IntrinsicsNVPTX.h" |
15 | |
16 | using namespace clang; |
17 | using namespace clang::CodeGen; |
18 | |
19 | //===----------------------------------------------------------------------===// |
20 | // NVPTX ABI Implementation |
21 | //===----------------------------------------------------------------------===// |
22 | |
23 | namespace { |
24 | |
25 | class NVPTXTargetCodeGenInfo; |
26 | |
27 | class NVPTXABIInfo : public ABIInfo { |
28 | NVPTXTargetCodeGenInfo &CGInfo; |
29 | |
30 | public: |
31 | NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) |
32 | : ABIInfo(CGT), CGInfo(Info) {} |
33 | |
34 | ABIArgInfo classifyReturnType(QualType RetTy) const; |
35 | ABIArgInfo classifyArgumentType(QualType Ty) const; |
36 | |
37 | void computeInfo(CGFunctionInfo &FI) const override; |
38 | RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, |
39 | AggValueSlot Slot) const override; |
40 | bool isUnsupportedType(QualType T) const; |
41 | ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; |
42 | }; |
43 | |
44 | class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { |
45 | public: |
46 | NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) |
47 | : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {} |
48 | |
49 | void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, |
50 | CodeGen::CodeGenModule &M) const override; |
51 | bool shouldEmitStaticExternCAliases() const override; |
52 | |
53 | llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, |
54 | llvm::PointerType *T, |
55 | QualType QT) const override; |
56 | |
57 | llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { |
58 | // On the device side, surface reference is represented as an object handle |
59 | // in 64-bit integer. |
60 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
61 | } |
62 | |
63 | llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { |
64 | // On the device side, texture reference is represented as an object handle |
65 | // in 64-bit integer. |
66 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
67 | } |
68 | |
69 | bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
70 | LValue Src) const override { |
71 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
72 | return true; |
73 | } |
74 | |
75 | bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
76 | LValue Src) const override { |
77 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
78 | return true; |
79 | } |
80 | |
81 | unsigned getDeviceKernelCallingConv() const override { |
82 | return llvm::CallingConv::PTX_Kernel; |
83 | } |
84 | |
85 | // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the |
86 | // resulting MDNode to the nvvm.annotations MDNode. |
87 | static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, |
88 | int Operand); |
89 | |
90 | static void |
91 | addGridConstantNVVMMetadata(llvm::GlobalValue *GV, |
92 | const SmallVectorImpl<int> &GridConstantArgs); |
93 | |
94 | private: |
95 | static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
96 | LValue Src) { |
97 | llvm::Value *Handle = nullptr; |
98 | llvm::Constant *C = |
99 | llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF)); |
100 | // Lookup `addrspacecast` through the constant pointer if any. |
101 | if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C)) |
102 | C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand()); |
103 | if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) { |
104 | // Load the handle from the specific global variable using |
105 | // `nvvm.texsurf.handle.internal` intrinsic. |
106 | Handle = CGF.EmitRuntimeCall( |
107 | callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal, |
108 | Tys: {GV->getType()}), |
109 | args: {GV}, name: "texsurf_handle" ); |
110 | } else |
111 | Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation()); |
112 | CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst); |
113 | } |
114 | }; |
115 | |
116 | /// Checks if the type is unsupported directly by the current target. |
117 | bool NVPTXABIInfo::isUnsupportedType(QualType T) const { |
118 | ASTContext &Context = getContext(); |
119 | if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) |
120 | return true; |
121 | if (!Context.getTargetInfo().hasFloat128Type() && |
122 | (T->isFloat128Type() || |
123 | (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) |
124 | return true; |
125 | if (const auto *EIT = T->getAs<BitIntType>()) |
126 | return EIT->getNumBits() > |
127 | (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); |
128 | if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && |
129 | Context.getTypeSize(T) > 64U) |
130 | return true; |
131 | if (const auto *AT = T->getAsArrayTypeUnsafe()) |
132 | return isUnsupportedType(T: AT->getElementType()); |
133 | const auto *RT = T->getAs<RecordType>(); |
134 | if (!RT) |
135 | return false; |
136 | const RecordDecl *RD = RT->getDecl(); |
137 | |
138 | // If this is a C++ record, check the bases first. |
139 | if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) |
140 | for (const CXXBaseSpecifier &I : CXXRD->bases()) |
141 | if (isUnsupportedType(T: I.getType())) |
142 | return true; |
143 | |
144 | for (const FieldDecl *I : RD->fields()) |
145 | if (isUnsupportedType(T: I->getType())) |
146 | return true; |
147 | return false; |
148 | } |
149 | |
150 | /// Coerce the given type into an array with maximum allowed size of elements. |
151 | ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, |
152 | unsigned MaxSize) const { |
153 | // Alignment and Size are measured in bits. |
154 | const uint64_t Size = getContext().getTypeSize(T: Ty); |
155 | const uint64_t Alignment = getContext().getTypeAlign(T: Ty); |
156 | const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment); |
157 | llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div); |
158 | const uint64_t NumElements = (Size + Div - 1) / Div; |
159 | return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements)); |
160 | } |
161 | |
162 | ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { |
163 | if (RetTy->isVoidType()) |
164 | return ABIArgInfo::getIgnore(); |
165 | |
166 | if (getContext().getLangOpts().OpenMP && |
167 | getContext().getLangOpts().OpenMPIsTargetDevice && |
168 | isUnsupportedType(T: RetTy)) |
169 | return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: 64); |
170 | |
171 | // note: this is different from default ABI |
172 | if (!RetTy->isScalarType()) |
173 | return ABIArgInfo::getDirect(); |
174 | |
175 | // Treat an enum type as its underlying type. |
176 | if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) |
177 | RetTy = EnumTy->getDecl()->getIntegerType(); |
178 | |
179 | return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy) |
180 | : ABIArgInfo::getDirect()); |
181 | } |
182 | |
183 | ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { |
184 | // Treat an enum type as its underlying type. |
185 | if (const EnumType *EnumTy = Ty->getAs<EnumType>()) |
186 | Ty = EnumTy->getDecl()->getIntegerType(); |
187 | |
188 | // Return aggregates type as indirect by value |
189 | if (isAggregateTypeForABI(T: Ty)) { |
190 | // Under CUDA device compilation, tex/surf builtin types are replaced with |
191 | // object types and passed directly. |
192 | if (getContext().getLangOpts().CUDAIsDevice) { |
193 | if (Ty->isCUDADeviceBuiltinSurfaceType()) |
194 | return ABIArgInfo::getDirect( |
195 | T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); |
196 | if (Ty->isCUDADeviceBuiltinTextureType()) |
197 | return ABIArgInfo::getDirect( |
198 | T: CGInfo.getCUDADeviceBuiltinTextureDeviceType()); |
199 | } |
200 | return getNaturalAlignIndirect( |
201 | Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(), |
202 | /* byval */ ByVal: true); |
203 | } |
204 | |
205 | if (const auto *EIT = Ty->getAs<BitIntType>()) { |
206 | if ((EIT->getNumBits() > 128) || |
207 | (!getContext().getTargetInfo().hasInt128Type() && |
208 | EIT->getNumBits() > 64)) |
209 | return getNaturalAlignIndirect( |
210 | Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(), |
211 | /* byval */ ByVal: true); |
212 | } |
213 | |
214 | return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) |
215 | : ABIArgInfo::getDirect()); |
216 | } |
217 | |
218 | void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { |
219 | if (!getCXXABI().classifyReturnType(FI)) |
220 | FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType()); |
221 | |
222 | for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments())) |
223 | I.info = ArgumentsCount < FI.getNumRequiredArgs() |
224 | ? classifyArgumentType(Ty: I.type) |
225 | : ABIArgInfo::getDirect(); |
226 | |
227 | // Always honor user-specified calling convention. |
228 | if (FI.getCallingConvention() != llvm::CallingConv::C) |
229 | return; |
230 | |
231 | FI.setEffectiveCallingConvention(getRuntimeCC()); |
232 | } |
233 | |
234 | RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, |
235 | QualType Ty, AggValueSlot Slot) const { |
236 | return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /*IsIndirect=*/false, |
237 | ValueInfo: getContext().getTypeInfoInChars(T: Ty), |
238 | SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: 1), |
239 | /*AllowHigherAlign=*/true, Slot); |
240 | } |
241 | |
242 | void NVPTXTargetCodeGenInfo::setTargetAttributes( |
243 | const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { |
244 | if (GV->isDeclaration()) |
245 | return; |
246 | const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D); |
247 | if (VD) { |
248 | if (M.getLangOpts().CUDA) { |
249 | if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) |
250 | addNVVMMetadata(GV, Name: "surface" , Operand: 1); |
251 | else if (VD->getType()->isCUDADeviceBuiltinTextureType()) |
252 | addNVVMMetadata(GV, Name: "texture" , Operand: 1); |
253 | return; |
254 | } |
255 | } |
256 | |
257 | const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D); |
258 | if (!FD) |
259 | return; |
260 | |
261 | llvm::Function *F = cast<llvm::Function>(Val: GV); |
262 | |
263 | // Perform special handling in OpenCL/CUDA mode |
264 | if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) { |
265 | // Use function attributes to check for kernel functions |
266 | // By default, all functions are device functions |
267 | if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) { |
268 | // OpenCL/CUDA kernel functions get kernel metadata |
269 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
270 | // And kernel functions are not subject to inlining |
271 | F->addFnAttr(Kind: llvm::Attribute::NoInline); |
272 | if (FD->hasAttr<CUDAGlobalAttr>()) { |
273 | SmallVector<int, 10> GCI; |
274 | for (auto IV : llvm::enumerate(First: FD->parameters())) |
275 | if (IV.value()->hasAttr<CUDAGridConstantAttr>()) |
276 | // For some reason arg indices are 1-based in NVVM |
277 | GCI.push_back(Elt: IV.index() + 1); |
278 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
279 | F->setCallingConv(llvm::CallingConv::PTX_Kernel); |
280 | addGridConstantNVVMMetadata(GV: F, GridConstantArgs: GCI); |
281 | } |
282 | if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) |
283 | M.handleCUDALaunchBoundsAttr(F, A: Attr); |
284 | } |
285 | } |
286 | // Attach kernel metadata directly if compiling for NVPTX. |
287 | if (FD->hasAttr<DeviceKernelAttr>()) { |
288 | F->setCallingConv(llvm::CallingConv::PTX_Kernel); |
289 | } |
290 | } |
291 | |
292 | void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, |
293 | StringRef Name, int Operand) { |
294 | llvm::Module *M = GV->getParent(); |
295 | llvm::LLVMContext &Ctx = M->getContext(); |
296 | |
297 | // Get "nvvm.annotations" metadata node |
298 | llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations" ); |
299 | |
300 | SmallVector<llvm::Metadata *, 5> MDVals = { |
301 | llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name), |
302 | llvm::ConstantAsMetadata::get( |
303 | C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))}; |
304 | |
305 | // Append metadata to nvvm.annotations |
306 | MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals)); |
307 | } |
308 | |
309 | void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata( |
310 | llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) { |
311 | |
312 | llvm::Module *M = GV->getParent(); |
313 | llvm::LLVMContext &Ctx = M->getContext(); |
314 | |
315 | // Get "nvvm.annotations" metadata node |
316 | llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations" ); |
317 | |
318 | SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(C: GV)}; |
319 | if (!GridConstantArgs.empty()) { |
320 | SmallVector<llvm::Metadata *, 10> GCM; |
321 | for (int I : GridConstantArgs) |
322 | GCM.push_back(Elt: llvm::ConstantAsMetadata::get( |
323 | C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: I))); |
324 | MDVals.append(IL: {llvm::MDString::get(Context&: Ctx, Str: "grid_constant" ), |
325 | llvm::MDNode::get(Context&: Ctx, MDs: GCM)}); |
326 | } |
327 | |
328 | // Append metadata to nvvm.annotations |
329 | MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals)); |
330 | } |
331 | |
332 | bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { |
333 | return false; |
334 | } |
335 | |
336 | llvm::Constant * |
337 | NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, |
338 | llvm::PointerType *PT, |
339 | QualType QT) const { |
340 | auto &Ctx = CGM.getContext(); |
341 | if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local)) |
342 | return llvm::ConstantPointerNull::get(T: PT); |
343 | |
344 | auto NPT = llvm::PointerType::get( |
345 | C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic)); |
346 | return llvm::ConstantExpr::getAddrSpaceCast( |
347 | C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT); |
348 | } |
349 | } // namespace |
350 | |
351 | void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, |
352 | const CUDALaunchBoundsAttr *Attr, |
353 | int32_t *MaxThreadsVal, |
354 | int32_t *MinBlocksVal, |
355 | int32_t *MaxClusterRankVal) { |
356 | llvm::APSInt MaxThreads(32); |
357 | MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext()); |
358 | if (MaxThreads > 0) { |
359 | if (MaxThreadsVal) |
360 | *MaxThreadsVal = MaxThreads.getExtValue(); |
361 | if (F) |
362 | F->addFnAttr(Kind: "nvvm.maxntid" , Val: llvm::utostr(X: MaxThreads.getExtValue())); |
363 | } |
364 | |
365 | // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it |
366 | // was not specified in __launch_bounds__ or if the user specified a 0 value, |
367 | // we don't have to add a PTX directive. |
368 | if (Attr->getMinBlocks()) { |
369 | llvm::APSInt MinBlocks(32); |
370 | MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext()); |
371 | if (MinBlocks > 0) { |
372 | if (MinBlocksVal) |
373 | *MinBlocksVal = MinBlocks.getExtValue(); |
374 | if (F) |
375 | F->addFnAttr(Kind: "nvvm.minctasm" , Val: llvm::utostr(X: MinBlocks.getExtValue())); |
376 | } |
377 | } |
378 | if (Attr->getMaxBlocks()) { |
379 | llvm::APSInt MaxBlocks(32); |
380 | MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext()); |
381 | if (MaxBlocks > 0) { |
382 | if (MaxClusterRankVal) |
383 | *MaxClusterRankVal = MaxBlocks.getExtValue(); |
384 | if (F) |
385 | F->addFnAttr(Kind: "nvvm.maxclusterrank" , |
386 | Val: llvm::utostr(X: MaxBlocks.getExtValue())); |
387 | } |
388 | } |
389 | } |
390 | |
391 | std::unique_ptr<TargetCodeGenInfo> |
392 | CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { |
393 | return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes()); |
394 | } |
395 | |