| 1 | //===- NVPTX.cpp ----------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "ABIInfoImpl.h" |
| 10 | #include "TargetInfo.h" |
| 11 | #include "llvm/ADT/STLExtras.h" |
| 12 | #include "llvm/ADT/StringExtras.h" |
| 13 | #include "llvm/IR/CallingConv.h" |
| 14 | #include "llvm/IR/IntrinsicsNVPTX.h" |
| 15 | |
| 16 | using namespace clang; |
| 17 | using namespace clang::CodeGen; |
| 18 | |
| 19 | //===----------------------------------------------------------------------===// |
| 20 | // NVPTX ABI Implementation |
| 21 | //===----------------------------------------------------------------------===// |
| 22 | |
| 23 | namespace { |
| 24 | |
| 25 | class NVPTXTargetCodeGenInfo; |
| 26 | |
| 27 | class NVPTXABIInfo : public ABIInfo { |
| 28 | NVPTXTargetCodeGenInfo &CGInfo; |
| 29 | |
| 30 | public: |
| 31 | NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) |
| 32 | : ABIInfo(CGT), CGInfo(Info) {} |
| 33 | |
| 34 | ABIArgInfo classifyReturnType(QualType RetTy) const; |
| 35 | ABIArgInfo classifyArgumentType(QualType Ty) const; |
| 36 | |
| 37 | void computeInfo(CGFunctionInfo &FI) const override; |
| 38 | RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, |
| 39 | AggValueSlot Slot) const override; |
| 40 | bool isUnsupportedType(QualType T) const; |
| 41 | ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const; |
| 42 | }; |
| 43 | |
| 44 | class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { |
| 45 | public: |
| 46 | NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) |
| 47 | : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(args&: CGT, args&: *this)) {} |
| 48 | |
| 49 | void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, |
| 50 | CodeGen::CodeGenModule &M) const override; |
| 51 | bool shouldEmitStaticExternCAliases() const override; |
| 52 | |
| 53 | llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM, |
| 54 | llvm::PointerType *T, |
| 55 | QualType QT) const override; |
| 56 | |
| 57 | llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { |
| 58 | // On the device side, surface reference is represented as an object handle |
| 59 | // in 64-bit integer. |
| 60 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
| 61 | } |
| 62 | |
| 63 | llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { |
| 64 | // On the device side, texture reference is represented as an object handle |
| 65 | // in 64-bit integer. |
| 66 | return llvm::Type::getInt64Ty(C&: getABIInfo().getVMContext()); |
| 67 | } |
| 68 | |
| 69 | bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
| 70 | LValue Src) const override { |
| 71 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
| 72 | return true; |
| 73 | } |
| 74 | |
| 75 | bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
| 76 | LValue Src) const override { |
| 77 | emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); |
| 78 | return true; |
| 79 | } |
| 80 | |
| 81 | unsigned getDeviceKernelCallingConv() const override { |
| 82 | return llvm::CallingConv::PTX_Kernel; |
| 83 | } |
| 84 | |
| 85 | // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the |
| 86 | // resulting MDNode to the nvvm.annotations MDNode. |
| 87 | static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, |
| 88 | int Operand); |
| 89 | |
| 90 | static void |
| 91 | addGridConstantNVVMMetadata(llvm::GlobalValue *GV, |
| 92 | const SmallVectorImpl<int> &GridConstantArgs); |
| 93 | |
| 94 | private: |
| 95 | static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, |
| 96 | LValue Src) { |
| 97 | llvm::Value *Handle = nullptr; |
| 98 | llvm::Constant *C = |
| 99 | llvm::dyn_cast<llvm::Constant>(Val: Src.getAddress().emitRawPointer(CGF)); |
| 100 | // Lookup `addrspacecast` through the constant pointer if any. |
| 101 | if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(Val: C)) |
| 102 | C = llvm::cast<llvm::Constant>(Val: ASC->getPointerOperand()); |
| 103 | if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(Val: C)) { |
| 104 | // Load the handle from the specific global variable using |
| 105 | // `nvvm.texsurf.handle.internal` intrinsic. |
| 106 | Handle = CGF.EmitRuntimeCall( |
| 107 | callee: CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::nvvm_texsurf_handle_internal, |
| 108 | Tys: {GV->getType()}), |
| 109 | args: {GV}, name: "texsurf_handle" ); |
| 110 | } else |
| 111 | Handle = CGF.EmitLoadOfScalar(lvalue: Src, Loc: SourceLocation()); |
| 112 | CGF.EmitStoreOfScalar(value: Handle, lvalue: Dst); |
| 113 | } |
| 114 | }; |
| 115 | |
| 116 | /// Checks if the type is unsupported directly by the current target. |
| 117 | bool NVPTXABIInfo::isUnsupportedType(QualType T) const { |
| 118 | ASTContext &Context = getContext(); |
| 119 | if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type()) |
| 120 | return true; |
| 121 | if (!Context.getTargetInfo().hasFloat128Type() && |
| 122 | (T->isFloat128Type() || |
| 123 | (T->isRealFloatingType() && Context.getTypeSize(T) == 128))) |
| 124 | return true; |
| 125 | if (const auto *EIT = T->getAs<BitIntType>()) |
| 126 | return EIT->getNumBits() > |
| 127 | (Context.getTargetInfo().hasInt128Type() ? 128U : 64U); |
| 128 | if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() && |
| 129 | Context.getTypeSize(T) > 64U) |
| 130 | return true; |
| 131 | if (const auto *AT = T->getAsArrayTypeUnsafe()) |
| 132 | return isUnsupportedType(T: AT->getElementType()); |
| 133 | const auto *RT = T->getAs<RecordType>(); |
| 134 | if (!RT) |
| 135 | return false; |
| 136 | const RecordDecl *RD = RT->getDecl(); |
| 137 | |
| 138 | // If this is a C++ record, check the bases first. |
| 139 | if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(Val: RD)) |
| 140 | for (const CXXBaseSpecifier &I : CXXRD->bases()) |
| 141 | if (isUnsupportedType(T: I.getType())) |
| 142 | return true; |
| 143 | |
| 144 | for (const FieldDecl *I : RD->fields()) |
| 145 | if (isUnsupportedType(T: I->getType())) |
| 146 | return true; |
| 147 | return false; |
| 148 | } |
| 149 | |
| 150 | /// Coerce the given type into an array with maximum allowed size of elements. |
| 151 | ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty, |
| 152 | unsigned MaxSize) const { |
| 153 | // Alignment and Size are measured in bits. |
| 154 | const uint64_t Size = getContext().getTypeSize(T: Ty); |
| 155 | const uint64_t Alignment = getContext().getTypeAlign(T: Ty); |
| 156 | const unsigned Div = std::min<unsigned>(a: MaxSize, b: Alignment); |
| 157 | llvm::Type *IntType = llvm::Type::getIntNTy(C&: getVMContext(), N: Div); |
| 158 | const uint64_t NumElements = (Size + Div - 1) / Div; |
| 159 | return ABIArgInfo::getDirect(T: llvm::ArrayType::get(ElementType: IntType, NumElements)); |
| 160 | } |
| 161 | |
| 162 | ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { |
| 163 | if (RetTy->isVoidType()) |
| 164 | return ABIArgInfo::getIgnore(); |
| 165 | |
| 166 | if (getContext().getLangOpts().OpenMP && |
| 167 | getContext().getLangOpts().OpenMPIsTargetDevice && |
| 168 | isUnsupportedType(T: RetTy)) |
| 169 | return coerceToIntArrayWithLimit(Ty: RetTy, MaxSize: 64); |
| 170 | |
| 171 | // note: this is different from default ABI |
| 172 | if (!RetTy->isScalarType()) |
| 173 | return ABIArgInfo::getDirect(); |
| 174 | |
| 175 | // Treat an enum type as its underlying type. |
| 176 | if (const EnumType *EnumTy = RetTy->getAs<EnumType>()) |
| 177 | RetTy = EnumTy->getDecl()->getIntegerType(); |
| 178 | |
| 179 | return (isPromotableIntegerTypeForABI(Ty: RetTy) ? ABIArgInfo::getExtend(Ty: RetTy) |
| 180 | : ABIArgInfo::getDirect()); |
| 181 | } |
| 182 | |
| 183 | ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { |
| 184 | // Treat an enum type as its underlying type. |
| 185 | if (const EnumType *EnumTy = Ty->getAs<EnumType>()) |
| 186 | Ty = EnumTy->getDecl()->getIntegerType(); |
| 187 | |
| 188 | // Return aggregates type as indirect by value |
| 189 | if (isAggregateTypeForABI(T: Ty)) { |
| 190 | // Under CUDA device compilation, tex/surf builtin types are replaced with |
| 191 | // object types and passed directly. |
| 192 | if (getContext().getLangOpts().CUDAIsDevice) { |
| 193 | if (Ty->isCUDADeviceBuiltinSurfaceType()) |
| 194 | return ABIArgInfo::getDirect( |
| 195 | T: CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); |
| 196 | if (Ty->isCUDADeviceBuiltinTextureType()) |
| 197 | return ABIArgInfo::getDirect( |
| 198 | T: CGInfo.getCUDADeviceBuiltinTextureDeviceType()); |
| 199 | } |
| 200 | return getNaturalAlignIndirect( |
| 201 | Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(), |
| 202 | /* byval */ ByVal: true); |
| 203 | } |
| 204 | |
| 205 | if (const auto *EIT = Ty->getAs<BitIntType>()) { |
| 206 | if ((EIT->getNumBits() > 128) || |
| 207 | (!getContext().getTargetInfo().hasInt128Type() && |
| 208 | EIT->getNumBits() > 64)) |
| 209 | return getNaturalAlignIndirect( |
| 210 | Ty, /* AddrSpace */ getDataLayout().getAllocaAddrSpace(), |
| 211 | /* byval */ ByVal: true); |
| 212 | } |
| 213 | |
| 214 | return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) |
| 215 | : ABIArgInfo::getDirect()); |
| 216 | } |
| 217 | |
| 218 | void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { |
| 219 | if (!getCXXABI().classifyReturnType(FI)) |
| 220 | FI.getReturnInfo() = classifyReturnType(RetTy: FI.getReturnType()); |
| 221 | |
| 222 | for (auto &&[ArgumentsCount, I] : llvm::enumerate(First: FI.arguments())) |
| 223 | I.info = ArgumentsCount < FI.getNumRequiredArgs() |
| 224 | ? classifyArgumentType(Ty: I.type) |
| 225 | : ABIArgInfo::getDirect(); |
| 226 | |
| 227 | // Always honor user-specified calling convention. |
| 228 | if (FI.getCallingConvention() != llvm::CallingConv::C) |
| 229 | return; |
| 230 | |
| 231 | FI.setEffectiveCallingConvention(getRuntimeCC()); |
| 232 | } |
| 233 | |
| 234 | RValue NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, |
| 235 | QualType Ty, AggValueSlot Slot) const { |
| 236 | return emitVoidPtrVAArg(CGF, VAListAddr, ValueTy: Ty, /*IsIndirect=*/false, |
| 237 | ValueInfo: getContext().getTypeInfoInChars(T: Ty), |
| 238 | SlotSizeAndAlign: CharUnits::fromQuantity(Quantity: 1), |
| 239 | /*AllowHigherAlign=*/true, Slot); |
| 240 | } |
| 241 | |
| 242 | void NVPTXTargetCodeGenInfo::setTargetAttributes( |
| 243 | const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { |
| 244 | if (GV->isDeclaration()) |
| 245 | return; |
| 246 | const VarDecl *VD = dyn_cast_or_null<VarDecl>(Val: D); |
| 247 | if (VD) { |
| 248 | if (M.getLangOpts().CUDA) { |
| 249 | if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) |
| 250 | addNVVMMetadata(GV, Name: "surface" , Operand: 1); |
| 251 | else if (VD->getType()->isCUDADeviceBuiltinTextureType()) |
| 252 | addNVVMMetadata(GV, Name: "texture" , Operand: 1); |
| 253 | return; |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Val: D); |
| 258 | if (!FD) |
| 259 | return; |
| 260 | |
| 261 | llvm::Function *F = cast<llvm::Function>(Val: GV); |
| 262 | |
| 263 | // Perform special handling in OpenCL/CUDA mode |
| 264 | if (M.getLangOpts().OpenCL || M.getLangOpts().CUDA) { |
| 265 | // Use function attributes to check for kernel functions |
| 266 | // By default, all functions are device functions |
| 267 | if (FD->hasAttr<DeviceKernelAttr>() || FD->hasAttr<CUDAGlobalAttr>()) { |
| 268 | // OpenCL/CUDA kernel functions get kernel metadata |
| 269 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
| 270 | // And kernel functions are not subject to inlining |
| 271 | F->addFnAttr(Kind: llvm::Attribute::NoInline); |
| 272 | if (FD->hasAttr<CUDAGlobalAttr>()) { |
| 273 | SmallVector<int, 10> GCI; |
| 274 | for (auto IV : llvm::enumerate(First: FD->parameters())) |
| 275 | if (IV.value()->hasAttr<CUDAGridConstantAttr>()) |
| 276 | // For some reason arg indices are 1-based in NVVM |
| 277 | GCI.push_back(Elt: IV.index() + 1); |
| 278 | // Create !{<func-ref>, metadata !"kernel", i32 1} node |
| 279 | F->setCallingConv(llvm::CallingConv::PTX_Kernel); |
| 280 | addGridConstantNVVMMetadata(GV: F, GridConstantArgs: GCI); |
| 281 | } |
| 282 | if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) |
| 283 | M.handleCUDALaunchBoundsAttr(F, A: Attr); |
| 284 | } |
| 285 | } |
| 286 | // Attach kernel metadata directly if compiling for NVPTX. |
| 287 | if (FD->hasAttr<DeviceKernelAttr>()) { |
| 288 | F->setCallingConv(llvm::CallingConv::PTX_Kernel); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, |
| 293 | StringRef Name, int Operand) { |
| 294 | llvm::Module *M = GV->getParent(); |
| 295 | llvm::LLVMContext &Ctx = M->getContext(); |
| 296 | |
| 297 | // Get "nvvm.annotations" metadata node |
| 298 | llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations" ); |
| 299 | |
| 300 | SmallVector<llvm::Metadata *, 5> MDVals = { |
| 301 | llvm::ConstantAsMetadata::get(C: GV), llvm::MDString::get(Context&: Ctx, Str: Name), |
| 302 | llvm::ConstantAsMetadata::get( |
| 303 | C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: Operand))}; |
| 304 | |
| 305 | // Append metadata to nvvm.annotations |
| 306 | MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals)); |
| 307 | } |
| 308 | |
| 309 | void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata( |
| 310 | llvm::GlobalValue *GV, const SmallVectorImpl<int> &GridConstantArgs) { |
| 311 | |
| 312 | llvm::Module *M = GV->getParent(); |
| 313 | llvm::LLVMContext &Ctx = M->getContext(); |
| 314 | |
| 315 | // Get "nvvm.annotations" metadata node |
| 316 | llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata(Name: "nvvm.annotations" ); |
| 317 | |
| 318 | SmallVector<llvm::Metadata *, 5> MDVals = {llvm::ConstantAsMetadata::get(C: GV)}; |
| 319 | if (!GridConstantArgs.empty()) { |
| 320 | SmallVector<llvm::Metadata *, 10> GCM; |
| 321 | for (int I : GridConstantArgs) |
| 322 | GCM.push_back(Elt: llvm::ConstantAsMetadata::get( |
| 323 | C: llvm::ConstantInt::get(Ty: llvm::Type::getInt32Ty(C&: Ctx), V: I))); |
| 324 | MDVals.append(IL: {llvm::MDString::get(Context&: Ctx, Str: "grid_constant" ), |
| 325 | llvm::MDNode::get(Context&: Ctx, MDs: GCM)}); |
| 326 | } |
| 327 | |
| 328 | // Append metadata to nvvm.annotations |
| 329 | MD->addOperand(M: llvm::MDNode::get(Context&: Ctx, MDs: MDVals)); |
| 330 | } |
| 331 | |
| 332 | bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { |
| 333 | return false; |
| 334 | } |
| 335 | |
| 336 | llvm::Constant * |
| 337 | NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM, |
| 338 | llvm::PointerType *PT, |
| 339 | QualType QT) const { |
| 340 | auto &Ctx = CGM.getContext(); |
| 341 | if (PT->getAddressSpace() != Ctx.getTargetAddressSpace(AS: LangAS::opencl_local)) |
| 342 | return llvm::ConstantPointerNull::get(T: PT); |
| 343 | |
| 344 | auto NPT = llvm::PointerType::get( |
| 345 | C&: PT->getContext(), AddressSpace: Ctx.getTargetAddressSpace(AS: LangAS::opencl_generic)); |
| 346 | return llvm::ConstantExpr::getAddrSpaceCast( |
| 347 | C: llvm::ConstantPointerNull::get(T: NPT), Ty: PT); |
| 348 | } |
| 349 | } // namespace |
| 350 | |
| 351 | void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, |
| 352 | const CUDALaunchBoundsAttr *Attr, |
| 353 | int32_t *MaxThreadsVal, |
| 354 | int32_t *MinBlocksVal, |
| 355 | int32_t *MaxClusterRankVal) { |
| 356 | llvm::APSInt MaxThreads(32); |
| 357 | MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(Ctx: getContext()); |
| 358 | if (MaxThreads > 0) { |
| 359 | if (MaxThreadsVal) |
| 360 | *MaxThreadsVal = MaxThreads.getExtValue(); |
| 361 | if (F) |
| 362 | F->addFnAttr(Kind: "nvvm.maxntid" , Val: llvm::utostr(X: MaxThreads.getExtValue())); |
| 363 | } |
| 364 | |
| 365 | // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it |
| 366 | // was not specified in __launch_bounds__ or if the user specified a 0 value, |
| 367 | // we don't have to add a PTX directive. |
| 368 | if (Attr->getMinBlocks()) { |
| 369 | llvm::APSInt MinBlocks(32); |
| 370 | MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(Ctx: getContext()); |
| 371 | if (MinBlocks > 0) { |
| 372 | if (MinBlocksVal) |
| 373 | *MinBlocksVal = MinBlocks.getExtValue(); |
| 374 | if (F) |
| 375 | F->addFnAttr(Kind: "nvvm.minctasm" , Val: llvm::utostr(X: MinBlocks.getExtValue())); |
| 376 | } |
| 377 | } |
| 378 | if (Attr->getMaxBlocks()) { |
| 379 | llvm::APSInt MaxBlocks(32); |
| 380 | MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(Ctx: getContext()); |
| 381 | if (MaxBlocks > 0) { |
| 382 | if (MaxClusterRankVal) |
| 383 | *MaxClusterRankVal = MaxBlocks.getExtValue(); |
| 384 | if (F) |
| 385 | F->addFnAttr(Kind: "nvvm.maxclusterrank" , |
| 386 | Val: llvm::utostr(X: MaxBlocks.getExtValue())); |
| 387 | } |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | std::unique_ptr<TargetCodeGenInfo> |
| 392 | CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) { |
| 393 | return std::make_unique<NVPTXTargetCodeGenInfo>(args&: CGM.getTypes()); |
| 394 | } |
| 395 | |