| 1 | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file implements semantic analysis for CUDA constructs. |
| 10 | /// |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "clang/Sema/SemaCUDA.h" |
| 14 | #include "clang/AST/ASTContext.h" |
| 15 | #include "clang/AST/Decl.h" |
| 16 | #include "clang/AST/ExprCXX.h" |
| 17 | #include "clang/Basic/Cuda.h" |
| 18 | #include "clang/Basic/TargetInfo.h" |
| 19 | #include "clang/Lex/Preprocessor.h" |
| 20 | #include "clang/Sema/Lookup.h" |
| 21 | #include "clang/Sema/Overload.h" |
| 22 | #include "clang/Sema/ScopeInfo.h" |
| 23 | #include "clang/Sema/Sema.h" |
| 24 | #include "clang/Sema/Template.h" |
| 25 | #include "llvm/ADT/SmallVector.h" |
| 26 | #include <optional> |
| 27 | using namespace clang; |
| 28 | |
| 29 | SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {} |
| 30 | |
| 31 | template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { |
| 32 | if (!D) |
| 33 | return false; |
| 34 | if (auto *A = D->getAttr<AttrT>()) |
| 35 | return !A->isImplicit(); |
| 36 | return false; |
| 37 | } |
| 38 | |
| 39 | void SemaCUDA::PushForceHostDevice() { |
| 40 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 41 | ForceHostDeviceDepth++; |
| 42 | } |
| 43 | |
| 44 | bool SemaCUDA::PopForceHostDevice() { |
| 45 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 46 | if (ForceHostDeviceDepth == 0) |
| 47 | return false; |
| 48 | ForceHostDeviceDepth--; |
| 49 | return true; |
| 50 | } |
| 51 | |
| 52 | ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
| 53 | MultiExprArg ExecConfig, |
| 54 | SourceLocation GGGLoc) { |
| 55 | FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl(); |
| 56 | if (!ConfigDecl) |
| 57 | return ExprError(Diag(Loc: LLLLoc, DiagID: diag::err_undeclared_var_use) |
| 58 | << getConfigureFuncName()); |
| 59 | QualType ConfigQTy = ConfigDecl->getType(); |
| 60 | |
| 61 | DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr( |
| 62 | getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); |
| 63 | SemaRef.MarkFunctionReferenced(Loc: LLLLoc, Func: ConfigDecl); |
| 64 | |
| 65 | return SemaRef.BuildCallExpr(S, Fn: ConfigDR, LParenLoc: LLLLoc, ArgExprs: ExecConfig, RParenLoc: GGGLoc, ExecConfig: nullptr, |
| 66 | /*IsExecConfig=*/true); |
| 67 | } |
| 68 | |
| 69 | CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { |
| 70 | bool HasHostAttr = false; |
| 71 | bool HasDeviceAttr = false; |
| 72 | bool HasGlobalAttr = false; |
| 73 | bool HasInvalidTargetAttr = false; |
| 74 | for (const ParsedAttr &AL : Attrs) { |
| 75 | switch (AL.getKind()) { |
| 76 | case ParsedAttr::AT_CUDAGlobal: |
| 77 | HasGlobalAttr = true; |
| 78 | break; |
| 79 | case ParsedAttr::AT_CUDAHost: |
| 80 | HasHostAttr = true; |
| 81 | break; |
| 82 | case ParsedAttr::AT_CUDADevice: |
| 83 | HasDeviceAttr = true; |
| 84 | break; |
| 85 | case ParsedAttr::AT_CUDAInvalidTarget: |
| 86 | HasInvalidTargetAttr = true; |
| 87 | break; |
| 88 | default: |
| 89 | break; |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | if (HasInvalidTargetAttr) |
| 94 | return CUDAFunctionTarget::InvalidTarget; |
| 95 | |
| 96 | if (HasGlobalAttr) |
| 97 | return CUDAFunctionTarget::Global; |
| 98 | |
| 99 | if (HasHostAttr && HasDeviceAttr) |
| 100 | return CUDAFunctionTarget::HostDevice; |
| 101 | |
| 102 | if (HasDeviceAttr) |
| 103 | return CUDAFunctionTarget::Device; |
| 104 | |
| 105 | return CUDAFunctionTarget::Host; |
| 106 | } |
| 107 | |
| 108 | template <typename A> |
| 109 | static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { |
| 110 | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { |
| 111 | return isa<A>(Attribute) && |
| 112 | !(IgnoreImplicitAttr && Attribute->isImplicit()); |
| 113 | }); |
| 114 | } |
| 115 | |
| 116 | SemaCUDA::CUDATargetContextRAII::( |
| 117 | SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) |
| 118 | : S(S_) { |
| 119 | SavedCtx = S.CurCUDATargetCtx; |
| 120 | assert(K == SemaCUDA::CTCK_InitGlobalVar); |
| 121 | auto *VD = dyn_cast_or_null<VarDecl>(Val: D); |
| 122 | if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { |
| 123 | auto Target = CUDAFunctionTarget::Host; |
| 124 | if ((hasAttr<CUDADeviceAttr>(D: VD, /*IgnoreImplicit=*/IgnoreImplicitAttr: true) && |
| 125 | !hasAttr<CUDAHostAttr>(D: VD, /*IgnoreImplicit=*/IgnoreImplicitAttr: true)) || |
| 126 | hasAttr<CUDASharedAttr>(D: VD, /*IgnoreImplicit=*/IgnoreImplicitAttr: true) || |
| 127 | hasAttr<CUDAConstantAttr>(D: VD, /*IgnoreImplicit=*/IgnoreImplicitAttr: true)) |
| 128 | Target = CUDAFunctionTarget::Device; |
| 129 | S.CurCUDATargetCtx = {.Target: Target, .Kind: K, .D: VD}; |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | /// IdentifyTarget - Determine the CUDA compilation target for this function |
| 134 | CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, |
| 135 | bool IgnoreImplicitHDAttr) { |
| 136 | // Code that lives outside a function gets the target from CurCUDATargetCtx. |
| 137 | if (D == nullptr) |
| 138 | return CurCUDATargetCtx.Target; |
| 139 | |
| 140 | if (D->hasAttr<CUDAInvalidTargetAttr>()) |
| 141 | return CUDAFunctionTarget::InvalidTarget; |
| 142 | |
| 143 | if (D->hasAttr<CUDAGlobalAttr>()) |
| 144 | return CUDAFunctionTarget::Global; |
| 145 | |
| 146 | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr)) { |
| 147 | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr)) |
| 148 | return CUDAFunctionTarget::HostDevice; |
| 149 | return CUDAFunctionTarget::Device; |
| 150 | } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr)) { |
| 151 | return CUDAFunctionTarget::Host; |
| 152 | } else if ((D->isImplicit() || !D->isUserProvided()) && |
| 153 | !IgnoreImplicitHDAttr) { |
| 154 | // Some implicit declarations (like intrinsic functions) are not marked. |
| 155 | // Set the most lenient target on them for maximal flexibility. |
| 156 | return CUDAFunctionTarget::HostDevice; |
| 157 | } |
| 158 | |
| 159 | return CUDAFunctionTarget::Host; |
| 160 | } |
| 161 | |
| 162 | /// IdentifyTarget - Determine the CUDA compilation target for this variable. |
| 163 | SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) { |
| 164 | if (Var->hasAttr<HIPManagedAttr>()) |
| 165 | return CVT_Unified; |
| 166 | // Only constexpr and const variabless with implicit constant attribute |
| 167 | // are emitted on both sides. Such variables are promoted to device side |
| 168 | // only if they have static constant intializers on device side. |
| 169 | if ((Var->isConstexpr() || Var->getType().isConstQualified()) && |
| 170 | Var->hasAttr<CUDAConstantAttr>() && |
| 171 | !hasExplicitAttr<CUDAConstantAttr>(D: Var)) |
| 172 | return CVT_Both; |
| 173 | if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || |
| 174 | Var->hasAttr<CUDASharedAttr>() || |
| 175 | Var->getType()->isCUDADeviceBuiltinSurfaceType() || |
| 176 | Var->getType()->isCUDADeviceBuiltinTextureType()) |
| 177 | return CVT_Device; |
| 178 | // Function-scope static variable without explicit device or constant |
| 179 | // attribute are emitted |
| 180 | // - on both sides in host device functions |
| 181 | // - on device side in device or global functions |
| 182 | if (auto *FD = dyn_cast<FunctionDecl>(Val: Var->getDeclContext())) { |
| 183 | switch (IdentifyTarget(D: FD)) { |
| 184 | case CUDAFunctionTarget::HostDevice: |
| 185 | return CVT_Both; |
| 186 | case CUDAFunctionTarget::Device: |
| 187 | case CUDAFunctionTarget::Global: |
| 188 | return CVT_Device; |
| 189 | default: |
| 190 | return CVT_Host; |
| 191 | } |
| 192 | } |
| 193 | return CVT_Host; |
| 194 | } |
| 195 | |
| 196 | // * CUDA Call preference table |
| 197 | // |
| 198 | // F - from, |
| 199 | // T - to |
| 200 | // Ph - preference in host mode |
| 201 | // Pd - preference in device mode |
| 202 | // H - handled in (x) |
| 203 | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. |
| 204 | // |
| 205 | // | F | T | Ph | Pd | H | |
| 206 | // |----+----+-----+-----+-----+ |
| 207 | // | d | d | N | N | (c) | |
| 208 | // | d | g | -- | -- | (a) | |
| 209 | // | d | h | -- | -- | (e) | |
| 210 | // | d | hd | HD | HD | (b) | |
| 211 | // | g | d | N | N | (c) | |
| 212 | // | g | g | -- | -- | (a) | |
| 213 | // | g | h | -- | -- | (e) | |
| 214 | // | g | hd | HD | HD | (b) | |
| 215 | // | h | d | -- | -- | (e) | |
| 216 | // | h | g | N | N | (c) | |
| 217 | // | h | h | N | N | (c) | |
| 218 | // | h | hd | HD | HD | (b) | |
| 219 | // | hd | d | WS | SS | (d) | |
| 220 | // | hd | g | SS | -- |(d/a)| |
| 221 | // | hd | h | SS | WS | (d) | |
| 222 | // | hd | hd | HD | HD | (b) | |
| 223 | |
| 224 | SemaCUDA::CUDAFunctionPreference |
| 225 | SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, |
| 226 | const FunctionDecl *Callee) { |
| 227 | assert(Callee && "Callee must be valid." ); |
| 228 | |
| 229 | // Treat ctor/dtor as host device function in device var initializer to allow |
| 230 | // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor |
| 231 | // will be diagnosed by checkAllowedInitializer. |
| 232 | if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && |
| 233 | CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && |
| 234 | (isa<CXXConstructorDecl>(Val: Callee) || isa<CXXDestructorDecl>(Val: Callee))) |
| 235 | return CFP_HostDevice; |
| 236 | |
| 237 | CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller); |
| 238 | CUDAFunctionTarget CalleeTarget = IdentifyTarget(D: Callee); |
| 239 | |
| 240 | // If one of the targets is invalid, the check always fails, no matter what |
| 241 | // the other target is. |
| 242 | if (CallerTarget == CUDAFunctionTarget::InvalidTarget || |
| 243 | CalleeTarget == CUDAFunctionTarget::InvalidTarget) |
| 244 | return CFP_Never; |
| 245 | |
| 246 | // (a) Can't call global from some contexts until we support CUDA's |
| 247 | // dynamic parallelism. |
| 248 | if (CalleeTarget == CUDAFunctionTarget::Global && |
| 249 | (CallerTarget == CUDAFunctionTarget::Global || |
| 250 | CallerTarget == CUDAFunctionTarget::Device)) |
| 251 | return CFP_Never; |
| 252 | |
| 253 | // (b) Calling HostDevice is OK for everyone. |
| 254 | if (CalleeTarget == CUDAFunctionTarget::HostDevice) |
| 255 | return CFP_HostDevice; |
| 256 | |
| 257 | // (c) Best case scenarios |
| 258 | if (CalleeTarget == CallerTarget || |
| 259 | (CallerTarget == CUDAFunctionTarget::Host && |
| 260 | CalleeTarget == CUDAFunctionTarget::Global) || |
| 261 | (CallerTarget == CUDAFunctionTarget::Global && |
| 262 | CalleeTarget == CUDAFunctionTarget::Device)) |
| 263 | return CFP_Native; |
| 264 | |
| 265 | // HipStdPar mode is special, in that assessing whether a device side call to |
| 266 | // a host target is deferred to a subsequent pass, and cannot unambiguously be |
| 267 | // adjudicated in the AST, hence we optimistically allow them to pass here. |
| 268 | if (getLangOpts().HIPStdPar && |
| 269 | (CallerTarget == CUDAFunctionTarget::Global || |
| 270 | CallerTarget == CUDAFunctionTarget::Device || |
| 271 | CallerTarget == CUDAFunctionTarget::HostDevice) && |
| 272 | CalleeTarget == CUDAFunctionTarget::Host) |
| 273 | return CFP_HostDevice; |
| 274 | |
| 275 | // (d) HostDevice behavior depends on compilation mode. |
| 276 | if (CallerTarget == CUDAFunctionTarget::HostDevice) { |
| 277 | // It's OK to call a compilation-mode matching function from an HD one. |
| 278 | if ((getLangOpts().CUDAIsDevice && |
| 279 | CalleeTarget == CUDAFunctionTarget::Device) || |
| 280 | (!getLangOpts().CUDAIsDevice && |
| 281 | (CalleeTarget == CUDAFunctionTarget::Host || |
| 282 | CalleeTarget == CUDAFunctionTarget::Global))) |
| 283 | return CFP_SameSide; |
| 284 | |
| 285 | // Calls from HD to non-mode-matching functions (i.e., to host functions |
| 286 | // when compiling in device mode or to device functions when compiling in |
| 287 | // host mode) are allowed at the sema level, but eventually rejected if |
| 288 | // they're ever codegened. TODO: Reject said calls earlier. |
| 289 | return CFP_WrongSide; |
| 290 | } |
| 291 | |
| 292 | // (e) Calling across device/host boundary is not something you should do. |
| 293 | if ((CallerTarget == CUDAFunctionTarget::Host && |
| 294 | CalleeTarget == CUDAFunctionTarget::Device) || |
| 295 | (CallerTarget == CUDAFunctionTarget::Device && |
| 296 | CalleeTarget == CUDAFunctionTarget::Host) || |
| 297 | (CallerTarget == CUDAFunctionTarget::Global && |
| 298 | CalleeTarget == CUDAFunctionTarget::Host)) |
| 299 | return CFP_Never; |
| 300 | |
| 301 | llvm_unreachable("All cases should've been handled by now." ); |
| 302 | } |
| 303 | |
| 304 | template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { |
| 305 | if (!D) |
| 306 | return false; |
| 307 | if (auto *A = D->getAttr<AttrT>()) |
| 308 | return A->isImplicit(); |
| 309 | return D->isImplicit(); |
| 310 | } |
| 311 | |
| 312 | bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) { |
| 313 | bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); |
| 314 | bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); |
| 315 | return IsImplicitDevAttr && IsImplicitHostAttr; |
| 316 | } |
| 317 | |
| 318 | void SemaCUDA::EraseUnwantedMatches( |
| 319 | const FunctionDecl *Caller, |
| 320 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { |
| 321 | if (Matches.size() <= 1) |
| 322 | return; |
| 323 | |
| 324 | using Pair = std::pair<DeclAccessPair, FunctionDecl *>; |
| 325 | |
| 326 | // Gets the CUDA function preference for a call from Caller to Match. |
| 327 | auto GetCFP = [&](const Pair &Match) { |
| 328 | return IdentifyPreference(Caller, Callee: Match.second); |
| 329 | }; |
| 330 | |
| 331 | // Find the best call preference among the functions in Matches. |
| 332 | CUDAFunctionPreference BestCFP = |
| 333 | GetCFP(*llvm::max_element(Range&: Matches, C: [&](const Pair &M1, const Pair &M2) { |
| 334 | return GetCFP(M1) < GetCFP(M2); |
| 335 | })); |
| 336 | |
| 337 | // Erase all functions with lower priority. |
| 338 | llvm::erase_if(C&: Matches, |
| 339 | P: [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); |
| 340 | } |
| 341 | |
| 342 | /// When an implicitly-declared special member has to invoke more than one |
| 343 | /// base/field special member, conflicts may occur in the targets of these |
| 344 | /// members. For example, if one base's member __host__ and another's is |
| 345 | /// __device__, it's a conflict. |
| 346 | /// This function figures out if the given targets \param Target1 and |
| 347 | /// \param Target2 conflict, and if they do not it fills in |
| 348 | /// \param ResolvedTarget with a target that resolves for both calls. |
| 349 | /// \return true if there's a conflict, false otherwise. |
| 350 | static bool |
| 351 | resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1, |
| 352 | CUDAFunctionTarget Target2, |
| 353 | CUDAFunctionTarget *ResolvedTarget) { |
| 354 | // Only free functions and static member functions may be global. |
| 355 | assert(Target1 != CUDAFunctionTarget::Global); |
| 356 | assert(Target2 != CUDAFunctionTarget::Global); |
| 357 | |
| 358 | if (Target1 == CUDAFunctionTarget::HostDevice) { |
| 359 | *ResolvedTarget = Target2; |
| 360 | } else if (Target2 == CUDAFunctionTarget::HostDevice) { |
| 361 | *ResolvedTarget = Target1; |
| 362 | } else if (Target1 != Target2) { |
| 363 | return true; |
| 364 | } else { |
| 365 | *ResolvedTarget = Target1; |
| 366 | } |
| 367 | |
| 368 | return false; |
| 369 | } |
| 370 | |
| 371 | bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
| 372 | CXXSpecialMemberKind CSM, |
| 373 | CXXMethodDecl *MemberDecl, |
| 374 | bool ConstRHS, |
| 375 | bool Diagnose) { |
| 376 | // If MemberDecl is virtual destructor of an explicit template class |
| 377 | // instantiation, it must be emitted, therefore it needs to be inferred |
| 378 | // conservatively by ignoring implicit host/device attrs of member and parent |
| 379 | // dtors called by it. Also, it needs to be checed by deferred diag visitor. |
| 380 | bool IsExpVDtor = false; |
| 381 | if (isa<CXXDestructorDecl>(Val: MemberDecl) && MemberDecl->isVirtual()) { |
| 382 | if (auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(Val: ClassDecl)) { |
| 383 | TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind(); |
| 384 | IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration || |
| 385 | TSK == TSK_ExplicitInstantiationDefinition; |
| 386 | } |
| 387 | } |
| 388 | if (IsExpVDtor) |
| 389 | SemaRef.DeclsToCheckForDeferredDiags.insert(X: MemberDecl); |
| 390 | |
| 391 | // If the defaulted special member is defined lexically outside of its |
| 392 | // owning class, or the special member already has explicit device or host |
| 393 | // attributes, do not infer. |
| 394 | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); |
| 395 | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); |
| 396 | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); |
| 397 | bool HasExplicitAttr = |
| 398 | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || |
| 399 | (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); |
| 400 | if (!InClass || HasExplicitAttr) |
| 401 | return false; |
| 402 | |
| 403 | std::optional<CUDAFunctionTarget> InferredTarget; |
| 404 | |
| 405 | // We're going to invoke special member lookup; mark that these special |
| 406 | // members are called from this one, and not from its caller. |
| 407 | Sema::ContextRAII MethodContext(SemaRef, MemberDecl); |
| 408 | |
| 409 | // Look for special members in base classes that should be invoked from here. |
| 410 | // Infer the target of this member base on the ones it should call. |
| 411 | // Skip direct and indirect virtual bases for abstract classes. |
| 412 | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; |
| 413 | for (const auto &B : ClassDecl->bases()) { |
| 414 | if (!B.isVirtual()) { |
| 415 | Bases.push_back(Elt: &B); |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | if (!ClassDecl->isAbstract()) { |
| 420 | llvm::append_range(C&: Bases, R: llvm::make_pointer_range(Range: ClassDecl->vbases())); |
| 421 | } |
| 422 | |
| 423 | for (const auto *B : Bases) { |
| 424 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); |
| 425 | if (!BaseType) { |
| 426 | continue; |
| 427 | } |
| 428 | |
| 429 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(Val: BaseType->getDecl()); |
| 430 | Sema::SpecialMemberOverloadResult SMOR = |
| 431 | SemaRef.LookupSpecialMember(D: BaseClassDecl, SM: CSM, |
| 432 | /* ConstArg */ ConstRHS, |
| 433 | /* VolatileArg */ false, |
| 434 | /* RValueThis */ false, |
| 435 | /* ConstThis */ false, |
| 436 | /* VolatileThis */ false); |
| 437 | |
| 438 | if (!SMOR.getMethod()) |
| 439 | continue; |
| 440 | |
| 441 | CUDAFunctionTarget BaseMethodTarget = |
| 442 | IdentifyTarget(D: SMOR.getMethod(), IgnoreImplicitHDAttr: IsExpVDtor); |
| 443 | |
| 444 | if (!InferredTarget) { |
| 445 | InferredTarget = BaseMethodTarget; |
| 446 | } else { |
| 447 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
| 448 | Target1: *InferredTarget, Target2: BaseMethodTarget, ResolvedTarget: &*InferredTarget); |
| 449 | if (ResolutionError) { |
| 450 | if (Diagnose) { |
| 451 | Diag(Loc: ClassDecl->getLocation(), |
| 452 | DiagID: diag::note_implicit_member_target_infer_collision) |
| 453 | << (unsigned)CSM << *InferredTarget << BaseMethodTarget; |
| 454 | } |
| 455 | MemberDecl->addAttr( |
| 456 | A: CUDAInvalidTargetAttr::CreateImplicit(Ctx&: getASTContext())); |
| 457 | return true; |
| 458 | } |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | // Same as for bases, but now for special members of fields. |
| 463 | for (const auto *F : ClassDecl->fields()) { |
| 464 | if (F->isInvalidDecl()) { |
| 465 | continue; |
| 466 | } |
| 467 | |
| 468 | const RecordType *FieldType = |
| 469 | getASTContext().getBaseElementType(QT: F->getType())->getAs<RecordType>(); |
| 470 | if (!FieldType) { |
| 471 | continue; |
| 472 | } |
| 473 | |
| 474 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(Val: FieldType->getDecl()); |
| 475 | Sema::SpecialMemberOverloadResult SMOR = |
| 476 | SemaRef.LookupSpecialMember(D: FieldRecDecl, SM: CSM, |
| 477 | /* ConstArg */ ConstRHS && !F->isMutable(), |
| 478 | /* VolatileArg */ false, |
| 479 | /* RValueThis */ false, |
| 480 | /* ConstThis */ false, |
| 481 | /* VolatileThis */ false); |
| 482 | |
| 483 | if (!SMOR.getMethod()) |
| 484 | continue; |
| 485 | |
| 486 | CUDAFunctionTarget FieldMethodTarget = |
| 487 | IdentifyTarget(D: SMOR.getMethod(), IgnoreImplicitHDAttr: IsExpVDtor); |
| 488 | |
| 489 | if (!InferredTarget) { |
| 490 | InferredTarget = FieldMethodTarget; |
| 491 | } else { |
| 492 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
| 493 | Target1: *InferredTarget, Target2: FieldMethodTarget, ResolvedTarget: &*InferredTarget); |
| 494 | if (ResolutionError) { |
| 495 | if (Diagnose) { |
| 496 | Diag(Loc: ClassDecl->getLocation(), |
| 497 | DiagID: diag::note_implicit_member_target_infer_collision) |
| 498 | << (unsigned)CSM << *InferredTarget << FieldMethodTarget; |
| 499 | } |
| 500 | MemberDecl->addAttr( |
| 501 | A: CUDAInvalidTargetAttr::CreateImplicit(Ctx&: getASTContext())); |
| 502 | return true; |
| 503 | } |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | // If no target was inferred, mark this member as __host__ __device__; |
| 508 | // it's the least restrictive option that can be invoked from any target. |
| 509 | bool NeedsH = true, NeedsD = true; |
| 510 | if (InferredTarget) { |
| 511 | if (*InferredTarget == CUDAFunctionTarget::Device) |
| 512 | NeedsH = false; |
| 513 | else if (*InferredTarget == CUDAFunctionTarget::Host) |
| 514 | NeedsD = false; |
| 515 | } |
| 516 | |
| 517 | // We either setting attributes first time, or the inferred ones must match |
| 518 | // previously set ones. |
| 519 | if (NeedsD && !HasD) |
| 520 | MemberDecl->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext())); |
| 521 | if (NeedsH && !HasH) |
| 522 | MemberDecl->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext())); |
| 523 | |
| 524 | return false; |
| 525 | } |
| 526 | |
| 527 | bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { |
| 528 | if (!CD->isDefined() && CD->isTemplateInstantiation()) |
| 529 | SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: CD->getFirstDecl()); |
| 530 | |
| 531 | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered |
| 532 | // empty at a point in the translation unit, if it is either a |
| 533 | // trivial constructor |
| 534 | if (CD->isTrivial()) |
| 535 | return true; |
| 536 | |
| 537 | // ... or it satisfies all of the following conditions: |
| 538 | // The constructor function has been defined. |
| 539 | // The constructor function has no parameters, |
| 540 | // and the function body is an empty compound statement. |
| 541 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) |
| 542 | return false; |
| 543 | |
| 544 | // Its class has no virtual functions and no virtual base classes. |
| 545 | if (CD->getParent()->isDynamicClass()) |
| 546 | return false; |
| 547 | |
| 548 | // Union ctor does not call ctors of its data members. |
| 549 | if (CD->getParent()->isUnion()) |
| 550 | return true; |
| 551 | |
| 552 | // The only form of initializer allowed is an empty constructor. |
| 553 | // This will recursively check all base classes and member initializers |
| 554 | if (!llvm::all_of(Range: CD->inits(), P: [&](const CXXCtorInitializer *CI) { |
| 555 | if (const CXXConstructExpr *CE = |
| 556 | dyn_cast<CXXConstructExpr>(Val: CI->getInit())) |
| 557 | return isEmptyConstructor(Loc, CD: CE->getConstructor()); |
| 558 | return false; |
| 559 | })) |
| 560 | return false; |
| 561 | |
| 562 | return true; |
| 563 | } |
| 564 | |
| 565 | bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { |
| 566 | // No destructor -> no problem. |
| 567 | if (!DD) |
| 568 | return true; |
| 569 | |
| 570 | if (!DD->isDefined() && DD->isTemplateInstantiation()) |
| 571 | SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: DD->getFirstDecl()); |
| 572 | |
| 573 | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered |
| 574 | // empty at a point in the translation unit, if it is either a |
| 575 | // trivial constructor |
| 576 | if (DD->isTrivial()) |
| 577 | return true; |
| 578 | |
| 579 | // ... or it satisfies all of the following conditions: |
| 580 | // The destructor function has been defined. |
| 581 | // and the function body is an empty compound statement. |
| 582 | if (!DD->hasTrivialBody()) |
| 583 | return false; |
| 584 | |
| 585 | const CXXRecordDecl *ClassDecl = DD->getParent(); |
| 586 | |
| 587 | // Its class has no virtual functions and no virtual base classes. |
| 588 | if (ClassDecl->isDynamicClass()) |
| 589 | return false; |
| 590 | |
| 591 | // Union does not have base class and union dtor does not call dtors of its |
| 592 | // data members. |
| 593 | if (DD->getParent()->isUnion()) |
| 594 | return true; |
| 595 | |
| 596 | // Only empty destructors are allowed. This will recursively check |
| 597 | // destructors for all base classes... |
| 598 | if (!llvm::all_of(Range: ClassDecl->bases(), P: [&](const CXXBaseSpecifier &BS) { |
| 599 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) |
| 600 | return isEmptyDestructor(Loc, DD: RD->getDestructor()); |
| 601 | return true; |
| 602 | })) |
| 603 | return false; |
| 604 | |
| 605 | // ... and member fields. |
| 606 | if (!llvm::all_of(Range: ClassDecl->fields(), P: [&](const FieldDecl *Field) { |
| 607 | if (CXXRecordDecl *RD = Field->getType() |
| 608 | ->getBaseElementTypeUnsafe() |
| 609 | ->getAsCXXRecordDecl()) |
| 610 | return isEmptyDestructor(Loc, DD: RD->getDestructor()); |
| 611 | return true; |
| 612 | })) |
| 613 | return false; |
| 614 | |
| 615 | return true; |
| 616 | } |
| 617 | |
| 618 | namespace { |
| 619 | enum CUDAInitializerCheckKind { |
| 620 | CICK_DeviceOrConstant, // Check initializer for device/constant variable |
| 621 | CICK_Shared, // Check initializer for shared variable |
| 622 | }; |
| 623 | |
| 624 | bool IsDependentVar(VarDecl *VD) { |
| 625 | if (VD->getType()->isDependentType()) |
| 626 | return true; |
| 627 | if (const auto *Init = VD->getInit()) |
| 628 | return Init->isValueDependent(); |
| 629 | return false; |
| 630 | } |
| 631 | |
| 632 | // Check whether a variable has an allowed initializer for a CUDA device side |
| 633 | // variable with global storage. \p VD may be a host variable to be checked for |
| 634 | // potential promotion to device side variable. |
| 635 | // |
| 636 | // CUDA/HIP allows only empty constructors as initializers for global |
| 637 | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
| 638 | // __shared__ variables whether they are local or not (they all are implicitly |
| 639 | // static in CUDA). One exception is that CUDA allows constant initializers |
| 640 | // for __constant__ and __device__ variables. |
| 641 | bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD, |
| 642 | CUDAInitializerCheckKind CheckKind) { |
| 643 | assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); |
| 644 | assert(!IsDependentVar(VD) && "do not check dependent var" ); |
| 645 | const Expr *Init = VD->getInit(); |
| 646 | auto IsEmptyInit = [&](const Expr *Init) { |
| 647 | if (!Init) |
| 648 | return true; |
| 649 | if (const auto *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
| 650 | return S.isEmptyConstructor(Loc: VD->getLocation(), CD: CE->getConstructor()); |
| 651 | } |
| 652 | return false; |
| 653 | }; |
| 654 | auto IsConstantInit = [&](const Expr *Init) { |
| 655 | assert(Init); |
| 656 | ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(), |
| 657 | /*NoWronSidedVars=*/true); |
| 658 | return Init->isConstantInitializer(Ctx&: S.getASTContext(), |
| 659 | ForRef: VD->getType()->isReferenceType()); |
| 660 | }; |
| 661 | auto HasEmptyDtor = [&](VarDecl *VD) { |
| 662 | if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) |
| 663 | return S.isEmptyDestructor(Loc: VD->getLocation(), DD: RD->getDestructor()); |
| 664 | return true; |
| 665 | }; |
| 666 | if (CheckKind == CICK_Shared) |
| 667 | return IsEmptyInit(Init) && HasEmptyDtor(VD); |
| 668 | return S.getLangOpts().GPUAllowDeviceInit || |
| 669 | ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); |
| 670 | } |
| 671 | } // namespace |
| 672 | |
| 673 | void SemaCUDA::checkAllowedInitializer(VarDecl *VD) { |
| 674 | // Return early if VD is inside a non-instantiated template function since |
| 675 | // the implicit constructor is not defined yet. |
| 676 | if (const FunctionDecl *FD = |
| 677 | dyn_cast_or_null<FunctionDecl>(Val: VD->getDeclContext()); |
| 678 | FD && FD->isDependentContext()) |
| 679 | return; |
| 680 | |
| 681 | bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); |
| 682 | bool IsDeviceOrConstantVar = |
| 683 | !IsSharedVar && |
| 684 | (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); |
| 685 | if ((IsSharedVar || IsDeviceOrConstantVar) && |
| 686 | VD->getType().getQualifiers().getAddressSpace() != LangAS::Default) { |
| 687 | Diag(Loc: VD->getLocation(), DiagID: diag::err_cuda_address_space_gpuvar); |
| 688 | VD->setInvalidDecl(); |
| 689 | return; |
| 690 | } |
| 691 | // Do not check dependent variables since the ctor/dtor/initializer are not |
| 692 | // determined. Do it after instantiation. |
| 693 | if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || |
| 694 | IsDependentVar(VD)) |
| 695 | return; |
| 696 | const Expr *Init = VD->getInit(); |
| 697 | if (IsDeviceOrConstantVar || IsSharedVar) { |
| 698 | if (HasAllowedCUDADeviceStaticInitializer( |
| 699 | S&: *this, VD, CheckKind: IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) |
| 700 | return; |
| 701 | Diag(Loc: VD->getLocation(), |
| 702 | DiagID: IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) |
| 703 | << Init->getSourceRange(); |
| 704 | VD->setInvalidDecl(); |
| 705 | } else { |
| 706 | // This is a host-side global variable. Check that the initializer is |
| 707 | // callable from the host side. |
| 708 | const FunctionDecl *InitFn = nullptr; |
| 709 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
| 710 | InitFn = CE->getConstructor(); |
| 711 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Val: Init)) { |
| 712 | InitFn = CE->getDirectCallee(); |
| 713 | } |
| 714 | if (InitFn) { |
| 715 | CUDAFunctionTarget InitFnTarget = IdentifyTarget(D: InitFn); |
| 716 | if (InitFnTarget != CUDAFunctionTarget::Host && |
| 717 | InitFnTarget != CUDAFunctionTarget::HostDevice) { |
| 718 | Diag(Loc: VD->getLocation(), DiagID: diag::err_ref_bad_target_global_initializer) |
| 719 | << InitFnTarget << InitFn; |
| 720 | Diag(Loc: InitFn->getLocation(), DiagID: diag::note_previous_decl) << InitFn; |
| 721 | VD->setInvalidDecl(); |
| 722 | } |
| 723 | } |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice( |
| 728 | const FunctionDecl *Callee) { |
| 729 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
| 730 | if (!Caller) |
| 731 | return; |
| 732 | |
| 733 | if (!isImplicitHostDeviceFunction(D: Callee)) |
| 734 | return; |
| 735 | |
| 736 | CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller); |
| 737 | |
| 738 | // Record whether an implicit host device function is used on device side. |
| 739 | if (CallerTarget != CUDAFunctionTarget::Device && |
| 740 | CallerTarget != CUDAFunctionTarget::Global && |
| 741 | (CallerTarget != CUDAFunctionTarget::HostDevice || |
| 742 | (isImplicitHostDeviceFunction(D: Caller) && |
| 743 | !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(V: Caller)))) |
| 744 | return; |
| 745 | |
| 746 | getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(V: Callee); |
| 747 | } |
| 748 | |
| 749 | // With -fcuda-host-device-constexpr, an unattributed constexpr function is |
| 750 | // treated as implicitly __host__ __device__, unless: |
| 751 | // * it is a variadic function (device-side variadic functions are not |
| 752 | // allowed), or |
| 753 | // * a __device__ function with this signature was already declared, in which |
| 754 | // case in which case we output an error, unless the __device__ decl is in a |
| 755 | // system header, in which case we leave the constexpr function unattributed. |
| 756 | // |
| 757 | // In addition, all function decls are treated as __host__ __device__ when |
| 758 | // ForceHostDeviceDepth > 0 (corresponding to code within a |
| 759 | // #pragma clang force_cuda_host_device_begin/end |
| 760 | // pair). |
| 761 | void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD, |
| 762 | const LookupResult &Previous) { |
| 763 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 764 | |
| 765 | if (ForceHostDeviceDepth > 0) { |
| 766 | if (!NewD->hasAttr<CUDAHostAttr>()) |
| 767 | NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext())); |
| 768 | if (!NewD->hasAttr<CUDADeviceAttr>()) |
| 769 | NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext())); |
| 770 | return; |
| 771 | } |
| 772 | |
| 773 | // If a template function has no host/device/global attributes, |
| 774 | // make it implicitly host device function. |
| 775 | if (getLangOpts().OffloadImplicitHostDeviceTemplates && |
| 776 | !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && |
| 777 | !NewD->hasAttr<CUDAGlobalAttr>() && |
| 778 | (NewD->getDescribedFunctionTemplate() || |
| 779 | NewD->isFunctionTemplateSpecialization())) { |
| 780 | NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext())); |
| 781 | NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext())); |
| 782 | return; |
| 783 | } |
| 784 | |
| 785 | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || |
| 786 | NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || |
| 787 | NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) |
| 788 | return; |
| 789 | |
| 790 | // Is D a __device__ function with the same signature as NewD, ignoring CUDA |
| 791 | // attributes? |
| 792 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { |
| 793 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(Val: D)) |
| 794 | D = Using->getTargetDecl(); |
| 795 | FunctionDecl *OldD = D->getAsFunction(); |
| 796 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && |
| 797 | !OldD->hasAttr<CUDAHostAttr>() && |
| 798 | !SemaRef.IsOverload(New: NewD, Old: OldD, |
| 799 | /* UseMemberUsingDeclRules = */ false, |
| 800 | /* ConsiderCudaAttrs = */ false); |
| 801 | }; |
| 802 | auto It = llvm::find_if(Range: Previous, P: IsMatchingDeviceFn); |
| 803 | if (It != Previous.end()) { |
| 804 | // We found a __device__ function with the same name and signature as NewD |
| 805 | // (ignoring CUDA attrs). This is an error unless that function is defined |
| 806 | // in a system header, in which case we simply return without making NewD |
| 807 | // host+device. |
| 808 | NamedDecl *Match = *It; |
| 809 | if (!SemaRef.getSourceManager().isInSystemHeader(Loc: Match->getLocation())) { |
| 810 | Diag(Loc: NewD->getLocation(), |
| 811 | DiagID: diag::err_cuda_unattributed_constexpr_cannot_overload_device) |
| 812 | << NewD; |
| 813 | Diag(Loc: Match->getLocation(), |
| 814 | DiagID: diag::note_cuda_conflicting_device_function_declared_here); |
| 815 | } |
| 816 | return; |
| 817 | } |
| 818 | |
| 819 | NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext())); |
| 820 | NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext())); |
| 821 | } |
| 822 | |
| 823 | // TODO: `__constant__` memory may be a limited resource for certain targets. |
| 824 | // A safeguard may be needed at the end of compilation pipeline if |
| 825 | // `__constant__` memory usage goes beyond limit. |
| 826 | void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) { |
| 827 | // Do not promote dependent variables since the cotr/dtor/initializer are |
| 828 | // not determined. Do it after instantiation. |
| 829 | if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && |
| 830 | !VD->hasAttr<CUDASharedAttr>() && |
| 831 | (VD->isFileVarDecl() || VD->isStaticDataMember()) && |
| 832 | !IsDependentVar(VD) && |
| 833 | ((VD->isConstexpr() || VD->getType().isConstQualified()) && |
| 834 | HasAllowedCUDADeviceStaticInitializer(S&: *this, VD, |
| 835 | CheckKind: CICK_DeviceOrConstant))) { |
| 836 | VD->addAttr(A: CUDAConstantAttr::CreateImplicit(Ctx&: getASTContext())); |
| 837 | } |
| 838 | } |
| 839 | |
| 840 | SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, |
| 841 | unsigned DiagID) { |
| 842 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 843 | FunctionDecl *CurFunContext = |
| 844 | SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
| 845 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
| 846 | if (!CurFunContext) |
| 847 | return SemaDiagnosticBuilder::K_Nop; |
| 848 | switch (CurrentTarget()) { |
| 849 | case CUDAFunctionTarget::Global: |
| 850 | case CUDAFunctionTarget::Device: |
| 851 | return SemaDiagnosticBuilder::K_Immediate; |
| 852 | case CUDAFunctionTarget::HostDevice: |
| 853 | // An HD function counts as host code if we're compiling for host, and |
| 854 | // device code if we're compiling for device. Defer any errors in device |
| 855 | // mode until the function is known-emitted. |
| 856 | if (!getLangOpts().CUDAIsDevice) |
| 857 | return SemaDiagnosticBuilder::K_Nop; |
| 858 | if (SemaRef.IsLastErrorImmediate && |
| 859 | getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) |
| 860 | return SemaDiagnosticBuilder::K_Immediate; |
| 861 | return (SemaRef.getEmissionStatus(Decl: CurFunContext) == |
| 862 | Sema::FunctionEmissionStatus::Emitted) |
| 863 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
| 864 | : SemaDiagnosticBuilder::K_Deferred; |
| 865 | default: |
| 866 | return SemaDiagnosticBuilder::K_Nop; |
| 867 | } |
| 868 | }(); |
| 869 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); |
| 870 | } |
| 871 | |
| 872 | Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, |
| 873 | unsigned DiagID) { |
| 874 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 875 | FunctionDecl *CurFunContext = |
| 876 | SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
| 877 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
| 878 | if (!CurFunContext) |
| 879 | return SemaDiagnosticBuilder::K_Nop; |
| 880 | switch (CurrentTarget()) { |
| 881 | case CUDAFunctionTarget::Host: |
| 882 | return SemaDiagnosticBuilder::K_Immediate; |
| 883 | case CUDAFunctionTarget::HostDevice: |
| 884 | // An HD function counts as host code if we're compiling for host, and |
| 885 | // device code if we're compiling for device. Defer any errors in device |
| 886 | // mode until the function is known-emitted. |
| 887 | if (getLangOpts().CUDAIsDevice) |
| 888 | return SemaDiagnosticBuilder::K_Nop; |
| 889 | if (SemaRef.IsLastErrorImmediate && |
| 890 | getDiagnostics().getDiagnosticIDs()->isNote(DiagID)) |
| 891 | return SemaDiagnosticBuilder::K_Immediate; |
| 892 | return (SemaRef.getEmissionStatus(Decl: CurFunContext) == |
| 893 | Sema::FunctionEmissionStatus::Emitted) |
| 894 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
| 895 | : SemaDiagnosticBuilder::K_Deferred; |
| 896 | default: |
| 897 | return SemaDiagnosticBuilder::K_Nop; |
| 898 | } |
| 899 | }(); |
| 900 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); |
| 901 | } |
| 902 | |
| 903 | bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { |
| 904 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 905 | assert(Callee && "Callee may not be null." ); |
| 906 | |
| 907 | const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); |
| 908 | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) |
| 909 | return true; |
| 910 | |
| 911 | // FIXME: Is bailing out early correct here? Should we instead assume that |
| 912 | // the caller is a global initializer? |
| 913 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
| 914 | if (!Caller) |
| 915 | return true; |
| 916 | |
| 917 | // If the caller is known-emitted, mark the callee as known-emitted. |
| 918 | // Otherwise, mark the call in our call graph so we can traverse it later. |
| 919 | bool CallerKnownEmitted = SemaRef.getEmissionStatus(Decl: Caller) == |
| 920 | Sema::FunctionEmissionStatus::Emitted; |
| 921 | SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, |
| 922 | CallerKnownEmitted] { |
| 923 | switch (IdentifyPreference(Caller, Callee)) { |
| 924 | case CFP_Never: |
| 925 | case CFP_WrongSide: |
| 926 | assert(Caller && "Never/wrongSide calls require a non-null caller" ); |
| 927 | // If we know the caller will be emitted, we know this wrong-side call |
| 928 | // will be emitted, so it's an immediate error. Otherwise, defer the |
| 929 | // error until we know the caller is emitted. |
| 930 | return CallerKnownEmitted |
| 931 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
| 932 | : SemaDiagnosticBuilder::K_Deferred; |
| 933 | default: |
| 934 | return SemaDiagnosticBuilder::K_Nop; |
| 935 | } |
| 936 | }(); |
| 937 | |
| 938 | if (DiagKind == SemaDiagnosticBuilder::K_Nop) { |
| 939 | // For -fgpu-rdc, keep track of external kernels used by host functions. |
| 940 | if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && |
| 941 | Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() && |
| 942 | (!Caller || (!Caller->getDescribedFunctionTemplate() && |
| 943 | getASTContext().GetGVALinkageForFunction(FD: Caller) == |
| 944 | GVA_StrongExternal))) |
| 945 | getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(X: Callee); |
| 946 | return true; |
| 947 | } |
| 948 | |
| 949 | // Avoid emitting this error twice for the same location. Using a hashtable |
| 950 | // like this is unfortunate, but because we must continue parsing as normal |
| 951 | // after encountering a deferred error, it's otherwise very tricky for us to |
| 952 | // ensure that we only emit this deferred error once. |
| 953 | if (!LocsWithCUDACallDiags.insert(V: {.FD: Caller, .Loc: Loc}).second) |
| 954 | return true; |
| 955 | |
| 956 | SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, |
| 957 | SemaRef) |
| 958 | << IdentifyTarget(D: Callee) << /*function*/ 0 << Callee |
| 959 | << IdentifyTarget(D: Caller); |
| 960 | if (!Callee->getBuiltinID()) |
| 961 | SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), |
| 962 | diag::note_previous_decl, Caller, SemaRef) |
| 963 | << Callee; |
| 964 | return DiagKind != SemaDiagnosticBuilder::K_Immediate && |
| 965 | DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; |
| 966 | } |
| 967 | |
| 968 | // Check the wrong-sided reference capture of lambda for CUDA/HIP. |
| 969 | // A lambda function may capture a stack variable by reference when it is |
| 970 | // defined and uses the capture by reference when the lambda is called. When |
| 971 | // the capture and use happen on different sides, the capture is invalid and |
| 972 | // should be diagnosed. |
| 973 | void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee, |
| 974 | const sema::Capture &Capture) { |
| 975 | // In host compilation we only need to check lambda functions emitted on host |
| 976 | // side. In such lambda functions, a reference capture is invalid only |
| 977 | // if the lambda structure is populated by a device function or kernel then |
| 978 | // is passed to and called by a host function. However that is impossible, |
| 979 | // since a device function or kernel can only call a device function, also a |
| 980 | // kernel cannot pass a lambda back to a host function since we cannot |
| 981 | // define a kernel argument type which can hold the lambda before the lambda |
| 982 | // itself is defined. |
| 983 | if (!getLangOpts().CUDAIsDevice) |
| 984 | return; |
| 985 | |
| 986 | // File-scope lambda can only do init captures for global variables, which |
| 987 | // results in passing by value for these global variables. |
| 988 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
| 989 | if (!Caller) |
| 990 | return; |
| 991 | |
| 992 | // In device compilation, we only need to check lambda functions which are |
| 993 | // emitted on device side. For such lambdas, a reference capture is invalid |
| 994 | // only if the lambda structure is populated by a host function then passed |
| 995 | // to and called in a device function or kernel. |
| 996 | bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); |
| 997 | bool CallerIsHost = |
| 998 | !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); |
| 999 | bool ShouldCheck = CalleeIsDevice && CallerIsHost; |
| 1000 | if (!ShouldCheck || !Capture.isReferenceCapture()) |
| 1001 | return; |
| 1002 | auto DiagKind = SemaDiagnosticBuilder::K_Deferred; |
| 1003 | if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { |
| 1004 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
| 1005 | diag::err_capture_bad_target, Callee, SemaRef) |
| 1006 | << Capture.getVariable(); |
| 1007 | } else if (Capture.isThisCapture()) { |
| 1008 | // Capture of this pointer is allowed since this pointer may be pointing to |
| 1009 | // managed memory which is accessible on both device and host sides. It only |
| 1010 | // results in invalid memory access if this pointer points to memory not |
| 1011 | // accessible on device side. |
| 1012 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
| 1013 | diag::warn_maybe_capture_bad_target_this_ptr, Callee, |
| 1014 | SemaRef); |
| 1015 | } |
| 1016 | } |
| 1017 | |
| 1018 | void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) { |
| 1019 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 1020 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) |
| 1021 | return; |
| 1022 | Method->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext())); |
| 1023 | Method->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext())); |
| 1024 | } |
| 1025 | |
| 1026 | void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, |
| 1027 | const LookupResult &Previous) { |
| 1028 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
| 1029 | CUDAFunctionTarget NewTarget = IdentifyTarget(D: NewFD); |
| 1030 | for (NamedDecl *OldND : Previous) { |
| 1031 | FunctionDecl *OldFD = OldND->getAsFunction(); |
| 1032 | if (!OldFD) |
| 1033 | continue; |
| 1034 | |
| 1035 | CUDAFunctionTarget OldTarget = IdentifyTarget(D: OldFD); |
| 1036 | // Don't allow HD and global functions to overload other functions with the |
| 1037 | // same signature. We allow overloading based on CUDA attributes so that |
| 1038 | // functions can have different implementations on the host and device, but |
| 1039 | // HD/global functions "exist" in some sense on both the host and device, so |
| 1040 | // should have the same implementation on both sides. |
| 1041 | if (NewTarget != OldTarget && |
| 1042 | !SemaRef.IsOverload(New: NewFD, Old: OldFD, /* UseMemberUsingDeclRules = */ false, |
| 1043 | /* ConsiderCudaAttrs = */ false)) { |
| 1044 | if ((NewTarget == CUDAFunctionTarget::HostDevice && |
| 1045 | !(getLangOpts().OffloadImplicitHostDeviceTemplates && |
| 1046 | isImplicitHostDeviceFunction(D: NewFD) && |
| 1047 | OldTarget == CUDAFunctionTarget::Device)) || |
| 1048 | (OldTarget == CUDAFunctionTarget::HostDevice && |
| 1049 | !(getLangOpts().OffloadImplicitHostDeviceTemplates && |
| 1050 | isImplicitHostDeviceFunction(D: OldFD) && |
| 1051 | NewTarget == CUDAFunctionTarget::Device)) || |
| 1052 | (NewTarget == CUDAFunctionTarget::Global) || |
| 1053 | (OldTarget == CUDAFunctionTarget::Global)) { |
| 1054 | Diag(Loc: NewFD->getLocation(), DiagID: diag::err_cuda_ovl_target) |
| 1055 | << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; |
| 1056 | Diag(Loc: OldFD->getLocation(), DiagID: diag::note_previous_declaration); |
| 1057 | NewFD->setInvalidDecl(); |
| 1058 | break; |
| 1059 | } |
| 1060 | if ((NewTarget == CUDAFunctionTarget::Host && |
| 1061 | OldTarget == CUDAFunctionTarget::Device) || |
| 1062 | (NewTarget == CUDAFunctionTarget::Device && |
| 1063 | OldTarget == CUDAFunctionTarget::Host)) { |
| 1064 | Diag(Loc: NewFD->getLocation(), DiagID: diag::warn_offload_incompatible_redeclare) |
| 1065 | << NewTarget << OldTarget; |
| 1066 | Diag(Loc: OldFD->getLocation(), DiagID: diag::note_previous_declaration); |
| 1067 | } |
| 1068 | } |
| 1069 | } |
| 1070 | } |
| 1071 | |
| 1072 | template <typename AttrTy> |
| 1073 | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, |
| 1074 | const FunctionDecl &TemplateFD) { |
| 1075 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { |
| 1076 | AttrTy *Clone = Attribute->clone(S.Context); |
| 1077 | Clone->setInherited(true); |
| 1078 | FD->addAttr(A: Clone); |
| 1079 | } |
| 1080 | } |
| 1081 | |
| 1082 | void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD, |
| 1083 | const FunctionTemplateDecl &TD) { |
| 1084 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); |
| 1085 | copyAttrIfPresent<CUDAGlobalAttr>(S&: SemaRef, FD, TemplateFD); |
| 1086 | copyAttrIfPresent<CUDAHostAttr>(S&: SemaRef, FD, TemplateFD); |
| 1087 | copyAttrIfPresent<CUDADeviceAttr>(S&: SemaRef, FD, TemplateFD); |
| 1088 | } |
| 1089 | |
| 1090 | std::string SemaCUDA::getConfigureFuncName() const { |
| 1091 | if (getLangOpts().OffloadViaLLVM) |
| 1092 | return "__llvmPushCallConfiguration" ; |
| 1093 | |
| 1094 | if (getLangOpts().HIP) |
| 1095 | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" |
| 1096 | : "hipConfigureCall" ; |
| 1097 | |
| 1098 | // New CUDA kernel launch sequence. |
| 1099 | if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(), |
| 1100 | CudaFeature::CUDA_USES_NEW_LAUNCH)) |
| 1101 | return "__cudaPushCallConfiguration" ; |
| 1102 | |
| 1103 | // Legacy CUDA kernel configuration call |
| 1104 | return "cudaConfigureCall" ; |
| 1105 | } |
| 1106 | |
| 1107 | // Record any local constexpr variables that are passed one way on the host |
| 1108 | // and another on the device. |
| 1109 | void SemaCUDA::recordPotentialODRUsedVariable( |
| 1110 | MultiExprArg Arguments, OverloadCandidateSet &Candidates) { |
| 1111 | sema::LambdaScopeInfo *LambdaInfo = SemaRef.getCurLambda(); |
| 1112 | if (!LambdaInfo) |
| 1113 | return; |
| 1114 | |
| 1115 | for (unsigned I = 0; I < Arguments.size(); ++I) { |
| 1116 | auto *DeclRef = dyn_cast<DeclRefExpr>(Val: Arguments[I]); |
| 1117 | if (!DeclRef) |
| 1118 | continue; |
| 1119 | auto *Variable = dyn_cast<VarDecl>(Val: DeclRef->getDecl()); |
| 1120 | if (!Variable || !Variable->isLocalVarDecl() || !Variable->isConstexpr()) |
| 1121 | continue; |
| 1122 | |
| 1123 | bool HostByValue = false, HostByRef = false; |
| 1124 | bool DeviceByValue = false, DeviceByRef = false; |
| 1125 | |
| 1126 | for (OverloadCandidate &Candidate : Candidates) { |
| 1127 | FunctionDecl *Callee = Candidate.Function; |
| 1128 | if (!Callee || I >= Callee->getNumParams()) |
| 1129 | continue; |
| 1130 | |
| 1131 | CUDAFunctionTarget Target = IdentifyTarget(D: Callee); |
| 1132 | if (Target == CUDAFunctionTarget::InvalidTarget || |
| 1133 | Target == CUDAFunctionTarget::Global) |
| 1134 | continue; |
| 1135 | |
| 1136 | bool CoversHost = (Target == CUDAFunctionTarget::Host || |
| 1137 | Target == CUDAFunctionTarget::HostDevice); |
| 1138 | bool CoversDevice = (Target == CUDAFunctionTarget::Device || |
| 1139 | Target == CUDAFunctionTarget::HostDevice); |
| 1140 | |
| 1141 | bool IsRef = Callee->getParamDecl(i: I)->getType()->isReferenceType(); |
| 1142 | HostByValue |= CoversHost && !IsRef; |
| 1143 | HostByRef |= CoversHost && IsRef; |
| 1144 | DeviceByValue |= CoversDevice && !IsRef; |
| 1145 | DeviceByRef |= CoversDevice && IsRef; |
| 1146 | } |
| 1147 | |
| 1148 | if ((HostByValue && DeviceByRef) || (HostByRef && DeviceByValue)) |
| 1149 | LambdaInfo->CUDAPotentialODRUsedVars.insert(Ptr: Variable); |
| 1150 | } |
| 1151 | } |
| 1152 | |