SemaCUDA.cpp source code [llvm_projects/clang/lib/Sema/SemaCUDA.cpp]

1	//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements semantic analysis for CUDA constructs.
10	///
11	//===----------------------------------------------------------------------===//
12
13	#include "clang/Sema/SemaCUDA.h"
14	#include "clang/AST/ASTContext.h"
15	#include "clang/AST/Decl.h"
16	#include "clang/AST/ExprCXX.h"
17	#include "clang/Basic/Cuda.h"
18	#include "clang/Basic/TargetInfo.h"
19	#include "clang/Lex/Preprocessor.h"
20	#include "clang/Sema/Lookup.h"
21	#include "clang/Sema/Overload.h"
22	#include "clang/Sema/ScopeInfo.h"
23	#include "clang/Sema/Sema.h"
24	#include "clang/Sema/Template.h"
25	#include "llvm/ADT/SmallVector.h"
26	#include <optional>
27	using namespace clang;
28
29	SemaCUDA::SemaCUDA(Sema &S) : SemaBase (S) {}
30
31	template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
32	if (!D)
33	return false;
34	if (auto *A = D->getAttr<AttrT>())
35	return !A->isImplicit();
36	return false;
37	}
38
39	void SemaCUDA::PushForceHostDevice() {
40	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
41	ForceHostDeviceDepth++;
42	}
43
44	bool SemaCUDA::PopForceHostDevice() {
45	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
46	if (ForceHostDeviceDepth == `0`)
47	return false;
48	ForceHostDeviceDepth--;
49	return true;
50	}
51
52	ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
53	MultiExprArg ExecConfig,
54	SourceLocation GGGLoc) {
55	bool IsDeviceKernelCall = false;
56	switch (CurrentTarget()) {
57	case CUDAFunctionTarget::Global:
58	case CUDAFunctionTarget::Device:
59	IsDeviceKernelCall = true;
60	break;
61	case CUDAFunctionTarget::HostDevice:
62	if (getLangOpts().CUDAIsDevice) {
63	IsDeviceKernelCall = true;
64	if (FunctionDecl *Caller =
65	SemaRef.getCurFunctionDecl(/AllowLambda=/true);
66	Caller && isImplicitHostDeviceFunction(D: Caller)) {
67	// Under the device compilation, config call under an HD function should
68	// be treated as a device kernel call. But, for implicit HD ones (such
69	// as lambdas), need to check whether RDC is enabled or not.
70	if (!getLangOpts().GPURelocatableDeviceCode)
71	IsDeviceKernelCall = false;
72	// HIP doesn't support device-side kernel call yet. Still treat it as
73	// the host-side kernel call.
74	if (getLangOpts().HIP)
75	IsDeviceKernelCall = false;
76	}
77	}
78	break;
79	default:
80	break;
81	}
82
83	if (IsDeviceKernelCall && getLangOpts().HIP)
84	return ExprError(
85	Diag(Loc: LLLLoc, DiagID: diag::err_cuda_device_kernel_launch_not_supported));
86
87	if (IsDeviceKernelCall && !getLangOpts().GPURelocatableDeviceCode)
88	return ExprError(
89	Diag(Loc: LLLLoc, DiagID: diag::err_cuda_device_kernel_launch_require_rdc));
90
91	FunctionDecl *ConfigDecl = IsDeviceKernelCall
92	? getASTContext().getcudaLaunchDeviceDecl()
93	: getASTContext().getcudaConfigureCallDecl();
94	if (!ConfigDecl)
95	return ExprError(Diag(Loc: LLLLoc, DiagID: diag::err_undeclared_var_use)
96	<< (IsDeviceKernelCall ? getLaunchDeviceFuncName()
97	: getConfigureFuncName()));
98	// Additional check on the launch function if it's a device kernel call.
99	if (IsDeviceKernelCall) {
100	auto *GetParamBuf = getASTContext().getcudaGetParameterBufferDecl();
101	if (!GetParamBuf)
102	return ExprError(Diag(Loc: LLLLoc, DiagID: diag::err_undeclared_var_use)
103	<< getGetParameterBufferFuncName());
104	}
105
106	QualType ConfigQTy = ConfigDecl->getType();
107
108	DeclRefExpr ConfigDR = new* (getASTContext()) DeclRefExpr (
109	getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
110	SemaRef.MarkFunctionReferenced(Loc: LLLLoc, Func: ConfigDecl);
111
112	if (IsDeviceKernelCall) {
113	SmallVector<Expr *> Args;
114	// Use a null pointer as the kernel function, which may not be resolvable
115	// here. For example, resolving that kernel function may need additional
116	// kernel arguments.
117	llvm::APInt Zero(SemaRef.Context.getTypeSize(T: SemaRef.Context.IntTy), `0`);
118	Args.push_back(Elt: IntegerLiteral::Create(C: SemaRef.Context, V: Zero,
119	type: SemaRef.Context.IntTy, l: LLLLoc));
120	// Use a null pointer as the placeholder of the parameter buffer, which
121	// should be replaced with the actual allocation later, in the codegen.
122	Args.push_back(Elt: IntegerLiteral::Create(C: SemaRef.Context, V: Zero,
123	type: SemaRef.Context.IntTy, l: LLLLoc));
124	// Add the original config arguments.
125	llvm::append_range(C&: Args, R&: ExecConfig);
126	// Add the default blockDim if it's missing.
127	if (Args.size() < `4`) {
128	llvm::APInt One(SemaRef.Context.getTypeSize(T: SemaRef.Context.IntTy), `1`);
129	Args.push_back(Elt: IntegerLiteral::Create(C: SemaRef.Context, V: One,
130	type: SemaRef.Context.IntTy, l: LLLLoc));
131	}
132	// Add the default sharedMemSize if it's missing.
133	if (Args.size() < `5`)
134	Args.push_back(Elt: IntegerLiteral::Create(C: SemaRef.Context, V: Zero,
135	type: SemaRef.Context.IntTy, l: LLLLoc));
136	// Add the default stream if it's missing.
137	if (Args.size() < `6`)
138	Args.push_back(Elt: new (SemaRef.Context) CXXNullPtrLiteralExpr (
139	SemaRef.Context.NullPtrTy, LLLLoc));
140	return SemaRef.BuildCallExpr(S, Fn: ConfigDR, LParenLoc: LLLLoc, ArgExprs: Args, RParenLoc: GGGLoc, ExecConfig: nullptr,
141	/IsExecConfig=/true);
142	}
143	return SemaRef.BuildCallExpr(S, Fn: ConfigDR, LParenLoc: LLLLoc, ArgExprs: ExecConfig, RParenLoc: GGGLoc, ExecConfig: nullptr,
144	/IsExecConfig=/true);
145	}
146
147	CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
148	bool HasHostAttr = false;
149	bool HasDeviceAttr = false;
150	bool HasGlobalAttr = false;
151	bool HasInvalidTargetAttr = false;
152	for (const ParsedAttr &AL : Attrs) {
153	switch (AL.getKind()) {
154	case ParsedAttr::AT_CUDAGlobal:
155	HasGlobalAttr = true;
156	break;
157	case ParsedAttr::AT_CUDAHost:
158	HasHostAttr = true;
159	break;
160	case ParsedAttr::AT_CUDADevice:
161	HasDeviceAttr = true;
162	break;
163	case ParsedAttr::AT_CUDAInvalidTarget:
164	HasInvalidTargetAttr = true;
165	break;
166	default:
167	break;
168	}
169	}
170
171	if (HasInvalidTargetAttr)
172	return CUDAFunctionTarget::InvalidTarget;
173
174	if (HasGlobalAttr)
175	return CUDAFunctionTarget::Global;
176
177	if (HasHostAttr && HasDeviceAttr)
178	return CUDAFunctionTarget::HostDevice;
179
180	if (HasDeviceAttr)
181	return CUDAFunctionTarget::Device;
182
183	return CUDAFunctionTarget::Host;
184	}
185
186	template <typename A>
187	static bool hasAttr(const Decl D, bool* IgnoreImplicitAttr) {
188	return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
189	return isa<A>(Attribute) &&
190	!(IgnoreImplicitAttr && Attribute->isImplicit());
191	});
192	}
193
194	SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
195	SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
196	: S(S_) {
197	SavedCtx = S.CurCUDATargetCtx;
198	assert(K == SemaCUDA::CTCK_InitGlobalVar);
199	auto *VD = dyn_cast_or_null<VarDecl>(Val: D);
200	if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
201	auto Target = CUDAFunctionTarget::Host;
202	if ((hasAttr<CUDADeviceAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true) &&
203	!hasAttr<CUDAHostAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true)) \|\|
204	hasAttr<CUDASharedAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true) \|\|
205	hasAttr<CUDAConstantAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true))
206	Target = CUDAFunctionTarget::Device;
207	S.CurCUDATargetCtx = {.Target: Target, .Kind: K, .D: VD};
208	}
209	}
210
211	/// IdentifyTarget - Determine the CUDA compilation target for this function
212	CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
213	bool IgnoreImplicitHDAttr) {
214	// Code that lives outside a function gets the target from CurCUDATargetCtx.
215	if (D == nullptr)
216	return CurCUDATargetCtx.Target;
217
218	// C++ deduction guides are never codegen'ed and only participate in template
219	// argument deduction. Treat them as if they were always host+device so that
220	// CUDA/HIP target checking never rejects their use based solely on target.
221	if (isa<CXXDeductionGuideDecl>(Val: D))
222	return CUDAFunctionTarget::HostDevice;
223
224	if (D->hasAttr<CUDAInvalidTargetAttr>())
225	return CUDAFunctionTarget::InvalidTarget;
226
227	if (D->hasAttr<CUDAGlobalAttr>())
228	return CUDAFunctionTarget::Global;
229
230	if (D->isConsteval())
231	return CUDAFunctionTarget::HostDevice;
232
233	if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr)) {
234	if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr))
235	return CUDAFunctionTarget::HostDevice;
236	return CUDAFunctionTarget::Device;
237	} else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr)) {
238	return CUDAFunctionTarget::Host;
239	} else if ((D->isImplicit() \|\| !D->isUserProvided()) &&
240	!IgnoreImplicitHDAttr) {
241	// Some implicit declarations (like intrinsic functions) are not marked.
242	// Set the most lenient target on them for maximal flexibility.
243	return CUDAFunctionTarget::HostDevice;
244	}
245
246	return CUDAFunctionTarget::Host;
247	}
248
249	/// IdentifyTarget - Determine the CUDA compilation target for this variable.
250	SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
251	if (Var->hasAttr<HIPManagedAttr>())
252	return CVT_Unified;
253	// Only constexpr and const variabless with implicit constant attribute
254	// are emitted on both sides. Such variables are promoted to device side
255	// only if they have static constant initializers on device side.
256	if ((Var->isConstexpr() \|\| Var->getType().isConstQualified()) &&
257	Var->hasAttr<CUDAConstantAttr>() &&
258	!hasExplicitAttr<CUDAConstantAttr>(D: Var))
259	return CVT_Both;
260	if (Var->hasAttr<CUDADeviceAttr>() \|\| Var->hasAttr<CUDAConstantAttr>() \|\|
261	Var->hasAttr<CUDASharedAttr>() \|\|
262	Var->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
263	Var->getType()->isCUDADeviceBuiltinTextureType())
264	return CVT_Device;
265	// Function-scope static variable without explicit device or constant
266	// attribute are emitted
267	// - on both sides in host device functions
268	// - on device side in device or global functions
269	if (auto *FD = dyn_cast<FunctionDecl>(Val: Var->getDeclContext())) {
270	switch (IdentifyTarget(D: FD)) {
271	case CUDAFunctionTarget::HostDevice:
272	return CVT_Both;
273	case CUDAFunctionTarget::Device:
274	case CUDAFunctionTarget::Global:
275	return CVT_Device;
276	default:
277	return CVT_Host;
278	}
279	}
280	return CVT_Host;
281	}
282
283	// CUDA Call preference table*
284	//
285	// F - from,
286	// T - to
287	// Ph - preference in host mode
288	// Pd - preference in device mode
289	// H - handled in (x)
290	// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
291	//
292	// \| F \| T \| Ph \| Pd \| H \|
293	// \|----+----+-----+-----+-----+
294	// \| d \| d \| N \| N \| (c) \|
295	// \| d \| g \| -- \| -- \| (a) \|
296	// \| d \| h \| -- \| -- \| (e) \|
297	// \| d \| hd \| HD \| HD \| (b) \|
298	// \| g \| d \| N \| N \| (c) \|
299	// \| g \| g \| -- \| -- \| (a) \|
300	// \| g \| h \| -- \| -- \| (e) \|
301	// \| g \| hd \| HD \| HD \| (b) \|
302	// \| h \| d \| -- \| -- \| (e) \|
303	// \| h \| g \| N \| N \| (c) \|
304	// \| h \| h \| N \| N \| (c) \|
305	// \| h \| hd \| HD \| HD \| (b) \|
306	// \| hd \| d \| WS \| SS \| (d) \|
307	// \| hd \| g \| SS \| -- \|(d/a)\|
308	// \| hd \| h \| SS \| WS \| (d) \|
309	// \| hd \| hd \| HD \| HD \| (b) \|
310
311	SemaCUDA::CUDAFunctionPreference
312	SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
313	const FunctionDecl *Callee) {
314	assert(Callee && "Callee must be valid.");
315
316	// Treat ctor/dtor as host device function in device var initializer to allow
317	// trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
318	// will be diagnosed by checkAllowedInitializer.
319	if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
320	CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
321	(isa<CXXConstructorDecl>(Val: Callee) \|\| isa<CXXDestructorDecl>(Val: Callee)))
322	return CFP_HostDevice;
323
324	CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller);
325	CUDAFunctionTarget CalleeTarget = IdentifyTarget(D: Callee);
326
327	// If one of the targets is invalid, the check always fails, no matter what
328	// the other target is.
329	if (CallerTarget == CUDAFunctionTarget::InvalidTarget \|\|
330	CalleeTarget == CUDAFunctionTarget::InvalidTarget)
331	return CFP_Never;
332
333	// (a) Call global from either global or device contexts is allowed as part
334	// of CUDA's dynamic parallelism support.
335	if (CalleeTarget == CUDAFunctionTarget::Global &&
336	(CallerTarget == CUDAFunctionTarget::Global \|\|
337	CallerTarget == CUDAFunctionTarget::Device))
338	return CFP_Native;
339
340	// (b) Calling HostDevice is OK for everyone.
341	if (CalleeTarget == CUDAFunctionTarget::HostDevice)
342	return CFP_HostDevice;
343
344	// (c) Best case scenarios
345	if (CalleeTarget == CallerTarget \|\|
346	(CallerTarget == CUDAFunctionTarget::Host &&
347	CalleeTarget == CUDAFunctionTarget::Global) \|\|
348	(CallerTarget == CUDAFunctionTarget::Global &&
349	CalleeTarget == CUDAFunctionTarget::Device))
350	return CFP_Native;
351
352	// HipStdPar mode is special, in that assessing whether a device side call to
353	// a host target is deferred to a subsequent pass, and cannot unambiguously be
354	// adjudicated in the AST, hence we optimistically allow them to pass here.
355	if (getLangOpts().HIPStdPar &&
356	(CallerTarget == CUDAFunctionTarget::Global \|\|
357	CallerTarget == CUDAFunctionTarget::Device \|\|
358	CallerTarget == CUDAFunctionTarget::HostDevice) &&
359	CalleeTarget == CUDAFunctionTarget::Host)
360	return CFP_HostDevice;
361
362	// (d) HostDevice behavior depends on compilation mode.
363	if (CallerTarget == CUDAFunctionTarget::HostDevice) {
364	// It's OK to call a compilation-mode matching function from an HD one.
365	if ((getLangOpts().CUDAIsDevice &&
366	(CalleeTarget == CUDAFunctionTarget::Device \|\|
367	CalleeTarget == CUDAFunctionTarget::Global)) \|\|
368	(!getLangOpts().CUDAIsDevice &&
369	(CalleeTarget == CUDAFunctionTarget::Host \|\|
370	CalleeTarget == CUDAFunctionTarget::Global)))
371	return CFP_SameSide;
372
373	// Calls from HD to non-mode-matching functions (i.e., to host functions
374	// when compiling in device mode or to device functions when compiling in
375	// host mode) are allowed at the sema level, but eventually rejected if
376	// they're ever codegened. TODO: Reject said calls earlier.
377	return CFP_WrongSide;
378	}
379
380	// (e) Calling across device/host boundary is not something you should do.
381	if ((CallerTarget == CUDAFunctionTarget::Host &&
382	CalleeTarget == CUDAFunctionTarget::Device) \|\|
383	(CallerTarget == CUDAFunctionTarget::Device &&
384	CalleeTarget == CUDAFunctionTarget::Host) \|\|
385	(CallerTarget == CUDAFunctionTarget::Global &&
386	CalleeTarget == CUDAFunctionTarget::Host))
387	return CFP_Never;
388
389	llvm_unreachable("All cases should've been handled by now.");
390	}
391
392	template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
393	if (!D)
394	return false;
395	if (auto *A = D->getAttr<AttrT>())
396	return A->isImplicit();
397	return D->isImplicit();
398	}
399
400	bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {
401	bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
402	bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
403	return IsImplicitDevAttr && IsImplicitHostAttr;
404	}
405
406	void SemaCUDA::EraseUnwantedMatches(
407	const FunctionDecl *Caller,
408	SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
409	if (Matches.size() <= `1`)
410	return;
411
412	using Pair = std::pair<DeclAccessPair, FunctionDecl *>;
413
414	// Gets the CUDA function preference for a call from Caller to Match.
415	auto GetCFP = [&](const Pair &Match) {
416	return IdentifyPreference(Caller, Callee: Match.second);
417	};
418
419	// Find the best call preference among the functions in Matches.
420	CUDAFunctionPreference BestCFP =
421	GetCFP (llvm::max_element(Range&: Matches, C: [&](const* Pair &M1, const Pair &M2) {
422	return GetCFP (M1) < GetCFP (M2);
423	}));
424
425	// Erase all functions with lower priority.
426	llvm::erase_if(C&: Matches,
427	P: [&](const Pair &Match) { return GetCFP (Match) < BestCFP; });
428	}
429
430	/// When an implicitly-declared special member has to invoke more than one
431	/// base/field special member, conflicts may occur in the targets of these
432	/// members. For example, if one base's member __host__ and another's is
433	/// __device__, it's a conflict.
434	/// This function figures out if the given targets \param Target1 and
435	/// \param Target2 conflict, and if they do not it fills in
436	/// \param ResolvedTarget with a target that resolves for both calls.
437	/// \return true if there's a conflict, false otherwise.
438	static bool
439	resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
440	CUDAFunctionTarget Target2,
441	CUDAFunctionTarget *ResolvedTarget) {
442	// Only free functions and static member functions may be global.
443	assert(Target1 != CUDAFunctionTarget::Global);
444	assert(Target2 != CUDAFunctionTarget::Global);
445
446	if (Target1 == CUDAFunctionTarget::HostDevice) {
447	*ResolvedTarget = Target2;
448	} else if (Target2 == CUDAFunctionTarget::HostDevice) {
449	*ResolvedTarget = Target1;
450	} else if (Target1 != Target2) {
451	return true;
452	} else {
453	*ResolvedTarget = Target1;
454	}
455
456	return false;
457	}
458
459	bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
460	CXXSpecialMemberKind CSM,
461	CXXMethodDecl *MemberDecl,
462	bool ConstRHS,
463	bool Diagnose) {
464	// If MemberDecl is virtual destructor of an explicit template class
465	// instantiation, it must be emitted, therefore it needs to be inferred
466	// conservatively by ignoring implicit host/device attrs of member and parent
467	// dtors called by it. Also, it needs to be checed by deferred diag visitor.
468	bool IsExpVDtor = false;
469	if (isa<CXXDestructorDecl>(Val: MemberDecl) && MemberDecl->isVirtual()) {
470	if (auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(Val: ClassDecl)) {
471	TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind();
472	IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration \|\|
473	TSK == TSK_ExplicitInstantiationDefinition;
474	}
475	}
476	if (IsExpVDtor)
477	SemaRef.DeclsToCheckForDeferredDiags.insert(X: MemberDecl);
478
479	// If the defaulted special member is defined lexically outside of its
480	// owning class, or the special member already has explicit device or host
481	// attributes, do not infer.
482	bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
483	bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
484	bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
485	bool HasExplicitAttr =
486	(HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) \|\|
487	(HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
488	if (!InClass \|\| HasExplicitAttr)
489	return false;
490
491	std::optional<CUDAFunctionTarget> InferredTarget;
492
493	// We're going to invoke special member lookup; mark that these special
494	// members are called from this one, and not from its caller.
495	Sema::ContextRAII MethodContext(SemaRef, MemberDecl);
496
497	// Look for special members in base classes that should be invoked from here.
498	// Infer the target of this member base on the ones it should call.
499	// Skip direct and indirect virtual bases for abstract classes, except for
500	// destructors — the complete destructor variant destroys virtual bases
501	// regardless of whether the class is abstract.
502	llvm::SmallVector<const CXXBaseSpecifier *, `16`> Bases;
503	for (const auto &B : ClassDecl->bases()) {
504	if (!B.isVirtual()) {
505	Bases.push_back(Elt: &B);
506	}
507	}
508
509	if (!ClassDecl->isAbstract() \|\| CSM == CXXSpecialMemberKind::Destructor)
510	llvm::append_range(C&: Bases, R: llvm::make_pointer_range(Range: ClassDecl->vbases()));
511
512	for (const auto *B : Bases) {
513	auto *BaseClassDecl = B->getType()->getAsCXXRecordDecl();
514	if (!BaseClassDecl)
515	continue;
516
517	Sema::SpecialMemberOverloadResult SMOR =
518	SemaRef.LookupSpecialMember(D: BaseClassDecl, SM: CSM,
519	/ ConstArg / ConstRHS,
520	/ VolatileArg / false,
521	/ RValueThis / false,
522	/ ConstThis / false,
523	/ VolatileThis / false);
524
525	if (!SMOR.getMethod())
526	continue;
527
528	CUDAFunctionTarget BaseMethodTarget =
529	IdentifyTarget(D: SMOR.getMethod(), IgnoreImplicitHDAttr: IsExpVDtor);
530
531	if (!InferredTarget) {
532	InferredTarget = BaseMethodTarget;
533	} else {
534	bool ResolutionError = resolveCalleeCUDATargetConflict(
535	Target1: InferredTarget, Target2: BaseMethodTarget, ResolvedTarget: &InferredTarget);
536	if (ResolutionError) {
537	if (Diagnose) {
538	Diag(Loc: ClassDecl->getLocation(),
539	DiagID: diag::note_implicit_member_target_infer_collision)
540	<< (unsigned)CSM << *InferredTarget << BaseMethodTarget;
541	}
542	MemberDecl->addAttr(
543	A: CUDAInvalidTargetAttr::CreateImplicit(Ctx&: getASTContext()));
544	return true;
545	}
546	}
547	}
548
549	// Same as for bases, but now for special members of fields.
550	for (const auto *F : ClassDecl->fields()) {
551	if (F->isInvalidDecl()) {
552	continue;
553	}
554
555	auto *FieldRecDecl =
556	getASTContext().getBaseElementType(QT: F->getType())->getAsCXXRecordDecl();
557	if (!FieldRecDecl)
558	continue;
559
560	Sema::SpecialMemberOverloadResult SMOR =
561	SemaRef.LookupSpecialMember(D: FieldRecDecl, SM: CSM,
562	/ ConstArg / ConstRHS && !F->isMutable(),
563	/ VolatileArg / false,
564	/ RValueThis / false,
565	/ ConstThis / false,
566	/ VolatileThis / false);
567
568	if (!SMOR.getMethod())
569	continue;
570
571	CUDAFunctionTarget FieldMethodTarget =
572	IdentifyTarget(D: SMOR.getMethod(), IgnoreImplicitHDAttr: IsExpVDtor);
573
574	if (!InferredTarget) {
575	InferredTarget = FieldMethodTarget;
576	} else {
577	bool ResolutionError = resolveCalleeCUDATargetConflict(
578	Target1: InferredTarget, Target2: FieldMethodTarget, ResolvedTarget: &InferredTarget);
579	if (ResolutionError) {
580	if (Diagnose) {
581	Diag(Loc: ClassDecl->getLocation(),
582	DiagID: diag::note_implicit_member_target_infer_collision)
583	<< (unsigned)CSM << *InferredTarget << FieldMethodTarget;
584	}
585	MemberDecl->addAttr(
586	A: CUDAInvalidTargetAttr::CreateImplicit(Ctx&: getASTContext()));
587	return true;
588	}
589	}
590	}
591
592	// If no target was inferred, mark this member as __host__ __device__;
593	// it's the least restrictive option that can be invoked from any target.
594	bool NeedsH = true, NeedsD = true;
595	if (InferredTarget) {
596	if (*InferredTarget == CUDAFunctionTarget::Device)
597	NeedsH = false;
598	else if (*InferredTarget == CUDAFunctionTarget::Host)
599	NeedsD = false;
600	}
601
602	// We either setting attributes first time, or the inferred ones must match
603	// previously set ones.
604	if (NeedsD && !HasD)
605	MemberDecl->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
606	if (NeedsH && !HasH)
607	MemberDecl->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
608
609	return false;
610	}
611
612	bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
613	if (!CD->isDefined() && CD->isTemplateInstantiation())
614	SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: CD->getFirstDecl());
615
616	// (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
617	// empty at a point in the translation unit, if it is either a
618	// trivial constructor
619	if (CD->isTrivial())
620	return true;
621
622	// ... or it satisfies all of the following conditions:
623	// The constructor function has been defined.
624	// The constructor function has no parameters,
625	// and the function body is an empty compound statement.
626	if (!(CD->hasTrivialBody() && CD->getNumParams() == `0`))
627	return false;
628
629	// Its class has no virtual functions and no virtual base classes.
630	if (CD->getParent()->isDynamicClass())
631	return false;
632
633	// Union ctor does not call ctors of its data members.
634	if (CD->getParent()->isUnion())
635	return true;
636
637	// The only form of initializer allowed is an empty constructor.
638	// This will recursively check all base classes and member initializers
639	if (!llvm::all_of(Range: CD->inits(), P: [&](const CXXCtorInitializer *CI) {
640	if (const CXXConstructExpr *CE =
641	dyn_cast<CXXConstructExpr>(Val: CI->getInit()))
642	return isEmptyConstructor(Loc, CD: CE->getConstructor());
643	return false;
644	}))
645	return false;
646
647	return true;
648	}
649
650	bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
651	// No destructor -> no problem.
652	if (!DD)
653	return true;
654
655	if (!DD->isDefined() && DD->isTemplateInstantiation())
656	SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: DD->getFirstDecl());
657
658	// (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
659	// empty at a point in the translation unit, if it is either a
660	// trivial constructor
661	if (DD->isTrivial())
662	return true;
663
664	// ... or it satisfies all of the following conditions:
665	// The destructor function has been defined.
666	// and the function body is an empty compound statement.
667	if (!DD->hasTrivialBody())
668	return false;
669
670	const CXXRecordDecl *ClassDecl = DD->getParent();
671
672	// Its class has no virtual functions and no virtual base classes.
673	if (ClassDecl->isDynamicClass())
674	return false;
675
676	// Union does not have base class and union dtor does not call dtors of its
677	// data members.
678	if (DD->getParent()->isUnion())
679	return true;
680
681	// Only empty destructors are allowed. This will recursively check
682	// destructors for all base classes...
683	if (!llvm::all_of(Range: ClassDecl->bases(), P: [&](const CXXBaseSpecifier &BS) {
684	if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
685	return isEmptyDestructor(Loc, DD: RD->getDestructor());
686	return true;
687	}))
688	return false;
689
690	// ... and member fields.
691	if (!llvm::all_of(Range: ClassDecl->fields(), P: [&](const FieldDecl *Field) {
692	if (CXXRecordDecl *RD = Field->getType()
693	->getBaseElementTypeUnsafe()
694	->getAsCXXRecordDecl())
695	return isEmptyDestructor(Loc, DD: RD->getDestructor());
696	return true;
697	}))
698	return false;
699
700	return true;
701	}
702
703	namespace {
704	enum CUDAInitializerCheckKind {
705	CICK_DeviceOrConstant, // Check initializer for device/constant variable
706	CICK_Shared, // Check initializer for shared variable
707	};
708
709	bool IsDependentVar(VarDecl *VD) {
710	if (VD->getType()->isDependentType())
711	return true;
712	if (const auto *Init = VD->getInit())
713	return Init->isValueDependent();
714	return false;
715	}
716
717	// Check whether a variable has an allowed initializer for a CUDA device side
718	// variable with global storage. \p VD may be a host variable to be checked for
719	// potential promotion to device side variable.
720	//
721	// CUDA/HIP allows only empty constructors as initializers for global
722	// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
723	// __shared__ variables whether they are local or not (they all are implicitly
724	// static in CUDA). One exception is that CUDA allows constant initializers
725	// for __constant__ and __device__ variables.
726	bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,
727	CUDAInitializerCheckKind CheckKind) {
728	assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
729	assert(!IsDependentVar(VD) && "do not check dependent var");
730	const Expr *Init = VD->getInit();
731	auto IsEmptyInit = [&](const Expr *Init) {
732	if (!Init)
733	return true;
734	if (const auto *CE = dyn_cast<CXXConstructExpr>(Val: Init)) {
735	return S.isEmptyConstructor(Loc: VD->getLocation(), CD: CE->getConstructor());
736	}
737	return false;
738	};
739	auto IsConstantInit = [&](const Expr *Init) {
740	assert(Init);
741	ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),
742	/NoWronSidedVars=/true);
743	return Init->isConstantInitializer(Ctx&: S.getASTContext(),
744	ForRef: VD->getType()->isReferenceType());
745	};
746	auto HasEmptyDtor = [&](VarDecl *VD) {
747	if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
748	return S.isEmptyDestructor(Loc: VD->getLocation(), DD: RD->getDestructor());
749	return true;
750	};
751	if (CheckKind == CICK_Shared)
752	return IsEmptyInit (Init) && HasEmptyDtor (VD);
753	return S.getLangOpts().GPUAllowDeviceInit \|\|
754	((IsEmptyInit (Init) \|\| IsConstantInit (Init)) && HasEmptyDtor (VD));
755	}
756	} // namespace
757
758	void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
759	// Return early if VD is inside a non-instantiated template function since
760	// the implicit constructor is not defined yet.
761	if (const FunctionDecl *FD =
762	dyn_cast_or_null<FunctionDecl>(Val: VD->getDeclContext());
763	FD && FD->isDependentContext())
764	return;
765
766	bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
767	bool IsDeviceOrConstantVar =
768	!IsSharedVar &&
769	(VD->hasAttr<CUDADeviceAttr>() \|\| VD->hasAttr<CUDAConstantAttr>());
770	if ((IsSharedVar \|\| IsDeviceOrConstantVar) &&
771	VD->getType().getQualifiers().getAddressSpace() != LangAS::Default) {
772	Diag(Loc: VD->getLocation(), DiagID: diag::err_cuda_address_space_gpuvar);
773	VD->setInvalidDecl();
774	return;
775	}
776	// Do not check dependent variables since the ctor/dtor/initializer are not
777	// determined. Do it after instantiation.
778	if (VD->isInvalidDecl() \|\| !VD->hasInit() \|\| !VD->hasGlobalStorage() \|\|
779	IsDependentVar(VD))
780	return;
781	const Expr *Init = VD->getInit();
782	if (IsDeviceOrConstantVar \|\| IsSharedVar) {
783	if (HasAllowedCUDADeviceStaticInitializer(
784	S&: *this, VD, CheckKind: IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
785	return;
786	Diag(Loc: VD->getLocation(),
787	DiagID: IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
788	<< Init->getSourceRange();
789	VD->setInvalidDecl();
790	} else {
791	// This is a host-side global variable. Check that the initializer is
792	// callable from the host side.
793	const FunctionDecl InitFn = nullptr*;
794	if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Val: Init)) {
795	InitFn = CE->getConstructor();
796	} else if (const CallExpr *CE = dyn_cast<CallExpr>(Val: Init)) {
797	InitFn = CE->getDirectCallee();
798	}
799	if (InitFn) {
800	CUDAFunctionTarget InitFnTarget = IdentifyTarget(D: InitFn);
801	if (InitFnTarget != CUDAFunctionTarget::Host &&
802	InitFnTarget != CUDAFunctionTarget::HostDevice) {
803	Diag(Loc: VD->getLocation(), DiagID: diag::err_ref_bad_target_global_initializer)
804	<< InitFnTarget << InitFn;
805	Diag(Loc: InitFn->getLocation(), DiagID: diag::note_previous_decl) << InitFn;
806	VD->setInvalidDecl();
807	}
808	}
809	}
810	}
811
812	void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
813	const FunctionDecl *Callee) {
814	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
815	if (!Caller)
816	return;
817
818	if (!isImplicitHostDeviceFunction(D: Callee))
819	return;
820
821	CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller);
822
823	// Record whether an implicit host device function is used on device side.
824	if (CallerTarget != CUDAFunctionTarget::Device &&
825	CallerTarget != CUDAFunctionTarget::Global &&
826	(CallerTarget != CUDAFunctionTarget::HostDevice \|\|
827	(isImplicitHostDeviceFunction(D: Caller) &&
828	!getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(V: Caller))))
829	return;
830
831	getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(V: Callee);
832	}
833
834	// With -fcuda-host-device-constexpr, an unattributed constexpr function is
835	// treated as implicitly __host__ __device__, unless:
836	// it is a variadic function (device-side variadic functions are not*
837	// allowed), or
838	// a __device__ function with this signature was already declared, in which*
839	// case in which case we output an error, unless the __device__ decl is in a
840	// system header, in which case we leave the constexpr function unattributed.
841	//
842	// In addition, all function decls are treated as __host__ __device__ when
843	// ForceHostDeviceDepth > 0 (corresponding to code within a
844	// #pragma clang force_cuda_host_device_begin/end
845	// pair).
846	void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,
847	const LookupResult &Previous) {
848	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
849
850	if (ForceHostDeviceDepth > `0`) {
851	if (!NewD->hasAttr<CUDAHostAttr>())
852	NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
853	if (!NewD->hasAttr<CUDADeviceAttr>())
854	NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
855	return;
856	}
857
858	// If a template function has no host/device/global attributes,
859	// make it implicitly host device function.
860	if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
861	!NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
862	!NewD->hasAttr<CUDAGlobalAttr>() &&
863	(NewD->getDescribedFunctionTemplate() \|\|
864	NewD->isFunctionTemplateSpecialization())) {
865	NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
866	NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
867	return;
868	}
869
870	if (!getLangOpts().CUDAHostDeviceConstexpr \|\| !NewD->isConstexpr() \|\|
871	NewD->isVariadic() \|\| NewD->hasAttr<CUDAHostAttr>() \|\|
872	NewD->hasAttr<CUDADeviceAttr>() \|\| NewD->hasAttr<CUDAGlobalAttr>())
873	return;
874
875	// Is D a __device__ function with the same signature as NewD, ignoring CUDA
876	// attributes?
877	auto IsMatchingDeviceFn = [&](NamedDecl *D) {
878	if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(Val: D))
879	D = Using->getTargetDecl();
880	FunctionDecl *OldD = D->getAsFunction();
881	return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
882	!OldD->hasAttr<CUDAHostAttr>() &&
883	!SemaRef.IsOverload(New: NewD, Old: OldD,
884	/ UseMemberUsingDeclRules = / false,
885	/ ConsiderCudaAttrs = / false);
886	};
887	auto It = llvm::find_if(Range: Previous, P: IsMatchingDeviceFn);
888	if (It != Previous.end()) {
889	// We found a __device__ function with the same name and signature as NewD
890	// (ignoring CUDA attrs). This is an error unless that function is defined
891	// in a system header, in which case we simply return without making NewD
892	// host+device.
893	NamedDecl Match = It;
894	if (!SemaRef.getSourceManager().isInSystemHeader(Loc: Match->getLocation())) {
895	Diag(Loc: NewD->getLocation(),
896	DiagID: diag::err_cuda_unattributed_constexpr_cannot_overload_device)
897	<< NewD;
898	Diag(Loc: Match->getLocation(),
899	DiagID: diag::note_cuda_conflicting_device_function_declared_here);
900	}
901	return;
902	}
903
904	NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
905	NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
906	}
907
908	// TODO: `__constant__` memory may be a limited resource for certain targets.
909	// A safeguard may be needed at the end of compilation pipeline if
910	// `__constant__` memory usage goes beyond limit.
911	void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {
912	// Do not promote dependent variables since the cotr/dtor/initializer are
913	// not determined. Do it after instantiation.
914	if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
915	!VD->hasAttr<CUDASharedAttr>() &&
916	(VD->isFileVarDecl() \|\| VD->isStaticDataMember()) &&
917	!IsDependentVar(VD) &&
918	((VD->isConstexpr() \|\| VD->getType().isConstQualified()) &&
919	HasAllowedCUDADeviceStaticInitializer(S&: *this, VD,
920	CheckKind: CICK_DeviceOrConstant))) {
921	VD->addAttr(A: CUDAConstantAttr::CreateImplicit(Ctx&: getASTContext()));
922	}
923	}
924
925	SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
926	unsigned DiagID) {
927	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
928	FunctionDecl *CurFunContext =
929	SemaRef.getCurFunctionDecl(/AllowLambda=/true);
930	SemaDiagnosticBuilder::Kind DiagKind = [&] {
931	if (!CurFunContext)
932	return SemaDiagnosticBuilder::K_Nop;
933	switch (CurrentTarget()) {
934	case CUDAFunctionTarget::Global:
935	case CUDAFunctionTarget::Device:
936	return SemaDiagnosticBuilder::K_Immediate;
937	case CUDAFunctionTarget::HostDevice:
938	// An HD function counts as host code if we're compiling for host, and
939	// device code if we're compiling for device. Defer any errors in device
940	// mode until the function is known-emitted.
941	if (!getLangOpts().CUDAIsDevice)
942	return SemaDiagnosticBuilder::K_Nop;
943	if (SemaRef.IsLastErrorImmediate &&
944	getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
945	return SemaDiagnosticBuilder::K_Immediate;
946	return (SemaRef.getEmissionStatus(Decl: CurFunContext) ==
947	Sema::FunctionEmissionStatus::Emitted)
948	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
949	: SemaDiagnosticBuilder::K_Deferred;
950	default:
951	return SemaDiagnosticBuilder::K_Nop;
952	}
953	}();
954	return SemaDiagnosticBuilder (DiagKind, Loc, DiagID, CurFunContext, SemaRef);
955	}
956
957	Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,
958	unsigned DiagID) {
959	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
960	FunctionDecl *CurFunContext =
961	SemaRef.getCurFunctionDecl(/AllowLambda=/true);
962	SemaDiagnosticBuilder::Kind DiagKind = [&] {
963	if (!CurFunContext)
964	return SemaDiagnosticBuilder::K_Nop;
965	switch (CurrentTarget()) {
966	case CUDAFunctionTarget::Host:
967	return SemaDiagnosticBuilder::K_Immediate;
968	case CUDAFunctionTarget::HostDevice:
969	// An HD function counts as host code if we're compiling for host, and
970	// device code if we're compiling for device. Defer any errors in device
971	// mode until the function is known-emitted.
972	if (getLangOpts().CUDAIsDevice)
973	return SemaDiagnosticBuilder::K_Nop;
974	if (SemaRef.IsLastErrorImmediate &&
975	getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
976	return SemaDiagnosticBuilder::K_Immediate;
977	return (SemaRef.getEmissionStatus(Decl: CurFunContext) ==
978	Sema::FunctionEmissionStatus::Emitted)
979	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
980	: SemaDiagnosticBuilder::K_Deferred;
981	default:
982	return SemaDiagnosticBuilder::K_Nop;
983	}
984	}();
985	return SemaDiagnosticBuilder (DiagKind, Loc, DiagID, CurFunContext, SemaRef);
986	}
987
988	bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
989	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
990	assert(Callee && "Callee may not be null.");
991
992	const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
993	if (ExprEvalCtx.isUnevaluated() \|\| ExprEvalCtx.isConstantEvaluated())
994	return true;
995
996	// C++ deduction guides participate in overload resolution but are not
997	// callable functions and are never codegen'ed. Treat them as always
998	// allowed for CUDA/HIP compatibility checking.
999	if (isa<CXXDeductionGuideDecl>(Val: Callee))
1000	return true;
1001
1002	// FIXME: Is bailing out early correct here? Should we instead assume that
1003	// the caller is a global initializer?
1004	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
1005	if (!Caller)
1006	return true;
1007
1008	// If the caller is known-emitted, mark the callee as known-emitted.
1009	// Otherwise, mark the call in our call graph so we can traverse it later.
1010	bool CallerKnownEmitted = SemaRef.getEmissionStatus(Decl: Caller) ==
1011	Sema::FunctionEmissionStatus::Emitted;
1012	SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
1013	CallerKnownEmitted] {
1014	switch (IdentifyPreference(Caller, Callee)) {
1015	case CFP_Never:
1016	case CFP_WrongSide:
1017	assert(Caller && "Never/wrongSide calls require a non-null caller");
1018	// If we know the caller will be emitted, we know this wrong-side call
1019	// will be emitted, so it's an immediate error. Otherwise, defer the
1020	// error until we know the caller is emitted.
1021	return CallerKnownEmitted
1022	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
1023	: SemaDiagnosticBuilder::K_Deferred;
1024	default:
1025	return SemaDiagnosticBuilder::K_Nop;
1026	}
1027	}();
1028
1029	if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
1030	// For -fgpu-rdc, keep track of external kernels used by host functions.
1031	if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
1032	Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
1033	(!Caller \|\| (!Caller->getDescribedFunctionTemplate() &&
1034	getASTContext().GetGVALinkageForFunction(FD: Caller) ==
1035	GVA_StrongExternal)))
1036	getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(X: Callee);
1037	return true;
1038	}
1039
1040	// Avoid emitting this error twice for the same location. Using a hashtable
1041	// like this is unfortunate, but because we must continue parsing as normal
1042	// after encountering a deferred error, it's otherwise very tricky for us to
1043	// ensure that we only emit this deferred error once.
1044	if (!LocsWithCUDACallDiags.insert(V: {.FD: Caller, .Loc: Loc}).second)
1045	return true;
1046
1047	SemaDiagnosticBuilder (DiagKind, Loc, diag::err_ref_bad_target, Caller,
1048	SemaRef)
1049	<< IdentifyTarget(D: Callee) << /function/ `0` << Callee
1050	<< IdentifyTarget(D: Caller);
1051	if (!Callee->getBuiltinID())
1052	SemaDiagnosticBuilder (DiagKind, Callee->getLocation(),
1053	diag::note_previous_decl, Caller, SemaRef)
1054	<< Callee;
1055	return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
1056	DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
1057	}
1058
1059	// Check the wrong-sided reference capture of lambda for CUDA/HIP.
1060	// A lambda function may capture a stack variable by reference when it is
1061	// defined and uses the capture by reference when the lambda is called. When
1062	// the capture and use happen on different sides, the capture is invalid and
1063	// should be diagnosed.
1064	void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,
1065	const sema::Capture &Capture) {
1066	// In host compilation we only need to check lambda functions emitted on host
1067	// side. In such lambda functions, a reference capture is invalid only
1068	// if the lambda structure is populated by a device function or kernel then
1069	// is passed to and called by a host function. However that is impossible,
1070	// since a device function or kernel can only call a device function, also a
1071	// kernel cannot pass a lambda back to a host function since we cannot
1072	// define a kernel argument type which can hold the lambda before the lambda
1073	// itself is defined.
1074	if (!getLangOpts().CUDAIsDevice)
1075	return;
1076
1077	// File-scope lambda can only do init captures for global variables, which
1078	// results in passing by value for these global variables.
1079	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
1080	if (!Caller)
1081	return;
1082
1083	// In device compilation, we only need to check lambda functions which are
1084	// emitted on device side. For such lambdas, a reference capture is invalid
1085	// only if the lambda structure is populated by a host function then passed
1086	// to and called in a device function or kernel.
1087	bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
1088	bool CallerIsHost =
1089	!Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
1090	bool ShouldCheck = CalleeIsDevice && CallerIsHost;
1091	if (!ShouldCheck \|\| !Capture.isReferenceCapture())
1092	return;
1093	auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
1094	if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
1095	SemaDiagnosticBuilder (DiagKind, Capture.getLocation(),
1096	diag::err_capture_bad_target, Callee, SemaRef)
1097	<< Capture.getVariable();
1098	} else if (Capture.isThisCapture()) {
1099	// Capture of this pointer is allowed since this pointer may be pointing to
1100	// managed memory which is accessible on both device and host sides. It only
1101	// results in invalid memory access if this pointer points to memory not
1102	// accessible on device side.
1103	SemaDiagnosticBuilder (DiagKind, Capture.getLocation(),
1104	diag::warn_maybe_capture_bad_target_this_ptr, Callee,
1105	SemaRef);
1106	}
1107	}
1108
1109	void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {
1110	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1111	if (Method->hasAttr<CUDAHostAttr>() \|\| Method->hasAttr<CUDADeviceAttr>())
1112	return;
1113	Method->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
1114	Method->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
1115	}
1116
1117	void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,
1118	const LookupResult &Previous) {
1119	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1120	CUDAFunctionTarget NewTarget = IdentifyTarget(D: NewFD);
1121	for (NamedDecl *OldND : Previous) {
1122	FunctionDecl *OldFD = OldND->getAsFunction();
1123	if (!OldFD)
1124	continue;
1125
1126	CUDAFunctionTarget OldTarget = IdentifyTarget(D: OldFD);
1127	// Don't allow HD and global functions to overload other functions with the
1128	// same signature. We allow overloading based on CUDA attributes so that
1129	// functions can have different implementations on the host and device, but
1130	// HD/global functions "exist" in some sense on both the host and device, so
1131	// should have the same implementation on both sides.
1132	if (NewTarget != OldTarget &&
1133	!SemaRef.IsOverload(New: NewFD, Old: OldFD, / UseMemberUsingDeclRules = / false,
1134	/ ConsiderCudaAttrs = / false)) {
1135	if ((NewTarget == CUDAFunctionTarget::HostDevice &&
1136	!(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1137	isImplicitHostDeviceFunction(D: NewFD) &&
1138	OldTarget == CUDAFunctionTarget::Device)) \|\|
1139	(OldTarget == CUDAFunctionTarget::HostDevice &&
1140	!(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1141	isImplicitHostDeviceFunction(D: OldFD) &&
1142	NewTarget == CUDAFunctionTarget::Device)) \|\|
1143	(NewTarget == CUDAFunctionTarget::Global) \|\|
1144	(OldTarget == CUDAFunctionTarget::Global)) {
1145	Diag(Loc: NewFD->getLocation(), DiagID: diag::err_cuda_ovl_target)
1146	<< NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
1147	Diag(Loc: OldFD->getLocation(), DiagID: diag::note_previous_declaration);
1148	NewFD->setInvalidDecl();
1149	break;
1150	}
1151	if ((NewTarget == CUDAFunctionTarget::Host &&
1152	OldTarget == CUDAFunctionTarget::Device) \|\|
1153	(NewTarget == CUDAFunctionTarget::Device &&
1154	OldTarget == CUDAFunctionTarget::Host)) {
1155	Diag(Loc: NewFD->getLocation(), DiagID: diag::warn_offload_incompatible_redeclare)
1156	<< NewTarget << OldTarget;
1157	Diag(Loc: OldFD->getLocation(), DiagID: diag::note_previous_declaration);
1158	}
1159	}
1160	}
1161	}
1162
1163	template <typename AttrTy>
1164	static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1165	const FunctionDecl &TemplateFD) {
1166	if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1167	AttrTy *Clone = Attribute->clone(S.Context);
1168	Clone->setInherited(true);
1169	FD->addAttr(A: Clone);
1170	}
1171	}
1172
1173	void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,
1174	const FunctionTemplateDecl &TD) {
1175	const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1176	copyAttrIfPresent<CUDAGlobalAttr>(S&: SemaRef, FD, TemplateFD);
1177	copyAttrIfPresent<CUDAHostAttr>(S&: SemaRef, FD, TemplateFD);
1178	copyAttrIfPresent<CUDADeviceAttr>(S&: SemaRef, FD, TemplateFD);
1179	}
1180
1181	std::string SemaCUDA::getConfigureFuncName() const {
1182	if (getLangOpts().OffloadViaLLVM)
1183	return "__llvmPushCallConfiguration";
1184
1185	if (getLangOpts().HIP)
1186	return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1187	: "hipConfigureCall";
1188
1189	// New CUDA kernel launch sequence.
1190	if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),
1191	CudaFeature::CUDA_USES_NEW_LAUNCH))
1192	return "__cudaPushCallConfiguration";
1193
1194	// Legacy CUDA kernel configuration call
1195	return "cudaConfigureCall";
1196	}
1197
1198	std::string SemaCUDA::getGetParameterBufferFuncName() const {
1199	return "cudaGetParameterBuffer";
1200	}
1201
1202	std::string SemaCUDA::getLaunchDeviceFuncName() const {
1203	return "cudaLaunchDevice";
1204	}
1205
1206	// Record any local constexpr variables that are passed one way on the host
1207	// and another on the device.
1208	void SemaCUDA::recordPotentialODRUsedVariable(
1209	MultiExprArg Arguments, OverloadCandidateSet &Candidates) {
1210	sema::LambdaScopeInfo *LambdaInfo = SemaRef.getCurLambda();
1211	if (!LambdaInfo)
1212	return;
1213
1214	for (unsigned I = `0`; I < Arguments.size(); ++I) {
1215	auto *DeclRef = dyn_cast<DeclRefExpr>(Val: Arguments [I]);
1216	if (!DeclRef)
1217	continue;
1218	auto *Variable = dyn_cast<VarDecl>(Val: DeclRef->getDecl());
1219	if (!Variable \|\| !Variable->isLocalVarDecl() \|\| !Variable->isConstexpr())
1220	continue;
1221
1222	bool HostByValue = false, HostByRef = false;
1223	bool DeviceByValue = false, DeviceByRef = false;
1224
1225	for (OverloadCandidate &Candidate : Candidates) {
1226	FunctionDecl *Callee = Candidate.Function;
1227	if (!Callee \|\| I >= Callee->getNumParams())
1228	continue;
1229
1230	CUDAFunctionTarget Target = IdentifyTarget(D: Callee);
1231	if (Target == CUDAFunctionTarget::InvalidTarget \|\|
1232	Target == CUDAFunctionTarget::Global)
1233	continue;
1234
1235	bool CoversHost = (Target == CUDAFunctionTarget::Host \|\|
1236	Target == CUDAFunctionTarget::HostDevice);
1237	bool CoversDevice = (Target == CUDAFunctionTarget::Device \|\|
1238	Target == CUDAFunctionTarget::HostDevice);
1239
1240	bool IsRef = Callee->getParamDecl(i: I)->getType()->isReferenceType();
1241	HostByValue \|= CoversHost && !IsRef;
1242	HostByRef \|= CoversHost && IsRef;
1243	DeviceByValue \|= CoversDevice && !IsRef;
1244	DeviceByRef \|= CoversDevice && IsRef;
1245	}
1246
1247	if ((HostByValue && DeviceByRef) \|\| (HostByRef && DeviceByValue))
1248	LambdaInfo->CUDAPotentialODRUsedVars.insert(Ptr: Variable);
1249	}
1250	}
1251

Browse the source code of llvm_projects/clang/lib/Sema/SemaCUDA.cpp