SemaCUDA.cpp source code [llvm_projects/clang/lib/Sema/SemaCUDA.cpp]

1	//===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements semantic analysis for CUDA constructs.
10	///
11	//===----------------------------------------------------------------------===//
12
13	#include "clang/Sema/SemaCUDA.h"
14	#include "clang/AST/ASTContext.h"
15	#include "clang/AST/Decl.h"
16	#include "clang/AST/ExprCXX.h"
17	#include "clang/Basic/Cuda.h"
18	#include "clang/Basic/TargetInfo.h"
19	#include "clang/Lex/Preprocessor.h"
20	#include "clang/Sema/Lookup.h"
21	#include "clang/Sema/Overload.h"
22	#include "clang/Sema/ScopeInfo.h"
23	#include "clang/Sema/Sema.h"
24	#include "clang/Sema/Template.h"
25	#include "llvm/ADT/SmallVector.h"
26	#include <optional>
27	using namespace clang;
28
29	SemaCUDA::SemaCUDA(Sema &S) : SemaBase (S) {}
30
31	template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) {
32	if (!D)
33	return false;
34	if (auto *A = D->getAttr<AttrT>())
35	return !A->isImplicit();
36	return false;
37	}
38
39	void SemaCUDA::PushForceHostDevice() {
40	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
41	ForceHostDeviceDepth++;
42	}
43
44	bool SemaCUDA::PopForceHostDevice() {
45	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
46	if (ForceHostDeviceDepth == `0`)
47	return false;
48	ForceHostDeviceDepth--;
49	return true;
50	}
51
52	ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
53	MultiExprArg ExecConfig,
54	SourceLocation GGGLoc) {
55	FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();
56	if (!ConfigDecl)
57	return ExprError(Diag(Loc: LLLLoc, DiagID: diag::err_undeclared_var_use)
58	<< getConfigureFuncName());
59	QualType ConfigQTy = ConfigDecl->getType();
60
61	DeclRefExpr ConfigDR = new* (getASTContext()) DeclRefExpr (
62	getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
63	SemaRef.MarkFunctionReferenced(Loc: LLLLoc, Func: ConfigDecl);
64
65	return SemaRef.BuildCallExpr(S, Fn: ConfigDR, LParenLoc: LLLLoc, ArgExprs: ExecConfig, RParenLoc: GGGLoc, ExecConfig: nullptr,
66	/IsExecConfig=/true);
67	}
68
69	CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
70	bool HasHostAttr = false;
71	bool HasDeviceAttr = false;
72	bool HasGlobalAttr = false;
73	bool HasInvalidTargetAttr = false;
74	for (const ParsedAttr &AL : Attrs) {
75	switch (AL.getKind()) {
76	case ParsedAttr::AT_CUDAGlobal:
77	HasGlobalAttr = true;
78	break;
79	case ParsedAttr::AT_CUDAHost:
80	HasHostAttr = true;
81	break;
82	case ParsedAttr::AT_CUDADevice:
83	HasDeviceAttr = true;
84	break;
85	case ParsedAttr::AT_CUDAInvalidTarget:
86	HasInvalidTargetAttr = true;
87	break;
88	default:
89	break;
90	}
91	}
92
93	if (HasInvalidTargetAttr)
94	return CUDAFunctionTarget::InvalidTarget;
95
96	if (HasGlobalAttr)
97	return CUDAFunctionTarget::Global;
98
99	if (HasHostAttr && HasDeviceAttr)
100	return CUDAFunctionTarget::HostDevice;
101
102	if (HasDeviceAttr)
103	return CUDAFunctionTarget::Device;
104
105	return CUDAFunctionTarget::Host;
106	}
107
108	template <typename A>
109	static bool hasAttr(const Decl D, bool* IgnoreImplicitAttr) {
110	return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
111	return isa<A>(Attribute) &&
112	!(IgnoreImplicitAttr && Attribute->isImplicit());
113	});
114	}
115
116	SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
117	SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
118	: S(S_) {
119	SavedCtx = S.CurCUDATargetCtx;
120	assert(K == SemaCUDA::CTCK_InitGlobalVar);
121	auto *VD = dyn_cast_or_null<VarDecl>(Val: D);
122	if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
123	auto Target = CUDAFunctionTarget::Host;
124	if ((hasAttr<CUDADeviceAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true) &&
125	!hasAttr<CUDAHostAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true)) \|\|
126	hasAttr<CUDASharedAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true) \|\|
127	hasAttr<CUDAConstantAttr>(D: VD, /IgnoreImplicit=/IgnoreImplicitAttr: true))
128	Target = CUDAFunctionTarget::Device;
129	S.CurCUDATargetCtx = {.Target: Target, .Kind: K, .D: VD};
130	}
131	}
132
133	/// IdentifyTarget - Determine the CUDA compilation target for this function
134	CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
135	bool IgnoreImplicitHDAttr) {
136	// Code that lives outside a function gets the target from CurCUDATargetCtx.
137	if (D == nullptr)
138	return CurCUDATargetCtx.Target;
139
140	if (D->hasAttr<CUDAInvalidTargetAttr>())
141	return CUDAFunctionTarget::InvalidTarget;
142
143	if (D->hasAttr<CUDAGlobalAttr>())
144	return CUDAFunctionTarget::Global;
145
146	if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr)) {
147	if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr))
148	return CUDAFunctionTarget::HostDevice;
149	return CUDAFunctionTarget::Device;
150	} else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitAttr: IgnoreImplicitHDAttr)) {
151	return CUDAFunctionTarget::Host;
152	} else if ((D->isImplicit() \|\| !D->isUserProvided()) &&
153	!IgnoreImplicitHDAttr) {
154	// Some implicit declarations (like intrinsic functions) are not marked.
155	// Set the most lenient target on them for maximal flexibility.
156	return CUDAFunctionTarget::HostDevice;
157	}
158
159	return CUDAFunctionTarget::Host;
160	}
161
162	/// IdentifyTarget - Determine the CUDA compilation target for this variable.
163	SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) {
164	if (Var->hasAttr<HIPManagedAttr>())
165	return CVT_Unified;
166	// Only constexpr and const variabless with implicit constant attribute
167	// are emitted on both sides. Such variables are promoted to device side
168	// only if they have static constant intializers on device side.
169	if ((Var->isConstexpr() \|\| Var->getType().isConstQualified()) &&
170	Var->hasAttr<CUDAConstantAttr>() &&
171	!hasExplicitAttr<CUDAConstantAttr>(D: Var))
172	return CVT_Both;
173	if (Var->hasAttr<CUDADeviceAttr>() \|\| Var->hasAttr<CUDAConstantAttr>() \|\|
174	Var->hasAttr<CUDASharedAttr>() \|\|
175	Var->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
176	Var->getType()->isCUDADeviceBuiltinTextureType())
177	return CVT_Device;
178	// Function-scope static variable without explicit device or constant
179	// attribute are emitted
180	// - on both sides in host device functions
181	// - on device side in device or global functions
182	if (auto *FD = dyn_cast<FunctionDecl>(Val: Var->getDeclContext())) {
183	switch (IdentifyTarget(D: FD)) {
184	case CUDAFunctionTarget::HostDevice:
185	return CVT_Both;
186	case CUDAFunctionTarget::Device:
187	case CUDAFunctionTarget::Global:
188	return CVT_Device;
189	default:
190	return CVT_Host;
191	}
192	}
193	return CVT_Host;
194	}
195
196	// CUDA Call preference table*
197	//
198	// F - from,
199	// T - to
200	// Ph - preference in host mode
201	// Pd - preference in device mode
202	// H - handled in (x)
203	// Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
204	//
205	// \| F \| T \| Ph \| Pd \| H \|
206	// \|----+----+-----+-----+-----+
207	// \| d \| d \| N \| N \| (c) \|
208	// \| d \| g \| -- \| -- \| (a) \|
209	// \| d \| h \| -- \| -- \| (e) \|
210	// \| d \| hd \| HD \| HD \| (b) \|
211	// \| g \| d \| N \| N \| (c) \|
212	// \| g \| g \| -- \| -- \| (a) \|
213	// \| g \| h \| -- \| -- \| (e) \|
214	// \| g \| hd \| HD \| HD \| (b) \|
215	// \| h \| d \| -- \| -- \| (e) \|
216	// \| h \| g \| N \| N \| (c) \|
217	// \| h \| h \| N \| N \| (c) \|
218	// \| h \| hd \| HD \| HD \| (b) \|
219	// \| hd \| d \| WS \| SS \| (d) \|
220	// \| hd \| g \| SS \| -- \|(d/a)\|
221	// \| hd \| h \| SS \| WS \| (d) \|
222	// \| hd \| hd \| HD \| HD \| (b) \|
223
224	SemaCUDA::CUDAFunctionPreference
225	SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
226	const FunctionDecl *Callee) {
227	assert(Callee && "Callee must be valid.");
228
229	// Treat ctor/dtor as host device function in device var initializer to allow
230	// trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
231	// will be diagnosed by checkAllowedInitializer.
232	if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
233	CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
234	(isa<CXXConstructorDecl>(Val: Callee) \|\| isa<CXXDestructorDecl>(Val: Callee)))
235	return CFP_HostDevice;
236
237	CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller);
238	CUDAFunctionTarget CalleeTarget = IdentifyTarget(D: Callee);
239
240	// If one of the targets is invalid, the check always fails, no matter what
241	// the other target is.
242	if (CallerTarget == CUDAFunctionTarget::InvalidTarget \|\|
243	CalleeTarget == CUDAFunctionTarget::InvalidTarget)
244	return CFP_Never;
245
246	// (a) Can't call global from some contexts until we support CUDA's
247	// dynamic parallelism.
248	if (CalleeTarget == CUDAFunctionTarget::Global &&
249	(CallerTarget == CUDAFunctionTarget::Global \|\|
250	CallerTarget == CUDAFunctionTarget::Device))
251	return CFP_Never;
252
253	// (b) Calling HostDevice is OK for everyone.
254	if (CalleeTarget == CUDAFunctionTarget::HostDevice)
255	return CFP_HostDevice;
256
257	// (c) Best case scenarios
258	if (CalleeTarget == CallerTarget \|\|
259	(CallerTarget == CUDAFunctionTarget::Host &&
260	CalleeTarget == CUDAFunctionTarget::Global) \|\|
261	(CallerTarget == CUDAFunctionTarget::Global &&
262	CalleeTarget == CUDAFunctionTarget::Device))
263	return CFP_Native;
264
265	// HipStdPar mode is special, in that assessing whether a device side call to
266	// a host target is deferred to a subsequent pass, and cannot unambiguously be
267	// adjudicated in the AST, hence we optimistically allow them to pass here.
268	if (getLangOpts().HIPStdPar &&
269	(CallerTarget == CUDAFunctionTarget::Global \|\|
270	CallerTarget == CUDAFunctionTarget::Device \|\|
271	CallerTarget == CUDAFunctionTarget::HostDevice) &&
272	CalleeTarget == CUDAFunctionTarget::Host)
273	return CFP_HostDevice;
274
275	// (d) HostDevice behavior depends on compilation mode.
276	if (CallerTarget == CUDAFunctionTarget::HostDevice) {
277	// It's OK to call a compilation-mode matching function from an HD one.
278	if ((getLangOpts().CUDAIsDevice &&
279	CalleeTarget == CUDAFunctionTarget::Device) \|\|
280	(!getLangOpts().CUDAIsDevice &&
281	(CalleeTarget == CUDAFunctionTarget::Host \|\|
282	CalleeTarget == CUDAFunctionTarget::Global)))
283	return CFP_SameSide;
284
285	// Calls from HD to non-mode-matching functions (i.e., to host functions
286	// when compiling in device mode or to device functions when compiling in
287	// host mode) are allowed at the sema level, but eventually rejected if
288	// they're ever codegened. TODO: Reject said calls earlier.
289	return CFP_WrongSide;
290	}
291
292	// (e) Calling across device/host boundary is not something you should do.
293	if ((CallerTarget == CUDAFunctionTarget::Host &&
294	CalleeTarget == CUDAFunctionTarget::Device) \|\|
295	(CallerTarget == CUDAFunctionTarget::Device &&
296	CalleeTarget == CUDAFunctionTarget::Host) \|\|
297	(CallerTarget == CUDAFunctionTarget::Global &&
298	CalleeTarget == CUDAFunctionTarget::Host))
299	return CFP_Never;
300
301	llvm_unreachable("All cases should've been handled by now.");
302	}
303
304	template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) {
305	if (!D)
306	return false;
307	if (auto *A = D->getAttr<AttrT>())
308	return A->isImplicit();
309	return D->isImplicit();
310	}
311
312	bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) {
313	bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D);
314	bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D);
315	return IsImplicitDevAttr && IsImplicitHostAttr;
316	}
317
318	void SemaCUDA::EraseUnwantedMatches(
319	const FunctionDecl *Caller,
320	SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
321	if (Matches.size() <= `1`)
322	return;
323
324	using Pair = std::pair<DeclAccessPair, FunctionDecl *>;
325
326	// Gets the CUDA function preference for a call from Caller to Match.
327	auto GetCFP = [&](const Pair &Match) {
328	return IdentifyPreference(Caller, Callee: Match.second);
329	};
330
331	// Find the best call preference among the functions in Matches.
332	CUDAFunctionPreference BestCFP =
333	GetCFP (llvm::max_element(Range&: Matches, C: [&](const* Pair &M1, const Pair &M2) {
334	return GetCFP (M1) < GetCFP (M2);
335	}));
336
337	// Erase all functions with lower priority.
338	llvm::erase_if(C&: Matches,
339	P: [&](const Pair &Match) { return GetCFP (Match) < BestCFP; });
340	}
341
342	/// When an implicitly-declared special member has to invoke more than one
343	/// base/field special member, conflicts may occur in the targets of these
344	/// members. For example, if one base's member __host__ and another's is
345	/// __device__, it's a conflict.
346	/// This function figures out if the given targets \param Target1 and
347	/// \param Target2 conflict, and if they do not it fills in
348	/// \param ResolvedTarget with a target that resolves for both calls.
349	/// \return true if there's a conflict, false otherwise.
350	static bool
351	resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1,
352	CUDAFunctionTarget Target2,
353	CUDAFunctionTarget *ResolvedTarget) {
354	// Only free functions and static member functions may be global.
355	assert(Target1 != CUDAFunctionTarget::Global);
356	assert(Target2 != CUDAFunctionTarget::Global);
357
358	if (Target1 == CUDAFunctionTarget::HostDevice) {
359	*ResolvedTarget = Target2;
360	} else if (Target2 == CUDAFunctionTarget::HostDevice) {
361	*ResolvedTarget = Target1;
362	} else if (Target1 != Target2) {
363	return true;
364	} else {
365	*ResolvedTarget = Target1;
366	}
367
368	return false;
369	}
370
371	bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
372	CXXSpecialMemberKind CSM,
373	CXXMethodDecl *MemberDecl,
374	bool ConstRHS,
375	bool Diagnose) {
376	// If MemberDecl is virtual destructor of an explicit template class
377	// instantiation, it must be emitted, therefore it needs to be inferred
378	// conservatively by ignoring implicit host/device attrs of member and parent
379	// dtors called by it. Also, it needs to be checed by deferred diag visitor.
380	bool IsExpVDtor = false;
381	if (isa<CXXDestructorDecl>(Val: MemberDecl) && MemberDecl->isVirtual()) {
382	if (auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(Val: ClassDecl)) {
383	TemplateSpecializationKind TSK = Spec->getTemplateSpecializationKind();
384	IsExpVDtor = TSK == TSK_ExplicitInstantiationDeclaration \|\|
385	TSK == TSK_ExplicitInstantiationDefinition;
386	}
387	}
388	if (IsExpVDtor)
389	SemaRef.DeclsToCheckForDeferredDiags.insert(X: MemberDecl);
390
391	// If the defaulted special member is defined lexically outside of its
392	// owning class, or the special member already has explicit device or host
393	// attributes, do not infer.
394	bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
395	bool HasH = MemberDecl->hasAttr<CUDAHostAttr>();
396	bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>();
397	bool HasExplicitAttr =
398	(HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) \|\|
399	(HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit());
400	if (!InClass \|\| HasExplicitAttr)
401	return false;
402
403	std::optional<CUDAFunctionTarget> InferredTarget;
404
405	// We're going to invoke special member lookup; mark that these special
406	// members are called from this one, and not from its caller.
407	Sema::ContextRAII MethodContext(SemaRef, MemberDecl);
408
409	// Look for special members in base classes that should be invoked from here.
410	// Infer the target of this member base on the ones it should call.
411	// Skip direct and indirect virtual bases for abstract classes.
412	llvm::SmallVector<const CXXBaseSpecifier *, `16`> Bases;
413	for (const auto &B : ClassDecl->bases()) {
414	if (!B.isVirtual()) {
415	Bases.push_back(Elt: &B);
416	}
417	}
418
419	if (!ClassDecl->isAbstract()) {
420	llvm::append_range(C&: Bases, R: llvm::make_pointer_range(Range: ClassDecl->vbases()));
421	}
422
423	for (const auto *B : Bases) {
424	const RecordType *BaseType = B->getType()->getAs<RecordType>();
425	if (!BaseType) {
426	continue;
427	}
428
429	CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(Val: BaseType->getDecl());
430	Sema::SpecialMemberOverloadResult SMOR =
431	SemaRef.LookupSpecialMember(D: BaseClassDecl, SM: CSM,
432	/ ConstArg / ConstRHS,
433	/ VolatileArg / false,
434	/ RValueThis / false,
435	/ ConstThis / false,
436	/ VolatileThis / false);
437
438	if (!SMOR.getMethod())
439	continue;
440
441	CUDAFunctionTarget BaseMethodTarget =
442	IdentifyTarget(D: SMOR.getMethod(), IgnoreImplicitHDAttr: IsExpVDtor);
443
444	if (!InferredTarget) {
445	InferredTarget = BaseMethodTarget;
446	} else {
447	bool ResolutionError = resolveCalleeCUDATargetConflict(
448	Target1: InferredTarget, Target2: BaseMethodTarget, ResolvedTarget: &InferredTarget);
449	if (ResolutionError) {
450	if (Diagnose) {
451	Diag(Loc: ClassDecl->getLocation(),
452	DiagID: diag::note_implicit_member_target_infer_collision)
453	<< (unsigned)CSM << *InferredTarget << BaseMethodTarget;
454	}
455	MemberDecl->addAttr(
456	A: CUDAInvalidTargetAttr::CreateImplicit(Ctx&: getASTContext()));
457	return true;
458	}
459	}
460	}
461
462	// Same as for bases, but now for special members of fields.
463	for (const auto *F : ClassDecl->fields()) {
464	if (F->isInvalidDecl()) {
465	continue;
466	}
467
468	const RecordType *FieldType =
469	getASTContext().getBaseElementType(QT: F->getType())->getAs<RecordType>();
470	if (!FieldType) {
471	continue;
472	}
473
474	CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(Val: FieldType->getDecl());
475	Sema::SpecialMemberOverloadResult SMOR =
476	SemaRef.LookupSpecialMember(D: FieldRecDecl, SM: CSM,
477	/ ConstArg / ConstRHS && !F->isMutable(),
478	/ VolatileArg / false,
479	/ RValueThis / false,
480	/ ConstThis / false,
481	/ VolatileThis / false);
482
483	if (!SMOR.getMethod())
484	continue;
485
486	CUDAFunctionTarget FieldMethodTarget =
487	IdentifyTarget(D: SMOR.getMethod(), IgnoreImplicitHDAttr: IsExpVDtor);
488
489	if (!InferredTarget) {
490	InferredTarget = FieldMethodTarget;
491	} else {
492	bool ResolutionError = resolveCalleeCUDATargetConflict(
493	Target1: InferredTarget, Target2: FieldMethodTarget, ResolvedTarget: &InferredTarget);
494	if (ResolutionError) {
495	if (Diagnose) {
496	Diag(Loc: ClassDecl->getLocation(),
497	DiagID: diag::note_implicit_member_target_infer_collision)
498	<< (unsigned)CSM << *InferredTarget << FieldMethodTarget;
499	}
500	MemberDecl->addAttr(
501	A: CUDAInvalidTargetAttr::CreateImplicit(Ctx&: getASTContext()));
502	return true;
503	}
504	}
505	}
506
507	// If no target was inferred, mark this member as __host__ __device__;
508	// it's the least restrictive option that can be invoked from any target.
509	bool NeedsH = true, NeedsD = true;
510	if (InferredTarget) {
511	if (*InferredTarget == CUDAFunctionTarget::Device)
512	NeedsH = false;
513	else if (*InferredTarget == CUDAFunctionTarget::Host)
514	NeedsD = false;
515	}
516
517	// We either setting attributes first time, or the inferred ones must match
518	// previously set ones.
519	if (NeedsD && !HasD)
520	MemberDecl->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
521	if (NeedsH && !HasH)
522	MemberDecl->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
523
524	return false;
525	}
526
527	bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
528	if (!CD->isDefined() && CD->isTemplateInstantiation())
529	SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: CD->getFirstDecl());
530
531	// (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
532	// empty at a point in the translation unit, if it is either a
533	// trivial constructor
534	if (CD->isTrivial())
535	return true;
536
537	// ... or it satisfies all of the following conditions:
538	// The constructor function has been defined.
539	// The constructor function has no parameters,
540	// and the function body is an empty compound statement.
541	if (!(CD->hasTrivialBody() && CD->getNumParams() == `0`))
542	return false;
543
544	// Its class has no virtual functions and no virtual base classes.
545	if (CD->getParent()->isDynamicClass())
546	return false;
547
548	// Union ctor does not call ctors of its data members.
549	if (CD->getParent()->isUnion())
550	return true;
551
552	// The only form of initializer allowed is an empty constructor.
553	// This will recursively check all base classes and member initializers
554	if (!llvm::all_of(Range: CD->inits(), P: [&](const CXXCtorInitializer *CI) {
555	if (const CXXConstructExpr *CE =
556	dyn_cast<CXXConstructExpr>(Val: CI->getInit()))
557	return isEmptyConstructor(Loc, CD: CE->getConstructor());
558	return false;
559	}))
560	return false;
561
562	return true;
563	}
564
565	bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
566	// No destructor -> no problem.
567	if (!DD)
568	return true;
569
570	if (!DD->isDefined() && DD->isTemplateInstantiation())
571	SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: DD->getFirstDecl());
572
573	// (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
574	// empty at a point in the translation unit, if it is either a
575	// trivial constructor
576	if (DD->isTrivial())
577	return true;
578
579	// ... or it satisfies all of the following conditions:
580	// The destructor function has been defined.
581	// and the function body is an empty compound statement.
582	if (!DD->hasTrivialBody())
583	return false;
584
585	const CXXRecordDecl *ClassDecl = DD->getParent();
586
587	// Its class has no virtual functions and no virtual base classes.
588	if (ClassDecl->isDynamicClass())
589	return false;
590
591	// Union does not have base class and union dtor does not call dtors of its
592	// data members.
593	if (DD->getParent()->isUnion())
594	return true;
595
596	// Only empty destructors are allowed. This will recursively check
597	// destructors for all base classes...
598	if (!llvm::all_of(Range: ClassDecl->bases(), P: [&](const CXXBaseSpecifier &BS) {
599	if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
600	return isEmptyDestructor(Loc, DD: RD->getDestructor());
601	return true;
602	}))
603	return false;
604
605	// ... and member fields.
606	if (!llvm::all_of(Range: ClassDecl->fields(), P: [&](const FieldDecl *Field) {
607	if (CXXRecordDecl *RD = Field->getType()
608	->getBaseElementTypeUnsafe()
609	->getAsCXXRecordDecl())
610	return isEmptyDestructor(Loc, DD: RD->getDestructor());
611	return true;
612	}))
613	return false;
614
615	return true;
616	}
617
618	namespace {
619	enum CUDAInitializerCheckKind {
620	CICK_DeviceOrConstant, // Check initializer for device/constant variable
621	CICK_Shared, // Check initializer for shared variable
622	};
623
624	bool IsDependentVar(VarDecl *VD) {
625	if (VD->getType()->isDependentType())
626	return true;
627	if (const auto *Init = VD->getInit())
628	return Init->isValueDependent();
629	return false;
630	}
631
632	// Check whether a variable has an allowed initializer for a CUDA device side
633	// variable with global storage. \p VD may be a host variable to be checked for
634	// potential promotion to device side variable.
635	//
636	// CUDA/HIP allows only empty constructors as initializers for global
637	// variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
638	// __shared__ variables whether they are local or not (they all are implicitly
639	// static in CUDA). One exception is that CUDA allows constant initializers
640	// for __constant__ and __device__ variables.
641	bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD,
642	CUDAInitializerCheckKind CheckKind) {
643	assert(!VD->isInvalidDecl() && VD->hasGlobalStorage());
644	assert(!IsDependentVar(VD) && "do not check dependent var");
645	const Expr *Init = VD->getInit();
646	auto IsEmptyInit = [&](const Expr *Init) {
647	if (!Init)
648	return true;
649	if (const auto *CE = dyn_cast<CXXConstructExpr>(Val: Init)) {
650	return S.isEmptyConstructor(Loc: VD->getLocation(), CD: CE->getConstructor());
651	}
652	return false;
653	};
654	auto IsConstantInit = [&](const Expr *Init) {
655	assert(Init);
656	ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(),
657	/NoWronSidedVars=/true);
658	return Init->isConstantInitializer(Ctx&: S.getASTContext(),
659	ForRef: VD->getType()->isReferenceType());
660	};
661	auto HasEmptyDtor = [&](VarDecl *VD) {
662	if (const auto *RD = VD->getType()->getAsCXXRecordDecl())
663	return S.isEmptyDestructor(Loc: VD->getLocation(), DD: RD->getDestructor());
664	return true;
665	};
666	if (CheckKind == CICK_Shared)
667	return IsEmptyInit (Init) && HasEmptyDtor (VD);
668	return S.getLangOpts().GPUAllowDeviceInit \|\|
669	((IsEmptyInit (Init) \|\| IsConstantInit (Init)) && HasEmptyDtor (VD));
670	}
671	} // namespace
672
673	void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
674	// Return early if VD is inside a non-instantiated template function since
675	// the implicit constructor is not defined yet.
676	if (const FunctionDecl *FD =
677	dyn_cast_or_null<FunctionDecl>(Val: VD->getDeclContext());
678	FD && FD->isDependentContext())
679	return;
680
681	bool IsSharedVar = VD->hasAttr<CUDASharedAttr>();
682	bool IsDeviceOrConstantVar =
683	!IsSharedVar &&
684	(VD->hasAttr<CUDADeviceAttr>() \|\| VD->hasAttr<CUDAConstantAttr>());
685	if ((IsSharedVar \|\| IsDeviceOrConstantVar) &&
686	VD->getType().getQualifiers().getAddressSpace() != LangAS::Default) {
687	Diag(Loc: VD->getLocation(), DiagID: diag::err_cuda_address_space_gpuvar);
688	VD->setInvalidDecl();
689	return;
690	}
691	// Do not check dependent variables since the ctor/dtor/initializer are not
692	// determined. Do it after instantiation.
693	if (VD->isInvalidDecl() \|\| !VD->hasInit() \|\| !VD->hasGlobalStorage() \|\|
694	IsDependentVar(VD))
695	return;
696	const Expr *Init = VD->getInit();
697	if (IsDeviceOrConstantVar \|\| IsSharedVar) {
698	if (HasAllowedCUDADeviceStaticInitializer(
699	S&: *this, VD, CheckKind: IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant))
700	return;
701	Diag(Loc: VD->getLocation(),
702	DiagID: IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init)
703	<< Init->getSourceRange();
704	VD->setInvalidDecl();
705	} else {
706	// This is a host-side global variable. Check that the initializer is
707	// callable from the host side.
708	const FunctionDecl InitFn = nullptr*;
709	if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Val: Init)) {
710	InitFn = CE->getConstructor();
711	} else if (const CallExpr *CE = dyn_cast<CallExpr>(Val: Init)) {
712	InitFn = CE->getDirectCallee();
713	}
714	if (InitFn) {
715	CUDAFunctionTarget InitFnTarget = IdentifyTarget(D: InitFn);
716	if (InitFnTarget != CUDAFunctionTarget::Host &&
717	InitFnTarget != CUDAFunctionTarget::HostDevice) {
718	Diag(Loc: VD->getLocation(), DiagID: diag::err_ref_bad_target_global_initializer)
719	<< InitFnTarget << InitFn;
720	Diag(Loc: InitFn->getLocation(), DiagID: diag::note_previous_decl) << InitFn;
721	VD->setInvalidDecl();
722	}
723	}
724	}
725	}
726
727	void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
728	const FunctionDecl *Callee) {
729	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
730	if (!Caller)
731	return;
732
733	if (!isImplicitHostDeviceFunction(D: Callee))
734	return;
735
736	CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller);
737
738	// Record whether an implicit host device function is used on device side.
739	if (CallerTarget != CUDAFunctionTarget::Device &&
740	CallerTarget != CUDAFunctionTarget::Global &&
741	(CallerTarget != CUDAFunctionTarget::HostDevice \|\|
742	(isImplicitHostDeviceFunction(D: Caller) &&
743	!getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(V: Caller))))
744	return;
745
746	getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(V: Callee);
747	}
748
749	// With -fcuda-host-device-constexpr, an unattributed constexpr function is
750	// treated as implicitly __host__ __device__, unless:
751	// it is a variadic function (device-side variadic functions are not*
752	// allowed), or
753	// a __device__ function with this signature was already declared, in which*
754	// case in which case we output an error, unless the __device__ decl is in a
755	// system header, in which case we leave the constexpr function unattributed.
756	//
757	// In addition, all function decls are treated as __host__ __device__ when
758	// ForceHostDeviceDepth > 0 (corresponding to code within a
759	// #pragma clang force_cuda_host_device_begin/end
760	// pair).
761	void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD,
762	const LookupResult &Previous) {
763	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
764
765	if (ForceHostDeviceDepth > `0`) {
766	if (!NewD->hasAttr<CUDAHostAttr>())
767	NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
768	if (!NewD->hasAttr<CUDADeviceAttr>())
769	NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
770	return;
771	}
772
773	// If a template function has no host/device/global attributes,
774	// make it implicitly host device function.
775	if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
776	!NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
777	!NewD->hasAttr<CUDAGlobalAttr>() &&
778	(NewD->getDescribedFunctionTemplate() \|\|
779	NewD->isFunctionTemplateSpecialization())) {
780	NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
781	NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
782	return;
783	}
784
785	if (!getLangOpts().CUDAHostDeviceConstexpr \|\| !NewD->isConstexpr() \|\|
786	NewD->isVariadic() \|\| NewD->hasAttr<CUDAHostAttr>() \|\|
787	NewD->hasAttr<CUDADeviceAttr>() \|\| NewD->hasAttr<CUDAGlobalAttr>())
788	return;
789
790	// Is D a __device__ function with the same signature as NewD, ignoring CUDA
791	// attributes?
792	auto IsMatchingDeviceFn = [&](NamedDecl *D) {
793	if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(Val: D))
794	D = Using->getTargetDecl();
795	FunctionDecl *OldD = D->getAsFunction();
796	return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
797	!OldD->hasAttr<CUDAHostAttr>() &&
798	!SemaRef.IsOverload(New: NewD, Old: OldD,
799	/ UseMemberUsingDeclRules = / false,
800	/ ConsiderCudaAttrs = / false);
801	};
802	auto It = llvm::find_if(Range: Previous, P: IsMatchingDeviceFn);
803	if (It != Previous.end()) {
804	// We found a __device__ function with the same name and signature as NewD
805	// (ignoring CUDA attrs). This is an error unless that function is defined
806	// in a system header, in which case we simply return without making NewD
807	// host+device.
808	NamedDecl Match = It;
809	if (!SemaRef.getSourceManager().isInSystemHeader(Loc: Match->getLocation())) {
810	Diag(Loc: NewD->getLocation(),
811	DiagID: diag::err_cuda_unattributed_constexpr_cannot_overload_device)
812	<< NewD;
813	Diag(Loc: Match->getLocation(),
814	DiagID: diag::note_cuda_conflicting_device_function_declared_here);
815	}
816	return;
817	}
818
819	NewD->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
820	NewD->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
821	}
822
823	// TODO: `__constant__` memory may be a limited resource for certain targets.
824	// A safeguard may be needed at the end of compilation pipeline if
825	// `__constant__` memory usage goes beyond limit.
826	void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) {
827	// Do not promote dependent variables since the cotr/dtor/initializer are
828	// not determined. Do it after instantiation.
829	if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() &&
830	!VD->hasAttr<CUDASharedAttr>() &&
831	(VD->isFileVarDecl() \|\| VD->isStaticDataMember()) &&
832	!IsDependentVar(VD) &&
833	((VD->isConstexpr() \|\| VD->getType().isConstQualified()) &&
834	HasAllowedCUDADeviceStaticInitializer(S&: *this, VD,
835	CheckKind: CICK_DeviceOrConstant))) {
836	VD->addAttr(A: CUDAConstantAttr::CreateImplicit(Ctx&: getASTContext()));
837	}
838	}
839
840	SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc,
841	unsigned DiagID) {
842	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
843	FunctionDecl *CurFunContext =
844	SemaRef.getCurFunctionDecl(/AllowLambda=/true);
845	SemaDiagnosticBuilder::Kind DiagKind = [&] {
846	if (!CurFunContext)
847	return SemaDiagnosticBuilder::K_Nop;
848	switch (CurrentTarget()) {
849	case CUDAFunctionTarget::Global:
850	case CUDAFunctionTarget::Device:
851	return SemaDiagnosticBuilder::K_Immediate;
852	case CUDAFunctionTarget::HostDevice:
853	// An HD function counts as host code if we're compiling for host, and
854	// device code if we're compiling for device. Defer any errors in device
855	// mode until the function is known-emitted.
856	if (!getLangOpts().CUDAIsDevice)
857	return SemaDiagnosticBuilder::K_Nop;
858	if (SemaRef.IsLastErrorImmediate &&
859	getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
860	return SemaDiagnosticBuilder::K_Immediate;
861	return (SemaRef.getEmissionStatus(Decl: CurFunContext) ==
862	Sema::FunctionEmissionStatus::Emitted)
863	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
864	: SemaDiagnosticBuilder::K_Deferred;
865	default:
866	return SemaDiagnosticBuilder::K_Nop;
867	}
868	}();
869	return SemaDiagnosticBuilder (DiagKind, Loc, DiagID, CurFunContext, SemaRef);
870	}
871
872	Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc,
873	unsigned DiagID) {
874	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
875	FunctionDecl *CurFunContext =
876	SemaRef.getCurFunctionDecl(/AllowLambda=/true);
877	SemaDiagnosticBuilder::Kind DiagKind = [&] {
878	if (!CurFunContext)
879	return SemaDiagnosticBuilder::K_Nop;
880	switch (CurrentTarget()) {
881	case CUDAFunctionTarget::Host:
882	return SemaDiagnosticBuilder::K_Immediate;
883	case CUDAFunctionTarget::HostDevice:
884	// An HD function counts as host code if we're compiling for host, and
885	// device code if we're compiling for device. Defer any errors in device
886	// mode until the function is known-emitted.
887	if (getLangOpts().CUDAIsDevice)
888	return SemaDiagnosticBuilder::K_Nop;
889	if (SemaRef.IsLastErrorImmediate &&
890	getDiagnostics().getDiagnosticIDs()->isNote(DiagID))
891	return SemaDiagnosticBuilder::K_Immediate;
892	return (SemaRef.getEmissionStatus(Decl: CurFunContext) ==
893	Sema::FunctionEmissionStatus::Emitted)
894	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
895	: SemaDiagnosticBuilder::K_Deferred;
896	default:
897	return SemaDiagnosticBuilder::K_Nop;
898	}
899	}();
900	return SemaDiagnosticBuilder (DiagKind, Loc, DiagID, CurFunContext, SemaRef);
901	}
902
903	bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) {
904	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
905	assert(Callee && "Callee may not be null.");
906
907	const auto &ExprEvalCtx = SemaRef.currentEvaluationContext();
908	if (ExprEvalCtx.isUnevaluated() \|\| ExprEvalCtx.isConstantEvaluated())
909	return true;
910
911	// FIXME: Is bailing out early correct here? Should we instead assume that
912	// the caller is a global initializer?
913	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
914	if (!Caller)
915	return true;
916
917	// If the caller is known-emitted, mark the callee as known-emitted.
918	// Otherwise, mark the call in our call graph so we can traverse it later.
919	bool CallerKnownEmitted = SemaRef.getEmissionStatus(Decl: Caller) ==
920	Sema::FunctionEmissionStatus::Emitted;
921	SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee,
922	CallerKnownEmitted] {
923	switch (IdentifyPreference(Caller, Callee)) {
924	case CFP_Never:
925	case CFP_WrongSide:
926	assert(Caller && "Never/wrongSide calls require a non-null caller");
927	// If we know the caller will be emitted, we know this wrong-side call
928	// will be emitted, so it's an immediate error. Otherwise, defer the
929	// error until we know the caller is emitted.
930	return CallerKnownEmitted
931	? SemaDiagnosticBuilder::K_ImmediateWithCallStack
932	: SemaDiagnosticBuilder::K_Deferred;
933	default:
934	return SemaDiagnosticBuilder::K_Nop;
935	}
936	}();
937
938	if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
939	// For -fgpu-rdc, keep track of external kernels used by host functions.
940	if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode &&
941	Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
942	(!Caller \|\| (!Caller->getDescribedFunctionTemplate() &&
943	getASTContext().GetGVALinkageForFunction(FD: Caller) ==
944	GVA_StrongExternal)))
945	getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(X: Callee);
946	return true;
947	}
948
949	// Avoid emitting this error twice for the same location. Using a hashtable
950	// like this is unfortunate, but because we must continue parsing as normal
951	// after encountering a deferred error, it's otherwise very tricky for us to
952	// ensure that we only emit this deferred error once.
953	if (!LocsWithCUDACallDiags.insert(V: {.FD: Caller, .Loc: Loc}).second)
954	return true;
955
956	SemaDiagnosticBuilder (DiagKind, Loc, diag::err_ref_bad_target, Caller,
957	SemaRef)
958	<< IdentifyTarget(D: Callee) << /function/ `0` << Callee
959	<< IdentifyTarget(D: Caller);
960	if (!Callee->getBuiltinID())
961	SemaDiagnosticBuilder (DiagKind, Callee->getLocation(),
962	diag::note_previous_decl, Caller, SemaRef)
963	<< Callee;
964	return DiagKind != SemaDiagnosticBuilder::K_Immediate &&
965	DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack;
966	}
967
968	// Check the wrong-sided reference capture of lambda for CUDA/HIP.
969	// A lambda function may capture a stack variable by reference when it is
970	// defined and uses the capture by reference when the lambda is called. When
971	// the capture and use happen on different sides, the capture is invalid and
972	// should be diagnosed.
973	void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee,
974	const sema::Capture &Capture) {
975	// In host compilation we only need to check lambda functions emitted on host
976	// side. In such lambda functions, a reference capture is invalid only
977	// if the lambda structure is populated by a device function or kernel then
978	// is passed to and called by a host function. However that is impossible,
979	// since a device function or kernel can only call a device function, also a
980	// kernel cannot pass a lambda back to a host function since we cannot
981	// define a kernel argument type which can hold the lambda before the lambda
982	// itself is defined.
983	if (!getLangOpts().CUDAIsDevice)
984	return;
985
986	// File-scope lambda can only do init captures for global variables, which
987	// results in passing by value for these global variables.
988	FunctionDecl Caller = SemaRef.getCurFunctionDecl(/AllowLambda=/*true);
989	if (!Caller)
990	return;
991
992	// In device compilation, we only need to check lambda functions which are
993	// emitted on device side. For such lambdas, a reference capture is invalid
994	// only if the lambda structure is populated by a host function then passed
995	// to and called in a device function or kernel.
996	bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>();
997	bool CallerIsHost =
998	!Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>();
999	bool ShouldCheck = CalleeIsDevice && CallerIsHost;
1000	if (!ShouldCheck \|\| !Capture.isReferenceCapture())
1001	return;
1002	auto DiagKind = SemaDiagnosticBuilder::K_Deferred;
1003	if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) {
1004	SemaDiagnosticBuilder (DiagKind, Capture.getLocation(),
1005	diag::err_capture_bad_target, Callee, SemaRef)
1006	<< Capture.getVariable();
1007	} else if (Capture.isThisCapture()) {
1008	// Capture of this pointer is allowed since this pointer may be pointing to
1009	// managed memory which is accessible on both device and host sides. It only
1010	// results in invalid memory access if this pointer points to memory not
1011	// accessible on device side.
1012	SemaDiagnosticBuilder (DiagKind, Capture.getLocation(),
1013	diag::warn_maybe_capture_bad_target_this_ptr, Callee,
1014	SemaRef);
1015	}
1016	}
1017
1018	void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) {
1019	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1020	if (Method->hasAttr<CUDAHostAttr>() \|\| Method->hasAttr<CUDADeviceAttr>())
1021	return;
1022	Method->addAttr(A: CUDADeviceAttr::CreateImplicit(Ctx&: getASTContext()));
1023	Method->addAttr(A: CUDAHostAttr::CreateImplicit(Ctx&: getASTContext()));
1024	}
1025
1026	void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD,
1027	const LookupResult &Previous) {
1028	assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
1029	CUDAFunctionTarget NewTarget = IdentifyTarget(D: NewFD);
1030	for (NamedDecl *OldND : Previous) {
1031	FunctionDecl *OldFD = OldND->getAsFunction();
1032	if (!OldFD)
1033	continue;
1034
1035	CUDAFunctionTarget OldTarget = IdentifyTarget(D: OldFD);
1036	// Don't allow HD and global functions to overload other functions with the
1037	// same signature. We allow overloading based on CUDA attributes so that
1038	// functions can have different implementations on the host and device, but
1039	// HD/global functions "exist" in some sense on both the host and device, so
1040	// should have the same implementation on both sides.
1041	if (NewTarget != OldTarget &&
1042	!SemaRef.IsOverload(New: NewFD, Old: OldFD, / UseMemberUsingDeclRules = / false,
1043	/ ConsiderCudaAttrs = / false)) {
1044	if ((NewTarget == CUDAFunctionTarget::HostDevice &&
1045	!(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1046	isImplicitHostDeviceFunction(D: NewFD) &&
1047	OldTarget == CUDAFunctionTarget::Device)) \|\|
1048	(OldTarget == CUDAFunctionTarget::HostDevice &&
1049	!(getLangOpts().OffloadImplicitHostDeviceTemplates &&
1050	isImplicitHostDeviceFunction(D: OldFD) &&
1051	NewTarget == CUDAFunctionTarget::Device)) \|\|
1052	(NewTarget == CUDAFunctionTarget::Global) \|\|
1053	(OldTarget == CUDAFunctionTarget::Global)) {
1054	Diag(Loc: NewFD->getLocation(), DiagID: diag::err_cuda_ovl_target)
1055	<< NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
1056	Diag(Loc: OldFD->getLocation(), DiagID: diag::note_previous_declaration);
1057	NewFD->setInvalidDecl();
1058	break;
1059	}
1060	if ((NewTarget == CUDAFunctionTarget::Host &&
1061	OldTarget == CUDAFunctionTarget::Device) \|\|
1062	(NewTarget == CUDAFunctionTarget::Device &&
1063	OldTarget == CUDAFunctionTarget::Host)) {
1064	Diag(Loc: NewFD->getLocation(), DiagID: diag::warn_offload_incompatible_redeclare)
1065	<< NewTarget << OldTarget;
1066	Diag(Loc: OldFD->getLocation(), DiagID: diag::note_previous_declaration);
1067	}
1068	}
1069	}
1070	}
1071
1072	template <typename AttrTy>
1073	static void copyAttrIfPresent(Sema &S, FunctionDecl *FD,
1074	const FunctionDecl &TemplateFD) {
1075	if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) {
1076	AttrTy *Clone = Attribute->clone(S.Context);
1077	Clone->setInherited(true);
1078	FD->addAttr(A: Clone);
1079	}
1080	}
1081
1082	void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD,
1083	const FunctionTemplateDecl &TD) {
1084	const FunctionDecl &TemplateFD = *TD.getTemplatedDecl();
1085	copyAttrIfPresent<CUDAGlobalAttr>(S&: SemaRef, FD, TemplateFD);
1086	copyAttrIfPresent<CUDAHostAttr>(S&: SemaRef, FD, TemplateFD);
1087	copyAttrIfPresent<CUDADeviceAttr>(S&: SemaRef, FD, TemplateFD);
1088	}
1089
1090	std::string SemaCUDA::getConfigureFuncName() const {
1091	if (getLangOpts().OffloadViaLLVM)
1092	return "__llvmPushCallConfiguration";
1093
1094	if (getLangOpts().HIP)
1095	return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration"
1096	: "hipConfigureCall";
1097
1098	// New CUDA kernel launch sequence.
1099	if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(),
1100	CudaFeature::CUDA_USES_NEW_LAUNCH))
1101	return "__cudaPushCallConfiguration";
1102
1103	// Legacy CUDA kernel configuration call
1104	return "cudaConfigureCall";
1105	}
1106
1107	// Record any local constexpr variables that are passed one way on the host
1108	// and another on the device.
1109	void SemaCUDA::recordPotentialODRUsedVariable(
1110	MultiExprArg Arguments, OverloadCandidateSet &Candidates) {
1111	sema::LambdaScopeInfo *LambdaInfo = SemaRef.getCurLambda();
1112	if (!LambdaInfo)
1113	return;
1114
1115	for (unsigned I = `0`; I < Arguments.size(); ++I) {
1116	auto *DeclRef = dyn_cast<DeclRefExpr>(Val: Arguments [I]);
1117	if (!DeclRef)
1118	continue;
1119	auto *Variable = dyn_cast<VarDecl>(Val: DeclRef->getDecl());
1120	if (!Variable \|\| !Variable->isLocalVarDecl() \|\| !Variable->isConstexpr())
1121	continue;
1122
1123	bool HostByValue = false, HostByRef = false;
1124	bool DeviceByValue = false, DeviceByRef = false;
1125
1126	for (OverloadCandidate &Candidate : Candidates) {
1127	FunctionDecl *Callee = Candidate.Function;
1128	if (!Callee \|\| I >= Callee->getNumParams())
1129	continue;
1130
1131	CUDAFunctionTarget Target = IdentifyTarget(D: Callee);
1132	if (Target == CUDAFunctionTarget::InvalidTarget \|\|
1133	Target == CUDAFunctionTarget::Global)
1134	continue;
1135
1136	bool CoversHost = (Target == CUDAFunctionTarget::Host \|\|
1137	Target == CUDAFunctionTarget::HostDevice);
1138	bool CoversDevice = (Target == CUDAFunctionTarget::Device \|\|
1139	Target == CUDAFunctionTarget::HostDevice);
1140
1141	bool IsRef = Callee->getParamDecl(i: I)->getType()->isReferenceType();
1142	HostByValue \|= CoversHost && !IsRef;
1143	HostByRef \|= CoversHost && IsRef;
1144	DeviceByValue \|= CoversDevice && !IsRef;
1145	DeviceByRef \|= CoversDevice && IsRef;
1146	}
1147
1148	if ((HostByValue && DeviceByRef) \|\| (HostByRef && DeviceByValue))
1149	LambdaInfo->CUDAPotentialODRUsedVars.insert(Ptr: Variable);
1150	}
1151	}
1152

Browse the source code of llvm_projects/clang/lib/Sema/SemaCUDA.cpp