1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
24#include "clang/AST/OpenMPClause.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/SourceManager.h"
30#include "clang/CodeGen/ConstantInitBuilder.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
166 PtrTy: PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(lvalue: PartIdLVal, Loc: PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
171 CGF.EmitBlock(BB: DoneBB);
172 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
173 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
174 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
175 Dest: CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
183 PtrTy: PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
185 lvalue: PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
189 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
191 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
192 Dest: CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
194 CGF.EmitBlock(BB: CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
572void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573 CodeGenFunction::RunCleanupsScope Scope(CGF);
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
596static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
604 const auto *CE = cast<CallExpr>(Val: InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
614 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
615 (void)PrivateScope.Privatize();
616 RValue Func = RValue::get(V: Reduction.second);
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(E: InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(T: Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
633 RValue::getComplex(C: CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
655static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BB: BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
693 Name: "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
711 Original: SrcElementCurrent, Ty: ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
721 Name: "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
728 Name: "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
733 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
746 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, DestAddr: PrivateAddr, Type: PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 Init: EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SrcAddr: SharedAddr);
765}
766
767ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768 ArrayRef<const Expr *> Origs,
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(N: Shareds.size());
772 SharedAddresses.reserve(N: Shareds.size());
773 Sizes.reserve(N: Shareds.size());
774 BaseDecls.reserve(N: Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
780 std::advance(i&: IOrig, n: 1);
781 std::advance(i&: IPriv, n: 1);
782 std::advance(i&: IRed, n: 1);
783 }
784}
785
786void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
791 SharedAddresses.emplace_back(Args&: First, Args&: Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(Args&: First, Args&: Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
797 OrigAddresses.emplace_back(Args&: First, Args&: Second);
798 }
799}
800
801void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 Args: CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
807 Args: nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
816 LHS: OrigAddresses[N].second.getPointer(CGF),
817 RHS: OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateZExtOrTrunc(V: Size, DestTy: ElemSizeOf->getType());
819 Size = CGF.Builder.CreateNUWAdd(
820 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
821 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
822 } else {
823 SizeInChars =
824 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
825 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
826 }
827 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
828 CodeGenFunction::OpaqueValueMapping OpaqueMap(
829 CGF,
830 cast<OpaqueValueExpr>(
831 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
832 RValue::get(V: Size));
833 CGF.EmitVariablyModifiedType(Ty: PrivateType);
834}
835
836void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
837 llvm::Value *Size) {
838 QualType PrivateType = getPrivateType(N);
839 if (!PrivateType->isVariablyModifiedType()) {
840 assert(!Size && !Sizes[N].second &&
841 "Size should be nullptr for non-variably modified reduction "
842 "items.");
843 return;
844 }
845 CodeGenFunction::OpaqueValueMapping OpaqueMap(
846 CGF,
847 cast<OpaqueValueExpr>(
848 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
849 RValue::get(V: Size));
850 CGF.EmitVariablyModifiedType(Ty: PrivateType);
851}
852
853void ReductionCodeGen::emitInitialization(
854 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
855 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
856 assert(SharedAddresses.size() > N && "No variable was generated");
857 const auto *PrivateVD =
858 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
859 const OMPDeclareReductionDecl *DRD =
860 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
861 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
862 if (DRD && DRD->getInitializer())
863 (void)DefaultInit(CGF);
864 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
865 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
866 (void)DefaultInit(CGF);
867 QualType SharedType = SharedAddresses[N].first.getType();
868 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
869 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
870 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
871 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
872 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
873 Quals: PrivateVD->getType().getQualifiers(),
874 /*IsInitializer=*/false);
875 }
876}
877
878bool ReductionCodeGen::needCleanups(unsigned N) {
879 QualType PrivateType = getPrivateType(N);
880 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
881 return DTorKind != QualType::DK_none;
882}
883
884void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
885 Address PrivateAddr) {
886 QualType PrivateType = getPrivateType(N);
887 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
888 if (needCleanups(N)) {
889 PrivateAddr =
890 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
891 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
892 }
893}
894
895static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
896 LValue BaseLV) {
897 BaseTy = BaseTy.getNonReferenceType();
898 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
899 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
900 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
901 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
902 } else {
903 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
904 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
905 }
906 BaseTy = BaseTy->getPointeeType();
907 }
908 return CGF.MakeAddrLValue(
909 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
910 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
911 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
912}
913
914static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
915 Address OriginalBaseAddress, llvm::Value *Addr) {
916 RawAddress Tmp = RawAddress::invalid();
917 Address TopTmp = Address::invalid();
918 Address MostTopTmp = Address::invalid();
919 BaseTy = BaseTy.getNonReferenceType();
920 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
921 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
922 Tmp = CGF.CreateMemTemp(T: BaseTy);
923 if (TopTmp.isValid())
924 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
925 else
926 MostTopTmp = Tmp;
927 TopTmp = Tmp;
928 BaseTy = BaseTy->getPointeeType();
929 }
930
931 if (Tmp.isValid()) {
932 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
933 V: Addr, DestTy: Tmp.getElementType());
934 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
935 return MostTopTmp;
936 }
937
938 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
939 V: Addr, DestTy: OriginalBaseAddress.getType());
940 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
941}
942
943static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
944 const VarDecl *OrigVD = nullptr;
945 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
946 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
947 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
948 Base = TempOASE->getBase()->IgnoreParenImpCasts();
949 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
950 Base = TempASE->getBase()->IgnoreParenImpCasts();
951 DE = cast<DeclRefExpr>(Val: Base);
952 OrigVD = cast<VarDecl>(Val: DE->getDecl());
953 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
954 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
955 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
956 Base = TempASE->getBase()->IgnoreParenImpCasts();
957 DE = cast<DeclRefExpr>(Val: Base);
958 OrigVD = cast<VarDecl>(Val: DE->getDecl());
959 }
960 return OrigVD;
961}
962
963Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
964 Address PrivateAddr) {
965 const DeclRefExpr *DE;
966 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
967 BaseDecls.emplace_back(Args&: OrigVD);
968 LValue OriginalBaseLValue = CGF.EmitLValue(E: DE);
969 LValue BaseLValue =
970 loadToBegin(CGF, BaseTy: OrigVD->getType(), ElTy: SharedAddresses[N].first.getType(),
971 BaseLV: OriginalBaseLValue);
972 Address SharedAddr = SharedAddresses[N].first.getAddress();
973 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
974 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
975 RHS: SharedAddr.emitRawPointer(CGF));
976 llvm::Value *PrivatePointer =
977 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
978 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
979 llvm::Value *Ptr = CGF.Builder.CreateGEP(
980 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
981 return castToBase(CGF, BaseTy: OrigVD->getType(),
982 ElTy: SharedAddresses[N].first.getType(),
983 OriginalBaseAddress: OriginalBaseLValue.getAddress(), Addr: Ptr);
984 }
985 BaseDecls.emplace_back(
986 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
987 return PrivateAddr;
988}
989
990bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
991 const OMPDeclareReductionDecl *DRD =
992 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
993 return DRD && DRD->getInitializer();
994}
995
996LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
997 return CGF.EmitLoadOfPointerLValue(
998 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
999 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
1000}
1001
1002void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1003 if (!CGF.HaveInsertPoint())
1004 return;
1005 // 1.2.2 OpenMP Language Terminology
1006 // Structured block - An executable statement with a single entry at the
1007 // top and a single exit at the bottom.
1008 // The point of exit cannot be a branch out of the structured block.
1009 // longjmp() and throw() must not violate the entry/exit criteria.
1010 CGF.EHStack.pushTerminate();
1011 if (S)
1012 CGF.incrementProfileCounter(S);
1013 CodeGen(CGF);
1014 CGF.EHStack.popTerminate();
1015}
1016
1017LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1018 CodeGenFunction &CGF) {
1019 return CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1020 T: getThreadIDVariable()->getType(),
1021 Source: AlignmentSource::Decl);
1022}
1023
1024static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1025 QualType FieldTy) {
1026 auto *Field = FieldDecl::Create(
1027 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1028 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1029 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1030 Field->setAccess(AS_public);
1031 DC->addDecl(D: Field);
1032 return Field;
1033}
1034
1035CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1036 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1037 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1038 llvm::OpenMPIRBuilderConfig Config(
1039 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1040 CGM.getLangOpts().OpenMPOffloadMandatory,
1041 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1042 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1043 Config.setDefaultTargetAS(
1044 CGM.getContext().getTargetInfo().getTargetAddressSpace(AS: LangAS::Default));
1045 Config.setRuntimeCC(CGM.getRuntimeCC());
1046
1047 OMPBuilder.setConfig(Config);
1048 OMPBuilder.initialize();
1049 OMPBuilder.loadOffloadInfoMetadata(VFS&: *CGM.getFileSystem(),
1050 HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1051 ? CGM.getLangOpts().OMPHostIRFile
1052 : StringRef{});
1053
1054 // The user forces the compiler to behave as if omp requires
1055 // unified_shared_memory was given.
1056 if (CGM.getLangOpts().OpenMPForceUSM) {
1057 HasRequiresUnifiedSharedMemory = true;
1058 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1059 }
1060}
1061
1062void CGOpenMPRuntime::clear() {
1063 InternalVars.clear();
1064 // Clean non-target variable declarations possibly used only in debug info.
1065 for (const auto &Data : EmittedNonTargetVariables) {
1066 if (!Data.getValue().pointsToAliveValue())
1067 continue;
1068 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1069 if (!GV)
1070 continue;
1071 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1072 continue;
1073 GV->eraseFromParent();
1074 }
1075}
1076
1077std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1078 return OMPBuilder.createPlatformSpecificName(Parts);
1079}
1080
1081static llvm::Function *
1082emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1083 const Expr *CombinerInitializer, const VarDecl *In,
1084 const VarDecl *Out, bool IsCombiner) {
1085 // void .omp_combiner.(Ty *in, Ty *out);
1086 ASTContext &C = CGM.getContext();
1087 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1088 FunctionArgList Args;
1089 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1090 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1091 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1092 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1093 Args.push_back(Elt: &OmpOutParm);
1094 Args.push_back(Elt: &OmpInParm);
1095 const CGFunctionInfo &FnInfo =
1096 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
1097 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1098 std::string Name = CGM.getOpenMPRuntime().getName(
1099 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1100 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1101 N: Name, M: &CGM.getModule());
1102 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1103 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1104 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
1105 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1106 Fn->removeFnAttr(Kind: llvm::Attribute::NoInline);
1107 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
1108 Fn->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
1109 }
1110 CodeGenFunction CGF(CGM);
1111 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1112 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1113 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc: In->getLocation(),
1114 StartLoc: Out->getLocation());
1115 CodeGenFunction::OMPPrivateScope Scope(CGF);
1116 Address AddrIn = CGF.GetAddrOfLocalVar(VD: &OmpInParm);
1117 Scope.addPrivate(
1118 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1119 .getAddress());
1120 Address AddrOut = CGF.GetAddrOfLocalVar(VD: &OmpOutParm);
1121 Scope.addPrivate(
1122 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1123 .getAddress());
1124 (void)Scope.Privatize();
1125 if (!IsCombiner && Out->hasInit() &&
1126 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1127 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1128 Quals: Out->getType().getQualifiers(),
1129 /*IsInitializer=*/true);
1130 }
1131 if (CombinerInitializer)
1132 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1133 Scope.ForceCleanup();
1134 CGF.FinishFunction();
1135 return Fn;
1136}
1137
1138void CGOpenMPRuntime::emitUserDefinedReduction(
1139 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1140 if (UDRMap.count(Val: D) > 0)
1141 return;
1142 llvm::Function *Combiner = emitCombinerOrInitializer(
1143 CGM, Ty: D->getType(), CombinerInitializer: D->getCombiner(),
1144 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1145 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1146 /*IsCombiner=*/true);
1147 llvm::Function *Initializer = nullptr;
1148 if (const Expr *Init = D->getInitializer()) {
1149 Initializer = emitCombinerOrInitializer(
1150 CGM, Ty: D->getType(),
1151 CombinerInitializer: D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1152 : nullptr,
1153 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1154 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1155 /*IsCombiner=*/false);
1156 }
1157 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1158 if (CGF)
1159 FunctionUDRMap[CGF->CurFn].push_back(Elt: D);
1160}
1161
1162std::pair<llvm::Function *, llvm::Function *>
1163CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1164 auto I = UDRMap.find(Val: D);
1165 if (I != UDRMap.end())
1166 return I->second;
1167 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1168 return UDRMap.lookup(Val: D);
1169}
1170
1171namespace {
1172// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1173// Builder if one is present.
1174struct PushAndPopStackRAII {
1175 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1176 bool HasCancel, llvm::omp::Directive Kind)
1177 : OMPBuilder(OMPBuilder) {
1178 if (!OMPBuilder)
1179 return;
1180
1181 // The following callback is the crucial part of clangs cleanup process.
1182 //
1183 // NOTE:
1184 // Once the OpenMPIRBuilder is used to create parallel regions (and
1185 // similar), the cancellation destination (Dest below) is determined via
1186 // IP. That means if we have variables to finalize we split the block at IP,
1187 // use the new block (=BB) as destination to build a JumpDest (via
1188 // getJumpDestInCurrentScope(BB)) which then is fed to
1189 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1190 // to push & pop an FinalizationInfo object.
1191 // The FiniCB will still be needed but at the point where the
1192 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1193 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1194 assert(IP.getBlock()->end() == IP.getPoint() &&
1195 "Clang CG should cause non-terminated block!");
1196 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1197 CGF.Builder.restoreIP(IP);
1198 CodeGenFunction::JumpDest Dest =
1199 CGF.getOMPCancelDestination(Kind: OMPD_parallel);
1200 CGF.EmitBranchThroughCleanup(Dest);
1201 return llvm::Error::success();
1202 };
1203
1204 // TODO: Remove this once we emit parallel regions through the
1205 // OpenMPIRBuilder as it can do this setup internally.
1206 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1207 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1208 }
1209 ~PushAndPopStackRAII() {
1210 if (OMPBuilder)
1211 OMPBuilder->popFinalizationCB();
1212 }
1213 llvm::OpenMPIRBuilder *OMPBuilder;
1214};
1215} // namespace
1216
1217static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1218 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1219 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1220 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1221 assert(ThreadIDVar->getType()->isPointerType() &&
1222 "thread id variable must be of type kmp_int32 *");
1223 CodeGenFunction CGF(CGM, true);
1224 bool HasCancel = false;
1225 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1226 HasCancel = OPD->hasCancel();
1227 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1228 HasCancel = OPD->hasCancel();
1229 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1230 HasCancel = OPSD->hasCancel();
1231 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD =
1238 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1239 HasCancel = OPFD->hasCancel();
1240 else if (const auto *OPFD =
1241 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1242 HasCancel = OPFD->hasCancel();
1243
1244 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1245 // parallel region to make cancellation barriers work properly.
1246 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1247 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1248 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1249 HasCancel, OutlinedHelperName);
1250 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1251 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, D);
1252}
1253
1254std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1255 std::string Suffix = getName(Parts: {"omp_outlined"});
1256 return (Name + Suffix).str();
1257}
1258
1259std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1260 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1261}
1262
1263std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1264 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1265 return (Name + Suffix).str();
1266}
1267
1268llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1269 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_parallel);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1278llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1279 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1280 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1281 const RegionCodeGenTy &CodeGen) {
1282 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_teams);
1283 return emitParallelOrTeamsOutlinedFunction(
1284 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1285 CodeGen);
1286}
1287
1288llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1292 bool Tied, unsigned &NumberOfParts) {
1293 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1294 PrePostActionTy &) {
1295 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1296 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1297 llvm::Value *TaskArgs[] = {
1298 UpLoc, ThreadID,
1299 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1300 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1301 .getPointer(CGF)};
1302 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1303 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1304 args: TaskArgs);
1305 };
1306 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1307 UntiedCodeGen);
1308 CodeGen.setAction(Action);
1309 assert(!ThreadIDVar->getType()->isPointerType() &&
1310 "thread id variable must be of type kmp_int32 for tasks");
1311 const OpenMPDirectiveKind Region =
1312 isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) ? OMPD_taskloop
1313 : OMPD_task;
1314 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: Region);
1315 bool HasCancel = false;
1316 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1317 HasCancel = TD->hasCancel();
1318 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1319 HasCancel = TD->hasCancel();
1320 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1321 HasCancel = TD->hasCancel();
1322 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1323 HasCancel = TD->hasCancel();
1324
1325 CodeGenFunction CGF(CGM, true);
1326 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1327 InnermostKind, HasCancel, Action);
1328 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1329 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1330 if (!Tied)
1331 NumberOfParts = Action.getNumberOfParts();
1332 return Res;
1333}
1334
1335void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1336 bool AtCurrentPoint) {
1337 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1338 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1339
1340 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1341 if (AtCurrentPoint) {
1342 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1343 CGF.Builder.GetInsertBlock());
1344 } else {
1345 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1346 Elem.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt->getIterator());
1347 }
1348}
1349
1350void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1351 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1352 if (Elem.ServiceInsertPt) {
1353 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1354 Elem.ServiceInsertPt = nullptr;
1355 Ptr->eraseFromParent();
1356 }
1357}
1358
1359static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1360 SourceLocation Loc,
1361 SmallString<128> &Buffer) {
1362 llvm::raw_svector_ostream OS(Buffer);
1363 // Build debug location
1364 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1365 OS << ";";
1366 if (auto *DbgInfo = CGF.getDebugInfo())
1367 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1368 else
1369 OS << PLoc.getFilename();
1370 OS << ";";
1371 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1372 OS << FD->getQualifiedNameAsString();
1373 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1374 return OS.str();
1375}
1376
1377llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1378 SourceLocation Loc,
1379 unsigned Flags, bool EmitLoc) {
1380 uint32_t SrcLocStrSize;
1381 llvm::Constant *SrcLocStr;
1382 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1383 llvm::codegenoptions::NoDebugInfo) ||
1384 Loc.isInvalid()) {
1385 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1386 } else {
1387 std::string FunctionName;
1388 std::string FileName;
1389 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1390 FunctionName = FD->getQualifiedNameAsString();
1391 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1392 if (auto *DbgInfo = CGF.getDebugInfo())
1393 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1394 else
1395 FileName = PLoc.getFilename();
1396 unsigned Line = PLoc.getLine();
1397 unsigned Column = PLoc.getColumn();
1398 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1399 Column, SrcLocStrSize);
1400 }
1401 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1402 return OMPBuilder.getOrCreateIdent(
1403 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1404}
1405
1406llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1407 SourceLocation Loc) {
1408 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1409 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1410 // the clang invariants used below might be broken.
1411 if (CGM.getLangOpts().OpenMPIRBuilder) {
1412 SmallString<128> Buffer;
1413 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1414 uint32_t SrcLocStrSize;
1415 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1416 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1417 return OMPBuilder.getOrCreateThreadID(
1418 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1419 }
1420
1421 llvm::Value *ThreadID = nullptr;
1422 // Check whether we've already cached a load of the thread id in this
1423 // function.
1424 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1425 if (I != OpenMPLocThreadIDMap.end()) {
1426 ThreadID = I->second.ThreadID;
1427 if (ThreadID != nullptr)
1428 return ThreadID;
1429 }
1430 // If exceptions are enabled, do not use parameter to avoid possible crash.
1431 if (auto *OMPRegionInfo =
1432 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1433 if (OMPRegionInfo->getThreadIDVariable()) {
1434 // Check if this an outlined function with thread id passed as argument.
1435 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1436 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1437 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1438 !CGF.getLangOpts().CXXExceptions ||
1439 CGF.Builder.GetInsertBlock() == TopBlock ||
1440 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1441 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1442 TopBlock ||
1443 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1444 CGF.Builder.GetInsertBlock()) {
1445 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1446 // If value loaded in entry block, cache it and use it everywhere in
1447 // function.
1448 if (CGF.Builder.GetInsertBlock() == TopBlock)
1449 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1450 return ThreadID;
1451 }
1452 }
1453 }
1454
1455 // This is not an outlined function region - need to call __kmpc_int32
1456 // kmpc_global_thread_num(ident_t *loc).
1457 // Generate thread id value and cache this value for use across the
1458 // function.
1459 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1460 if (!Elem.ServiceInsertPt)
1461 setLocThreadIdInsertPt(CGF);
1462 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1463 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1464 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1465 llvm::CallInst *Call = CGF.Builder.CreateCall(
1466 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1467 FnID: OMPRTL___kmpc_global_thread_num),
1468 Args: emitUpdateLocation(CGF, Loc));
1469 Call->setCallingConv(CGF.getRuntimeCC());
1470 Elem.ThreadID = Call;
1471 return Call;
1472}
1473
1474void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1475 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1476 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1477 clearLocThreadIdInsertPt(CGF);
1478 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1479 }
1480 if (auto I = FunctionUDRMap.find(Val: CGF.CurFn); I != FunctionUDRMap.end()) {
1481 for (const auto *D : I->second)
1482 UDRMap.erase(Val: D);
1483 FunctionUDRMap.erase(I);
1484 }
1485 if (auto I = FunctionUDMMap.find(Val: CGF.CurFn); I != FunctionUDMMap.end()) {
1486 for (const auto *D : I->second)
1487 UDMMap.erase(Val: D);
1488 FunctionUDMMap.erase(I);
1489 }
1490 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1491 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1492}
1493
1494llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1495 return OMPBuilder.IdentPtr;
1496}
1497
1498static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499convertDeviceClause(const VarDecl *VD) {
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522convertCaptureClause(const VarDecl *VD) {
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1534 break;
1535 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1537 break;
1538 default:
1539 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1540 break;
1541 }
1542}
1543
1544static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1545 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1546 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1547
1548 auto FileInfoCallBack = [&]() {
1549 SourceManager &SM = CGM.getContext().getSourceManager();
1550 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1551
1552 if (!CGM.getFileSystem()->exists(Path: PLoc.getFilename()))
1553 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1554
1555 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1556 };
1557
1558 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack,
1559 VFS&: *CGM.getFileSystem(), ParentName);
1560}
1561
1562ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1566 return CGM.getLLVMLinkageVarDefinition(VD);
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1572 T: CGM.getContext().getPointerType(T: VD->getType()));
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1575 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1576 IsExternallyVisible: VD->isExternallyVisible(),
1577 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1578 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
1579 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1580 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1581 VariableLinkage: LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(D: VD));
1586}
1587
1588llvm::Constant *
1589CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1591 !CGM.getContext().getTargetInfo().isTLSSupported());
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName(Parts: {"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1596}
1597
1598Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1599 const VarDecl *VD,
1600 Address VDAddr,
1601 SourceLocation Loc) {
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1603 CGM.getContext().getTargetInfo().isTLSSupported())
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1608 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1609 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1610 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1611 getOrCreateThreadPrivateCache(VD)};
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 callee: OMPBuilder.getOrCreateRuntimeFunction(
1615 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1616 args: Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618}
1619
1620void CGOpenMPRuntime::emitThreadPrivateVarInit(
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1627 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1628 args: OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 callee: OMPBuilder.getOrCreateRuntimeFunction(
1637 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1638 args: Args);
1639}
1640
1641llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1645 CGM.getContext().getTargetInfo().isTLSSupported())
1646 return nullptr;
1647
1648 VD = VD->getDefinition(C&: CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1661 ImplicitParamKind::Other);
1662 Args.push_back(Elt: &Dst);
1663
1664 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1665 resultType: CGM.getContext().VoidPtrTy, args: Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1667 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1669 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1670 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1671 Args, Loc, StartLoc: Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1674 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1681 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1682 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1686 if (VD->getType().isDestructedType() != QualType::DK_none) {
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1693 ImplicitParamKind::Other);
1694 Args.push_back(Elt: &Dst);
1695
1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697 resultType: CGM.getContext().VoidTy, args: Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1699 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1701 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1702 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1703 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1704 Loc, StartLoc: Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 Addr: DtorCGF.GetAddrOfLocalVar(VD: &Dst),
1709 /*Volatile=*/false, Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1712 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1713 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 // Copying constructor for the threadprivate variable.
1722 // Must be NULL - reserved by runtime, but currently it requires that this
1723 // parameter is always NULL. Otherwise it fires assertion.
1724 CopyCtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1725 if (Ctor == nullptr) {
1726 Ctor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1727 }
1728 if (Dtor == nullptr) {
1729 Dtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1730 }
1731 if (!CGF) {
1732 auto *InitFunctionTy =
1733 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1734 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1735 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1736 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1737 CodeGenFunction InitCGF(CGM);
1738 FunctionArgList ArgList;
1739 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1740 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1741 Loc, StartLoc: Loc);
1742 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743 InitCGF.FinishFunction();
1744 return InitFunction;
1745 }
1746 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1747 }
1748 return nullptr;
1749}
1750
1751void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1752 llvm::GlobalValue *GV) {
1753 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1754 OMPDeclareTargetDeclAttr::getActiveAttr(VD: FD);
1755
1756 // We only need to handle active 'indirect' declare target functions.
1757 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1758 return;
1759
1760 // Get a mangled name to store the new device global in.
1761 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1762 CGM, OMPBuilder, BeginLoc: FD->getCanonicalDecl()->getBeginLoc(), ParentName: FD->getName());
1763 SmallString<128> Name;
1764 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1765
1766 // We need to generate a new global to hold the address of the indirectly
1767 // called device function. Doing this allows us to keep the visibility and
1768 // linkage of the associated function unchanged while allowing the runtime to
1769 // access its value.
1770 llvm::GlobalValue *Addr = GV;
1771 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1772 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1773 C&: CGM.getLLVMContext(),
1774 AddressSpace: CGM.getModule().getDataLayout().getProgramAddressSpace());
1775 Addr = new llvm::GlobalVariable(
1776 CGM.getModule(), FnPtrTy,
1777 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1778 nullptr, llvm::GlobalValue::NotThreadLocal,
1779 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1780 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1781 }
1782
1783 // Register the indirect Vtable:
1784 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1785 // size field refers to the size of memory pointed to, not the size of
1786 // the pointer symbol itself (which is implicitly the size of a pointer).
1787 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1788 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1789 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790 Linkage: llvm::GlobalValue::WeakODRLinkage);
1791}
1792
1793void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1794 const VarDecl *VD) {
1795 // TODO: add logic to avoid duplicate vtable registrations per
1796 // translation unit; though for external linkage, this should no
1797 // longer be an issue - or at least we can avoid the issue by
1798 // checking for an existing offloading entry. But, perhaps the
1799 // better approach is to defer emission of the vtables and offload
1800 // entries until later (by tracking a list of items that need to be
1801 // emitted).
1802
1803 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1804
1805 // Generate a new externally visible global to point to the
1806 // internally visible vtable. Doing this allows us to keep the
1807 // visibility and linkage of the associated vtable unchanged while
1808 // allowing the runtime to access its value. The externally
1809 // visible global var needs to be emitted with a unique mangled
1810 // name that won't conflict with similarly named (internal)
1811 // vtables in other translation units.
1812
1813 // Register vtable with source location of dynamic object in map
1814 // clause.
1815 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1816 CGM, OMPBuilder, BeginLoc: VD->getCanonicalDecl()->getBeginLoc(),
1817 ParentName: VTable->getName());
1818
1819 llvm::GlobalVariable *Addr = VTable;
1820 SmallString<128> AddrName;
1821 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name&: AddrName, EntryInfo);
1822 AddrName.append(RHS: "addr");
1823
1824 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1825 Addr = new llvm::GlobalVariable(
1826 CGM.getModule(), VTable->getType(),
1827 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1828 AddrName,
1829 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1830 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1831 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1832 }
1833 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1834 VarName: AddrName, Addr: VTable,
1835 VarSize: CGM.getDataLayout().getTypeAllocSize(Ty: VTable->getInitializer()->getType()),
1836 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1837 Linkage: llvm::GlobalValue::WeakODRLinkage);
1838}
1839
1840void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
1841 CXXRecordDecl *CXXRecord,
1842 const VarDecl *VD) {
1843 // Register C++ VTable to OpenMP Offload Entry if it's a new
1844 // CXXRecordDecl.
1845 if (CXXRecord && CXXRecord->isDynamicClass() &&
1846 !CGM.getOpenMPRuntime().VTableDeclMap.contains(Val: CXXRecord)) {
1847 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(Key: CXXRecord, Args&: VD);
1848 if (Res.second) {
1849 CGM.EmitVTable(Class: CXXRecord);
1850 CodeGenVTables VTables = CGM.getVTables();
1851 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(RD: CXXRecord);
1852 assert(VTablesAddr && "Expected non-null VTable address");
1853 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTable: VTablesAddr, VD);
1854 // Emit VTable for all the fields containing dynamic CXXRecord
1855 for (const FieldDecl *Field : CXXRecord->fields()) {
1856 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1857 emitAndRegisterVTable(CGM, CXXRecord: RecordDecl, VD);
1858 }
1859 // Emit VTable for all dynamic parent class
1860 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1861 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1862 emitAndRegisterVTable(CGM, CXXRecord: BaseDecl, VD);
1863 }
1864 }
1865 }
1866}
1867
1868void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
1869 // Register VTable by scanning through the map clause of OpenMP target region.
1870 // Get CXXRecordDecl and VarDecl from Expr.
1871 auto GetVTableDecl = [](const Expr *E) {
1872 QualType VDTy = E->getType();
1873 CXXRecordDecl *CXXRecord = nullptr;
1874 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1875 VDTy = RefType->getPointeeType();
1876 if (VDTy->isPointerType())
1877 CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
1878 else
1879 CXXRecord = VDTy->getAsCXXRecordDecl();
1880
1881 const VarDecl *VD = nullptr;
1882 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: E)) {
1883 VD = cast<VarDecl>(Val: DRE->getDecl());
1884 } else if (auto *MRE = dyn_cast<MemberExpr>(Val: E)) {
1885 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(Val: MRE->getBase())) {
1886 if (auto *BaseVD = dyn_cast<VarDecl>(Val: BaseDRE->getDecl()))
1887 VD = BaseVD;
1888 }
1889 }
1890 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1891 };
1892 // Collect VTable from OpenMP map clause.
1893 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1894 for (const auto *E : C->varlist()) {
1895 auto DeclPair = GetVTableDecl(E);
1896 // Ensure VD is not null
1897 if (DeclPair.second)
1898 emitAndRegisterVTable(CGM, CXXRecord: DeclPair.first, VD: DeclPair.second);
1899 }
1900 }
1901}
1902
1903Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1904 QualType VarType,
1905 StringRef Name) {
1906 std::string Suffix = getName(Parts: {"artificial", ""});
1907 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1908 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1909 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1910 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1911 CGM.getTarget().isTLSSupported()) {
1912 GAddr->setThreadLocal(/*Val=*/true);
1913 return Address(GAddr, GAddr->getValueType(),
1914 CGM.getContext().getTypeAlignInChars(T: VarType));
1915 }
1916 std::string CacheSuffix = getName(Parts: {"cache", ""});
1917 llvm::Value *Args[] = {
1918 emitUpdateLocation(CGF, Loc: SourceLocation()),
1919 getThreadID(CGF, Loc: SourceLocation()),
1920 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1921 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1922 /*isSigned=*/false),
1923 OMPBuilder.getOrCreateInternalVariable(
1924 Ty: CGM.VoidPtrPtrTy,
1925 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1926 return Address(
1927 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1928 V: CGF.EmitRuntimeCall(
1929 callee: OMPBuilder.getOrCreateRuntimeFunction(
1930 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1931 args: Args),
1932 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
1933 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1934}
1935
1936void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1937 const RegionCodeGenTy &ThenGen,
1938 const RegionCodeGenTy &ElseGen) {
1939 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1940
1941 // If the condition constant folds and can be elided, try to avoid emitting
1942 // the condition and the dead arm of the if/else.
1943 bool CondConstant;
1944 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1945 if (CondConstant)
1946 ThenGen(CGF);
1947 else
1948 ElseGen(CGF);
1949 return;
1950 }
1951
1952 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1953 // emit the conditional branch.
1954 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1955 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1956 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1957 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1958
1959 // Emit the 'then' code.
1960 CGF.EmitBlock(BB: ThenBlock);
1961 ThenGen(CGF);
1962 CGF.EmitBranch(Block: ContBlock);
1963 // Emit the 'else' code if present.
1964 // There is no need to emit line number for unconditional branch.
1965 (void)ApplyDebugLocation::CreateEmpty(CGF);
1966 CGF.EmitBlock(BB: ElseBlock);
1967 ElseGen(CGF);
1968 // There is no need to emit line number for unconditional branch.
1969 (void)ApplyDebugLocation::CreateEmpty(CGF);
1970 CGF.EmitBranch(Block: ContBlock);
1971 // Emit the continuation block for code after the if.
1972 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1973}
1974
1975void CGOpenMPRuntime::emitParallelCall(
1976 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1977 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1978 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1979 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1980 if (!CGF.HaveInsertPoint())
1981 return;
1982 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1983 auto &M = CGM.getModule();
1984 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1985 this](CodeGenFunction &CGF, PrePostActionTy &) {
1986 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1987 llvm::Value *Args[] = {
1988 RTLoc,
1989 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1990 OutlinedFn};
1991 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1992 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1993 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1994
1995 llvm::FunctionCallee RTLFn =
1996 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1997 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1998 };
1999 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2000 this](CodeGenFunction &CGF, PrePostActionTy &) {
2001 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2002 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2003 // Build calls:
2004 // __kmpc_serialized_parallel(&Loc, GTid);
2005 llvm::Value *Args[] = {RTLoc, ThreadID};
2006 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2007 M, FnID: OMPRTL___kmpc_serialized_parallel),
2008 args: Args);
2009
2010 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2011 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2012 RawAddress ZeroAddrBound =
2013 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
2014 /*Name=*/".bound.zero.addr");
2015 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
2016 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2017 // ThreadId for serialized parallels is 0.
2018 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
2019 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
2020 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
2021
2022 // Ensure we do not inline the function. This is trivially true for the ones
2023 // passed to __kmpc_fork_call but the ones called in serialized regions
2024 // could be inlined. This is not a perfect but it is closer to the invariant
2025 // we want, namely, every data environment starts with a new function.
2026 // TODO: We should pass the if condition to the runtime function and do the
2027 // handling there. Much cleaner code.
2028 OutlinedFn->removeFnAttr(Kind: llvm::Attribute::AlwaysInline);
2029 OutlinedFn->addFnAttr(Kind: llvm::Attribute::NoInline);
2030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
2031
2032 // __kmpc_end_serialized_parallel(&Loc, GTid);
2033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2034 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2035 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
2036 args: EndArgs);
2037 };
2038 if (IfCond) {
2039 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2040 } else {
2041 RegionCodeGenTy ThenRCG(ThenGen);
2042 ThenRCG(CGF);
2043 }
2044}
2045
2046// If we're inside an (outlined) parallel region, use the region info's
2047// thread-ID variable (it is passed in a first argument of the outlined function
2048// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2049// regular serial code region, get thread ID by calling kmp_int32
2050// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2051// return the address of that temp.
2052Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2053 SourceLocation Loc) {
2054 if (auto *OMPRegionInfo =
2055 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2056 if (OMPRegionInfo->getThreadIDVariable())
2057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2058
2059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2060 QualType Int32Ty =
2061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2062 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
2063 CGF.EmitStoreOfScalar(value: ThreadID,
2064 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
2065
2066 return ThreadIDTemp;
2067}
2068
2069llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2070 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2071 std::string Name = getName(Parts: {Prefix, "var"});
2072 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
2073}
2074
2075namespace {
2076/// Common pre(post)-action for different OpenMP constructs.
2077class CommonActionTy final : public PrePostActionTy {
2078 llvm::FunctionCallee EnterCallee;
2079 ArrayRef<llvm::Value *> EnterArgs;
2080 llvm::FunctionCallee ExitCallee;
2081 ArrayRef<llvm::Value *> ExitArgs;
2082 bool Conditional;
2083 llvm::BasicBlock *ContBlock = nullptr;
2084
2085public:
2086 CommonActionTy(llvm::FunctionCallee EnterCallee,
2087 ArrayRef<llvm::Value *> EnterArgs,
2088 llvm::FunctionCallee ExitCallee,
2089 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2090 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2091 ExitArgs(ExitArgs), Conditional(Conditional) {}
2092 void Enter(CodeGenFunction &CGF) override {
2093 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
2094 if (Conditional) {
2095 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
2096 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
2097 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
2098 // Generate the branch (If-stmt)
2099 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
2100 CGF.EmitBlock(BB: ThenBlock);
2101 }
2102 }
2103 void Done(CodeGenFunction &CGF) {
2104 // Emit the rest of blocks/branches
2105 CGF.EmitBranch(Block: ContBlock);
2106 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2107 }
2108 void Exit(CodeGenFunction &CGF) override {
2109 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
2110 }
2111};
2112} // anonymous namespace
2113
2114void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2115 StringRef CriticalName,
2116 const RegionCodeGenTy &CriticalOpGen,
2117 SourceLocation Loc, const Expr *Hint) {
2118 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2119 // CriticalOpGen();
2120 // __kmpc_end_critical(ident_t *, gtid, Lock);
2121 // Prepare arguments and build a call to __kmpc_critical
2122 if (!CGF.HaveInsertPoint())
2123 return;
2124 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2125 M&: CGM.getModule(),
2126 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2127 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2128 unsigned LockVarArgIdx = 2;
2129 if (cast<llvm::GlobalVariable>(Val: LockVar)->getAddressSpace() !=
2130 RuntimeFcn.getFunctionType()
2131 ->getParamType(i: LockVarArgIdx)
2132 ->getPointerAddressSpace())
2133 LockVar = CGF.Builder.CreateAddrSpaceCast(
2134 V: LockVar, DestTy: RuntimeFcn.getFunctionType()->getParamType(i: LockVarArgIdx));
2135 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2136 LockVar};
2137 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2138 std::end(arr&: Args));
2139 if (Hint) {
2140 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2141 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2142 }
2143 CommonActionTy Action(RuntimeFcn, EnterArgs,
2144 OMPBuilder.getOrCreateRuntimeFunction(
2145 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_critical),
2146 Args);
2147 CriticalOpGen.setAction(Action);
2148 emitInlinedDirective(CGF, InnermostKind: OMPD_critical, CodeGen: CriticalOpGen);
2149}
2150
2151void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2152 const RegionCodeGenTy &MasterOpGen,
2153 SourceLocation Loc) {
2154 if (!CGF.HaveInsertPoint())
2155 return;
2156 // if(__kmpc_master(ident_t *, gtid)) {
2157 // MasterOpGen();
2158 // __kmpc_end_master(ident_t *, gtid);
2159 // }
2160 // Prepare arguments and build a call to __kmpc_master
2161 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2162 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2163 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2164 Args,
2165 OMPBuilder.getOrCreateRuntimeFunction(
2166 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2167 Args,
2168 /*Conditional=*/true);
2169 MasterOpGen.setAction(Action);
2170 emitInlinedDirective(CGF, InnermostKind: OMPD_master, CodeGen: MasterOpGen);
2171 Action.Done(CGF);
2172}
2173
2174void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2175 const RegionCodeGenTy &MaskedOpGen,
2176 SourceLocation Loc, const Expr *Filter) {
2177 if (!CGF.HaveInsertPoint())
2178 return;
2179 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2180 // MaskedOpGen();
2181 // __kmpc_end_masked(iden_t *, gtid);
2182 // }
2183 // Prepare arguments and build a call to __kmpc_masked
2184 llvm::Value *FilterVal = Filter
2185 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2186 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2187 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2188 FilterVal};
2189 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2190 getThreadID(CGF, Loc)};
2191 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2192 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2193 Args,
2194 OMPBuilder.getOrCreateRuntimeFunction(
2195 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2196 ArgsEnd,
2197 /*Conditional=*/true);
2198 MaskedOpGen.setAction(Action);
2199 emitInlinedDirective(CGF, InnermostKind: OMPD_masked, CodeGen: MaskedOpGen);
2200 Action.Done(CGF);
2201}
2202
2203void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2204 SourceLocation Loc) {
2205 if (!CGF.HaveInsertPoint())
2206 return;
2207 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2208 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2209 } else {
2210 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2211 llvm::Value *Args[] = {
2212 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2213 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2214 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2215 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2216 args: Args);
2217 }
2218
2219 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2220 Region->emitUntiedSwitch(CGF);
2221}
2222
2223void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2224 const RegionCodeGenTy &TaskgroupOpGen,
2225 SourceLocation Loc) {
2226 if (!CGF.HaveInsertPoint())
2227 return;
2228 // __kmpc_taskgroup(ident_t *, gtid);
2229 // TaskgroupOpGen();
2230 // __kmpc_end_taskgroup(ident_t *, gtid);
2231 // Prepare arguments and build a call to __kmpc_taskgroup
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2238 Args);
2239 TaskgroupOpGen.setAction(Action);
2240 emitInlinedDirective(CGF, InnermostKind: OMPD_taskgroup, CodeGen: TaskgroupOpGen);
2241}
2242
2243/// Given an array of pointers to variables, project the address of a
2244/// given variable.
2245static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2246 unsigned Index, const VarDecl *Var) {
2247 // Pull out the pointer to the variable.
2248 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2249 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2250
2251 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2252 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(D: Var));
2253}
2254
2255static llvm::Value *emitCopyprivateCopyFunction(
2256 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2257 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2258 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2259 SourceLocation Loc) {
2260 ASTContext &C = CGM.getContext();
2261 // void copy_func(void *LHSArg, void *RHSArg);
2262 FunctionArgList Args;
2263 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2264 ImplicitParamKind::Other);
2265 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2266 ImplicitParamKind::Other);
2267 Args.push_back(Elt: &LHSArg);
2268 Args.push_back(Elt: &RHSArg);
2269 const auto &CGFI =
2270 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
2271 std::string Name =
2272 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2273 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
2274 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
2275 M: &CGM.getModule());
2276 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2277 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2278 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
2279 Fn->setDoesNotRecurse();
2280 CodeGenFunction CGF(CGM);
2281 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2282 // Dest = (void*[n])(LHSArg);
2283 // Src = (void*[n])(RHSArg);
2284 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2285 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
2286 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2287 ArgsElemType, CGF.getPointerAlign());
2288 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2289 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
2290 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2291 ArgsElemType, CGF.getPointerAlign());
2292 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2293 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2294 // ...
2295 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2296 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2297 const auto *DestVar =
2298 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2299 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2300
2301 const auto *SrcVar =
2302 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2303 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2304
2305 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2306 QualType Type = VD->getType();
2307 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2308 }
2309 CGF.FinishFunction();
2310 return Fn;
2311}
2312
2313void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2314 const RegionCodeGenTy &SingleOpGen,
2315 SourceLocation Loc,
2316 ArrayRef<const Expr *> CopyprivateVars,
2317 ArrayRef<const Expr *> SrcExprs,
2318 ArrayRef<const Expr *> DstExprs,
2319 ArrayRef<const Expr *> AssignmentOps) {
2320 if (!CGF.HaveInsertPoint())
2321 return;
2322 assert(CopyprivateVars.size() == SrcExprs.size() &&
2323 CopyprivateVars.size() == DstExprs.size() &&
2324 CopyprivateVars.size() == AssignmentOps.size());
2325 ASTContext &C = CGM.getContext();
2326 // int32 did_it = 0;
2327 // if(__kmpc_single(ident_t *, gtid)) {
2328 // SingleOpGen();
2329 // __kmpc_end_single(ident_t *, gtid);
2330 // did_it = 1;
2331 // }
2332 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2333 // <copy_func>, did_it);
2334
2335 Address DidIt = Address::invalid();
2336 if (!CopyprivateVars.empty()) {
2337 // int32 did_it = 0;
2338 QualType KmpInt32Ty =
2339 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2340 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2341 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2342 }
2343 // Prepare arguments and build a call to __kmpc_single
2344 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2345 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2346 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2347 Args,
2348 OMPBuilder.getOrCreateRuntimeFunction(
2349 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2350 Args,
2351 /*Conditional=*/true);
2352 SingleOpGen.setAction(Action);
2353 emitInlinedDirective(CGF, InnermostKind: OMPD_single, CodeGen: SingleOpGen);
2354 if (DidIt.isValid()) {
2355 // did_it = 1;
2356 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2357 }
2358 Action.Done(CGF);
2359 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2360 // <copy_func>, did_it);
2361 if (DidIt.isValid()) {
2362 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2363 QualType CopyprivateArrayTy = C.getConstantArrayType(
2364 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2365 /*IndexTypeQuals=*/0);
2366 // Create a list of all private variables for copyprivate.
2367 Address CopyprivateList =
2368 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2369 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2370 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2371 CGF.Builder.CreateStore(
2372 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2373 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2374 DestTy: CGF.VoidPtrTy),
2375 Addr: Elem);
2376 }
2377 // Build function that copies private values from single region to all other
2378 // threads in the corresponding parallel region.
2379 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2380 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2381 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2382 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2383 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2384 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2385 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2386 llvm::Value *Args[] = {
2387 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2388 getThreadID(CGF, Loc), // i32 <gtid>
2389 BufSize, // size_t <buf_size>
2390 CL.emitRawPointer(CGF), // void *<copyprivate list>
2391 CpyFn, // void (*) (void *, void *) <copy_func>
2392 DidItVal // i32 did_it
2393 };
2394 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2395 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2396 args: Args);
2397 }
2398}
2399
2400void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2401 const RegionCodeGenTy &OrderedOpGen,
2402 SourceLocation Loc, bool IsThreads) {
2403 if (!CGF.HaveInsertPoint())
2404 return;
2405 // __kmpc_ordered(ident_t *, gtid);
2406 // OrderedOpGen();
2407 // __kmpc_end_ordered(ident_t *, gtid);
2408 // Prepare arguments and build a call to __kmpc_ordered
2409 if (IsThreads) {
2410 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2411 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2412 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2413 Args,
2414 OMPBuilder.getOrCreateRuntimeFunction(
2415 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2416 Args);
2417 OrderedOpGen.setAction(Action);
2418 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2419 return;
2420 }
2421 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2422}
2423
2424unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2425 unsigned Flags;
2426 if (Kind == OMPD_for)
2427 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2428 else if (Kind == OMPD_sections)
2429 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2430 else if (Kind == OMPD_single)
2431 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2432 else if (Kind == OMPD_barrier)
2433 Flags = OMP_IDENT_BARRIER_EXPL;
2434 else
2435 Flags = OMP_IDENT_BARRIER_IMPL;
2436 return Flags;
2437}
2438
2439void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2440 CodeGenFunction &CGF, const OMPLoopDirective &S,
2441 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2442 // Check if the loop directive is actually a doacross loop directive. In this
2443 // case choose static, 1 schedule.
2444 if (llvm::any_of(
2445 Range: S.getClausesOfKind<OMPOrderedClause>(),
2446 P: [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2447 ScheduleKind = OMPC_SCHEDULE_static;
2448 // Chunk size is 1 in this case.
2449 llvm::APInt ChunkSize(32, 1);
2450 ChunkExpr = IntegerLiteral::Create(
2451 C: CGF.getContext(), V: ChunkSize,
2452 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2453 l: SourceLocation());
2454 }
2455}
2456
2457void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2458 OpenMPDirectiveKind Kind, bool EmitChecks,
2459 bool ForceSimpleCall) {
2460 // Check if we should use the OMPBuilder
2461 auto *OMPRegionInfo =
2462 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2463 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2464 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2465 cantFail(ValOrErr: OMPBuilder.createBarrier(Loc: CGF.Builder, Kind, ForceSimpleCall,
2466 CheckCancelFlag: EmitChecks));
2467 CGF.Builder.restoreIP(IP: AfterIP);
2468 return;
2469 }
2470
2471 if (!CGF.HaveInsertPoint())
2472 return;
2473 // Build call __kmpc_cancel_barrier(loc, thread_id);
2474 // Build call __kmpc_barrier(loc, thread_id);
2475 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2476 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2477 // thread_id);
2478 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2479 getThreadID(CGF, Loc)};
2480 if (OMPRegionInfo) {
2481 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2482 llvm::Value *Result = CGF.EmitRuntimeCall(
2483 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2484 FnID: OMPRTL___kmpc_cancel_barrier),
2485 args: Args);
2486 if (EmitChecks) {
2487 // if (__kmpc_cancel_barrier()) {
2488 // exit from construct;
2489 // }
2490 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2491 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2492 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2493 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2494 CGF.EmitBlock(BB: ExitBB);
2495 // exit from construct;
2496 CodeGenFunction::JumpDest CancelDestination =
2497 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
2498 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2499 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2500 }
2501 return;
2502 }
2503 }
2504 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2505 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2506 args: Args);
2507}
2508
2509void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2510 Expr *ME, bool IsFatal) {
2511 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(E: ME)
2512 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2513 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2514 // *message)
2515 llvm::Value *Args[] = {
2516 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2517 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2518 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2519 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2520 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2521 args: Args);
2522}
2523
2524/// Map the OpenMP loop schedule to the runtime enumeration.
2525static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2526 bool Chunked, bool Ordered) {
2527 switch (ScheduleKind) {
2528 case OMPC_SCHEDULE_static:
2529 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2530 : (Ordered ? OMP_ord_static : OMP_sch_static);
2531 case OMPC_SCHEDULE_dynamic:
2532 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2533 case OMPC_SCHEDULE_guided:
2534 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2535 case OMPC_SCHEDULE_runtime:
2536 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2537 case OMPC_SCHEDULE_auto:
2538 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2539 case OMPC_SCHEDULE_unknown:
2540 assert(!Chunked && "chunk was specified but schedule kind not known");
2541 return Ordered ? OMP_ord_static : OMP_sch_static;
2542 }
2543 llvm_unreachable("Unexpected runtime schedule");
2544}
2545
2546/// Map the OpenMP distribute schedule to the runtime enumeration.
2547static OpenMPSchedType
2548getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2549 // only static is allowed for dist_schedule
2550 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2551}
2552
2553bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2554 bool Chunked) const {
2555 OpenMPSchedType Schedule =
2556 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2557 return Schedule == OMP_sch_static;
2558}
2559
2560bool CGOpenMPRuntime::isStaticNonchunked(
2561 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2562 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2563 return Schedule == OMP_dist_sch_static;
2564}
2565
2566bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2567 bool Chunked) const {
2568 OpenMPSchedType Schedule =
2569 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2570 return Schedule == OMP_sch_static_chunked;
2571}
2572
2573bool CGOpenMPRuntime::isStaticChunked(
2574 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2575 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2576 return Schedule == OMP_dist_sch_static_chunked;
2577}
2578
2579bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2580 OpenMPSchedType Schedule =
2581 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2582 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2583 return Schedule != OMP_sch_static;
2584}
2585
2586static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2587 OpenMPScheduleClauseModifier M1,
2588 OpenMPScheduleClauseModifier M2) {
2589 int Modifier = 0;
2590 switch (M1) {
2591 case OMPC_SCHEDULE_MODIFIER_monotonic:
2592 Modifier = OMP_sch_modifier_monotonic;
2593 break;
2594 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2595 Modifier = OMP_sch_modifier_nonmonotonic;
2596 break;
2597 case OMPC_SCHEDULE_MODIFIER_simd:
2598 if (Schedule == OMP_sch_static_chunked)
2599 Schedule = OMP_sch_static_balanced_chunked;
2600 break;
2601 case OMPC_SCHEDULE_MODIFIER_last:
2602 case OMPC_SCHEDULE_MODIFIER_unknown:
2603 break;
2604 }
2605 switch (M2) {
2606 case OMPC_SCHEDULE_MODIFIER_monotonic:
2607 Modifier = OMP_sch_modifier_monotonic;
2608 break;
2609 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2610 Modifier = OMP_sch_modifier_nonmonotonic;
2611 break;
2612 case OMPC_SCHEDULE_MODIFIER_simd:
2613 if (Schedule == OMP_sch_static_chunked)
2614 Schedule = OMP_sch_static_balanced_chunked;
2615 break;
2616 case OMPC_SCHEDULE_MODIFIER_last:
2617 case OMPC_SCHEDULE_MODIFIER_unknown:
2618 break;
2619 }
2620 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2621 // If the static schedule kind is specified or if the ordered clause is
2622 // specified, and if the nonmonotonic modifier is not specified, the effect is
2623 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2624 // modifier is specified, the effect is as if the nonmonotonic modifier is
2625 // specified.
2626 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2627 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2628 Schedule == OMP_sch_static_balanced_chunked ||
2629 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2630 Schedule == OMP_dist_sch_static_chunked ||
2631 Schedule == OMP_dist_sch_static))
2632 Modifier = OMP_sch_modifier_nonmonotonic;
2633 }
2634 return Schedule | Modifier;
2635}
2636
2637void CGOpenMPRuntime::emitForDispatchInit(
2638 CodeGenFunction &CGF, SourceLocation Loc,
2639 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2640 bool Ordered, const DispatchRTInput &DispatchValues) {
2641 if (!CGF.HaveInsertPoint())
2642 return;
2643 OpenMPSchedType Schedule = getRuntimeSchedule(
2644 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2645 assert(Ordered ||
2646 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2647 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2648 Schedule != OMP_sch_static_balanced_chunked));
2649 // Call __kmpc_dispatch_init(
2650 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2651 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2652 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2653
2654 // If the Chunk was not specified in the clause - use default value 1.
2655 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2656 : CGF.Builder.getIntN(N: IVSize, C: 1);
2657 llvm::Value *Args[] = {
2658 emitUpdateLocation(CGF, Loc),
2659 getThreadID(CGF, Loc),
2660 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2661 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2662 DispatchValues.LB, // Lower
2663 DispatchValues.UB, // Upper
2664 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2665 Chunk // Chunk
2666 };
2667 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2668 args: Args);
2669}
2670
2671void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2672 SourceLocation Loc) {
2673 if (!CGF.HaveInsertPoint())
2674 return;
2675 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2676 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2677 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2678}
2679
2680static void emitForStaticInitCall(
2681 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2682 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2683 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2684 const CGOpenMPRuntime::StaticRTInput &Values) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687
2688 assert(!Values.Ordered);
2689 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2690 Schedule == OMP_sch_static_balanced_chunked ||
2691 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2692 Schedule == OMP_dist_sch_static ||
2693 Schedule == OMP_dist_sch_static_chunked);
2694
2695 // Call __kmpc_for_static_init(
2696 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2697 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2698 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2699 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2700 llvm::Value *Chunk = Values.Chunk;
2701 if (Chunk == nullptr) {
2702 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2703 Schedule == OMP_dist_sch_static) &&
2704 "expected static non-chunked schedule");
2705 // If the Chunk was not specified in the clause - use default value 1.
2706 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2707 } else {
2708 assert((Schedule == OMP_sch_static_chunked ||
2709 Schedule == OMP_sch_static_balanced_chunked ||
2710 Schedule == OMP_ord_static_chunked ||
2711 Schedule == OMP_dist_sch_static_chunked) &&
2712 "expected static chunked schedule");
2713 }
2714 llvm::Value *Args[] = {
2715 UpdateLocation,
2716 ThreadId,
2717 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2718 M2)), // Schedule type
2719 Values.IL.emitRawPointer(CGF), // &isLastIter
2720 Values.LB.emitRawPointer(CGF), // &LB
2721 Values.UB.emitRawPointer(CGF), // &UB
2722 Values.ST.emitRawPointer(CGF), // &Stride
2723 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2724 Chunk // Chunk
2725 };
2726 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2727}
2728
2729void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2730 SourceLocation Loc,
2731 OpenMPDirectiveKind DKind,
2732 const OpenMPScheduleTy &ScheduleKind,
2733 const StaticRTInput &Values) {
2734 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2735 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2736 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2737 "Expected loop-based or sections-based directive.");
2738 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2739 Flags: isOpenMPLoopDirective(DKind)
2740 ? OMP_IDENT_WORK_LOOP
2741 : OMP_IDENT_WORK_SECTIONS);
2742 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2743 llvm::FunctionCallee StaticInitFunction =
2744 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2745 IsGPUDistribute: false);
2746 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2747 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2748 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2749}
2750
2751void CGOpenMPRuntime::emitDistributeStaticInit(
2752 CodeGenFunction &CGF, SourceLocation Loc,
2753 OpenMPDistScheduleClauseKind SchedKind,
2754 const CGOpenMPRuntime::StaticRTInput &Values) {
2755 OpenMPSchedType ScheduleNum =
2756 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2757 llvm::Value *UpdatedLocation =
2758 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2759 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2760 llvm::FunctionCallee StaticInitFunction;
2761 bool isGPUDistribute =
2762 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2763 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2764 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2765
2766 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2767 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2768 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2769}
2770
2771void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2772 SourceLocation Loc,
2773 OpenMPDirectiveKind DKind) {
2774 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2775 DKind == OMPD_sections) &&
2776 "Expected distribute, for, or sections directive kind");
2777 if (!CGF.HaveInsertPoint())
2778 return;
2779 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2780 llvm::Value *Args[] = {
2781 emitUpdateLocation(CGF, Loc,
2782 Flags: isOpenMPDistributeDirective(DKind) ||
2783 (DKind == OMPD_target_teams_loop)
2784 ? OMP_IDENT_WORK_DISTRIBUTE
2785 : isOpenMPLoopDirective(DKind)
2786 ? OMP_IDENT_WORK_LOOP
2787 : OMP_IDENT_WORK_SECTIONS),
2788 getThreadID(CGF, Loc)};
2789 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2790 if (isOpenMPDistributeDirective(DKind) &&
2791 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2792 CGF.EmitRuntimeCall(
2793 callee: OMPBuilder.getOrCreateRuntimeFunction(
2794 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2795 args: Args);
2796 else
2797 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2798 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2799 args: Args);
2800}
2801
2802void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2803 SourceLocation Loc,
2804 unsigned IVSize,
2805 bool IVSigned) {
2806 if (!CGF.HaveInsertPoint())
2807 return;
2808 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2809 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2810 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2811 args: Args);
2812}
2813
2814llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2815 SourceLocation Loc, unsigned IVSize,
2816 bool IVSigned, Address IL,
2817 Address LB, Address UB,
2818 Address ST) {
2819 // Call __kmpc_dispatch_next(
2820 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2821 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2822 // kmp_int[32|64] *p_stride);
2823 llvm::Value *Args[] = {
2824 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2825 IL.emitRawPointer(CGF), // &isLastIter
2826 LB.emitRawPointer(CGF), // &Lower
2827 UB.emitRawPointer(CGF), // &Upper
2828 ST.emitRawPointer(CGF) // &Stride
2829 };
2830 llvm::Value *Call = CGF.EmitRuntimeCall(
2831 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2832 return CGF.EmitScalarConversion(
2833 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2834 DstTy: CGF.getContext().BoolTy, Loc);
2835}
2836
2837llvm::Value *CGOpenMPRuntime::emitMessageClause(CodeGenFunction &CGF,
2838 const Expr *Message,
2839 SourceLocation Loc) {
2840 if (!Message)
2841 return llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2842 return CGF.EmitScalarExpr(E: Message);
2843}
2844
2845llvm::Value *
2846CGOpenMPRuntime::emitSeverityClause(OpenMPSeverityClauseKind Severity,
2847 SourceLocation Loc) {
2848 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2849 // as if sev-level is fatal."
2850 return llvm::ConstantInt::get(Ty: CGM.Int32Ty,
2851 V: Severity == OMPC_SEVERITY_warning ? 1 : 2);
2852}
2853
2854void CGOpenMPRuntime::emitNumThreadsClause(
2855 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2856 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
2857 SourceLocation SeverityLoc, const Expr *Message,
2858 SourceLocation MessageLoc) {
2859 if (!CGF.HaveInsertPoint())
2860 return;
2861 llvm::SmallVector<llvm::Value *, 4> Args(
2862 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2863 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)});
2864 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2865 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2866 // messsage) if strict modifier is used.
2867 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2868 if (Modifier == OMPC_NUMTHREADS_strict) {
2869 FnID = OMPRTL___kmpc_push_num_threads_strict;
2870 Args.push_back(Elt: emitSeverityClause(Severity, Loc: SeverityLoc));
2871 Args.push_back(Elt: emitMessageClause(CGF, Message, Loc: MessageLoc));
2872 }
2873 CGF.EmitRuntimeCall(
2874 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args);
2875}
2876
2877void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2878 ProcBindKind ProcBind,
2879 SourceLocation Loc) {
2880 if (!CGF.HaveInsertPoint())
2881 return;
2882 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2883 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2884 llvm::Value *Args[] = {
2885 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2886 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2887 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2888 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2889 args: Args);
2890}
2891
2892void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2893 SourceLocation Loc, llvm::AtomicOrdering AO) {
2894 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2895 OMPBuilder.createFlush(Loc: CGF.Builder);
2896 } else {
2897 if (!CGF.HaveInsertPoint())
2898 return;
2899 // Build call void __kmpc_flush(ident_t *loc)
2900 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2901 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2902 args: emitUpdateLocation(CGF, Loc));
2903 }
2904}
2905
2906namespace {
2907/// Indexes of fields for type kmp_task_t.
2908enum KmpTaskTFields {
2909 /// List of shared variables.
2910 KmpTaskTShareds,
2911 /// Task routine.
2912 KmpTaskTRoutine,
2913 /// Partition id for the untied tasks.
2914 KmpTaskTPartId,
2915 /// Function with call of destructors for private variables.
2916 Data1,
2917 /// Task priority.
2918 Data2,
2919 /// (Taskloops only) Lower bound.
2920 KmpTaskTLowerBound,
2921 /// (Taskloops only) Upper bound.
2922 KmpTaskTUpperBound,
2923 /// (Taskloops only) Stride.
2924 KmpTaskTStride,
2925 /// (Taskloops only) Is last iteration flag.
2926 KmpTaskTLastIter,
2927 /// (Taskloops only) Reduction data.
2928 KmpTaskTReductions,
2929};
2930} // anonymous namespace
2931
2932void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2933 // If we are in simd mode or there are no entries, we don't need to do
2934 // anything.
2935 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2936 return;
2937
2938 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2939 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2940 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2941 SourceLocation Loc;
2942 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2943 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2944 E = CGM.getContext().getSourceManager().fileinfo_end();
2945 I != E; ++I) {
2946 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2947 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2948 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2949 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2950 break;
2951 }
2952 }
2953 }
2954 switch (Kind) {
2955 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2956 CGM.getDiags().Report(Loc,
2957 DiagID: diag::err_target_region_offloading_entry_incorrect)
2958 << EntryInfo.ParentName;
2959 } break;
2960 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2961 CGM.getDiags().Report(
2962 Loc, DiagID: diag::err_target_var_offloading_entry_incorrect_with_parent)
2963 << EntryInfo.ParentName;
2964 } break;
2965 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2966 CGM.getDiags().Report(DiagID: diag::err_target_var_offloading_entry_incorrect);
2967 } break;
2968 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2969 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2970 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for indirect declare "
2971 "target variable is incorrect: the "
2972 "address is invalid.");
2973 CGM.getDiags().Report(DiagID);
2974 } break;
2975 }
2976 };
2977
2978 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2979}
2980
2981void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2982 if (!KmpRoutineEntryPtrTy) {
2983 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2984 ASTContext &C = CGM.getContext();
2985 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2986 FunctionProtoType::ExtProtoInfo EPI;
2987 KmpRoutineEntryPtrQTy = C.getPointerType(
2988 T: C.getFunctionType(ResultTy: KmpInt32Ty, Args: KmpRoutineEntryTyArgs, EPI));
2989 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(T: KmpRoutineEntryPtrQTy);
2990 }
2991}
2992
2993namespace {
2994struct PrivateHelpersTy {
2995 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2996 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2997 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2998 PrivateElemInit(PrivateElemInit) {}
2999 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3000 const Expr *OriginalRef = nullptr;
3001 const VarDecl *Original = nullptr;
3002 const VarDecl *PrivateCopy = nullptr;
3003 const VarDecl *PrivateElemInit = nullptr;
3004 bool isLocalPrivate() const {
3005 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3006 }
3007};
3008typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3009} // anonymous namespace
3010
3011static bool isAllocatableDecl(const VarDecl *VD) {
3012 const VarDecl *CVD = VD->getCanonicalDecl();
3013 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3014 return false;
3015 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3016 // Use the default allocation.
3017 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3018 !AA->getAllocator());
3019}
3020
3021static RecordDecl *
3022createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3023 if (!Privates.empty()) {
3024 ASTContext &C = CGM.getContext();
3025 // Build struct .kmp_privates_t. {
3026 // /* private vars */
3027 // };
3028 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
3029 RD->startDefinition();
3030 for (const auto &Pair : Privates) {
3031 const VarDecl *VD = Pair.second.Original;
3032 QualType Type = VD->getType().getNonReferenceType();
3033 // If the private variable is a local variable with lvalue ref type,
3034 // allocate the pointer instead of the pointee type.
3035 if (Pair.second.isLocalPrivate()) {
3036 if (VD->getType()->isLValueReferenceType())
3037 Type = C.getPointerType(T: Type);
3038 if (isAllocatableDecl(VD))
3039 Type = C.getPointerType(T: Type);
3040 }
3041 FieldDecl *FD = addFieldToRecordDecl(C, DC: RD, FieldTy: Type);
3042 if (VD->hasAttrs()) {
3043 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3044 E(VD->getAttrs().end());
3045 I != E; ++I)
3046 FD->addAttr(A: *I);
3047 }
3048 }
3049 RD->completeDefinition();
3050 return RD;
3051 }
3052 return nullptr;
3053}
3054
3055static RecordDecl *
3056createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3057 QualType KmpInt32Ty,
3058 QualType KmpRoutineEntryPointerQTy) {
3059 ASTContext &C = CGM.getContext();
3060 // Build struct kmp_task_t {
3061 // void * shareds;
3062 // kmp_routine_entry_t routine;
3063 // kmp_int32 part_id;
3064 // kmp_cmplrdata_t data1;
3065 // kmp_cmplrdata_t data2;
3066 // For taskloops additional fields:
3067 // kmp_uint64 lb;
3068 // kmp_uint64 ub;
3069 // kmp_int64 st;
3070 // kmp_int32 liter;
3071 // void * reductions;
3072 // };
3073 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
3074 UD->startDefinition();
3075 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpInt32Ty);
3076 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpRoutineEntryPointerQTy);
3077 UD->completeDefinition();
3078 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(TD: UD);
3079 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
3080 RD->startDefinition();
3081 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3082 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpRoutineEntryPointerQTy);
3083 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3084 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3085 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3086 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3087 QualType KmpUInt64Ty =
3088 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3089 QualType KmpInt64Ty =
3090 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3091 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3092 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3093 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt64Ty);
3094 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3095 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3096 }
3097 RD->completeDefinition();
3098 return RD;
3099}
3100
3101static RecordDecl *
3102createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3103 ArrayRef<PrivateDataTy> Privates) {
3104 ASTContext &C = CGM.getContext();
3105 // Build struct kmp_task_t_with_privates {
3106 // kmp_task_t task_data;
3107 // .kmp_privates_t. privates;
3108 // };
3109 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
3110 RD->startDefinition();
3111 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpTaskTQTy);
3112 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3113 addFieldToRecordDecl(C, DC: RD, FieldTy: C.getCanonicalTagType(TD: PrivateRD));
3114 RD->completeDefinition();
3115 return RD;
3116}
3117
3118/// Emit a proxy function which accepts kmp_task_t as the second
3119/// argument.
3120/// \code
3121/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3122/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3123/// For taskloops:
3124/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3125/// tt->reductions, tt->shareds);
3126/// return 0;
3127/// }
3128/// \endcode
3129static llvm::Function *
3130emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3131 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3132 QualType KmpTaskTWithPrivatesPtrQTy,
3133 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3134 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3135 llvm::Value *TaskPrivatesMap) {
3136 ASTContext &C = CGM.getContext();
3137 FunctionArgList Args;
3138 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3139 ImplicitParamKind::Other);
3140 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3141 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3142 ImplicitParamKind::Other);
3143 Args.push_back(Elt: &GtidArg);
3144 Args.push_back(Elt: &TaskTypeArg);
3145 const auto &TaskEntryFnInfo =
3146 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3147 llvm::FunctionType *TaskEntryTy =
3148 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
3149 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
3150 auto *TaskEntry = llvm::Function::Create(
3151 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3152 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
3153 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3154 TaskEntry->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3155 TaskEntry->setDoesNotRecurse();
3156 CodeGenFunction CGF(CGM);
3157 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
3158 Loc, StartLoc: Loc);
3159
3160 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3161 // tt,
3162 // For taskloops:
3163 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3164 // tt->task_data.shareds);
3165 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3166 Addr: CGF.GetAddrOfLocalVar(VD: &GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3167 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3168 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3169 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3170 const auto *KmpTaskTWithPrivatesQTyRD =
3171 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3172 LValue Base =
3173 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3174 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3175 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3176 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3177 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3178
3179 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3180 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3181 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3182 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3183 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3184
3185 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3186 llvm::Value *PrivatesParam;
3187 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3188 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3189 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3190 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3191 } else {
3192 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3193 }
3194
3195 llvm::Value *CommonArgs[] = {
3196 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3197 CGF.Builder
3198 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3199 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3200 .emitRawPointer(CGF)};
3201 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3202 std::end(arr&: CommonArgs));
3203 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3204 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3205 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3206 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3207 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3208 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3209 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3210 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3211 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3212 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3213 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3214 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3215 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3216 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3217 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3218 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3219 CallArgs.push_back(Elt: LBParam);
3220 CallArgs.push_back(Elt: UBParam);
3221 CallArgs.push_back(Elt: StParam);
3222 CallArgs.push_back(Elt: LIParam);
3223 CallArgs.push_back(Elt: RParam);
3224 }
3225 CallArgs.push_back(Elt: SharedsParam);
3226
3227 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3228 Args: CallArgs);
3229 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3230 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3231 CGF.FinishFunction();
3232 return TaskEntry;
3233}
3234
3235static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3236 SourceLocation Loc,
3237 QualType KmpInt32Ty,
3238 QualType KmpTaskTWithPrivatesPtrQTy,
3239 QualType KmpTaskTWithPrivatesQTy) {
3240 ASTContext &C = CGM.getContext();
3241 FunctionArgList Args;
3242 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3243 ImplicitParamKind::Other);
3244 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3245 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3246 ImplicitParamKind::Other);
3247 Args.push_back(Elt: &GtidArg);
3248 Args.push_back(Elt: &TaskTypeArg);
3249 const auto &DestructorFnInfo =
3250 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3251 llvm::FunctionType *DestructorFnTy =
3252 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3253 std::string Name =
3254 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3255 auto *DestructorFn =
3256 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3257 N: Name, M: &CGM.getModule());
3258 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3259 FI: DestructorFnInfo);
3260 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3261 DestructorFn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3262 DestructorFn->setDoesNotRecurse();
3263 CodeGenFunction CGF(CGM);
3264 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3265 Args, Loc, StartLoc: Loc);
3266
3267 LValue Base = CGF.EmitLoadOfPointerLValue(
3268 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3269 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3270 const auto *KmpTaskTWithPrivatesQTyRD =
3271 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3272 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3273 Base = CGF.EmitLValueForField(Base, Field: *FI);
3274 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3275 if (QualType::DestructionKind DtorKind =
3276 Field->getType().isDestructedType()) {
3277 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3278 CGF.pushDestroy(dtorKind: DtorKind, addr: FieldLValue.getAddress(), type: Field->getType());
3279 }
3280 }
3281 CGF.FinishFunction();
3282 return DestructorFn;
3283}
3284
3285/// Emit a privates mapping function for correct handling of private and
3286/// firstprivate variables.
3287/// \code
3288/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3289/// **noalias priv1,..., <tyn> **noalias privn) {
3290/// *priv1 = &.privates.priv1;
3291/// ...;
3292/// *privn = &.privates.privn;
3293/// }
3294/// \endcode
3295static llvm::Value *
3296emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3297 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3298 ArrayRef<PrivateDataTy> Privates) {
3299 ASTContext &C = CGM.getContext();
3300 FunctionArgList Args;
3301 ImplicitParamDecl TaskPrivatesArg(
3302 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3303 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3304 ImplicitParamKind::Other);
3305 Args.push_back(Elt: &TaskPrivatesArg);
3306 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3307 unsigned Counter = 1;
3308 for (const Expr *E : Data.PrivateVars) {
3309 Args.push_back(Elt: ImplicitParamDecl::Create(
3310 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3311 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3312 .withConst()
3313 .withRestrict(),
3314 ParamKind: ImplicitParamKind::Other));
3315 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3316 PrivateVarsPos[VD] = Counter;
3317 ++Counter;
3318 }
3319 for (const Expr *E : Data.FirstprivateVars) {
3320 Args.push_back(Elt: ImplicitParamDecl::Create(
3321 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3322 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3323 .withConst()
3324 .withRestrict(),
3325 ParamKind: ImplicitParamKind::Other));
3326 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3327 PrivateVarsPos[VD] = Counter;
3328 ++Counter;
3329 }
3330 for (const Expr *E : Data.LastprivateVars) {
3331 Args.push_back(Elt: ImplicitParamDecl::Create(
3332 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3333 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3334 .withConst()
3335 .withRestrict(),
3336 ParamKind: ImplicitParamKind::Other));
3337 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3338 PrivateVarsPos[VD] = Counter;
3339 ++Counter;
3340 }
3341 for (const VarDecl *VD : Data.PrivateLocals) {
3342 QualType Ty = VD->getType().getNonReferenceType();
3343 if (VD->getType()->isLValueReferenceType())
3344 Ty = C.getPointerType(T: Ty);
3345 if (isAllocatableDecl(VD))
3346 Ty = C.getPointerType(T: Ty);
3347 Args.push_back(Elt: ImplicitParamDecl::Create(
3348 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3349 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3350 ParamKind: ImplicitParamKind::Other));
3351 PrivateVarsPos[VD] = Counter;
3352 ++Counter;
3353 }
3354 const auto &TaskPrivatesMapFnInfo =
3355 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3356 llvm::FunctionType *TaskPrivatesMapTy =
3357 CGM.getTypes().GetFunctionType(Info: TaskPrivatesMapFnInfo);
3358 std::string Name =
3359 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3360 auto *TaskPrivatesMap = llvm::Function::Create(
3361 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3362 M: &CGM.getModule());
3363 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3364 FI: TaskPrivatesMapFnInfo);
3365 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3366 TaskPrivatesMap->addFnAttr(Kind: "sample-profile-suffix-elision-policy",
3367 Val: "selected");
3368 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3369 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::NoInline);
3370 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
3371 TaskPrivatesMap->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
3372 }
3373 CodeGenFunction CGF(CGM);
3374 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3375 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3376
3377 // *privi = &.privates.privi;
3378 LValue Base = CGF.EmitLoadOfPointerLValue(
3379 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskPrivatesArg),
3380 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3381 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3382 Counter = 0;
3383 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3384 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3385 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3386 LValue RefLVal =
3387 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD), T: VD->getType());
3388 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3389 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3390 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3391 ++Counter;
3392 }
3393 CGF.FinishFunction();
3394 return TaskPrivatesMap;
3395}
3396
3397/// Emit initialization for private variables in task-based directives.
3398static void emitPrivatesInit(CodeGenFunction &CGF,
3399 const OMPExecutableDirective &D,
3400 Address KmpTaskSharedsPtr, LValue TDBase,
3401 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3402 QualType SharedsTy, QualType SharedsPtrTy,
3403 const OMPTaskDataTy &Data,
3404 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3405 ASTContext &C = CGF.getContext();
3406 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3407 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3408 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())
3409 ? OMPD_taskloop
3410 : OMPD_task;
3411 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: Kind);
3412 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3413 LValue SrcBase;
3414 bool IsTargetTask =
3415 isOpenMPTargetDataManagementDirective(DKind: D.getDirectiveKind()) ||
3416 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
3417 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3418 // PointersArray, SizesArray, and MappersArray. The original variables for
3419 // these arrays are not captured and we get their addresses explicitly.
3420 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3421 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3422 SrcBase = CGF.MakeAddrLValue(
3423 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3424 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3425 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3426 T: SharedsTy);
3427 }
3428 FI = FI->getType()->castAsRecordDecl()->field_begin();
3429 for (const PrivateDataTy &Pair : Privates) {
3430 // Do not initialize private locals.
3431 if (Pair.second.isLocalPrivate()) {
3432 ++FI;
3433 continue;
3434 }
3435 const VarDecl *VD = Pair.second.PrivateCopy;
3436 const Expr *Init = VD->getAnyInitializer();
3437 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3438 !CGF.isTrivialInitializer(Init)))) {
3439 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3440 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3441 const VarDecl *OriginalVD = Pair.second.Original;
3442 // Check if the variable is the target-based BasePointersArray,
3443 // PointersArray, SizesArray, or MappersArray.
3444 LValue SharedRefLValue;
3445 QualType Type = PrivateLValue.getType();
3446 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3447 if (IsTargetTask && !SharedField) {
3448 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3449 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3450 cast<CapturedDecl>(OriginalVD->getDeclContext())
3451 ->getNumParams() == 0 &&
3452 isa<TranslationUnitDecl>(
3453 cast<CapturedDecl>(OriginalVD->getDeclContext())
3454 ->getDeclContext()) &&
3455 "Expected artificial target data variable.");
3456 SharedRefLValue =
3457 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3458 } else if (ForDup) {
3459 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3460 SharedRefLValue = CGF.MakeAddrLValue(
3461 Addr: SharedRefLValue.getAddress().withAlignment(
3462 NewAlignment: C.getDeclAlign(D: OriginalVD)),
3463 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3464 TBAAInfo: SharedRefLValue.getTBAAInfo());
3465 } else if (CGF.LambdaCaptureFields.count(
3466 Val: Pair.second.Original->getCanonicalDecl()) > 0 ||
3467 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3468 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3469 } else {
3470 // Processing for implicitly captured variables.
3471 InlinedOpenMPRegionRAII Region(
3472 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3473 /*HasCancel=*/false, /*NoInheritance=*/true);
3474 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3475 }
3476 if (Type->isArrayType()) {
3477 // Initialize firstprivate array.
3478 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3479 // Perform simple memcpy.
3480 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3481 } else {
3482 // Initialize firstprivate array using element-by-element
3483 // initialization.
3484 CGF.EmitOMPAggregateAssign(
3485 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3486 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3487 Address SrcElement) {
3488 // Clean up any temporaries needed by the initialization.
3489 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3490 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3491 (void)InitScope.Privatize();
3492 // Emit initialization for single element.
3493 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3494 CGF, &CapturesInfo);
3495 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3496 Quals: Init->getType().getQualifiers(),
3497 /*IsInitializer=*/false);
3498 });
3499 }
3500 } else {
3501 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3502 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3503 (void)InitScope.Privatize();
3504 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3505 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue,
3506 /*capturedByInit=*/false);
3507 }
3508 } else {
3509 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue, /*capturedByInit=*/false);
3510 }
3511 }
3512 ++FI;
3513 }
3514}
3515
3516/// Check if duplication function is required for taskloops.
3517static bool checkInitIsRequired(CodeGenFunction &CGF,
3518 ArrayRef<PrivateDataTy> Privates) {
3519 bool InitRequired = false;
3520 for (const PrivateDataTy &Pair : Privates) {
3521 if (Pair.second.isLocalPrivate())
3522 continue;
3523 const VarDecl *VD = Pair.second.PrivateCopy;
3524 const Expr *Init = VD->getAnyInitializer();
3525 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3526 !CGF.isTrivialInitializer(Init));
3527 if (InitRequired)
3528 break;
3529 }
3530 return InitRequired;
3531}
3532
3533
3534/// Emit task_dup function (for initialization of
3535/// private/firstprivate/lastprivate vars and last_iter flag)
3536/// \code
3537/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3538/// lastpriv) {
3539/// // setup lastprivate flag
3540/// task_dst->last = lastpriv;
3541/// // could be constructor calls here...
3542/// }
3543/// \endcode
3544static llvm::Value *
3545emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3546 const OMPExecutableDirective &D,
3547 QualType KmpTaskTWithPrivatesPtrQTy,
3548 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3549 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3550 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3551 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3552 ASTContext &C = CGM.getContext();
3553 FunctionArgList Args;
3554 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3555 KmpTaskTWithPrivatesPtrQTy,
3556 ImplicitParamKind::Other);
3557 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3558 KmpTaskTWithPrivatesPtrQTy,
3559 ImplicitParamKind::Other);
3560 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3561 ImplicitParamKind::Other);
3562 Args.push_back(Elt: &DstArg);
3563 Args.push_back(Elt: &SrcArg);
3564 Args.push_back(Elt: &LastprivArg);
3565 const auto &TaskDupFnInfo =
3566 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3567 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(Info: TaskDupFnInfo);
3568 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3569 auto *TaskDup = llvm::Function::Create(
3570 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3571 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3572 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3573 TaskDup->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3574 TaskDup->setDoesNotRecurse();
3575 CodeGenFunction CGF(CGM);
3576 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3577 StartLoc: Loc);
3578
3579 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3580 Ptr: CGF.GetAddrOfLocalVar(VD: &DstArg),
3581 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3582 // task_dst->liter = lastpriv;
3583 if (WithLastIter) {
3584 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3585 LValue Base = CGF.EmitLValueForField(
3586 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3587 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3588 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3589 Addr: CGF.GetAddrOfLocalVar(VD: &LastprivArg), /*Volatile=*/false, Ty: C.IntTy, Loc);
3590 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3591 }
3592
3593 // Emit initial values for private copies (if any).
3594 assert(!Privates.empty());
3595 Address KmpTaskSharedsPtr = Address::invalid();
3596 if (!Data.FirstprivateVars.empty()) {
3597 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3598 Ptr: CGF.GetAddrOfLocalVar(VD: &SrcArg),
3599 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3600 LValue Base = CGF.EmitLValueForField(
3601 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3602 KmpTaskSharedsPtr = Address(
3603 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3604 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3605 n: KmpTaskTShareds)),
3606 Loc),
3607 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3608 }
3609 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3610 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3611 CGF.FinishFunction();
3612 return TaskDup;
3613}
3614
3615/// Checks if destructor function is required to be generated.
3616/// \return true if cleanups are required, false otherwise.
3617static bool
3618checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3619 ArrayRef<PrivateDataTy> Privates) {
3620 for (const PrivateDataTy &P : Privates) {
3621 if (P.second.isLocalPrivate())
3622 continue;
3623 QualType Ty = P.second.Original->getType().getNonReferenceType();
3624 if (Ty.isDestructedType())
3625 return true;
3626 }
3627 return false;
3628}
3629
3630namespace {
3631/// Loop generator for OpenMP iterator expression.
3632class OMPIteratorGeneratorScope final
3633 : public CodeGenFunction::OMPPrivateScope {
3634 CodeGenFunction &CGF;
3635 const OMPIteratorExpr *E = nullptr;
3636 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3637 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3638 OMPIteratorGeneratorScope() = delete;
3639 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3640
3641public:
3642 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3643 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3644 if (!E)
3645 return;
3646 SmallVector<llvm::Value *, 4> Uppers;
3647 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3648 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3649 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3650 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(T: VD->getType(), Name: VD->getName()));
3651 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3652 addPrivate(
3653 LocalVD: HelperData.CounterVD,
3654 Addr: CGF.CreateMemTemp(T: HelperData.CounterVD->getType(), Name: "counter.addr"));
3655 }
3656 Privatize();
3657
3658 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3659 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3660 LValue CLVal =
3661 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3662 T: HelperData.CounterVD->getType());
3663 // Counter = 0;
3664 CGF.EmitStoreOfScalar(
3665 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3666 lvalue: CLVal);
3667 CodeGenFunction::JumpDest &ContDest =
3668 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3669 CodeGenFunction::JumpDest &ExitDest =
3670 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3671 // N = <number-of_iterations>;
3672 llvm::Value *N = Uppers[I];
3673 // cont:
3674 // if (Counter < N) goto body; else goto exit;
3675 CGF.EmitBlock(BB: ContDest.getBlock());
3676 auto *CVal =
3677 CGF.EmitLoadOfScalar(lvalue: CLVal, Loc: HelperData.CounterVD->getLocation());
3678 llvm::Value *Cmp =
3679 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3680 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3681 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3682 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3683 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3684 // body:
3685 CGF.EmitBlock(BB: BodyBB);
3686 // Iteri = Begini + Counter * Stepi;
3687 CGF.EmitIgnoredExpr(E: HelperData.Update);
3688 }
3689 }
3690 ~OMPIteratorGeneratorScope() {
3691 if (!E)
3692 return;
3693 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3694 // Counter = Counter + 1;
3695 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3696 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3697 // goto cont;
3698 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3699 // exit:
3700 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3701 }
3702 }
3703};
3704} // namespace
3705
3706static std::pair<llvm::Value *, llvm::Value *>
3707getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3708 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3709 llvm::Value *Addr;
3710 if (OASE) {
3711 const Expr *Base = OASE->getBase();
3712 Addr = CGF.EmitScalarExpr(E: Base);
3713 } else {
3714 Addr = CGF.EmitLValue(E).getPointer(CGF);
3715 }
3716 llvm::Value *SizeVal;
3717 QualType Ty = E->getType();
3718 if (OASE) {
3719 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3720 for (const Expr *SE : OASE->getDimensions()) {
3721 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3722 Sz = CGF.EmitScalarConversion(
3723 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3724 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3725 }
3726 } else if (const auto *ASE =
3727 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3728 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3729 Address UpAddrAddress = UpAddrLVal.getAddress();
3730 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3731 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3732 /*Idx0=*/1);
3733 SizeVal = CGF.Builder.CreatePtrDiff(LHS: UpAddr, RHS: Addr, Name: "", /*IsNUW=*/true);
3734 } else {
3735 SizeVal = CGF.getTypeSize(Ty);
3736 }
3737 return std::make_pair(x&: Addr, y&: SizeVal);
3738}
3739
3740/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3741static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3742 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3743 if (KmpTaskAffinityInfoTy.isNull()) {
3744 RecordDecl *KmpAffinityInfoRD =
3745 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3746 KmpAffinityInfoRD->startDefinition();
3747 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getIntPtrType());
3748 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getSizeType());
3749 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: FlagsTy);
3750 KmpAffinityInfoRD->completeDefinition();
3751 KmpTaskAffinityInfoTy = C.getCanonicalTagType(TD: KmpAffinityInfoRD);
3752 }
3753}
3754
3755CGOpenMPRuntime::TaskResultTy
3756CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3757 const OMPExecutableDirective &D,
3758 llvm::Function *TaskFunction, QualType SharedsTy,
3759 Address Shareds, const OMPTaskDataTy &Data) {
3760 ASTContext &C = CGM.getContext();
3761 llvm::SmallVector<PrivateDataTy, 4> Privates;
3762 // Aggregate privates and sort them by the alignment.
3763 const auto *I = Data.PrivateCopies.begin();
3764 for (const Expr *E : Data.PrivateVars) {
3765 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3766 Privates.emplace_back(
3767 Args: C.getDeclAlign(D: VD),
3768 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3769 /*PrivateElemInit=*/nullptr));
3770 ++I;
3771 }
3772 I = Data.FirstprivateCopies.begin();
3773 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3774 for (const Expr *E : Data.FirstprivateVars) {
3775 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3776 Privates.emplace_back(
3777 Args: C.getDeclAlign(D: VD),
3778 Args: PrivateHelpersTy(
3779 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3780 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3781 ++I;
3782 ++IElemInitRef;
3783 }
3784 I = Data.LastprivateCopies.begin();
3785 for (const Expr *E : Data.LastprivateVars) {
3786 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3787 Privates.emplace_back(
3788 Args: C.getDeclAlign(D: VD),
3789 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3790 /*PrivateElemInit=*/nullptr));
3791 ++I;
3792 }
3793 for (const VarDecl *VD : Data.PrivateLocals) {
3794 if (isAllocatableDecl(VD))
3795 Privates.emplace_back(Args: CGM.getPointerAlign(), Args: PrivateHelpersTy(VD));
3796 else
3797 Privates.emplace_back(Args: C.getDeclAlign(D: VD), Args: PrivateHelpersTy(VD));
3798 }
3799 llvm::stable_sort(Range&: Privates,
3800 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3801 return L.first > R.first;
3802 });
3803 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3804 // Build type kmp_routine_entry_t (if not built yet).
3805 emitKmpRoutineEntryT(KmpInt32Ty);
3806 // Build type kmp_task_t (if not built yet).
3807 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())) {
3808 if (SavedKmpTaskloopTQTy.isNull()) {
3809 SavedKmpTaskloopTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3810 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3811 }
3812 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3813 } else {
3814 assert((D.getDirectiveKind() == OMPD_task ||
3815 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3816 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3817 "Expected taskloop, task or target directive");
3818 if (SavedKmpTaskTQTy.isNull()) {
3819 SavedKmpTaskTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3820 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3821 }
3822 KmpTaskTQTy = SavedKmpTaskTQTy;
3823 }
3824 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3825 // Build particular struct kmp_task_t for the given task.
3826 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3827 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3828 CanQualType KmpTaskTWithPrivatesQTy =
3829 C.getCanonicalTagType(TD: KmpTaskTWithPrivatesQTyRD);
3830 QualType KmpTaskTWithPrivatesPtrQTy =
3831 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3832 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(AddrSpace: 0);
3833 llvm::Value *KmpTaskTWithPrivatesTySize =
3834 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3835 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3836
3837 // Emit initial values for private copies (if any).
3838 llvm::Value *TaskPrivatesMap = nullptr;
3839 llvm::Type *TaskPrivatesMapTy =
3840 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3841 if (!Privates.empty()) {
3842 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3843 TaskPrivatesMap =
3844 emitTaskPrivateMappingFunction(CGM, Loc, Data, PrivatesQTy: FI->getType(), Privates);
3845 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3846 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3847 } else {
3848 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3849 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3850 }
3851 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3852 // kmp_task_t *tt);
3853 llvm::Function *TaskEntry = emitProxyTaskFunction(
3854 CGM, Loc, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3855 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3856 TaskPrivatesMap);
3857
3858 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3859 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3860 // kmp_routine_entry_t *task_entry);
3861 // Task flags. Format is taken from
3862 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3863 // description of kmp_tasking_flags struct.
3864 enum {
3865 TiedFlag = 0x1,
3866 FinalFlag = 0x2,
3867 DestructorsFlag = 0x8,
3868 PriorityFlag = 0x20,
3869 DetachableFlag = 0x40,
3870 FreeAgentFlag = 0x80,
3871 TransparentFlag = 0x100,
3872 };
3873 unsigned Flags = Data.Tied ? TiedFlag : 0;
3874 bool NeedsCleanup = false;
3875 if (!Privates.empty()) {
3876 NeedsCleanup =
3877 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3878 if (NeedsCleanup)
3879 Flags = Flags | DestructorsFlag;
3880 }
3881 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3882 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3883 if (Kind == OMPC_THREADSET_omp_pool)
3884 Flags = Flags | FreeAgentFlag;
3885 }
3886 if (D.getSingleClause<OMPTransparentClause>())
3887 Flags |= TransparentFlag;
3888
3889 if (Data.Priority.getInt())
3890 Flags = Flags | PriorityFlag;
3891 if (D.hasClausesOfKind<OMPDetachClause>())
3892 Flags = Flags | DetachableFlag;
3893 llvm::Value *TaskFlags =
3894 Data.Final.getPointer()
3895 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3896 True: CGF.Builder.getInt32(C: FinalFlag),
3897 False: CGF.Builder.getInt32(/*C=*/0))
3898 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3899 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3900 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3901 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3902 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3903 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3904 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3905 llvm::Value *NewTask;
3906 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3907 // Check if we have any device clause associated with the directive.
3908 const Expr *Device = nullptr;
3909 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3910 Device = C->getDevice();
3911 // Emit device ID if any otherwise use default value.
3912 llvm::Value *DeviceID;
3913 if (Device)
3914 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3915 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3916 else
3917 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3918 AllocArgs.push_back(Elt: DeviceID);
3919 NewTask = CGF.EmitRuntimeCall(
3920 callee: OMPBuilder.getOrCreateRuntimeFunction(
3921 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3922 args: AllocArgs);
3923 } else {
3924 NewTask =
3925 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3926 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3927 args: AllocArgs);
3928 }
3929 // Emit detach clause initialization.
3930 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3931 // task_descriptor);
3932 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3933 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3934 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3935
3936 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3937 // int gtid, kmp_task_t *task);
3938 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3939 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3940 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3941 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3942 callee: OMPBuilder.getOrCreateRuntimeFunction(
3943 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3944 args: {Loc, Tid, NewTask});
3945 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3946 Loc: Evt->getExprLoc());
3947 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3948 }
3949 // Process affinity clauses.
3950 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3951 // Process list of affinity data.
3952 ASTContext &C = CGM.getContext();
3953 Address AffinitiesArray = Address::invalid();
3954 // Calculate number of elements to form the array of affinity data.
3955 llvm::Value *NumOfElements = nullptr;
3956 unsigned NumAffinities = 0;
3957 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3958 if (const Expr *Modifier = C->getModifier()) {
3959 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3960 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3961 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3962 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3963 NumOfElements =
3964 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3965 }
3966 } else {
3967 NumAffinities += C->varlist_size();
3968 }
3969 }
3970 getKmpAffinityType(C&: CGM.getContext(), KmpTaskAffinityInfoTy);
3971 // Fields ids in kmp_task_affinity_info record.
3972 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3973
3974 QualType KmpTaskAffinityInfoArrayTy;
3975 if (NumOfElements) {
3976 NumOfElements = CGF.Builder.CreateNUWAdd(
3977 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3978 auto *OVE = new (C) OpaqueValueExpr(
3979 Loc,
3980 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3981 VK_PRValue);
3982 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3983 RValue::get(V: NumOfElements));
3984 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3985 EltTy: KmpTaskAffinityInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
3986 /*IndexTypeQuals=*/0);
3987 // Properly emit variable-sized array.
3988 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3989 ParamKind: ImplicitParamKind::Other);
3990 CGF.EmitVarDecl(D: *PD);
3991 AffinitiesArray = CGF.GetAddrOfLocalVar(VD: PD);
3992 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3993 /*isSigned=*/false);
3994 } else {
3995 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3996 EltTy: KmpTaskAffinityInfoTy,
3997 ArySize: llvm::APInt(C.getTypeSize(T: C.getSizeType()), NumAffinities), SizeExpr: nullptr,
3998 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3999 AffinitiesArray =
4000 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
4001 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
4002 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
4003 /*isSigned=*/IsSigned: false);
4004 }
4005
4006 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4007 // Fill array by elements without iterators.
4008 unsigned Pos = 0;
4009 bool HasIterator = false;
4010 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4011 if (C->getModifier()) {
4012 HasIterator = true;
4013 continue;
4014 }
4015 for (const Expr *E : C->varlist()) {
4016 llvm::Value *Addr;
4017 llvm::Value *Size;
4018 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4019 LValue Base =
4020 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateConstGEP(Addr: AffinitiesArray, Index: Pos),
4021 T: KmpTaskAffinityInfoTy);
4022 // affs[i].base_addr = &<Affinities[i].second>;
4023 LValue BaseAddrLVal = CGF.EmitLValueForField(
4024 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4025 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4026 lvalue: BaseAddrLVal);
4027 // affs[i].len = sizeof(<Affinities[i].second>);
4028 LValue LenLVal = CGF.EmitLValueForField(
4029 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4030 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4031 ++Pos;
4032 }
4033 }
4034 LValue PosLVal;
4035 if (HasIterator) {
4036 PosLVal = CGF.MakeAddrLValue(
4037 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
4038 T: C.getSizeType());
4039 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4040 }
4041 // Process elements with iterators.
4042 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4043 const Expr *Modifier = C->getModifier();
4044 if (!Modifier)
4045 continue;
4046 OMPIteratorGeneratorScope IteratorScope(
4047 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
4048 for (const Expr *E : C->varlist()) {
4049 llvm::Value *Addr;
4050 llvm::Value *Size;
4051 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4052 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4053 LValue Base =
4054 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateGEP(CGF, Addr: AffinitiesArray, Index: Idx),
4055 T: KmpTaskAffinityInfoTy);
4056 // affs[i].base_addr = &<Affinities[i].second>;
4057 LValue BaseAddrLVal = CGF.EmitLValueForField(
4058 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4059 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4060 lvalue: BaseAddrLVal);
4061 // affs[i].len = sizeof(<Affinities[i].second>);
4062 LValue LenLVal = CGF.EmitLValueForField(
4063 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4064 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4065 Idx = CGF.Builder.CreateNUWAdd(
4066 LHS: Idx, RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4067 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4068 }
4069 }
4070 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4071 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4072 // naffins, kmp_task_affinity_info_t *affin_list);
4073 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4074 llvm::Value *GTid = getThreadID(CGF, Loc);
4075 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4076 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
4077 // FIXME: Emit the function and ignore its result for now unless the
4078 // runtime function is properly implemented.
4079 (void)CGF.EmitRuntimeCall(
4080 callee: OMPBuilder.getOrCreateRuntimeFunction(
4081 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
4082 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4083 }
4084 llvm::Value *NewTaskNewTaskTTy =
4085 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4086 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
4087 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
4088 T: KmpTaskTWithPrivatesQTy);
4089 LValue TDBase =
4090 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
4091 // Fill the data in the resulting kmp_task_t record.
4092 // Copy shareds if there are any.
4093 Address KmpTaskSharedsPtr = Address::invalid();
4094 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4095 KmpTaskSharedsPtr = Address(
4096 CGF.EmitLoadOfScalar(
4097 lvalue: CGF.EmitLValueForField(
4098 Base: TDBase,
4099 Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds)),
4100 Loc),
4101 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
4102 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
4103 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
4104 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
4105 }
4106 // Emit initial values for private copies (if any).
4107 TaskResultTy Result;
4108 if (!Privates.empty()) {
4109 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
4110 SharedsTy, SharedsPtrTy, Data, Privates,
4111 /*ForDup=*/false);
4112 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
4113 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4114 Result.TaskDupFn = emitTaskDupFunction(
4115 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4116 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4117 /*WithLastIter=*/!Data.LastprivateVars.empty());
4118 }
4119 }
4120 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4121 enum { Priority = 0, Destructors = 1 };
4122 // Provide pointer to function with destructors for privates.
4123 auto FI = std::next(x: KmpTaskTQTyRD->field_begin(), n: Data1);
4124 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4125 assert(KmpCmplrdataUD->isUnion());
4126 if (NeedsCleanup) {
4127 llvm::Value *DestructorFn = emitDestructorsFunction(
4128 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4129 KmpTaskTWithPrivatesQTy);
4130 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
4131 LValue DestructorsLV = CGF.EmitLValueForField(
4132 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
4133 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4134 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
4135 lvalue: DestructorsLV);
4136 }
4137 // Set priority.
4138 if (Data.Priority.getInt()) {
4139 LValue Data2LV = CGF.EmitLValueForField(
4140 Base: TDBase, Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: Data2));
4141 LValue PriorityLV = CGF.EmitLValueForField(
4142 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
4143 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
4144 }
4145 Result.NewTask = NewTask;
4146 Result.TaskEntry = TaskEntry;
4147 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4148 Result.TDBase = TDBase;
4149 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4150 return Result;
4151}
4152
4153/// Translates internal dependency kind into the runtime kind.
4154static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4155 RTLDependenceKindTy DepKind;
4156 switch (K) {
4157 case OMPC_DEPEND_in:
4158 DepKind = RTLDependenceKindTy::DepIn;
4159 break;
4160 // Out and InOut dependencies must use the same code.
4161 case OMPC_DEPEND_out:
4162 case OMPC_DEPEND_inout:
4163 DepKind = RTLDependenceKindTy::DepInOut;
4164 break;
4165 case OMPC_DEPEND_mutexinoutset:
4166 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4167 break;
4168 case OMPC_DEPEND_inoutset:
4169 DepKind = RTLDependenceKindTy::DepInOutSet;
4170 break;
4171 case OMPC_DEPEND_outallmemory:
4172 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4173 break;
4174 case OMPC_DEPEND_source:
4175 case OMPC_DEPEND_sink:
4176 case OMPC_DEPEND_depobj:
4177 case OMPC_DEPEND_inoutallmemory:
4178 case OMPC_DEPEND_unknown:
4179 llvm_unreachable("Unknown task dependence type");
4180 }
4181 return DepKind;
4182}
4183
4184/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4185static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4186 QualType &FlagsTy) {
4187 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.BoolTy), /*Signed=*/false);
4188 if (KmpDependInfoTy.isNull()) {
4189 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4190 KmpDependInfoRD->startDefinition();
4191 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getIntPtrType());
4192 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getSizeType());
4193 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: FlagsTy);
4194 KmpDependInfoRD->completeDefinition();
4195 KmpDependInfoTy = C.getCanonicalTagType(TD: KmpDependInfoRD);
4196 }
4197}
4198
4199std::pair<llvm::Value *, LValue>
4200CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4201 SourceLocation Loc) {
4202 ASTContext &C = CGM.getContext();
4203 QualType FlagsTy;
4204 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4205 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4206 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4207 LValue Base = CGF.EmitLoadOfPointerLValue(
4208 Ptr: DepobjLVal.getAddress().withElementType(
4209 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4210 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4211 Address DepObjAddr = CGF.Builder.CreateGEP(
4212 CGF, Addr: Base.getAddress(),
4213 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4214 LValue NumDepsBase = CGF.MakeAddrLValue(
4215 Addr: DepObjAddr, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(), TBAAInfo: Base.getTBAAInfo());
4216 // NumDeps = deps[i].base_addr;
4217 LValue BaseAddrLVal = CGF.EmitLValueForField(
4218 Base: NumDepsBase,
4219 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4220 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4221 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4222 return std::make_pair(x&: NumDeps, y&: Base);
4223}
4224
4225static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4226 llvm::PointerUnion<unsigned *, LValue *> Pos,
4227 const OMPTaskDataTy::DependData &Data,
4228 Address DependenciesArray) {
4229 CodeGenModule &CGM = CGF.CGM;
4230 ASTContext &C = CGM.getContext();
4231 QualType FlagsTy;
4232 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4233 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4234 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4235
4236 OMPIteratorGeneratorScope IteratorScope(
4237 CGF, cast_or_null<OMPIteratorExpr>(
4238 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4239 : nullptr));
4240 for (const Expr *E : Data.DepExprs) {
4241 llvm::Value *Addr;
4242 llvm::Value *Size;
4243
4244 // The expression will be a nullptr in the 'omp_all_memory' case.
4245 if (E) {
4246 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4247 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4248 } else {
4249 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4250 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4251 }
4252 LValue Base;
4253 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4254 Base = CGF.MakeAddrLValue(
4255 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4256 } else {
4257 assert(E && "Expected a non-null expression");
4258 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4259 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4260 Base = CGF.MakeAddrLValue(
4261 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4262 }
4263 // deps[i].base_addr = &<Dependencies[i].second>;
4264 LValue BaseAddrLVal = CGF.EmitLValueForField(
4265 Base,
4266 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4267 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4268 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4269 // deps[i].len = sizeof(<Dependencies[i].second>);
4270 LValue LenLVal = CGF.EmitLValueForField(
4271 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4272 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4273 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4274 // deps[i].flags = <Dependencies[i].first>;
4275 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4276 LValue FlagsLVal = CGF.EmitLValueForField(
4277 Base,
4278 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4279 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4280 CGF.EmitStoreOfScalar(
4281 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4282 lvalue: FlagsLVal);
4283 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4284 ++(*P);
4285 } else {
4286 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4287 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4288 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4289 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4290 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4291 }
4292 }
4293}
4294
4295SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4296 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4297 const OMPTaskDataTy::DependData &Data) {
4298 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4299 "Expected depobj dependency kind.");
4300 SmallVector<llvm::Value *, 4> Sizes;
4301 SmallVector<LValue, 4> SizeLVals;
4302 ASTContext &C = CGF.getContext();
4303 {
4304 OMPIteratorGeneratorScope IteratorScope(
4305 CGF, cast_or_null<OMPIteratorExpr>(
4306 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4307 : nullptr));
4308 for (const Expr *E : Data.DepExprs) {
4309 llvm::Value *NumDeps;
4310 LValue Base;
4311 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4312 std::tie(args&: NumDeps, args&: Base) =
4313 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4314 LValue NumLVal = CGF.MakeAddrLValue(
4315 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4316 T: C.getUIntPtrType());
4317 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4318 Addr: NumLVal.getAddress());
4319 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4320 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4321 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4322 SizeLVals.push_back(Elt: NumLVal);
4323 }
4324 }
4325 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4326 llvm::Value *Size =
4327 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4328 Sizes.push_back(Elt: Size);
4329 }
4330 return Sizes;
4331}
4332
4333void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4334 QualType &KmpDependInfoTy,
4335 LValue PosLVal,
4336 const OMPTaskDataTy::DependData &Data,
4337 Address DependenciesArray) {
4338 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4339 "Expected depobj dependency kind.");
4340 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4341 {
4342 OMPIteratorGeneratorScope IteratorScope(
4343 CGF, cast_or_null<OMPIteratorExpr>(
4344 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4345 : nullptr));
4346 for (const Expr *E : Data.DepExprs) {
4347 llvm::Value *NumDeps;
4348 LValue Base;
4349 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4350 std::tie(args&: NumDeps, args&: Base) =
4351 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4352
4353 // memcopy dependency data.
4354 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4355 LHS: ElSize,
4356 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4357 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4358 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4359 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4360
4361 // Increase pos.
4362 // pos += size;
4363 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4364 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4365 }
4366 }
4367}
4368
4369std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4370 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4371 SourceLocation Loc) {
4372 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4373 return D.DepExprs.empty();
4374 }))
4375 return std::make_pair(x: nullptr, y: Address::invalid());
4376 // Process list of dependencies.
4377 ASTContext &C = CGM.getContext();
4378 Address DependenciesArray = Address::invalid();
4379 llvm::Value *NumOfElements = nullptr;
4380 unsigned NumDependencies = std::accumulate(
4381 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4382 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4383 return D.DepKind == OMPC_DEPEND_depobj
4384 ? V
4385 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4386 });
4387 QualType FlagsTy;
4388 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4389 bool HasDepobjDeps = false;
4390 bool HasRegularWithIterators = false;
4391 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4392 llvm::Value *NumOfRegularWithIterators =
4393 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4394 // Calculate number of depobj dependencies and regular deps with the
4395 // iterators.
4396 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4397 if (D.DepKind == OMPC_DEPEND_depobj) {
4398 SmallVector<llvm::Value *, 4> Sizes =
4399 emitDepobjElementsSizes(CGF, KmpDependInfoTy, Data: D);
4400 for (llvm::Value *Size : Sizes) {
4401 NumOfDepobjElements =
4402 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4403 }
4404 HasDepobjDeps = true;
4405 continue;
4406 }
4407 // Include number of iterations, if any.
4408
4409 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4410 llvm::Value *ClauseIteratorSpace =
4411 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4412 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4413 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4414 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4415 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4416 }
4417 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4418 LHS: ClauseIteratorSpace,
4419 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4420 NumOfRegularWithIterators =
4421 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4422 HasRegularWithIterators = true;
4423 continue;
4424 }
4425 }
4426
4427 QualType KmpDependInfoArrayTy;
4428 if (HasDepobjDeps || HasRegularWithIterators) {
4429 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4430 /*isSigned=*/IsSigned: false);
4431 if (HasDepobjDeps) {
4432 NumOfElements =
4433 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4434 }
4435 if (HasRegularWithIterators) {
4436 NumOfElements =
4437 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4438 }
4439 auto *OVE = new (C) OpaqueValueExpr(
4440 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4441 VK_PRValue);
4442 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4443 RValue::get(V: NumOfElements));
4444 KmpDependInfoArrayTy =
4445 C.getVariableArrayType(EltTy: KmpDependInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
4446 /*IndexTypeQuals=*/0);
4447 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4448 // Properly emit variable-sized array.
4449 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4450 ParamKind: ImplicitParamKind::Other);
4451 CGF.EmitVarDecl(D: *PD);
4452 DependenciesArray = CGF.GetAddrOfLocalVar(VD: PD);
4453 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4454 /*isSigned=*/false);
4455 } else {
4456 KmpDependInfoArrayTy = C.getConstantArrayType(
4457 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies), SizeExpr: nullptr,
4458 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4459 DependenciesArray =
4460 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4461 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4462 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4463 /*isSigned=*/IsSigned: false);
4464 }
4465 unsigned Pos = 0;
4466 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4467 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4468 continue;
4469 emitDependData(CGF, KmpDependInfoTy, Pos: &Pos, Data: Dep, DependenciesArray);
4470 }
4471 // Copy regular dependencies with iterators.
4472 LValue PosLVal = CGF.MakeAddrLValue(
4473 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4474 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4475 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4476 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4477 continue;
4478 emitDependData(CGF, KmpDependInfoTy, Pos: &PosLVal, Data: Dep, DependenciesArray);
4479 }
4480 // Copy final depobj arrays without iterators.
4481 if (HasDepobjDeps) {
4482 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4483 if (Dep.DepKind != OMPC_DEPEND_depobj)
4484 continue;
4485 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Data: Dep, DependenciesArray);
4486 }
4487 }
4488 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4489 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4490 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4491}
4492
4493Address CGOpenMPRuntime::emitDepobjDependClause(
4494 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4495 SourceLocation Loc) {
4496 if (Dependencies.DepExprs.empty())
4497 return Address::invalid();
4498 // Process list of dependencies.
4499 ASTContext &C = CGM.getContext();
4500 Address DependenciesArray = Address::invalid();
4501 unsigned NumDependencies = Dependencies.DepExprs.size();
4502 QualType FlagsTy;
4503 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4504 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4505
4506 llvm::Value *Size;
4507 // Define type kmp_depend_info[<Dependencies.size()>];
4508 // For depobj reserve one extra element to store the number of elements.
4509 // It is required to handle depobj(x) update(in) construct.
4510 // kmp_depend_info[<Dependencies.size()>] deps;
4511 llvm::Value *NumDepsVal;
4512 CharUnits Align = C.getTypeAlignInChars(T: KmpDependInfoTy);
4513 if (const auto *IE =
4514 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4515 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4516 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4517 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4518 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4519 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4520 }
4521 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4522 RHS: NumDepsVal);
4523 CharUnits SizeInBytes =
4524 C.getTypeSizeInChars(T: KmpDependInfoTy).alignTo(Align);
4525 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4526 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4527 NumDepsVal =
4528 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4529 } else {
4530 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4531 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4532 SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4533 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4534 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4535 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4536 }
4537 // Need to allocate on the dynamic memory.
4538 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4539 // Use default allocator.
4540 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4541 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4542
4543 llvm::Value *Addr =
4544 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4545 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4546 args: Args, name: ".dep.arr.addr");
4547 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(T: KmpDependInfoTy);
4548 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4549 V: Addr, DestTy: CGF.Builder.getPtrTy(AddrSpace: 0));
4550 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4551 // Write number of elements in the first element of array for depobj.
4552 LValue Base = CGF.MakeAddrLValue(Addr: DependenciesArray, T: KmpDependInfoTy);
4553 // deps[i].base_addr = NumDependencies;
4554 LValue BaseAddrLVal = CGF.EmitLValueForField(
4555 Base,
4556 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4557 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4558 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4559 llvm::PointerUnion<unsigned *, LValue *> Pos;
4560 unsigned Idx = 1;
4561 LValue PosLVal;
4562 if (Dependencies.IteratorExpr) {
4563 PosLVal = CGF.MakeAddrLValue(
4564 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4565 T: C.getSizeType());
4566 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4567 /*IsInit=*/isInit: true);
4568 Pos = &PosLVal;
4569 } else {
4570 Pos = &Idx;
4571 }
4572 emitDependData(CGF, KmpDependInfoTy, Pos, Data: Dependencies, DependenciesArray);
4573 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4574 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4575 ElementTy: CGF.Int8Ty);
4576 return DependenciesArray;
4577}
4578
4579void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4580 SourceLocation Loc) {
4581 ASTContext &C = CGM.getContext();
4582 QualType FlagsTy;
4583 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4584 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4585 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4586 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4587 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4588 Addr: Base.getAddress(), Ty: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy),
4589 ElementTy: CGF.ConvertTypeForMem(T: KmpDependInfoTy));
4590 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4591 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4592 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4593 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4594 DestTy: CGF.VoidPtrTy);
4595 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4596 // Use default allocator.
4597 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4598 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4599
4600 // _kmpc_free(gtid, addr, nullptr);
4601 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4602 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4603 args: Args);
4604}
4605
4606void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4607 OpenMPDependClauseKind NewDepKind,
4608 SourceLocation Loc) {
4609 ASTContext &C = CGM.getContext();
4610 QualType FlagsTy;
4611 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4612 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4613 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4614 llvm::Value *NumDeps;
4615 LValue Base;
4616 std::tie(args&: NumDeps, args&: Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4617
4618 Address Begin = Base.getAddress();
4619 // Cast from pointer to array type to pointer to single element.
4620 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4621 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4622 // The basic structure here is a while-do loop.
4623 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4624 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4625 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4626 CGF.EmitBlock(BB: BodyBB);
4627 llvm::PHINode *ElementPHI =
4628 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4629 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4630 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4631 Base = CGF.MakeAddrLValue(Addr: Begin, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(),
4632 TBAAInfo: Base.getTBAAInfo());
4633 // deps[i].flags = NewDepKind;
4634 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4635 LValue FlagsLVal = CGF.EmitLValueForField(
4636 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4637 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4638 CGF.EmitStoreOfScalar(
4639 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4640 lvalue: FlagsLVal);
4641
4642 // Shift the address forward by one element.
4643 llvm::Value *ElementNext =
4644 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4645 .emitRawPointer(CGF);
4646 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4647 llvm::Value *IsEmpty =
4648 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4649 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4650 // Done.
4651 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4652}
4653
4654void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4655 const OMPExecutableDirective &D,
4656 llvm::Function *TaskFunction,
4657 QualType SharedsTy, Address Shareds,
4658 const Expr *IfCond,
4659 const OMPTaskDataTy &Data) {
4660 if (!CGF.HaveInsertPoint())
4661 return;
4662
4663 TaskResultTy Result =
4664 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4665 llvm::Value *NewTask = Result.NewTask;
4666 llvm::Function *TaskEntry = Result.TaskEntry;
4667 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4668 LValue TDBase = Result.TDBase;
4669 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4670 // Process list of dependences.
4671 Address DependenciesArray = Address::invalid();
4672 llvm::Value *NumOfElements;
4673 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4674 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4675
4676 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4677 // libcall.
4678 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4679 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4680 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4681 // list is not empty
4682 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4683 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4684 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4685 llvm::Value *DepTaskArgs[7];
4686 if (!Data.Dependences.empty()) {
4687 DepTaskArgs[0] = UpLoc;
4688 DepTaskArgs[1] = ThreadID;
4689 DepTaskArgs[2] = NewTask;
4690 DepTaskArgs[3] = NumOfElements;
4691 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4692 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4693 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4694 }
4695 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4696 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4697 if (!Data.Tied) {
4698 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4699 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4700 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4701 }
4702 if (!Data.Dependences.empty()) {
4703 CGF.EmitRuntimeCall(
4704 callee: OMPBuilder.getOrCreateRuntimeFunction(
4705 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4706 args: DepTaskArgs);
4707 } else {
4708 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4709 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4710 args: TaskArgs);
4711 }
4712 // Check if parent region is untied and build return for untied task;
4713 if (auto *Region =
4714 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4715 Region->emitUntiedSwitch(CGF);
4716 };
4717
4718 llvm::Value *DepWaitTaskArgs[7];
4719 if (!Data.Dependences.empty()) {
4720 DepWaitTaskArgs[0] = UpLoc;
4721 DepWaitTaskArgs[1] = ThreadID;
4722 DepWaitTaskArgs[2] = NumOfElements;
4723 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4724 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4725 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4726 DepWaitTaskArgs[6] =
4727 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4728 }
4729 auto &M = CGM.getModule();
4730 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4731 TaskEntry, &Data, &DepWaitTaskArgs,
4732 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4733 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4734 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4735 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4736 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4737 // is specified.
4738 if (!Data.Dependences.empty())
4739 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4740 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4741 args: DepWaitTaskArgs);
4742 // Call proxy_task_entry(gtid, new_task);
4743 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4744 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4745 Action.Enter(CGF);
4746 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4747 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4748 Args: OutlinedFnArgs);
4749 };
4750
4751 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4752 // kmp_task_t *new_task);
4753 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4754 // kmp_task_t *new_task);
4755 RegionCodeGenTy RCG(CodeGen);
4756 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4757 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4758 TaskArgs,
4759 OMPBuilder.getOrCreateRuntimeFunction(
4760 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4761 TaskArgs);
4762 RCG.setAction(Action);
4763 RCG(CGF);
4764 };
4765
4766 if (IfCond) {
4767 emitIfClause(CGF, Cond: IfCond, ThenGen: ThenCodeGen, ElseGen: ElseCodeGen);
4768 } else {
4769 RegionCodeGenTy ThenRCG(ThenCodeGen);
4770 ThenRCG(CGF);
4771 }
4772}
4773
4774void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4775 const OMPLoopDirective &D,
4776 llvm::Function *TaskFunction,
4777 QualType SharedsTy, Address Shareds,
4778 const Expr *IfCond,
4779 const OMPTaskDataTy &Data) {
4780 if (!CGF.HaveInsertPoint())
4781 return;
4782 TaskResultTy Result =
4783 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4784 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4785 // libcall.
4786 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4787 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4788 // sched, kmp_uint64 grainsize, void *task_dup);
4789 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4790 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4791 llvm::Value *IfVal;
4792 if (IfCond) {
4793 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4794 /*isSigned=*/true);
4795 } else {
4796 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4797 }
4798
4799 LValue LBLVal = CGF.EmitLValueForField(
4800 Base: Result.TDBase,
4801 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4802 const auto *LBVar =
4803 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4804 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4805 /*IsInitializer=*/true);
4806 LValue UBLVal = CGF.EmitLValueForField(
4807 Base: Result.TDBase,
4808 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4809 const auto *UBVar =
4810 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4811 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4812 /*IsInitializer=*/true);
4813 LValue StLVal = CGF.EmitLValueForField(
4814 Base: Result.TDBase,
4815 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4816 const auto *StVar =
4817 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4818 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4819 /*IsInitializer=*/true);
4820 // Store reductions address.
4821 LValue RedLVal = CGF.EmitLValueForField(
4822 Base: Result.TDBase,
4823 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4824 if (Data.Reductions) {
4825 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4826 } else {
4827 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4828 Ty: CGF.getContext().VoidPtrTy);
4829 }
4830 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4831 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4832 UpLoc,
4833 ThreadID,
4834 Result.NewTask,
4835 IfVal,
4836 LBLVal.getPointer(CGF),
4837 UBLVal.getPointer(CGF),
4838 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4839 llvm::ConstantInt::getSigned(
4840 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4841 llvm::ConstantInt::getSigned(
4842 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4843 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4844 : NoSchedule),
4845 Data.Schedule.getPointer()
4846 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4847 /*isSigned=*/false)
4848 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0)};
4849 if (Data.HasModifier)
4850 TaskArgs.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 1));
4851
4852 TaskArgs.push_back(Elt: Result.TaskDupFn
4853 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4854 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4855 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy));
4856 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4857 M&: CGM.getModule(), FnID: Data.HasModifier
4858 ? OMPRTL___kmpc_taskloop_5
4859 : OMPRTL___kmpc_taskloop),
4860 args: TaskArgs);
4861}
4862
4863/// Emit reduction operation for each element of array (required for
4864/// array sections) LHS op = RHS.
4865/// \param Type Type of array.
4866/// \param LHSVar Variable on the left side of the reduction operation
4867/// (references element of array in original variable).
4868/// \param RHSVar Variable on the right side of the reduction operation
4869/// (references element of array in original variable).
4870/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4871/// RHSVar.
4872static void EmitOMPAggregateReduction(
4873 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4874 const VarDecl *RHSVar,
4875 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4876 const Expr *, const Expr *)> &RedOpGen,
4877 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4878 const Expr *UpExpr = nullptr) {
4879 // Perform element-by-element initialization.
4880 QualType ElementTy;
4881 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4882 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4883
4884 // Drill down to the base element type on both arrays.
4885 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4886 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4887
4888 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4889 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4890 // Cast from pointer to array type to pointer to single element.
4891 llvm::Value *LHSEnd =
4892 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4893 // The basic structure here is a while-do loop.
4894 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4895 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4896 llvm::Value *IsEmpty =
4897 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4898 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4899
4900 // Enter the loop body, making that address the current address.
4901 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4902 CGF.EmitBlock(BB: BodyBB);
4903
4904 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4905
4906 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4907 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4908 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4909 Address RHSElementCurrent(
4910 RHSElementPHI, RHSAddr.getElementType(),
4911 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4912
4913 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4914 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4915 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4916 Address LHSElementCurrent(
4917 LHSElementPHI, LHSAddr.getElementType(),
4918 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4919
4920 // Emit copy.
4921 CodeGenFunction::OMPPrivateScope Scope(CGF);
4922 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4923 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4924 Scope.Privatize();
4925 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4926 Scope.ForceCleanup();
4927
4928 // Shift the address forward by one element.
4929 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4930 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4931 Name: "omp.arraycpy.dest.element");
4932 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4933 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4934 Name: "omp.arraycpy.src.element");
4935 // Check whether we've reached the end.
4936 llvm::Value *Done =
4937 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4938 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4939 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4940 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4941
4942 // Done.
4943 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4944}
4945
4946/// Emit reduction combiner. If the combiner is a simple expression emit it as
4947/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4948/// UDR combiner function.
4949static void emitReductionCombiner(CodeGenFunction &CGF,
4950 const Expr *ReductionOp) {
4951 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4952 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4953 if (const auto *DRE =
4954 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4955 if (const auto *DRD =
4956 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4957 std::pair<llvm::Function *, llvm::Function *> Reduction =
4958 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4959 RValue Func = RValue::get(V: Reduction.first);
4960 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4961 CGF.EmitIgnoredExpr(E: ReductionOp);
4962 return;
4963 }
4964 CGF.EmitIgnoredExpr(E: ReductionOp);
4965}
4966
4967llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4968 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4969 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4970 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4971 ASTContext &C = CGM.getContext();
4972
4973 // void reduction_func(void *LHSArg, void *RHSArg);
4974 FunctionArgList Args;
4975 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4976 ImplicitParamKind::Other);
4977 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4978 ImplicitParamKind::Other);
4979 Args.push_back(Elt: &LHSArg);
4980 Args.push_back(Elt: &RHSArg);
4981 const auto &CGFI =
4982 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
4983 std::string Name = getReductionFuncName(Name: ReducerName);
4984 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
4985 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
4986 M: &CGM.getModule());
4987 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4988 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
4989 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
4990 Fn->setDoesNotRecurse();
4991 CodeGenFunction CGF(CGM);
4992 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4993
4994 // Dst = (void*[n])(LHSArg);
4995 // Src = (void*[n])(RHSArg);
4996 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4997 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
4998 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4999 ArgsElemType, CGF.getPointerAlign());
5000 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5001 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
5002 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5003 ArgsElemType, CGF.getPointerAlign());
5004
5005 // ...
5006 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5007 // ...
5008 CodeGenFunction::OMPPrivateScope Scope(CGF);
5009 const auto *IPriv = Privates.begin();
5010 unsigned Idx = 0;
5011 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5012 const auto *RHSVar =
5013 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
5014 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
5015 const auto *LHSVar =
5016 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
5017 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
5018 QualType PrivTy = (*IPriv)->getType();
5019 if (PrivTy->isVariablyModifiedType()) {
5020 // Get array size and emit VLA type.
5021 ++Idx;
5022 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
5023 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
5024 const VariableArrayType *VLA =
5025 CGF.getContext().getAsVariableArrayType(T: PrivTy);
5026 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
5027 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5028 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
5029 CGF.EmitVariablyModifiedType(Ty: PrivTy);
5030 }
5031 }
5032 Scope.Privatize();
5033 IPriv = Privates.begin();
5034 const auto *ILHS = LHSExprs.begin();
5035 const auto *IRHS = RHSExprs.begin();
5036 for (const Expr *E : ReductionOps) {
5037 if ((*IPriv)->getType()->isArrayType()) {
5038 // Emit reduction for array section.
5039 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5040 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5041 EmitOMPAggregateReduction(
5042 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5043 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5044 emitReductionCombiner(CGF, ReductionOp: E);
5045 });
5046 } else {
5047 // Emit reduction for array subscript or single variable.
5048 emitReductionCombiner(CGF, ReductionOp: E);
5049 }
5050 ++IPriv;
5051 ++ILHS;
5052 ++IRHS;
5053 }
5054 Scope.ForceCleanup();
5055 CGF.FinishFunction();
5056 return Fn;
5057}
5058
5059void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5060 const Expr *ReductionOp,
5061 const Expr *PrivateRef,
5062 const DeclRefExpr *LHS,
5063 const DeclRefExpr *RHS) {
5064 if (PrivateRef->getType()->isArrayType()) {
5065 // Emit reduction for array section.
5066 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
5067 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
5068 EmitOMPAggregateReduction(
5069 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
5070 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5071 emitReductionCombiner(CGF, ReductionOp);
5072 });
5073 } else {
5074 // Emit reduction for array subscript or single variable.
5075 emitReductionCombiner(CGF, ReductionOp);
5076 }
5077}
5078
5079static std::string generateUniqueName(CodeGenModule &CGM,
5080 llvm::StringRef Prefix, const Expr *Ref);
5081
5082void CGOpenMPRuntime::emitPrivateReduction(
5083 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5084 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5085
5086 // Create a shared global variable (__shared_reduction_var) to accumulate the
5087 // final result.
5088 //
5089 // Call __kmpc_barrier to synchronize threads before initialization.
5090 //
5091 // The master thread (thread_id == 0) initializes __shared_reduction_var
5092 // with the identity value or initializer.
5093 //
5094 // Call __kmpc_barrier to synchronize before combining.
5095 // For each i:
5096 // - Thread enters critical section.
5097 // - Reads its private value from LHSExprs[i].
5098 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5099 // Privates[i]).
5100 // - Exits critical section.
5101 //
5102 // Call __kmpc_barrier after combining.
5103 //
5104 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5105 //
5106 // Final __kmpc_barrier to synchronize after broadcasting
5107 QualType PrivateType = Privates->getType();
5108 llvm::Type *LLVMType = CGF.ConvertTypeForMem(T: PrivateType);
5109
5110 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOp: ReductionOps);
5111 std::string ReductionVarNameStr;
5112 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates->IgnoreParenCasts()))
5113 ReductionVarNameStr =
5114 generateUniqueName(CGM, Prefix: DRE->getDecl()->getNameAsString(), Ref: Privates);
5115 else
5116 ReductionVarNameStr = "unnamed_priv_var";
5117
5118 // Create an internal shared variable
5119 std::string SharedName =
5120 CGM.getOpenMPRuntime().getName(Parts: {"internal_pivate_", ReductionVarNameStr});
5121 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5122 Ty: LLVMType, Name: ".omp.reduction." + SharedName);
5123
5124 SharedVar->setAlignment(
5125 llvm::MaybeAlign(CGF.getContext().getTypeAlign(T: PrivateType) / 8));
5126
5127 Address SharedResult =
5128 CGF.MakeNaturalAlignRawAddrLValue(V: SharedVar, T: PrivateType).getAddress();
5129
5130 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5131 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5132 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5133
5134 llvm::BasicBlock *InitBB = CGF.createBasicBlock(name: "init");
5135 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock(name: "init.end");
5136
5137 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5138 LHS: ThreadId, RHS: llvm::ConstantInt::get(Ty: ThreadId->getType(), V: 0));
5139 CGF.Builder.CreateCondBr(Cond: IsWorker, True: InitBB, False: InitEndBB);
5140
5141 CGF.EmitBlock(BB: InitBB);
5142
5143 auto EmitSharedInit = [&]() {
5144 if (UDR) { // Check if it's a User-Defined Reduction
5145 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5146 std::pair<llvm::Function *, llvm::Function *> FnPair =
5147 getUserDefinedReduction(D: UDR);
5148 llvm::Function *InitializerFn = FnPair.second;
5149 if (InitializerFn) {
5150 if (const auto *CE =
5151 dyn_cast<CallExpr>(Val: UDRInitExpr->IgnoreParenImpCasts())) {
5152 const auto *OutDRE = cast<DeclRefExpr>(
5153 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5154 ->getSubExpr());
5155 const VarDecl *OutVD = cast<VarDecl>(Val: OutDRE->getDecl());
5156
5157 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5158 LocalScope.addPrivate(LocalVD: OutVD, Addr: SharedResult);
5159
5160 (void)LocalScope.Privatize();
5161 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5162 Val: CE->getCallee()->IgnoreParenImpCasts())) {
5163 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5164 CGF, OVE, RValue::get(V: InitializerFn));
5165 CGF.EmitIgnoredExpr(E: CE);
5166 } else {
5167 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5168 Quals: PrivateType.getQualifiers(),
5169 /*IsInitializer=*/true);
5170 }
5171 } else {
5172 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5173 Quals: PrivateType.getQualifiers(),
5174 /*IsInitializer=*/true);
5175 }
5176 } else {
5177 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5178 Quals: PrivateType.getQualifiers(),
5179 /*IsInitializer=*/true);
5180 }
5181 } else {
5182 // EmitNullInitialization handles default construction for C++ classes
5183 // and zeroing for scalars, which is a reasonable default.
5184 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5185 }
5186 return; // UDR initialization handled
5187 }
5188 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates)) {
5189 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
5190 if (const Expr *InitExpr = VD->getInit()) {
5191 CGF.EmitAnyExprToMem(E: InitExpr, Location: SharedResult,
5192 Quals: PrivateType.getQualifiers(), IsInitializer: true);
5193 return;
5194 }
5195 }
5196 }
5197 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5198 };
5199 EmitSharedInit();
5200 CGF.Builder.CreateBr(Dest: InitEndBB);
5201 CGF.EmitBlock(BB: InitEndBB);
5202
5203 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5204 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5205 args: BarrierArgs);
5206
5207 const Expr *ReductionOp = ReductionOps;
5208 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5209 LValue SharedLV = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5210 LValue LHSLV = CGF.EmitLValue(E: Privates);
5211
5212 auto EmitCriticalReduction = [&](auto ReductionGen) {
5213 std::string CriticalName = getName(Parts: {"reduction_critical"});
5214 emitCriticalRegion(CGF, CriticalName, CriticalOpGen: ReductionGen, Loc);
5215 };
5216
5217 if (CurrentUDR) {
5218 // Handle user-defined reduction.
5219 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5220 Action.Enter(CGF);
5221 std::pair<llvm::Function *, llvm::Function *> FnPair =
5222 getUserDefinedReduction(D: CurrentUDR);
5223 if (FnPair.first) {
5224 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp)) {
5225 const auto *OutDRE = cast<DeclRefExpr>(
5226 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5227 ->getSubExpr());
5228 const auto *InDRE = cast<DeclRefExpr>(
5229 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 1)->IgnoreParenImpCasts())
5230 ->getSubExpr());
5231 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5232 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: OutDRE->getDecl()),
5233 Addr: SharedLV.getAddress());
5234 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: InDRE->getDecl()),
5235 Addr: LHSLV.getAddress());
5236 (void)LocalScope.Privatize();
5237 emitReductionCombiner(CGF, ReductionOp);
5238 }
5239 }
5240 };
5241 EmitCriticalReduction(ReductionGen);
5242 } else {
5243 // Handle built-in reduction operations.
5244#ifndef NDEBUG
5245 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5246 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5247 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5248
5249 const Expr *AssignRHS = nullptr;
5250 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5251 if (BinOp->getOpcode() == BO_Assign)
5252 AssignRHS = BinOp->getRHS();
5253 } else if (const auto *OpCall =
5254 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5255 if (OpCall->getOperator() == OO_Equal)
5256 AssignRHS = OpCall->getArg(1);
5257 }
5258
5259 assert(AssignRHS &&
5260 "Private Variable Reduction : Invalid ReductionOp expression");
5261#endif
5262
5263 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5264 Action.Enter(CGF);
5265 const auto *OmpOutDRE =
5266 dyn_cast<DeclRefExpr>(Val: LHSExprs->IgnoreParenImpCasts());
5267 const auto *OmpInDRE =
5268 dyn_cast<DeclRefExpr>(Val: RHSExprs->IgnoreParenImpCasts());
5269 assert(
5270 OmpOutDRE && OmpInDRE &&
5271 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5272 const VarDecl *OmpOutVD = cast<VarDecl>(Val: OmpOutDRE->getDecl());
5273 const VarDecl *OmpInVD = cast<VarDecl>(Val: OmpInDRE->getDecl());
5274 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5275 LocalScope.addPrivate(LocalVD: OmpOutVD, Addr: SharedLV.getAddress());
5276 LocalScope.addPrivate(LocalVD: OmpInVD, Addr: LHSLV.getAddress());
5277 (void)LocalScope.Privatize();
5278 // Emit the actual reduction operation
5279 CGF.EmitIgnoredExpr(E: ReductionOp);
5280 };
5281 EmitCriticalReduction(ReductionGen);
5282 }
5283
5284 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5285 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5286 args: BarrierArgs);
5287
5288 // Broadcast final result
5289 bool IsAggregate = PrivateType->isAggregateType();
5290 LValue SharedLV1 = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5291 llvm::Value *FinalResultVal = nullptr;
5292 Address FinalResultAddr = Address::invalid();
5293
5294 if (IsAggregate)
5295 FinalResultAddr = SharedResult;
5296 else
5297 FinalResultVal = CGF.EmitLoadOfScalar(lvalue: SharedLV1, Loc);
5298
5299 LValue TargetLHSLV = CGF.EmitLValue(E: RHSExprs);
5300 if (IsAggregate) {
5301 CGF.EmitAggregateCopy(Dest: TargetLHSLV,
5302 Src: CGF.MakeAddrLValue(Addr: FinalResultAddr, T: PrivateType),
5303 EltTy: PrivateType, MayOverlap: AggValueSlot::DoesNotOverlap, isVolatile: false);
5304 } else {
5305 CGF.EmitStoreOfScalar(value: FinalResultVal, lvalue: TargetLHSLV);
5306 }
5307 // Final synchronization barrier
5308 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5309 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5310 args: BarrierArgs);
5311
5312 // Combiner with original list item
5313 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5314 PrePostActionTy &Action) {
5315 Action.Enter(CGF);
5316 emitSingleReductionCombiner(CGF, ReductionOp: ReductionOps, PrivateRef: Privates,
5317 LHS: cast<DeclRefExpr>(Val: LHSExprs),
5318 RHS: cast<DeclRefExpr>(Val: RHSExprs));
5319 };
5320 EmitCriticalReduction(OriginalListCombiner);
5321}
5322
5323void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5324 ArrayRef<const Expr *> OrgPrivates,
5325 ArrayRef<const Expr *> OrgLHSExprs,
5326 ArrayRef<const Expr *> OrgRHSExprs,
5327 ArrayRef<const Expr *> OrgReductionOps,
5328 ReductionOptionsTy Options) {
5329 if (!CGF.HaveInsertPoint())
5330 return;
5331
5332 bool WithNowait = Options.WithNowait;
5333 bool SimpleReduction = Options.SimpleReduction;
5334
5335 // Next code should be emitted for reduction:
5336 //
5337 // static kmp_critical_name lock = { 0 };
5338 //
5339 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5340 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5341 // ...
5342 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5343 // *(Type<n>-1*)rhs[<n>-1]);
5344 // }
5345 //
5346 // ...
5347 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5348 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5349 // RedList, reduce_func, &<lock>)) {
5350 // case 1:
5351 // ...
5352 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5353 // ...
5354 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5355 // break;
5356 // case 2:
5357 // ...
5358 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5359 // ...
5360 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5361 // break;
5362 // default:;
5363 // }
5364 //
5365 // if SimpleReduction is true, only the next code is generated:
5366 // ...
5367 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5368 // ...
5369
5370 ASTContext &C = CGM.getContext();
5371
5372 if (SimpleReduction) {
5373 CodeGenFunction::RunCleanupsScope Scope(CGF);
5374 const auto *IPriv = OrgPrivates.begin();
5375 const auto *ILHS = OrgLHSExprs.begin();
5376 const auto *IRHS = OrgRHSExprs.begin();
5377 for (const Expr *E : OrgReductionOps) {
5378 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5379 RHS: cast<DeclRefExpr>(Val: *IRHS));
5380 ++IPriv;
5381 ++ILHS;
5382 ++IRHS;
5383 }
5384 return;
5385 }
5386
5387 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5388 // Only keep entries where the corresponding variable is not private.
5389 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5390 FilteredRHSExprs, FilteredReductionOps;
5391 for (unsigned I : llvm::seq<unsigned>(
5392 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5393 if (!Options.IsPrivateVarReduction[I]) {
5394 FilteredPrivates.emplace_back(Args: OrgPrivates[I]);
5395 FilteredLHSExprs.emplace_back(Args: OrgLHSExprs[I]);
5396 FilteredRHSExprs.emplace_back(Args: OrgRHSExprs[I]);
5397 FilteredReductionOps.emplace_back(Args: OrgReductionOps[I]);
5398 }
5399 }
5400 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5401 // processing.
5402 ArrayRef<const Expr *> Privates = FilteredPrivates;
5403 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5404 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5405 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5406
5407 // 1. Build a list of reduction variables.
5408 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5409 auto Size = RHSExprs.size();
5410 for (const Expr *E : Privates) {
5411 if (E->getType()->isVariablyModifiedType())
5412 // Reserve place for array size.
5413 ++Size;
5414 }
5415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5416 QualType ReductionArrayTy = C.getConstantArrayType(
5417 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5418 /*IndexTypeQuals=*/0);
5419 RawAddress ReductionList =
5420 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5421 const auto *IPriv = Privates.begin();
5422 unsigned Idx = 0;
5423 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5424 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5425 CGF.Builder.CreateStore(
5426 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5427 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5428 Addr: Elem);
5429 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5430 // Store array size.
5431 ++Idx;
5432 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5433 llvm::Value *Size = CGF.Builder.CreateIntCast(
5434 V: CGF.getVLASize(
5435 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5436 .NumElts,
5437 DestTy: CGF.SizeTy, /*isSigned=*/false);
5438 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5439 Addr: Elem);
5440 }
5441 }
5442
5443 // 2. Emit reduce_func().
5444 llvm::Function *ReductionFn = emitReductionFunction(
5445 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5446 Privates, LHSExprs, RHSExprs, ReductionOps);
5447
5448 // 3. Create static kmp_critical_name lock = { 0 };
5449 std::string Name = getName(Parts: {"reduction"});
5450 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5451
5452 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5453 // RedList, reduce_func, &<lock>);
5454 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5455 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5456 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5457 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5458 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5459 llvm::Value *Args[] = {
5460 IdentTLoc, // ident_t *<loc>
5461 ThreadId, // i32 <gtid>
5462 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5463 ReductionArrayTySize, // size_type sizeof(RedList)
5464 RL, // void *RedList
5465 ReductionFn, // void (*) (void *, void *) <reduce_func>
5466 Lock // kmp_critical_name *&<lock>
5467 };
5468 llvm::Value *Res = CGF.EmitRuntimeCall(
5469 callee: OMPBuilder.getOrCreateRuntimeFunction(
5470 M&: CGM.getModule(),
5471 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5472 args: Args);
5473
5474 // 5. Build switch(res)
5475 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5476 llvm::SwitchInst *SwInst =
5477 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5478
5479 // 6. Build case 1:
5480 // ...
5481 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5482 // ...
5483 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5484 // break;
5485 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5486 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5487 CGF.EmitBlock(BB: Case1BB);
5488
5489 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5490 llvm::Value *EndArgs[] = {
5491 IdentTLoc, // ident_t *<loc>
5492 ThreadId, // i32 <gtid>
5493 Lock // kmp_critical_name *&<lock>
5494 };
5495 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5496 CodeGenFunction &CGF, PrePostActionTy &Action) {
5497 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5498 const auto *IPriv = Privates.begin();
5499 const auto *ILHS = LHSExprs.begin();
5500 const auto *IRHS = RHSExprs.begin();
5501 for (const Expr *E : ReductionOps) {
5502 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5503 RHS: cast<DeclRefExpr>(Val: *IRHS));
5504 ++IPriv;
5505 ++ILHS;
5506 ++IRHS;
5507 }
5508 };
5509 RegionCodeGenTy RCG(CodeGen);
5510 CommonActionTy Action(
5511 nullptr, {},
5512 OMPBuilder.getOrCreateRuntimeFunction(
5513 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5514 : OMPRTL___kmpc_end_reduce),
5515 EndArgs);
5516 RCG.setAction(Action);
5517 RCG(CGF);
5518
5519 CGF.EmitBranch(Block: DefaultBB);
5520
5521 // 7. Build case 2:
5522 // ...
5523 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5524 // ...
5525 // break;
5526 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5527 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5528 CGF.EmitBlock(BB: Case2BB);
5529
5530 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5531 CodeGenFunction &CGF, PrePostActionTy &Action) {
5532 const auto *ILHS = LHSExprs.begin();
5533 const auto *IRHS = RHSExprs.begin();
5534 const auto *IPriv = Privates.begin();
5535 for (const Expr *E : ReductionOps) {
5536 const Expr *XExpr = nullptr;
5537 const Expr *EExpr = nullptr;
5538 const Expr *UpExpr = nullptr;
5539 BinaryOperatorKind BO = BO_Comma;
5540 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5541 if (BO->getOpcode() == BO_Assign) {
5542 XExpr = BO->getLHS();
5543 UpExpr = BO->getRHS();
5544 }
5545 }
5546 // Try to emit update expression as a simple atomic.
5547 const Expr *RHSExpr = UpExpr;
5548 if (RHSExpr) {
5549 // Analyze RHS part of the whole expression.
5550 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5551 Val: RHSExpr->IgnoreParenImpCasts())) {
5552 // If this is a conditional operator, analyze its condition for
5553 // min/max reduction operator.
5554 RHSExpr = ACO->getCond();
5555 }
5556 if (const auto *BORHS =
5557 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5558 EExpr = BORHS->getRHS();
5559 BO = BORHS->getOpcode();
5560 }
5561 }
5562 if (XExpr) {
5563 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5564 auto &&AtomicRedGen = [BO, VD,
5565 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5566 const Expr *EExpr, const Expr *UpExpr) {
5567 LValue X = CGF.EmitLValue(E: XExpr);
5568 RValue E;
5569 if (EExpr)
5570 E = CGF.EmitAnyExpr(E: EExpr);
5571 CGF.EmitOMPAtomicSimpleUpdateExpr(
5572 X, E, BO, /*IsXLHSInRHSPart=*/true,
5573 AO: llvm::AtomicOrdering::Monotonic, Loc,
5574 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5575 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5576 Address LHSTemp = CGF.CreateMemTemp(T: VD->getType());
5577 CGF.emitOMPSimpleStore(
5578 LVal: CGF.MakeAddrLValue(Addr: LHSTemp, T: VD->getType()), RVal: XRValue,
5579 RValTy: VD->getType().getNonReferenceType(), Loc);
5580 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5581 (void)PrivateScope.Privatize();
5582 return CGF.EmitAnyExpr(E: UpExpr);
5583 });
5584 };
5585 if ((*IPriv)->getType()->isArrayType()) {
5586 // Emit atomic reduction for array section.
5587 const auto *RHSVar =
5588 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5589 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5590 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5591 } else {
5592 // Emit atomic reduction for array subscript or single variable.
5593 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5594 }
5595 } else {
5596 // Emit as a critical region.
5597 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5598 const Expr *, const Expr *) {
5599 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5600 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5601 RT.emitCriticalRegion(
5602 CGF, CriticalName: Name,
5603 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5604 Action.Enter(CGF);
5605 emitReductionCombiner(CGF, ReductionOp: E);
5606 },
5607 Loc);
5608 };
5609 if ((*IPriv)->getType()->isArrayType()) {
5610 const auto *LHSVar =
5611 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5612 const auto *RHSVar =
5613 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5614 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5615 RedOpGen: CritRedGen);
5616 } else {
5617 CritRedGen(CGF, nullptr, nullptr, nullptr);
5618 }
5619 }
5620 ++ILHS;
5621 ++IRHS;
5622 ++IPriv;
5623 }
5624 };
5625 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5626 if (!WithNowait) {
5627 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5628 llvm::Value *EndArgs[] = {
5629 IdentTLoc, // ident_t *<loc>
5630 ThreadId, // i32 <gtid>
5631 Lock // kmp_critical_name *&<lock>
5632 };
5633 CommonActionTy Action(nullptr, {},
5634 OMPBuilder.getOrCreateRuntimeFunction(
5635 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5636 EndArgs);
5637 AtomicRCG.setAction(Action);
5638 AtomicRCG(CGF);
5639 } else {
5640 AtomicRCG(CGF);
5641 }
5642
5643 CGF.EmitBranch(Block: DefaultBB);
5644 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5645 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5646 "PrivateVarReduction: Privates size mismatch");
5647 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5648 "PrivateVarReduction: ReductionOps size mismatch");
5649 for (unsigned I : llvm::seq<unsigned>(
5650 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5651 if (Options.IsPrivateVarReduction[I])
5652 emitPrivateReduction(CGF, Loc, Privates: OrgPrivates[I], LHSExprs: OrgLHSExprs[I],
5653 RHSExprs: OrgRHSExprs[I], ReductionOps: OrgReductionOps[I]);
5654 }
5655}
5656
5657/// Generates unique name for artificial threadprivate variables.
5658/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5659static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5660 const Expr *Ref) {
5661 SmallString<256> Buffer;
5662 llvm::raw_svector_ostream Out(Buffer);
5663 const clang::DeclRefExpr *DE;
5664 const VarDecl *D = ::getBaseDecl(Ref, DE);
5665 if (!D)
5666 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5667 D = D->getCanonicalDecl();
5668 std::string Name = CGM.getOpenMPRuntime().getName(
5669 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5670 Out << Prefix << Name << "_"
5671 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5672 return std::string(Out.str());
5673}
5674
5675/// Emits reduction initializer function:
5676/// \code
5677/// void @.red_init(void* %arg, void* %orig) {
5678/// %0 = bitcast void* %arg to <type>*
5679/// store <type> <init>, <type>* %0
5680/// ret void
5681/// }
5682/// \endcode
5683static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5684 SourceLocation Loc,
5685 ReductionCodeGen &RCG, unsigned N) {
5686 ASTContext &C = CGM.getContext();
5687 QualType VoidPtrTy = C.VoidPtrTy;
5688 VoidPtrTy.addRestrict();
5689 FunctionArgList Args;
5690 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5691 ImplicitParamKind::Other);
5692 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5693 ImplicitParamKind::Other);
5694 Args.emplace_back(Args: &Param);
5695 Args.emplace_back(Args: &ParamOrig);
5696 const auto &FnInfo =
5697 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5698 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5699 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5700 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5701 N: Name, M: &CGM.getModule());
5702 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5703 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5704 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5705 Fn->setDoesNotRecurse();
5706 CodeGenFunction CGF(CGM);
5707 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5708 QualType PrivateType = RCG.getPrivateType(N);
5709 Address PrivateAddr = CGF.EmitLoadOfPointer(
5710 Ptr: CGF.GetAddrOfLocalVar(VD: &Param).withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5711 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5712 llvm::Value *Size = nullptr;
5713 // If the size of the reduction item is non-constant, load it from global
5714 // threadprivate variable.
5715 if (RCG.getSizes(N).second) {
5716 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5717 CGF, VarType: CGM.getContext().getSizeType(),
5718 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5719 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5720 Ty: CGM.getContext().getSizeType(), Loc);
5721 }
5722 RCG.emitAggregateType(CGF, N, Size);
5723 Address OrigAddr = Address::invalid();
5724 // If initializer uses initializer from declare reduction construct, emit a
5725 // pointer to the address of the original reduction item (reuired by reduction
5726 // initializer)
5727 if (RCG.usesReductionInitializer(N)) {
5728 Address SharedAddr = CGF.GetAddrOfLocalVar(VD: &ParamOrig);
5729 OrigAddr = CGF.EmitLoadOfPointer(
5730 Ptr: SharedAddr,
5731 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5732 }
5733 // Emit the initializer:
5734 // %0 = bitcast void* %arg to <type>*
5735 // store <type> <init>, <type>* %0
5736 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5737 DefaultInit: [](CodeGenFunction &) { return false; });
5738 CGF.FinishFunction();
5739 return Fn;
5740}
5741
5742/// Emits reduction combiner function:
5743/// \code
5744/// void @.red_comb(void* %arg0, void* %arg1) {
5745/// %lhs = bitcast void* %arg0 to <type>*
5746/// %rhs = bitcast void* %arg1 to <type>*
5747/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5748/// store <type> %2, <type>* %lhs
5749/// ret void
5750/// }
5751/// \endcode
5752static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5753 SourceLocation Loc,
5754 ReductionCodeGen &RCG, unsigned N,
5755 const Expr *ReductionOp,
5756 const Expr *LHS, const Expr *RHS,
5757 const Expr *PrivateRef) {
5758 ASTContext &C = CGM.getContext();
5759 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5760 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5761 FunctionArgList Args;
5762 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5763 C.VoidPtrTy, ImplicitParamKind::Other);
5764 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5765 ImplicitParamKind::Other);
5766 Args.emplace_back(Args: &ParamInOut);
5767 Args.emplace_back(Args: &ParamIn);
5768 const auto &FnInfo =
5769 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5770 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5771 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5772 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5773 N: Name, M: &CGM.getModule());
5774 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5775 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5776 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5777 Fn->setDoesNotRecurse();
5778 CodeGenFunction CGF(CGM);
5779 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5780 llvm::Value *Size = nullptr;
5781 // If the size of the reduction item is non-constant, load it from global
5782 // threadprivate variable.
5783 if (RCG.getSizes(N).second) {
5784 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5785 CGF, VarType: CGM.getContext().getSizeType(),
5786 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5787 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5788 Ty: CGM.getContext().getSizeType(), Loc);
5789 }
5790 RCG.emitAggregateType(CGF, N, Size);
5791 // Remap lhs and rhs variables to the addresses of the function arguments.
5792 // %lhs = bitcast void* %arg0 to <type>*
5793 // %rhs = bitcast void* %arg1 to <type>*
5794 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5795 PrivateScope.addPrivate(
5796 LocalVD: LHSVD,
5797 // Pull out the pointer to the variable.
5798 Addr: CGF.EmitLoadOfPointer(
5799 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamInOut)
5800 .withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5801 PtrTy: C.getPointerType(T: LHSVD->getType())->castAs<PointerType>()));
5802 PrivateScope.addPrivate(
5803 LocalVD: RHSVD,
5804 // Pull out the pointer to the variable.
5805 Addr: CGF.EmitLoadOfPointer(
5806 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamIn).withElementType(
5807 ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5808 PtrTy: C.getPointerType(T: RHSVD->getType())->castAs<PointerType>()));
5809 PrivateScope.Privatize();
5810 // Emit the combiner body:
5811 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5812 // store <type> %2, <type>* %lhs
5813 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5814 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5815 RHS: cast<DeclRefExpr>(Val: RHS));
5816 CGF.FinishFunction();
5817 return Fn;
5818}
5819
5820/// Emits reduction finalizer function:
5821/// \code
5822/// void @.red_fini(void* %arg) {
5823/// %0 = bitcast void* %arg to <type>*
5824/// <destroy>(<type>* %0)
5825/// ret void
5826/// }
5827/// \endcode
5828static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5829 SourceLocation Loc,
5830 ReductionCodeGen &RCG, unsigned N) {
5831 if (!RCG.needCleanups(N))
5832 return nullptr;
5833 ASTContext &C = CGM.getContext();
5834 FunctionArgList Args;
5835 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5836 ImplicitParamKind::Other);
5837 Args.emplace_back(Args: &Param);
5838 const auto &FnInfo =
5839 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5840 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5841 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5842 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5843 N: Name, M: &CGM.getModule());
5844 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5845 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5846 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5847 Fn->setDoesNotRecurse();
5848 CodeGenFunction CGF(CGM);
5849 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5850 Address PrivateAddr = CGF.EmitLoadOfPointer(
5851 Ptr: CGF.GetAddrOfLocalVar(VD: &Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5852 llvm::Value *Size = nullptr;
5853 // If the size of the reduction item is non-constant, load it from global
5854 // threadprivate variable.
5855 if (RCG.getSizes(N).second) {
5856 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5857 CGF, VarType: CGM.getContext().getSizeType(),
5858 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5859 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5860 Ty: CGM.getContext().getSizeType(), Loc);
5861 }
5862 RCG.emitAggregateType(CGF, N, Size);
5863 // Emit the finalizer body:
5864 // <destroy>(<type>* %0)
5865 RCG.emitCleanups(CGF, N, PrivateAddr);
5866 CGF.FinishFunction(EndLoc: Loc);
5867 return Fn;
5868}
5869
5870llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5871 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5872 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5873 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5874 return nullptr;
5875
5876 // Build typedef struct:
5877 // kmp_taskred_input {
5878 // void *reduce_shar; // shared reduction item
5879 // void *reduce_orig; // original reduction item used for initialization
5880 // size_t reduce_size; // size of data item
5881 // void *reduce_init; // data initialization routine
5882 // void *reduce_fini; // data finalization routine
5883 // void *reduce_comb; // data combiner routine
5884 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5885 // } kmp_taskred_input_t;
5886 ASTContext &C = CGM.getContext();
5887 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5888 RD->startDefinition();
5889 const FieldDecl *SharedFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5890 const FieldDecl *OrigFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5891 const FieldDecl *SizeFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.getSizeType());
5892 const FieldDecl *InitFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5893 const FieldDecl *FiniFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5894 const FieldDecl *CombFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5895 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5896 C, DC: RD, FieldTy: C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5897 RD->completeDefinition();
5898 CanQualType RDType = C.getCanonicalTagType(TD: RD);
5899 unsigned Size = Data.ReductionVars.size();
5900 llvm::APInt ArraySize(/*numBits=*/64, Size);
5901 QualType ArrayRDType =
5902 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5903 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5904 // kmp_task_red_input_t .rd_input.[Size];
5905 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5906 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5907 Data.ReductionCopies, Data.ReductionOps);
5908 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5909 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5910 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5911 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5912 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5913 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5914 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5915 Name: ".rd_input.gep.");
5916 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5917 // ElemLVal.reduce_shar = &Shareds[Cnt];
5918 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5919 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5920 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5921 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5922 // ElemLVal.reduce_orig = &Origs[Cnt];
5923 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5924 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5925 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5926 RCG.emitAggregateType(CGF, N: Cnt);
5927 llvm::Value *SizeValInChars;
5928 llvm::Value *SizeVal;
5929 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5930 // We use delayed creation/initialization for VLAs and array sections. It is
5931 // required because runtime does not provide the way to pass the sizes of
5932 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5933 // threadprivate global variables are used to store these values and use
5934 // them in the functions.
5935 bool DelayedCreation = !!SizeVal;
5936 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5937 /*isSigned=*/false);
5938 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5939 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5940 // ElemLVal.reduce_init = init;
5941 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5942 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5943 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5944 // ElemLVal.reduce_fini = fini;
5945 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5946 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5947 llvm::Value *FiniAddr =
5948 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5949 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5950 // ElemLVal.reduce_comb = comb;
5951 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5952 llvm::Value *CombAddr = emitReduceCombFunction(
5953 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5954 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5955 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5956 // ElemLVal.flags = 0;
5957 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5958 if (DelayedCreation) {
5959 CGF.EmitStoreOfScalar(
5960 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5961 lvalue: FlagsLVal);
5962 } else
5963 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5964 }
5965 if (Data.IsReductionWithTaskMod) {
5966 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5967 // is_ws, int num, void *data);
5968 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5969 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5970 DestTy: CGM.IntTy, /*isSigned=*/true);
5971 llvm::Value *Args[] = {
5972 IdentTLoc, GTid,
5973 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5974 /*isSigned=*/IsSigned: true),
5975 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5976 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5977 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5978 return CGF.EmitRuntimeCall(
5979 callee: OMPBuilder.getOrCreateRuntimeFunction(
5980 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5981 args: Args);
5982 }
5983 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5984 llvm::Value *Args[] = {
5985 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5986 /*isSigned=*/true),
5987 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5988 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5989 DestTy: CGM.VoidPtrTy)};
5990 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5991 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5992 args: Args);
5993}
5994
5995void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5996 SourceLocation Loc,
5997 bool IsWorksharingReduction) {
5998 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5999 // is_ws, int num, void *data);
6000 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6001 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6002 DestTy: CGM.IntTy, /*isSigned=*/true);
6003 llvm::Value *Args[] = {IdentTLoc, GTid,
6004 llvm::ConstantInt::get(Ty: CGM.IntTy,
6005 V: IsWorksharingReduction ? 1 : 0,
6006 /*isSigned=*/IsSigned: true)};
6007 (void)CGF.EmitRuntimeCall(
6008 callee: OMPBuilder.getOrCreateRuntimeFunction(
6009 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
6010 args: Args);
6011}
6012
6013void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6014 SourceLocation Loc,
6015 ReductionCodeGen &RCG,
6016 unsigned N) {
6017 auto Sizes = RCG.getSizes(N);
6018 // Emit threadprivate global variable if the type is non-constant
6019 // (Sizes.second = nullptr).
6020 if (Sizes.second) {
6021 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
6022 /*isSigned=*/false);
6023 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6024 CGF, VarType: CGM.getContext().getSizeType(),
6025 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
6026 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
6027 }
6028}
6029
6030Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6031 SourceLocation Loc,
6032 llvm::Value *ReductionsPtr,
6033 LValue SharedLVal) {
6034 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6035 // *d);
6036 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6037 DestTy: CGM.IntTy,
6038 /*isSigned=*/true),
6039 ReductionsPtr,
6040 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6041 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
6042 return Address(
6043 CGF.EmitRuntimeCall(
6044 callee: OMPBuilder.getOrCreateRuntimeFunction(
6045 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
6046 args: Args),
6047 CGF.Int8Ty, SharedLVal.getAlignment());
6048}
6049
6050void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6051 const OMPTaskDataTy &Data) {
6052 if (!CGF.HaveInsertPoint())
6053 return;
6054
6055 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6056 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6057 OMPBuilder.createTaskwait(Loc: CGF.Builder);
6058 } else {
6059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6060 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6061 auto &M = CGM.getModule();
6062 Address DependenciesArray = Address::invalid();
6063 llvm::Value *NumOfElements;
6064 std::tie(args&: NumOfElements, args&: DependenciesArray) =
6065 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
6066 if (!Data.Dependences.empty()) {
6067 llvm::Value *DepWaitTaskArgs[7];
6068 DepWaitTaskArgs[0] = UpLoc;
6069 DepWaitTaskArgs[1] = ThreadID;
6070 DepWaitTaskArgs[2] = NumOfElements;
6071 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6072 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
6073 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6074 DepWaitTaskArgs[6] =
6075 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
6076
6077 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6078
6079 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6080 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6081 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6082 // kmp_int32 has_no_wait); if dependence info is specified.
6083 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6084 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
6085 args: DepWaitTaskArgs);
6086
6087 } else {
6088
6089 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6090 // global_tid);
6091 llvm::Value *Args[] = {UpLoc, ThreadID};
6092 // Ignore return result until untied tasks are supported.
6093 CGF.EmitRuntimeCall(
6094 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
6095 args: Args);
6096 }
6097 }
6098
6099 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
6100 Region->emitUntiedSwitch(CGF);
6101}
6102
6103void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6104 OpenMPDirectiveKind InnerKind,
6105 const RegionCodeGenTy &CodeGen,
6106 bool HasCancel) {
6107 if (!CGF.HaveInsertPoint())
6108 return;
6109 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6110 InnerKind != OMPD_critical &&
6111 InnerKind != OMPD_master &&
6112 InnerKind != OMPD_masked);
6113 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6114}
6115
6116namespace {
6117enum RTCancelKind {
6118 CancelNoreq = 0,
6119 CancelParallel = 1,
6120 CancelLoop = 2,
6121 CancelSections = 3,
6122 CancelTaskgroup = 4
6123};
6124} // anonymous namespace
6125
6126static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6127 RTCancelKind CancelKind = CancelNoreq;
6128 if (CancelRegion == OMPD_parallel)
6129 CancelKind = CancelParallel;
6130 else if (CancelRegion == OMPD_for)
6131 CancelKind = CancelLoop;
6132 else if (CancelRegion == OMPD_sections)
6133 CancelKind = CancelSections;
6134 else {
6135 assert(CancelRegion == OMPD_taskgroup);
6136 CancelKind = CancelTaskgroup;
6137 }
6138 return CancelKind;
6139}
6140
6141void CGOpenMPRuntime::emitCancellationPointCall(
6142 CodeGenFunction &CGF, SourceLocation Loc,
6143 OpenMPDirectiveKind CancelRegion) {
6144 if (!CGF.HaveInsertPoint())
6145 return;
6146 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6147 // global_tid, kmp_int32 cncl_kind);
6148 if (auto *OMPRegionInfo =
6149 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6150 // For 'cancellation point taskgroup', the task region info may not have a
6151 // cancel. This may instead happen in another adjacent task.
6152 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6153 llvm::Value *Args[] = {
6154 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6155 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6156 // Ignore return result until untied tasks are supported.
6157 llvm::Value *Result = CGF.EmitRuntimeCall(
6158 callee: OMPBuilder.getOrCreateRuntimeFunction(
6159 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
6160 args: Args);
6161 // if (__kmpc_cancellationpoint()) {
6162 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6163 // exit from construct;
6164 // }
6165 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6166 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6167 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6168 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6169 CGF.EmitBlock(BB: ExitBB);
6170 if (CancelRegion == OMPD_parallel)
6171 emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6172 // exit from construct;
6173 CodeGenFunction::JumpDest CancelDest =
6174 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6175 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6176 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6177 }
6178 }
6179}
6180
6181void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6182 const Expr *IfCond,
6183 OpenMPDirectiveKind CancelRegion) {
6184 if (!CGF.HaveInsertPoint())
6185 return;
6186 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6187 // kmp_int32 cncl_kind);
6188 auto &M = CGM.getModule();
6189 if (auto *OMPRegionInfo =
6190 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6191 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6192 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6193 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6194 llvm::Value *Args[] = {
6195 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6196 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6197 // Ignore return result until untied tasks are supported.
6198 llvm::Value *Result = CGF.EmitRuntimeCall(
6199 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), args: Args);
6200 // if (__kmpc_cancel()) {
6201 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6202 // exit from construct;
6203 // }
6204 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6205 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6206 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6207 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6208 CGF.EmitBlock(BB: ExitBB);
6209 if (CancelRegion == OMPD_parallel)
6210 RT.emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6211 // exit from construct;
6212 CodeGenFunction::JumpDest CancelDest =
6213 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6214 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6215 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6216 };
6217 if (IfCond) {
6218 emitIfClause(CGF, Cond: IfCond, ThenGen,
6219 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
6220 } else {
6221 RegionCodeGenTy ThenRCG(ThenGen);
6222 ThenRCG(CGF);
6223 }
6224 }
6225}
6226
6227namespace {
6228/// Cleanup action for uses_allocators support.
6229class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6230 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6231
6232public:
6233 OMPUsesAllocatorsActionTy(
6234 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6235 : Allocators(Allocators) {}
6236 void Enter(CodeGenFunction &CGF) override {
6237 if (!CGF.HaveInsertPoint())
6238 return;
6239 for (const auto &AllocatorData : Allocators) {
6240 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6241 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
6242 }
6243 }
6244 void Exit(CodeGenFunction &CGF) override {
6245 if (!CGF.HaveInsertPoint())
6246 return;
6247 for (const auto &AllocatorData : Allocators) {
6248 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6249 Allocator: AllocatorData.first);
6250 }
6251 }
6252};
6253} // namespace
6254
6255void CGOpenMPRuntime::emitTargetOutlinedFunction(
6256 const OMPExecutableDirective &D, StringRef ParentName,
6257 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6258 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6259 assert(!ParentName.empty() && "Invalid target entry parent name!");
6260 HasEmittedTargetRegion = true;
6261 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6262 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6263 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6264 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6265 if (!D.AllocatorTraits)
6266 continue;
6267 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
6268 }
6269 }
6270 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6271 CodeGen.setAction(UsesAllocatorAction);
6272 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6273 IsOffloadEntry, CodeGen);
6274}
6275
6276void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6277 const Expr *Allocator,
6278 const Expr *AllocatorTraits) {
6279 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6280 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6281 // Use default memspace handle.
6282 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6283 llvm::Value *NumTraits = llvm::ConstantInt::get(
6284 Ty: CGF.IntTy, V: cast<ConstantArrayType>(
6285 Val: AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6286 ->getSize()
6287 .getLimitedValue());
6288 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
6289 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6290 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
6291 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, T: CGF.getContext().VoidPtrTy,
6292 BaseInfo: AllocatorTraitsLVal.getBaseInfo(),
6293 TBAAInfo: AllocatorTraitsLVal.getTBAAInfo());
6294 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6295
6296 llvm::Value *AllocatorVal =
6297 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6298 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
6299 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
6300 // Store to allocator.
6301 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
6302 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
6303 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6304 AllocatorVal =
6305 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
6306 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
6307 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
6308}
6309
6310void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6311 const Expr *Allocator) {
6312 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6313 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6314 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6315 llvm::Value *AllocatorVal =
6316 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
6317 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
6318 DstTy: CGF.getContext().VoidPtrTy,
6319 Loc: Allocator->getExprLoc());
6320 (void)CGF.EmitRuntimeCall(
6321 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
6322 FnID: OMPRTL___kmpc_destroy_allocator),
6323 args: {ThreadId, AllocatorVal});
6324}
6325
6326void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6327 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6328 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6329 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6330 "invalid default attrs structure");
6331 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6332 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6333
6334 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: Attrs.MinTeams, MaxTeamsVal);
6335 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
6336 /*UpperBoundOnly=*/true);
6337
6338 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6339 for (auto *A : C->getAttrs()) {
6340 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6341 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6342 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(Val: A))
6343 CGM.handleCUDALaunchBoundsAttr(F: nullptr, A: Attr, MaxThreadsVal: &AttrMaxThreadsVal,
6344 MinBlocksVal: &AttrMinBlocksVal, MaxClusterRankVal: &AttrMaxBlocksVal);
6345 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(Val: A))
6346 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6347 F: nullptr, A: Attr, /*ReqdWGS=*/nullptr, MinThreadsVal: &AttrMinThreadsVal,
6348 MaxThreadsVal: &AttrMaxThreadsVal);
6349 else
6350 continue;
6351
6352 Attrs.MinThreads = std::max(a: Attrs.MinThreads, b: AttrMinThreadsVal);
6353 if (AttrMaxThreadsVal > 0)
6354 MaxThreadsVal = MaxThreadsVal > 0
6355 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
6356 : AttrMaxThreadsVal;
6357 Attrs.MinTeams = std::max(a: Attrs.MinTeams, b: AttrMinBlocksVal);
6358 if (AttrMaxBlocksVal > 0)
6359 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
6360 : AttrMaxBlocksVal;
6361 }
6362 }
6363}
6364
6365void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6366 const OMPExecutableDirective &D, StringRef ParentName,
6367 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6368 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6369
6370 llvm::TargetRegionEntryInfo EntryInfo =
6371 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
6372
6373 CodeGenFunction CGF(CGM, true);
6374 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6375 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6376 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
6377
6378 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6379 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6380 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, D);
6381 };
6382
6383 cantFail(Err: OMPBuilder.emitTargetRegionFunction(
6384 EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6385 OutlinedFnID));
6386
6387 if (!OutlinedFn)
6388 return;
6389
6390 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
6391
6392 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6393 for (auto *A : C->getAttrs()) {
6394 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(Val: A))
6395 CGM.handleAMDGPUWavesPerEUAttr(F: OutlinedFn, A: Attr);
6396 }
6397 }
6398 registerVTable(D);
6399}
6400
6401/// Checks if the expression is constant or does not have non-trivial function
6402/// calls.
6403static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6404 // We can skip constant expressions.
6405 // We can skip expressions with trivial calls or simple expressions.
6406 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
6407 !E->hasNonTrivialCall(Ctx)) &&
6408 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6409}
6410
6411const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6412 const Stmt *Body) {
6413 const Stmt *Child = Body->IgnoreContainers();
6414 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
6415 Child = nullptr;
6416 for (const Stmt *S : C->body()) {
6417 if (const auto *E = dyn_cast<Expr>(Val: S)) {
6418 if (isTrivial(Ctx, E))
6419 continue;
6420 }
6421 // Some of the statements can be ignored.
6422 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
6423 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
6424 continue;
6425 // Analyze declarations.
6426 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
6427 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
6428 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
6429 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
6430 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
6431 isa<UsingDirectiveDecl>(Val: D) ||
6432 isa<OMPDeclareReductionDecl>(Val: D) ||
6433 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6434 return true;
6435 const auto *VD = dyn_cast<VarDecl>(Val: D);
6436 if (!VD)
6437 return false;
6438 return VD->hasGlobalStorage() || !VD->isUsed();
6439 }))
6440 continue;
6441 }
6442 // Found multiple children - cannot get the one child only.
6443 if (Child)
6444 return nullptr;
6445 Child = S;
6446 }
6447 if (Child)
6448 Child = Child->IgnoreContainers();
6449 }
6450 return Child;
6451}
6452
6453const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6454 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6455 int32_t &MaxTeamsVal) {
6456
6457 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6458 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6459 "Expected target-based executable directive.");
6460 switch (DirectiveKind) {
6461 case OMPD_target: {
6462 const auto *CS = D.getInnermostCapturedStmt();
6463 const auto *Body =
6464 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6465 const Stmt *ChildStmt =
6466 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6467 if (const auto *NestedDir =
6468 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6469 if (isOpenMPTeamsDirective(DKind: NestedDir->getDirectiveKind())) {
6470 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6471 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6472 ->getNumTeams()
6473 .front();
6474 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6475 if (auto Constant =
6476 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6477 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6478 return NumTeams;
6479 }
6480 MinTeamsVal = MaxTeamsVal = 0;
6481 return nullptr;
6482 }
6483 MinTeamsVal = MaxTeamsVal = 1;
6484 return nullptr;
6485 }
6486 // A value of -1 is used to check if we need to emit no teams region
6487 MinTeamsVal = MaxTeamsVal = -1;
6488 return nullptr;
6489 }
6490 case OMPD_target_teams_loop:
6491 case OMPD_target_teams:
6492 case OMPD_target_teams_distribute:
6493 case OMPD_target_teams_distribute_simd:
6494 case OMPD_target_teams_distribute_parallel_for:
6495 case OMPD_target_teams_distribute_parallel_for_simd: {
6496 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6497 const Expr *NumTeams =
6498 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6499 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6500 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6501 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6502 return NumTeams;
6503 }
6504 MinTeamsVal = MaxTeamsVal = 0;
6505 return nullptr;
6506 }
6507 case OMPD_target_parallel:
6508 case OMPD_target_parallel_for:
6509 case OMPD_target_parallel_for_simd:
6510 case OMPD_target_parallel_loop:
6511 case OMPD_target_simd:
6512 MinTeamsVal = MaxTeamsVal = 1;
6513 return nullptr;
6514 case OMPD_parallel:
6515 case OMPD_for:
6516 case OMPD_parallel_for:
6517 case OMPD_parallel_loop:
6518 case OMPD_parallel_master:
6519 case OMPD_parallel_sections:
6520 case OMPD_for_simd:
6521 case OMPD_parallel_for_simd:
6522 case OMPD_cancel:
6523 case OMPD_cancellation_point:
6524 case OMPD_ordered:
6525 case OMPD_threadprivate:
6526 case OMPD_allocate:
6527 case OMPD_task:
6528 case OMPD_simd:
6529 case OMPD_tile:
6530 case OMPD_unroll:
6531 case OMPD_sections:
6532 case OMPD_section:
6533 case OMPD_single:
6534 case OMPD_master:
6535 case OMPD_critical:
6536 case OMPD_taskyield:
6537 case OMPD_barrier:
6538 case OMPD_taskwait:
6539 case OMPD_taskgroup:
6540 case OMPD_atomic:
6541 case OMPD_flush:
6542 case OMPD_depobj:
6543 case OMPD_scan:
6544 case OMPD_teams:
6545 case OMPD_target_data:
6546 case OMPD_target_exit_data:
6547 case OMPD_target_enter_data:
6548 case OMPD_distribute:
6549 case OMPD_distribute_simd:
6550 case OMPD_distribute_parallel_for:
6551 case OMPD_distribute_parallel_for_simd:
6552 case OMPD_teams_distribute:
6553 case OMPD_teams_distribute_simd:
6554 case OMPD_teams_distribute_parallel_for:
6555 case OMPD_teams_distribute_parallel_for_simd:
6556 case OMPD_target_update:
6557 case OMPD_declare_simd:
6558 case OMPD_declare_variant:
6559 case OMPD_begin_declare_variant:
6560 case OMPD_end_declare_variant:
6561 case OMPD_declare_target:
6562 case OMPD_end_declare_target:
6563 case OMPD_declare_reduction:
6564 case OMPD_declare_mapper:
6565 case OMPD_taskloop:
6566 case OMPD_taskloop_simd:
6567 case OMPD_master_taskloop:
6568 case OMPD_master_taskloop_simd:
6569 case OMPD_parallel_master_taskloop:
6570 case OMPD_parallel_master_taskloop_simd:
6571 case OMPD_requires:
6572 case OMPD_metadirective:
6573 case OMPD_unknown:
6574 break;
6575 default:
6576 break;
6577 }
6578 llvm_unreachable("Unexpected directive kind.");
6579}
6580
6581llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6582 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6583 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6584 "Clauses associated with the teams directive expected to be emitted "
6585 "only for the host!");
6586 CGBuilderTy &Bld = CGF.Builder;
6587 int32_t MinNT = -1, MaxNT = -1;
6588 const Expr *NumTeams =
6589 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6590 if (NumTeams != nullptr) {
6591 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6592
6593 switch (DirectiveKind) {
6594 case OMPD_target: {
6595 const auto *CS = D.getInnermostCapturedStmt();
6596 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6597 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6598 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6599 /*IgnoreResultAssign*/ true);
6600 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6601 /*isSigned=*/true);
6602 }
6603 case OMPD_target_teams:
6604 case OMPD_target_teams_distribute:
6605 case OMPD_target_teams_distribute_simd:
6606 case OMPD_target_teams_distribute_parallel_for:
6607 case OMPD_target_teams_distribute_parallel_for_simd: {
6608 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6609 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6610 /*IgnoreResultAssign*/ true);
6611 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6612 /*isSigned=*/true);
6613 }
6614 default:
6615 break;
6616 }
6617 }
6618
6619 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6620 return llvm::ConstantInt::getSigned(Ty: CGF.Int32Ty, V: MinNT);
6621}
6622
6623/// Check for a num threads constant value (stored in \p DefaultVal), or
6624/// expression (stored in \p E). If the value is conditional (via an if-clause),
6625/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6626/// nullptr, no expression evaluation is perfomed.
6627static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6628 const Expr **E, int32_t &UpperBound,
6629 bool UpperBoundOnly, llvm::Value **CondVal) {
6630 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6631 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6632 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6633 if (!Dir)
6634 return;
6635
6636 if (isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6637 // Handle if clause. If if clause present, the number of threads is
6638 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6639 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6640 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6641 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6642 const OMPIfClause *IfClause = nullptr;
6643 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6644 if (C->getNameModifier() == OMPD_unknown ||
6645 C->getNameModifier() == OMPD_parallel) {
6646 IfClause = C;
6647 break;
6648 }
6649 }
6650 if (IfClause) {
6651 const Expr *CondExpr = IfClause->getCondition();
6652 bool Result;
6653 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6654 if (!Result) {
6655 UpperBound = 1;
6656 return;
6657 }
6658 } else {
6659 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6660 if (const auto *PreInit =
6661 cast_or_null<DeclStmt>(Val: IfClause->getPreInitStmt())) {
6662 for (const auto *I : PreInit->decls()) {
6663 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6664 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6665 } else {
6666 CodeGenFunction::AutoVarEmission Emission =
6667 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6668 CGF.EmitAutoVarCleanups(emission: Emission);
6669 }
6670 }
6671 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6672 }
6673 }
6674 }
6675 }
6676 // Check the value of num_threads clause iff if clause was not specified
6677 // or is not evaluated to false.
6678 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6679 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6680 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6681 const auto *NumThreadsClause =
6682 Dir->getSingleClause<OMPNumThreadsClause>();
6683 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6684 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6685 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6686 UpperBound =
6687 UpperBound
6688 ? Constant->getZExtValue()
6689 : std::min(a: UpperBound,
6690 b: static_cast<int32_t>(Constant->getZExtValue()));
6691 // If we haven't found a upper bound, remember we saw a thread limiting
6692 // clause.
6693 if (UpperBound == -1)
6694 UpperBound = 0;
6695 if (!E)
6696 return;
6697 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6698 if (const auto *PreInit =
6699 cast_or_null<DeclStmt>(Val: NumThreadsClause->getPreInitStmt())) {
6700 for (const auto *I : PreInit->decls()) {
6701 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6702 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6703 } else {
6704 CodeGenFunction::AutoVarEmission Emission =
6705 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6706 CGF.EmitAutoVarCleanups(emission: Emission);
6707 }
6708 }
6709 }
6710 *E = NTExpr;
6711 }
6712 return;
6713 }
6714 if (isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6715 UpperBound = 1;
6716}
6717
6718const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6719 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6720 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6721 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6722 "Clauses associated with the teams directive expected to be emitted "
6723 "only for the host!");
6724 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6725 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6726 "Expected target-based executable directive.");
6727
6728 const Expr *NT = nullptr;
6729 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6730
6731 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6732 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6733 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6734 UpperBound = UpperBound ? Constant->getZExtValue()
6735 : std::min(a: UpperBound,
6736 b: int32_t(Constant->getZExtValue()));
6737 }
6738 // If we haven't found a upper bound, remember we saw a thread limiting
6739 // clause.
6740 if (UpperBound == -1)
6741 UpperBound = 0;
6742 if (EPtr)
6743 *EPtr = E;
6744 };
6745
6746 auto ReturnSequential = [&]() {
6747 UpperBound = 1;
6748 return NT;
6749 };
6750
6751 switch (DirectiveKind) {
6752 case OMPD_target: {
6753 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6754 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6755 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6756 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6757 // TODO: The standard is not clear how to resolve two thread limit clauses,
6758 // let's pick the teams one if it's present, otherwise the target one.
6759 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6760 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6761 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6762 ThreadLimitClause = TLC;
6763 if (ThreadLimitExpr) {
6764 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6765 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6766 CodeGenFunction::LexicalScope Scope(
6767 CGF,
6768 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6769 if (const auto *PreInit =
6770 cast_or_null<DeclStmt>(Val: ThreadLimitClause->getPreInitStmt())) {
6771 for (const auto *I : PreInit->decls()) {
6772 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6773 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6774 } else {
6775 CodeGenFunction::AutoVarEmission Emission =
6776 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6777 CGF.EmitAutoVarCleanups(emission: Emission);
6778 }
6779 }
6780 }
6781 }
6782 }
6783 }
6784 if (ThreadLimitClause)
6785 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6786 ThreadLimitExpr);
6787 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6788 if (isOpenMPTeamsDirective(DKind: Dir->getDirectiveKind()) &&
6789 !isOpenMPDistributeDirective(DKind: Dir->getDirectiveKind())) {
6790 CS = Dir->getInnermostCapturedStmt();
6791 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6792 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6793 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6794 }
6795 if (Dir && isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6796 CS = Dir->getInnermostCapturedStmt();
6797 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6798 } else if (Dir && isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6799 return ReturnSequential();
6800 }
6801 return NT;
6802 }
6803 case OMPD_target_teams: {
6804 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6805 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6806 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6807 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6808 ThreadLimitExpr);
6809 }
6810 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6811 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6812 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6813 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6814 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6815 if (Dir->getDirectiveKind() == OMPD_distribute) {
6816 CS = Dir->getInnermostCapturedStmt();
6817 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6818 }
6819 }
6820 return NT;
6821 }
6822 case OMPD_target_teams_distribute:
6823 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6824 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6825 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6826 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6827 ThreadLimitExpr);
6828 }
6829 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6830 UpperBoundOnly, CondVal);
6831 return NT;
6832 case OMPD_target_teams_loop:
6833 case OMPD_target_parallel_loop:
6834 case OMPD_target_parallel:
6835 case OMPD_target_parallel_for:
6836 case OMPD_target_parallel_for_simd:
6837 case OMPD_target_teams_distribute_parallel_for:
6838 case OMPD_target_teams_distribute_parallel_for_simd: {
6839 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6840 const OMPIfClause *IfClause = nullptr;
6841 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6842 if (C->getNameModifier() == OMPD_unknown ||
6843 C->getNameModifier() == OMPD_parallel) {
6844 IfClause = C;
6845 break;
6846 }
6847 }
6848 if (IfClause) {
6849 const Expr *Cond = IfClause->getCondition();
6850 bool Result;
6851 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6852 if (!Result)
6853 return ReturnSequential();
6854 } else {
6855 CodeGenFunction::RunCleanupsScope Scope(CGF);
6856 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6857 }
6858 }
6859 }
6860 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6861 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6862 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6863 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6864 ThreadLimitExpr);
6865 }
6866 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6867 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6868 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6869 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6870 return NumThreadsClause->getNumThreads();
6871 }
6872 return NT;
6873 }
6874 case OMPD_target_teams_distribute_simd:
6875 case OMPD_target_simd:
6876 return ReturnSequential();
6877 default:
6878 break;
6879 }
6880 llvm_unreachable("Unsupported directive kind.");
6881}
6882
6883llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6884 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6885 llvm::Value *NumThreadsVal = nullptr;
6886 llvm::Value *CondVal = nullptr;
6887 llvm::Value *ThreadLimitVal = nullptr;
6888 const Expr *ThreadLimitExpr = nullptr;
6889 int32_t UpperBound = -1;
6890
6891 const Expr *NT = getNumThreadsExprForTargetDirective(
6892 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6893 ThreadLimitExpr: &ThreadLimitExpr);
6894
6895 // Thread limit expressions are used below, emit them.
6896 if (ThreadLimitExpr) {
6897 ThreadLimitVal =
6898 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6899 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6900 /*isSigned=*/false);
6901 }
6902
6903 // Generate the num teams expression.
6904 if (UpperBound == 1) {
6905 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6906 } else if (NT) {
6907 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6908 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6909 /*isSigned=*/false);
6910 } else if (ThreadLimitVal) {
6911 // If we do not have a num threads value but a thread limit, replace the
6912 // former with the latter. We know handled the thread limit expression.
6913 NumThreadsVal = ThreadLimitVal;
6914 ThreadLimitVal = nullptr;
6915 } else {
6916 // Default to "0" which means runtime choice.
6917 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6918 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6919 }
6920
6921 // Handle if clause. If if clause present, the number of threads is
6922 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6923 if (CondVal) {
6924 CodeGenFunction::RunCleanupsScope Scope(CGF);
6925 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6926 False: CGF.Builder.getInt32(C: 1));
6927 }
6928
6929 // If the thread limit and num teams expression were present, take the
6930 // minimum.
6931 if (ThreadLimitVal) {
6932 NumThreadsVal = CGF.Builder.CreateSelect(
6933 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6934 True: ThreadLimitVal, False: NumThreadsVal);
6935 }
6936
6937 return NumThreadsVal;
6938}
6939
6940namespace {
6941LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6942
6943// Utility to handle information from clauses associated with a given
6944// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6945// It provides a convenient interface to obtain the information and generate
6946// code for that information.
6947class MappableExprsHandler {
6948public:
6949 /// Custom comparator for attach-pointer expressions that compares them by
6950 /// complexity (i.e. their component-depth) first, then by the order in which
6951 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6952 /// different.
6953 struct AttachPtrExprComparator {
6954 const MappableExprsHandler &Handler;
6955 // Cache of previous equality comparison results.
6956 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6957 CachedEqualityComparisons;
6958
6959 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6960 AttachPtrExprComparator() = delete;
6961
6962 // Return true iff LHS is "less than" RHS.
6963 bool operator()(const Expr *LHS, const Expr *RHS) const {
6964 if (LHS == RHS)
6965 return false;
6966
6967 // First, compare by complexity (depth)
6968 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(Val: LHS);
6969 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(Val: RHS);
6970
6971 std::optional<size_t> DepthLHS =
6972 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6973 : std::nullopt;
6974 std::optional<size_t> DepthRHS =
6975 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6976 : std::nullopt;
6977
6978 // std::nullopt (no attach pointer) has lowest complexity
6979 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6980 // Both have same complexity, now check semantic equality
6981 if (areEqual(LHS, RHS))
6982 return false;
6983 // Different semantically, compare by computation order
6984 return wasComputedBefore(LHS, RHS);
6985 }
6986 if (!DepthLHS.has_value())
6987 return true; // LHS has lower complexity
6988 if (!DepthRHS.has_value())
6989 return false; // RHS has lower complexity
6990
6991 // Both have values, compare by depth (lower depth = lower complexity)
6992 if (DepthLHS.value() != DepthRHS.value())
6993 return DepthLHS.value() < DepthRHS.value();
6994
6995 // Same complexity, now check semantic equality
6996 if (areEqual(LHS, RHS))
6997 return false;
6998 // Different semantically, compare by computation order
6999 return wasComputedBefore(LHS, RHS);
7000 }
7001
7002 public:
7003 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7004 /// results, if available, otherwise does a recursive semantic comparison.
7005 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7006 // Check cache first for faster lookup
7007 const auto CachedResultIt = CachedEqualityComparisons.find(Val: {LHS, RHS});
7008 if (CachedResultIt != CachedEqualityComparisons.end())
7009 return CachedResultIt->second;
7010
7011 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7012
7013 // Cache the result for future lookups (both orders since semantic
7014 // equality is commutative)
7015 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7016 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7017 return ComparisonResult;
7018 }
7019
7020 /// Compare the two attach-ptr expressions by their computation order.
7021 /// Returns true iff LHS was computed before RHS by
7022 /// collectAttachPtrExprInfo().
7023 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7024 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(Val: LHS);
7025 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(Val: RHS);
7026
7027 return OrderLHS < OrderRHS;
7028 }
7029
7030 private:
7031 /// Helper function to compare attach-pointer expressions semantically.
7032 /// This function handles various expression types that can be part of an
7033 /// attach-pointer.
7034 /// TODO: Not urgent, but we should ideally return true when comparing
7035 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7036 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7037 if (LHS == RHS)
7038 return true;
7039
7040 // If only one is null, they aren't equal
7041 if (!LHS || !RHS)
7042 return false;
7043
7044 ASTContext &Ctx = Handler.CGF.getContext();
7045 // Strip away parentheses and no-op casts to get to the core expression
7046 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7047 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7048
7049 // Direct pointer comparison of the underlying expressions
7050 if (LHS == RHS)
7051 return true;
7052
7053 // Check if the expression classes match
7054 if (LHS->getStmtClass() != RHS->getStmtClass())
7055 return false;
7056
7057 // Handle DeclRefExpr (variable references)
7058 if (const auto *LD = dyn_cast<DeclRefExpr>(Val: LHS)) {
7059 const auto *RD = dyn_cast<DeclRefExpr>(Val: RHS);
7060 if (!RD)
7061 return false;
7062 return LD->getDecl()->getCanonicalDecl() ==
7063 RD->getDecl()->getCanonicalDecl();
7064 }
7065
7066 // Handle ArraySubscriptExpr (array indexing like a[i])
7067 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(Val: LHS)) {
7068 const auto *RA = dyn_cast<ArraySubscriptExpr>(Val: RHS);
7069 if (!RA)
7070 return false;
7071 return areSemanticallyEqual(LHS: LA->getBase(), RHS: RA->getBase()) &&
7072 areSemanticallyEqual(LHS: LA->getIdx(), RHS: RA->getIdx());
7073 }
7074
7075 // Handle MemberExpr (member access like s.m or p->m)
7076 if (const auto *LM = dyn_cast<MemberExpr>(Val: LHS)) {
7077 const auto *RM = dyn_cast<MemberExpr>(Val: RHS);
7078 if (!RM)
7079 return false;
7080 if (LM->getMemberDecl()->getCanonicalDecl() !=
7081 RM->getMemberDecl()->getCanonicalDecl())
7082 return false;
7083 return areSemanticallyEqual(LHS: LM->getBase(), RHS: RM->getBase());
7084 }
7085
7086 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7087 if (const auto *LU = dyn_cast<UnaryOperator>(Val: LHS)) {
7088 const auto *RU = dyn_cast<UnaryOperator>(Val: RHS);
7089 if (!RU)
7090 return false;
7091 if (LU->getOpcode() != RU->getOpcode())
7092 return false;
7093 return areSemanticallyEqual(LHS: LU->getSubExpr(), RHS: RU->getSubExpr());
7094 }
7095
7096 // Handle BinaryOperator (binary operations like p + offset)
7097 if (const auto *LB = dyn_cast<BinaryOperator>(Val: LHS)) {
7098 const auto *RB = dyn_cast<BinaryOperator>(Val: RHS);
7099 if (!RB)
7100 return false;
7101 if (LB->getOpcode() != RB->getOpcode())
7102 return false;
7103 return areSemanticallyEqual(LHS: LB->getLHS(), RHS: RB->getLHS()) &&
7104 areSemanticallyEqual(LHS: LB->getRHS(), RHS: RB->getRHS());
7105 }
7106
7107 // Handle ArraySectionExpr (array sections like a[0:1])
7108 // Attach pointers should not contain array-sections, but currently we
7109 // don't emit an error.
7110 if (const auto *LAS = dyn_cast<ArraySectionExpr>(Val: LHS)) {
7111 const auto *RAS = dyn_cast<ArraySectionExpr>(Val: RHS);
7112 if (!RAS)
7113 return false;
7114 return areSemanticallyEqual(LHS: LAS->getBase(), RHS: RAS->getBase()) &&
7115 areSemanticallyEqual(LHS: LAS->getLowerBound(),
7116 RHS: RAS->getLowerBound()) &&
7117 areSemanticallyEqual(LHS: LAS->getLength(), RHS: RAS->getLength());
7118 }
7119
7120 // Handle CastExpr (explicit casts)
7121 if (const auto *LC = dyn_cast<CastExpr>(Val: LHS)) {
7122 const auto *RC = dyn_cast<CastExpr>(Val: RHS);
7123 if (!RC)
7124 return false;
7125 if (LC->getCastKind() != RC->getCastKind())
7126 return false;
7127 return areSemanticallyEqual(LHS: LC->getSubExpr(), RHS: RC->getSubExpr());
7128 }
7129
7130 // Handle CXXThisExpr (this pointer)
7131 if (isa<CXXThisExpr>(Val: LHS) && isa<CXXThisExpr>(Val: RHS))
7132 return true;
7133
7134 // Handle IntegerLiteral (integer constants)
7135 if (const auto *LI = dyn_cast<IntegerLiteral>(Val: LHS)) {
7136 const auto *RI = dyn_cast<IntegerLiteral>(Val: RHS);
7137 if (!RI)
7138 return false;
7139 return LI->getValue() == RI->getValue();
7140 }
7141
7142 // Handle CharacterLiteral (character constants)
7143 if (const auto *LC = dyn_cast<CharacterLiteral>(Val: LHS)) {
7144 const auto *RC = dyn_cast<CharacterLiteral>(Val: RHS);
7145 if (!RC)
7146 return false;
7147 return LC->getValue() == RC->getValue();
7148 }
7149
7150 // Handle FloatingLiteral (floating point constants)
7151 if (const auto *LF = dyn_cast<FloatingLiteral>(Val: LHS)) {
7152 const auto *RF = dyn_cast<FloatingLiteral>(Val: RHS);
7153 if (!RF)
7154 return false;
7155 // Use bitwise comparison for floating point literals
7156 return LF->getValue().bitwiseIsEqual(RHS: RF->getValue());
7157 }
7158
7159 // Handle StringLiteral (string constants)
7160 if (const auto *LS = dyn_cast<StringLiteral>(Val: LHS)) {
7161 const auto *RS = dyn_cast<StringLiteral>(Val: RHS);
7162 if (!RS)
7163 return false;
7164 return LS->getString() == RS->getString();
7165 }
7166
7167 // Handle CXXNullPtrLiteralExpr (nullptr)
7168 if (isa<CXXNullPtrLiteralExpr>(Val: LHS) && isa<CXXNullPtrLiteralExpr>(Val: RHS))
7169 return true;
7170
7171 // Handle CXXBoolLiteralExpr (true/false)
7172 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(Val: LHS)) {
7173 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(Val: RHS);
7174 if (!RB)
7175 return false;
7176 return LB->getValue() == RB->getValue();
7177 }
7178
7179 // Fallback for other forms - use the existing comparison method
7180 return Expr::isSameComparisonOperand(E1: LHS, E2: RHS);
7181 }
7182 };
7183
7184 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7185 static unsigned getFlagMemberOffset() {
7186 unsigned Offset = 0;
7187 for (uint64_t Remain =
7188 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7189 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7190 !(Remain & 1); Remain = Remain >> 1)
7191 Offset++;
7192 return Offset;
7193 }
7194
7195 /// Class that holds debugging information for a data mapping to be passed to
7196 /// the runtime library.
7197 class MappingExprInfo {
7198 /// The variable declaration used for the data mapping.
7199 const ValueDecl *MapDecl = nullptr;
7200 /// The original expression used in the map clause, or null if there is
7201 /// none.
7202 const Expr *MapExpr = nullptr;
7203
7204 public:
7205 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7206 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7207
7208 const ValueDecl *getMapDecl() const { return MapDecl; }
7209 const Expr *getMapExpr() const { return MapExpr; }
7210 };
7211
7212 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7213 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7214 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7215 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7216 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7217 using MapNonContiguousArrayTy =
7218 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7219 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7220 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7221 using MapData =
7222 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7223 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7224 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7225 using MapDataArrayTy = SmallVector<MapData, 4>;
7226
7227 /// This structure contains combined information generated for mappable
7228 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7229 /// mappers, and non-contiguous information.
7230 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7231 MapExprsArrayTy Exprs;
7232 MapValueDeclsArrayTy Mappers;
7233 MapValueDeclsArrayTy DevicePtrDecls;
7234
7235 /// Append arrays in \a CurInfo.
7236 void append(MapCombinedInfoTy &CurInfo) {
7237 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
7238 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
7239 in_end: CurInfo.DevicePtrDecls.end());
7240 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
7241 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7242 }
7243 };
7244
7245 /// Map between a struct and the its lowest & highest elements which have been
7246 /// mapped.
7247 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7248 /// HE(FieldIndex, Pointer)}
7249 struct StructRangeInfoTy {
7250 MapCombinedInfoTy PreliminaryMapData;
7251 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7252 0, Address::invalid()};
7253 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7254 0, Address::invalid()};
7255 Address Base = Address::invalid();
7256 Address LB = Address::invalid();
7257 bool IsArraySection = false;
7258 bool HasCompleteRecord = false;
7259 };
7260
7261 /// A struct to store the attach pointer and pointee information, to be used
7262 /// when emitting an attach entry.
7263 struct AttachInfoTy {
7264 Address AttachPtrAddr = Address::invalid();
7265 Address AttachPteeAddr = Address::invalid();
7266 const ValueDecl *AttachPtrDecl = nullptr;
7267 const Expr *AttachMapExpr = nullptr;
7268
7269 bool isValid() const {
7270 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7271 }
7272 };
7273
7274 /// Check if there's any component list where the attach pointer expression
7275 /// matches the given captured variable.
7276 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7277 for (const auto &AttachEntry : AttachPtrExprMap) {
7278 if (AttachEntry.second) {
7279 // Check if the attach pointer expression is a DeclRefExpr that
7280 // references the captured variable
7281 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: AttachEntry.second))
7282 if (DRE->getDecl() == VD)
7283 return true;
7284 }
7285 }
7286 return false;
7287 }
7288
7289 /// Get the previously-cached attach pointer for a component list, if-any.
7290 const Expr *getAttachPtrExpr(
7291 OMPClauseMappableExprCommon::MappableExprComponentListRef Components)
7292 const {
7293 const auto It = AttachPtrExprMap.find(Val: Components);
7294 if (It != AttachPtrExprMap.end())
7295 return It->second;
7296
7297 return nullptr;
7298 }
7299
7300private:
7301 /// Kind that defines how a device pointer has to be returned.
7302 struct MapInfo {
7303 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7304 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7305 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7306 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7307 bool ReturnDevicePointer = false;
7308 bool IsImplicit = false;
7309 const ValueDecl *Mapper = nullptr;
7310 const Expr *VarRef = nullptr;
7311 bool ForDeviceAddr = false;
7312 bool HasUdpFbNullify = false;
7313
7314 MapInfo() = default;
7315 MapInfo(
7316 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7317 OpenMPMapClauseKind MapType,
7318 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7319 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7320 bool ReturnDevicePointer, bool IsImplicit,
7321 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7322 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7323 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7324 MotionModifiers(MotionModifiers),
7325 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7326 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7327 HasUdpFbNullify(HasUdpFbNullify) {}
7328 };
7329
7330 /// The target directive from where the mappable clauses were extracted. It
7331 /// is either a executable directive or a user-defined mapper directive.
7332 llvm::PointerUnion<const OMPExecutableDirective *,
7333 const OMPDeclareMapperDecl *>
7334 CurDir;
7335
7336 /// Function the directive is being generated for.
7337 CodeGenFunction &CGF;
7338
7339 /// Set of all first private variables in the current directive.
7340 /// bool data is set to true if the variable is implicitly marked as
7341 /// firstprivate, false otherwise.
7342 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7343
7344 /// Set of defaultmap clause kinds that use firstprivate behavior.
7345 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7346
7347 /// Map between device pointer declarations and their expression components.
7348 /// The key value for declarations in 'this' is null.
7349 llvm::DenseMap<
7350 const ValueDecl *,
7351 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7352 DevPointersMap;
7353
7354 /// Map between device addr declarations and their expression components.
7355 /// The key value for declarations in 'this' is null.
7356 llvm::DenseMap<
7357 const ValueDecl *,
7358 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7359 HasDevAddrsMap;
7360
7361 /// Map between lambda declarations and their map type.
7362 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7363
7364 /// Map from component lists to their attach pointer expressions.
7365 llvm::DenseMap<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7366 const Expr *>
7367 AttachPtrExprMap;
7368
7369 /// Map from attach pointer expressions to their component depth.
7370 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7371 /// expressions with increasing/decreasing depth.
7372 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7373 /// TODO: Not urgent, but we should ideally use the number of pointer
7374 /// dereferences in an expr as an indicator of its complexity, instead of the
7375 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7376 /// `*(p + 5 + 5)` together.
7377 llvm::DenseMap<const Expr *, std::optional<size_t>>
7378 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7379
7380 /// Map from attach pointer expressions to the order they were computed in, in
7381 /// collectAttachPtrExprInfo().
7382 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7383 {nullptr, 0}};
7384
7385 /// An instance of attach-ptr-expr comparator that can be used throughout the
7386 /// lifetime of this handler.
7387 AttachPtrExprComparator AttachPtrComparator;
7388
7389 llvm::Value *getExprTypeSize(const Expr *E) const {
7390 QualType ExprTy = E->getType().getCanonicalType();
7391
7392 // Calculate the size for array shaping expression.
7393 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
7394 llvm::Value *Size =
7395 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
7396 for (const Expr *SE : OAE->getDimensions()) {
7397 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
7398 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
7399 DstTy: CGF.getContext().getSizeType(),
7400 Loc: SE->getExprLoc());
7401 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
7402 }
7403 return Size;
7404 }
7405
7406 // Reference types are ignored for mapping purposes.
7407 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7408 ExprTy = RefTy->getPointeeType().getCanonicalType();
7409
7410 // Given that an array section is considered a built-in type, we need to
7411 // do the calculation based on the length of the section instead of relying
7412 // on CGF.getTypeSize(E->getType()).
7413 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
7414 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7415 Base: OAE->getBase()->IgnoreParenImpCasts())
7416 .getCanonicalType();
7417
7418 // If there is no length associated with the expression and lower bound is
7419 // not specified too, that means we are using the whole length of the
7420 // base.
7421 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7422 !OAE->getLowerBound())
7423 return CGF.getTypeSize(Ty: BaseTy);
7424
7425 llvm::Value *ElemSize;
7426 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7427 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
7428 } else {
7429 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
7430 assert(ATy && "Expecting array type if not a pointer type.");
7431 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
7432 }
7433
7434 // If we don't have a length at this point, that is because we have an
7435 // array section with a single element.
7436 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7437 return ElemSize;
7438
7439 if (const Expr *LenExpr = OAE->getLength()) {
7440 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
7441 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
7442 DstTy: CGF.getContext().getSizeType(),
7443 Loc: LenExpr->getExprLoc());
7444 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
7445 }
7446 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7447 OAE->getLowerBound() && "expected array_section[lb:].");
7448 // Size = sizetype - lb * elemtype;
7449 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
7450 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
7451 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
7452 DstTy: CGF.getContext().getSizeType(),
7453 Loc: OAE->getLowerBound()->getExprLoc());
7454 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
7455 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
7456 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
7457 LengthVal = CGF.Builder.CreateSelect(
7458 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
7459 return LengthVal;
7460 }
7461 return CGF.getTypeSize(Ty: ExprTy);
7462 }
7463
7464 /// Return the corresponding bits for a given map clause modifier. Add
7465 /// a flag marking the map as a pointer if requested. Add a flag marking the
7466 /// map as the first one of a series of maps that relate to the same map
7467 /// expression.
7468 OpenMPOffloadMappingFlags getMapTypeBits(
7469 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7470 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7471 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7472 OpenMPOffloadMappingFlags Bits =
7473 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7474 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7475 switch (MapType) {
7476 case OMPC_MAP_alloc:
7477 case OMPC_MAP_release:
7478 // alloc and release is the default behavior in the runtime library, i.e.
7479 // if we don't pass any bits alloc/release that is what the runtime is
7480 // going to do. Therefore, we don't need to signal anything for these two
7481 // type modifiers.
7482 break;
7483 case OMPC_MAP_to:
7484 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7485 break;
7486 case OMPC_MAP_from:
7487 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7488 break;
7489 case OMPC_MAP_tofrom:
7490 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7491 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7492 break;
7493 case OMPC_MAP_delete:
7494 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7495 break;
7496 case OMPC_MAP_unknown:
7497 llvm_unreachable("Unexpected map type!");
7498 }
7499 if (AddPtrFlag)
7500 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7501 if (AddIsTargetParamFlag)
7502 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7503 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
7504 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7505 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
7506 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7507 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
7508 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
7509 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7510 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
7511 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7512 if (IsNonContiguous)
7513 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7514 return Bits;
7515 }
7516
7517 /// Return true if the provided expression is a final array section. A
7518 /// final array section, is one whose length can't be proved to be one.
7519 bool isFinalArraySectionExpression(const Expr *E) const {
7520 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
7521
7522 // It is not an array section and therefore not a unity-size one.
7523 if (!OASE)
7524 return false;
7525
7526 // An array section with no colon always refer to a single element.
7527 if (OASE->getColonLocFirst().isInvalid())
7528 return false;
7529
7530 const Expr *Length = OASE->getLength();
7531
7532 // If we don't have a length we have to check if the array has size 1
7533 // for this dimension. Also, we should always expect a length if the
7534 // base type is pointer.
7535 if (!Length) {
7536 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7537 Base: OASE->getBase()->IgnoreParenImpCasts())
7538 .getCanonicalType();
7539 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
7540 return ATy->getSExtSize() != 1;
7541 // If we don't have a constant dimension length, we have to consider
7542 // the current section as having any size, so it is not necessarily
7543 // unitary. If it happen to be unity size, that's user fault.
7544 return true;
7545 }
7546
7547 // Check if the length evaluates to 1.
7548 Expr::EvalResult Result;
7549 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
7550 return true; // Can have more that size 1.
7551
7552 llvm::APSInt ConstLength = Result.Val.getInt();
7553 return ConstLength.getSExtValue() != 1;
7554 }
7555
7556 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7557 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7558 /// an attach entry has the following form:
7559 /// &p, &p[1], sizeof(void*), ATTACH
7560 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7561 const AttachInfoTy &AttachInfo) const {
7562 assert(AttachInfo.isValid() &&
7563 "Expected valid attach pointer/pointee information!");
7564
7565 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7566 // size
7567 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7568 V: llvm::ConstantInt::get(
7569 Ty: CGF.CGM.SizeTy, V: CGF.getContext()
7570 .getTypeSizeInChars(T: CGF.getContext().VoidPtrTy)
7571 .getQuantity()),
7572 DestTy: CGF.Int64Ty, /*isSigned=*/true);
7573
7574 CombinedInfo.Exprs.emplace_back(Args: AttachInfo.AttachPtrDecl,
7575 Args: AttachInfo.AttachMapExpr);
7576 CombinedInfo.BasePointers.push_back(
7577 Elt: AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7578 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7579 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7580 CombinedInfo.Pointers.push_back(
7581 Elt: AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7582 CombinedInfo.Sizes.push_back(Elt: PointerSize);
7583 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7584 CombinedInfo.Mappers.push_back(Elt: nullptr);
7585 CombinedInfo.NonContigInfo.Dims.push_back(Elt: 1);
7586 }
7587
7588 /// A helper class to copy structures with overlapped elements, i.e. those
7589 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7590 /// are not explicitly copied have mapping nodes synthesized for them,
7591 /// taking care to avoid generating zero-sized copies.
7592 class CopyOverlappedEntryGaps {
7593 CodeGenFunction &CGF;
7594 MapCombinedInfoTy &CombinedInfo;
7595 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7596 const ValueDecl *MapDecl = nullptr;
7597 const Expr *MapExpr = nullptr;
7598 Address BP = Address::invalid();
7599 bool IsNonContiguous = false;
7600 uint64_t DimSize = 0;
7601 // These elements track the position as the struct is iterated over
7602 // (in order of increasing element address).
7603 const RecordDecl *LastParent = nullptr;
7604 uint64_t Cursor = 0;
7605 unsigned LastIndex = -1u;
7606 Address LB = Address::invalid();
7607
7608 public:
7609 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7610 MapCombinedInfoTy &CombinedInfo,
7611 OpenMPOffloadMappingFlags Flags,
7612 const ValueDecl *MapDecl, const Expr *MapExpr,
7613 Address BP, Address LB, bool IsNonContiguous,
7614 uint64_t DimSize)
7615 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7616 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7617 DimSize(DimSize), LB(LB) {}
7618
7619 void processField(
7620 const OMPClauseMappableExprCommon::MappableComponent &MC,
7621 const FieldDecl *FD,
7622 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7623 EmitMemberExprBase) {
7624 const RecordDecl *RD = FD->getParent();
7625 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(D: RD);
7626 uint64_t FieldOffset = RL.getFieldOffset(FieldNo: FD->getFieldIndex());
7627 uint64_t FieldSize =
7628 CGF.getContext().getTypeSize(T: FD->getType().getCanonicalType());
7629 Address ComponentLB = Address::invalid();
7630
7631 if (FD->getType()->isLValueReferenceType()) {
7632 const auto *ME = cast<MemberExpr>(Val: MC.getAssociatedExpression());
7633 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7634 ComponentLB =
7635 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD).getAddress();
7636 } else {
7637 ComponentLB =
7638 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression()).getAddress();
7639 }
7640
7641 if (!LastParent)
7642 LastParent = RD;
7643 if (FD->getParent() == LastParent) {
7644 if (FD->getFieldIndex() != LastIndex + 1)
7645 copyUntilField(FD, ComponentLB);
7646 } else {
7647 LastParent = FD->getParent();
7648 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7649 copyUntilField(FD, ComponentLB);
7650 }
7651 Cursor = FieldOffset + FieldSize;
7652 LastIndex = FD->getFieldIndex();
7653 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7654 }
7655
7656 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7657 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7658 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7659 llvm::Value *Size = CGF.Builder.CreatePtrDiff(LHS: ComponentLBPtr, RHS: LBPtr);
7660 copySizedChunk(Base: LBPtr, Size);
7661 }
7662
7663 void copyUntilEnd(Address HB) {
7664 if (LastParent) {
7665 const ASTRecordLayout &RL =
7666 CGF.getContext().getASTRecordLayout(D: LastParent);
7667 if ((uint64_t)CGF.getContext().toBits(CharSize: RL.getSize()) <= Cursor)
7668 return;
7669 }
7670 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7671 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7672 LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF), RHS: LBPtr);
7673 copySizedChunk(Base: LBPtr, Size);
7674 }
7675
7676 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7677 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7678 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7679 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7680 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7681 CombinedInfo.Pointers.push_back(Elt: Base);
7682 CombinedInfo.Sizes.push_back(
7683 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/false));
7684 CombinedInfo.Types.push_back(Elt: Flags);
7685 CombinedInfo.Mappers.push_back(Elt: nullptr);
7686 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize : 1);
7687 }
7688 };
7689
7690 /// Generate the base pointers, section pointers, sizes, map type bits, and
7691 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7692 /// map type, map or motion modifiers, and expression components.
7693 /// \a IsFirstComponent should be set to true if the provided set of
7694 /// components is the first associated with a capture.
7695 void generateInfoForComponentList(
7696 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7697 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7698 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7699 MapCombinedInfoTy &CombinedInfo,
7700 MapCombinedInfoTy &StructBaseCombinedInfo,
7701 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7702 bool IsFirstComponentList, bool IsImplicit,
7703 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7704 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7705 const Expr *MapExpr = nullptr,
7706 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7707 OverlappedElements = {}) const {
7708
7709 // The following summarizes what has to be generated for each map and the
7710 // types below. The generated information is expressed in this order:
7711 // base pointer, section pointer, size, flags
7712 // (to add to the ones that come from the map type and modifier).
7713 // Entries annotated with (+) are only generated for "target" constructs,
7714 // and only if the variable at the beginning of the expression is used in
7715 // the region.
7716 //
7717 // double d;
7718 // int i[100];
7719 // float *p;
7720 // int **a = &i;
7721 //
7722 // struct S1 {
7723 // int i;
7724 // float f[50];
7725 // }
7726 // struct S2 {
7727 // int i;
7728 // float f[50];
7729 // S1 s;
7730 // double *p;
7731 // double *&pref;
7732 // struct S2 *ps;
7733 // int &ref;
7734 // }
7735 // S2 s;
7736 // S2 *ps;
7737 //
7738 // map(d)
7739 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7740 //
7741 // map(i)
7742 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7743 //
7744 // map(i[1:23])
7745 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7746 //
7747 // map(p)
7748 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7749 //
7750 // map(p[1:24])
7751 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7752 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7753 // // are present, and either is new
7754 //
7755 // map(([22])p)
7756 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7757 // &p, p, sizeof(void*), ATTACH
7758 //
7759 // map((*a)[0:3])
7760 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7761 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7762 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7763 // (+) Only on target, if a is used in the region
7764 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7765 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7766 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7767 // referenced in the target region, because it is a pointer.
7768 //
7769 // map(**a)
7770 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7771 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7772 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7773 // (+) Only on target, if a is used in the region
7774 //
7775 // map(s)
7776 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7777 // effect is supposed to be same as if the user had a map for every element
7778 // of the struct. We currently do a shallow-map of s.
7779 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7780 //
7781 // map(s.i)
7782 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7783 //
7784 // map(s.s.f)
7785 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7786 //
7787 // map(s.p)
7788 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7789 //
7790 // map(to: s.p[:22])
7791 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7792 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7793 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7794 //
7795 // map(to: s.ref)
7796 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7797 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7798 // (*) alloc space for struct members, only this is a target parameter.
7799 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7800 // optimizes this entry out, same in the examples below)
7801 // (***) map the pointee (map: to)
7802 // Note: ptr(s.ref) represents the referring pointer of s.ref
7803 // ptee(s.ref) represents the referenced pointee of s.ref
7804 //
7805 // map(to: s.pref)
7806 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7807 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7808 //
7809 // map(to: s.pref[:22])
7810 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7811 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7812 // FROM | IMPLICIT // (+)
7813 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7814 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7815 //
7816 // map(s.ps)
7817 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7818 //
7819 // map(from: s.ps->s.i)
7820 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7821 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7822 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7823 //
7824 // map(to: s.ps->ps)
7825 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7826 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7827 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7828 //
7829 // map(s.ps->ps->ps)
7830 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7831 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7832 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7833 //
7834 // map(to: s.ps->ps->s.f[:22])
7835 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7836 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7837 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7838 //
7839 // map(ps)
7840 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7841 //
7842 // map(ps->i)
7843 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7844 // &ps, &(ps->i), sizeof(void*), ATTACH
7845 //
7846 // map(ps->s.f)
7847 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7848 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7849 //
7850 // map(from: ps->p)
7851 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7852 // &ps, &(ps->p), sizeof(ps), ATTACH
7853 //
7854 // map(to: ps->p[:22])
7855 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7856 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7857 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7858 //
7859 // map(ps->ps)
7860 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7861 // &ps, &(ps->ps), sizeof(ps), ATTACH
7862 //
7863 // map(from: ps->ps->s.i)
7864 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7865 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7866 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7867 //
7868 // map(from: ps->ps->ps)
7869 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7870 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7871 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7872 //
7873 // map(ps->ps->ps->ps)
7874 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7875 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7876 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7877 //
7878 // map(to: ps->ps->ps->s.f[:22])
7879 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7880 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7881 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7882 //
7883 // map(to: s.f[:22]) map(from: s.p[:33])
7884 // On target, and if s is used in the region:
7885 //
7886 // &s, &(s.f[0]), 50*sizeof(float) +
7887 // sizeof(struct S1) +
7888 // sizeof(double*) (**), TARGET_PARAM
7889 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7890 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7891 // FROM | IMPLICIT
7892 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7893 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7894 // (**) allocate contiguous space needed to fit all mapped members even if
7895 // we allocate space for members not mapped (in this example,
7896 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7897 // them as well because they fall between &s.f[0] and &s.p)
7898 //
7899 // On other constructs, and, if s is not used in the region, on target:
7900 // &s, &(s.f[0]), 22*sizeof(float), TO
7901 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7902 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7903 //
7904 // map(from: s.f[:22]) map(to: ps->p[:33])
7905 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7906 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7907 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7908 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7909 //
7910 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7911 // &s, &(s.f[0]), 50*sizeof(float) +
7912 // sizeof(struct S1), TARGET_PARAM
7913 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7914 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7915 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7916 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7917 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7918 //
7919 // map(p[:100], p)
7920 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7921 // p, &p[0], 100*sizeof(float), TO | FROM
7922 // &p, &p[0], sizeof(float*), ATTACH
7923
7924 // Track if the map information being generated is the first for a capture.
7925 bool IsCaptureFirstInfo = IsFirstComponentList;
7926 // When the variable is on a declare target link or in a to clause with
7927 // unified memory, a reference is needed to hold the host/device address
7928 // of the variable.
7929 bool RequiresReference = false;
7930
7931 // Scan the components from the base to the complete expression.
7932 auto CI = Components.rbegin();
7933 auto CE = Components.rend();
7934 auto I = CI;
7935
7936 // Track if the map information being generated is the first for a list of
7937 // components.
7938 bool IsExpressionFirstInfo = true;
7939 bool FirstPointerInComplexData = false;
7940 Address BP = Address::invalid();
7941 Address FinalLowestElem = Address::invalid();
7942 const Expr *AssocExpr = I->getAssociatedExpression();
7943 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7944 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7945 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7946
7947 // Get the pointer-attachment base-pointer for the given list, if any.
7948 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7949 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7950 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7951
7952 bool HasAttachPtr = AttachPtrExpr != nullptr;
7953 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7954 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7955
7956 if (FirstComponentIsForAttachPtr) {
7957 // No need to process AttachPtr here. It will be processed at the end
7958 // after we have computed the pointee's address.
7959 ++I;
7960 } else if (isa<MemberExpr>(Val: AssocExpr)) {
7961 // The base is the 'this' pointer. The content of the pointer is going
7962 // to be the base of the field being mapped.
7963 BP = CGF.LoadCXXThisAddress();
7964 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7965 (OASE &&
7966 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7967 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7968 } else if (OAShE &&
7969 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7970 BP = Address(
7971 CGF.EmitScalarExpr(E: OAShE->getBase()),
7972 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7973 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7974 } else {
7975 // The base is the reference to the variable.
7976 // BP = &Var.
7977 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7978 if (const auto *VD =
7979 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7980 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7981 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7982 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7983 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7984 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
7985 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
7986 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7987 RequiresReference = true;
7988 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7989 }
7990 }
7991 }
7992
7993 // If the variable is a pointer and is being dereferenced (i.e. is not
7994 // the last component), the base has to be the pointer itself, not its
7995 // reference. References are ignored for mapping purposes.
7996 QualType Ty =
7997 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7998 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7999 // No need to generate individual map information for the pointer, it
8000 // can be associated with the combined storage if shared memory mode is
8001 // active or the base declaration is not global variable.
8002 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
8003 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8004 !VD || VD->hasLocalStorage() || HasAttachPtr)
8005 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8006 else
8007 FirstPointerInComplexData = true;
8008 ++I;
8009 }
8010 }
8011
8012 // Track whether a component of the list should be marked as MEMBER_OF some
8013 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8014 // in a component list should be marked as MEMBER_OF, all subsequent entries
8015 // do not belong to the base struct. E.g.
8016 // struct S2 s;
8017 // s.ps->ps->ps->f[:]
8018 // (1) (2) (3) (4)
8019 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8020 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8021 // is the pointee of ps(2) which is not member of struct s, so it should not
8022 // be marked as such (it is still PTR_AND_OBJ).
8023 // The variable is initialized to false so that PTR_AND_OBJ entries which
8024 // are not struct members are not considered (e.g. array of pointers to
8025 // data).
8026 bool ShouldBeMemberOf = false;
8027
8028 // Variable keeping track of whether or not we have encountered a component
8029 // in the component list which is a member expression. Useful when we have a
8030 // pointer or a final array section, in which case it is the previous
8031 // component in the list which tells us whether we have a member expression.
8032 // E.g. X.f[:]
8033 // While processing the final array section "[:]" it is "f" which tells us
8034 // whether we are dealing with a member of a declared struct.
8035 const MemberExpr *EncounteredME = nullptr;
8036
8037 // Track for the total number of dimension. Start from one for the dummy
8038 // dimension.
8039 uint64_t DimSize = 1;
8040
8041 // Detects non-contiguous updates due to strided accesses.
8042 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8043 // correctly when generating information to be passed to the runtime. The
8044 // flag is set to true if any array section has a stride not equal to 1, or
8045 // if the stride is not a constant expression (conservatively assumed
8046 // non-contiguous).
8047 bool IsNonContiguous =
8048 CombinedInfo.NonContigInfo.IsNonContiguous ||
8049 any_of(Range&: Components, P: [&](const auto &Component) {
8050 const auto *OASE =
8051 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8052 if (!OASE)
8053 return false;
8054
8055 const Expr *StrideExpr = OASE->getStride();
8056 if (!StrideExpr)
8057 return false;
8058
8059 assert(StrideExpr->getType()->isIntegerType() &&
8060 "Stride expression must be of integer type");
8061
8062 // If stride is not evaluatable as a constant, treat as
8063 // non-contiguous.
8064 const auto Constant =
8065 StrideExpr->getIntegerConstantExpr(Ctx: CGF.getContext());
8066 if (!Constant)
8067 return true;
8068
8069 // Treat non-unitary strides as non-contiguous.
8070 return !Constant->isOne();
8071 });
8072
8073 bool IsPrevMemberReference = false;
8074
8075 bool IsPartialMapped =
8076 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8077
8078 // We need to check if we will be encountering any MEs. If we do not
8079 // encounter any ME expression it means we will be mapping the whole struct.
8080 // In that case we need to skip adding an entry for the struct to the
8081 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8082 // list only when generating all info for clauses.
8083 bool IsMappingWholeStruct = true;
8084 if (!GenerateAllInfoForClauses) {
8085 IsMappingWholeStruct = false;
8086 } else {
8087 for (auto TempI = I; TempI != CE; ++TempI) {
8088 const MemberExpr *PossibleME =
8089 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
8090 if (PossibleME) {
8091 IsMappingWholeStruct = false;
8092 break;
8093 }
8094 }
8095 }
8096
8097 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8098 for (; I != CE; ++I) {
8099 // If we have a valid attach-ptr, we skip processing all components until
8100 // after the attach-ptr.
8101 if (HasAttachPtr && !SeenAttachPtr) {
8102 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8103 continue;
8104 }
8105
8106 // After finding the attach pointer, skip binary-ops, to skip past
8107 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8108 // the attach-ptr.
8109 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8110 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8111 if (BO)
8112 continue;
8113
8114 // Found the first non-binary-operator component after attach
8115 SeenFirstNonBinOpExprAfterAttachPtr = true;
8116 BP = AttachPteeBaseAddr;
8117 }
8118
8119 // If the current component is member of a struct (parent struct) mark it.
8120 if (!EncounteredME) {
8121 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
8122 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8123 // as MEMBER_OF the parent struct.
8124 if (EncounteredME) {
8125 ShouldBeMemberOf = true;
8126 // Do not emit as complex pointer if this is actually not array-like
8127 // expression.
8128 if (FirstPointerInComplexData) {
8129 QualType Ty = std::prev(x: I)
8130 ->getAssociatedDeclaration()
8131 ->getType()
8132 .getNonReferenceType();
8133 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8134 FirstPointerInComplexData = false;
8135 }
8136 }
8137 }
8138
8139 auto Next = std::next(x: I);
8140
8141 // We need to generate the addresses and sizes if this is the last
8142 // component, if the component is a pointer or if it is an array section
8143 // whose length can't be proved to be one. If this is a pointer, it
8144 // becomes the base address for the following components.
8145
8146 // A final array section, is one whose length can't be proved to be one.
8147 // If the map item is non-contiguous then we don't treat any array section
8148 // as final array section.
8149 bool IsFinalArraySection =
8150 !IsNonContiguous &&
8151 isFinalArraySectionExpression(E: I->getAssociatedExpression());
8152
8153 // If we have a declaration for the mapping use that, otherwise use
8154 // the base declaration of the map clause.
8155 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8156 ? I->getAssociatedDeclaration()
8157 : BaseDecl;
8158 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8159 : MapExpr;
8160
8161 // Get information on whether the element is a pointer. Have to do a
8162 // special treatment for array sections given that they are built-in
8163 // types.
8164 const auto *OASE =
8165 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
8166 const auto *OAShE =
8167 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
8168 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
8169 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8170 bool IsPointer =
8171 OAShE ||
8172 (OASE && ArraySectionExpr::getBaseOriginalType(Base: OASE)
8173 .getCanonicalType()
8174 ->isAnyPointerType()) ||
8175 I->getAssociatedExpression()->getType()->isAnyPointerType();
8176 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
8177 MapDecl &&
8178 MapDecl->getType()->isLValueReferenceType();
8179 bool IsNonDerefPointer = IsPointer &&
8180 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8181 !IsNonContiguous;
8182
8183 if (OASE)
8184 ++DimSize;
8185
8186 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8187 IsFinalArraySection) {
8188 // If this is not the last component, we expect the pointer to be
8189 // associated with an array expression or member expression.
8190 assert((Next == CE ||
8191 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8192 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8193 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8194 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8195 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8196 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8197 "Unexpected expression");
8198
8199 Address LB = Address::invalid();
8200 Address LowestElem = Address::invalid();
8201 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8202 const MemberExpr *E) {
8203 const Expr *BaseExpr = E->getBase();
8204 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8205 // scalar.
8206 LValue BaseLV;
8207 if (E->isArrow()) {
8208 LValueBaseInfo BaseInfo;
8209 TBAAAccessInfo TBAAInfo;
8210 Address Addr =
8211 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
8212 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8213 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
8214 } else {
8215 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
8216 }
8217 return BaseLV;
8218 };
8219 if (OAShE) {
8220 LowestElem = LB =
8221 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
8222 CGF.ConvertTypeForMem(
8223 T: OAShE->getBase()->getType()->getPointeeType()),
8224 CGF.getContext().getTypeAlignInChars(
8225 T: OAShE->getBase()->getType()));
8226 } else if (IsMemberReference) {
8227 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
8228 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8229 LowestElem = CGF.EmitLValueForFieldInitialization(
8230 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
8231 .getAddress();
8232 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
8233 .getAddress();
8234 } else {
8235 LowestElem = LB =
8236 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
8237 .getAddress();
8238 }
8239
8240 // Save the final LowestElem, to use it as the pointee in attach maps,
8241 // if emitted.
8242 if (Next == CE)
8243 FinalLowestElem = LowestElem;
8244
8245 // If this component is a pointer inside the base struct then we don't
8246 // need to create any entry for it - it will be combined with the object
8247 // it is pointing to into a single PTR_AND_OBJ entry.
8248 bool IsMemberPointerOrAddr =
8249 EncounteredME &&
8250 (((IsPointer || ForDeviceAddr) &&
8251 I->getAssociatedExpression() == EncounteredME) ||
8252 (IsPrevMemberReference && !IsPointer) ||
8253 (IsMemberReference && Next != CE &&
8254 !Next->getAssociatedExpression()->getType()->isPointerType()));
8255 if (!OverlappedElements.empty() && Next == CE) {
8256 // Handle base element with the info for overlapped elements.
8257 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8258 assert(!IsPointer &&
8259 "Unexpected base element with the pointer type.");
8260 // Mark the whole struct as the struct that requires allocation on the
8261 // device.
8262 PartialStruct.LowestElem = {0, LowestElem};
8263 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8264 T: I->getAssociatedExpression()->getType());
8265 Address HB = CGF.Builder.CreateConstGEP(
8266 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8267 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
8268 Index: TypeSize.getQuantity() - 1);
8269 PartialStruct.HighestElem = {
8270 std::numeric_limits<decltype(
8271 PartialStruct.HighestElem.first)>::max(),
8272 HB};
8273 PartialStruct.Base = BP;
8274 PartialStruct.LB = LB;
8275 assert(
8276 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8277 "Overlapped elements must be used only once for the variable.");
8278 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
8279 // Emit data for non-overlapped data.
8280 OpenMPOffloadMappingFlags Flags =
8281 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8282 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8283 /*AddPtrFlag=*/false,
8284 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8285 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8286 MapExpr, BP, LB, IsNonContiguous,
8287 DimSize);
8288 // Do bitcopy of all non-overlapped structure elements.
8289 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8290 Component : OverlappedElements) {
8291 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8292 Component) {
8293 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8294 if (const auto *FD = dyn_cast<FieldDecl>(Val: VD)) {
8295 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8296 }
8297 }
8298 }
8299 }
8300 CopyGaps.copyUntilEnd(HB);
8301 break;
8302 }
8303 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
8304 // Skip adding an entry in the CurInfo of this combined entry if the
8305 // whole struct is currently being mapped. The struct needs to be added
8306 // in the first position before any data internal to the struct is being
8307 // mapped.
8308 // Skip adding an entry in the CurInfo of this combined entry if the
8309 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8310 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8311 (Next == CE && MapType != OMPC_MAP_unknown)) {
8312 if (!IsMappingWholeStruct) {
8313 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8314 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
8315 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8316 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8317 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8318 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8319 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8320 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
8321 : 1);
8322 } else {
8323 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8324 StructBaseCombinedInfo.BasePointers.push_back(
8325 Elt: BP.emitRawPointer(CGF));
8326 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8327 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8328 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8329 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8330 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8331 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8332 Elt: IsNonContiguous ? DimSize : 1);
8333 }
8334
8335 // If Mapper is valid, the last component inherits the mapper.
8336 bool HasMapper = Mapper && Next == CE;
8337 if (!IsMappingWholeStruct)
8338 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
8339 else
8340 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
8341 : nullptr);
8342
8343 // We need to add a pointer flag for each map that comes from the
8344 // same expression except for the first one. We also need to signal
8345 // this map is the first one that relates with the current capture
8346 // (there is a set of entries for each capture).
8347 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8348 MapType, MapModifiers, MotionModifiers, IsImplicit,
8349 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
8350 FirstPointerInComplexData || IsMemberReference,
8351 AddIsTargetParamFlag: IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8352
8353 if (!IsExpressionFirstInfo || IsMemberReference) {
8354 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8355 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8356 if (IsPointer || (IsMemberReference && Next != CE))
8357 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8358 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8359 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8360 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8361 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8362
8363 if (ShouldBeMemberOf) {
8364 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8365 // should be later updated with the correct value of MEMBER_OF.
8366 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8367 // From now on, all subsequent PTR_AND_OBJ entries should not be
8368 // marked as MEMBER_OF.
8369 ShouldBeMemberOf = false;
8370 }
8371 }
8372
8373 if (!IsMappingWholeStruct)
8374 CombinedInfo.Types.push_back(Elt: Flags);
8375 else
8376 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
8377 }
8378
8379 // If we have encountered a member expression so far, keep track of the
8380 // mapped member. If the parent is "*this", then the value declaration
8381 // is nullptr.
8382 if (EncounteredME) {
8383 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
8384 unsigned FieldIndex = FD->getFieldIndex();
8385
8386 // Update info about the lowest and highest elements for this struct
8387 if (!PartialStruct.Base.isValid()) {
8388 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8389 if (IsFinalArraySection && OASE) {
8390 Address HB =
8391 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8392 .getAddress();
8393 PartialStruct.HighestElem = {FieldIndex, HB};
8394 } else {
8395 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8396 }
8397 PartialStruct.Base = BP;
8398 PartialStruct.LB = BP;
8399 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8400 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8401 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8402 if (IsFinalArraySection && OASE) {
8403 Address HB =
8404 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8405 .getAddress();
8406 PartialStruct.HighestElem = {FieldIndex, HB};
8407 } else {
8408 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8409 }
8410 }
8411 }
8412
8413 // Need to emit combined struct for array sections.
8414 if (IsFinalArraySection || IsNonContiguous)
8415 PartialStruct.IsArraySection = true;
8416
8417 // If we have a final array section, we are done with this expression.
8418 if (IsFinalArraySection)
8419 break;
8420
8421 // The pointer becomes the base for the next element.
8422 if (Next != CE)
8423 BP = IsMemberReference ? LowestElem : LB;
8424 if (!IsPartialMapped)
8425 IsExpressionFirstInfo = false;
8426 IsCaptureFirstInfo = false;
8427 FirstPointerInComplexData = false;
8428 IsPrevMemberReference = IsMemberReference;
8429 } else if (FirstPointerInComplexData) {
8430 QualType Ty = Components.rbegin()
8431 ->getAssociatedDeclaration()
8432 ->getType()
8433 .getNonReferenceType();
8434 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8435 FirstPointerInComplexData = false;
8436 }
8437 }
8438 // If ran into the whole component - allocate the space for the whole
8439 // record.
8440 if (!EncounteredME)
8441 PartialStruct.HasCompleteRecord = true;
8442
8443 // Populate ATTACH information for later processing by emitAttachEntry.
8444 if (shouldEmitAttachEntry(PointerExpr: AttachPtrExpr, MapBaseDecl: BaseDecl, CGF, CurDir)) {
8445 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8446 AttachInfo.AttachPteeAddr = FinalLowestElem;
8447 AttachInfo.AttachPtrDecl = BaseDecl;
8448 AttachInfo.AttachMapExpr = MapExpr;
8449 }
8450
8451 if (!IsNonContiguous)
8452 return;
8453
8454 const ASTContext &Context = CGF.getContext();
8455
8456 // For supporting stride in array section, we need to initialize the first
8457 // dimension size as 1, first offset as 0, and first count as 1
8458 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
8459 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8460 MapValuesArrayTy CurStrides;
8461 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8462 uint64_t ElementTypeSize;
8463
8464 // Collect Size information for each dimension and get the element size as
8465 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8466 // should be [10, 10] and the first stride is 4 btyes.
8467 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8468 Components) {
8469 const Expr *AssocExpr = Component.getAssociatedExpression();
8470 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8471
8472 if (!OASE)
8473 continue;
8474
8475 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
8476 auto *CAT = Context.getAsConstantArrayType(T: Ty);
8477 auto *VAT = Context.getAsVariableArrayType(T: Ty);
8478
8479 // We need all the dimension size except for the last dimension.
8480 assert((VAT || CAT || &Component == &*Components.begin()) &&
8481 "Should be either ConstantArray or VariableArray if not the "
8482 "first Component");
8483
8484 // Get element size if CurStrides is empty.
8485 if (CurStrides.empty()) {
8486 const Type *ElementType = nullptr;
8487 if (CAT)
8488 ElementType = CAT->getElementType().getTypePtr();
8489 else if (VAT)
8490 ElementType = VAT->getElementType().getTypePtr();
8491 else if (&Component == &*Components.begin()) {
8492 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8493 // there was no earlier CAT/VAT/array handling to establish
8494 // ElementType. Capture the pointee type now so that subsequent
8495 // components (offset/length/stride) have a concrete element type to
8496 // work with. This makes pointer-backed sections behave consistently
8497 // with CAT/VAT/array bases.
8498 if (const auto *PtrType = Ty->getAs<PointerType>())
8499 ElementType = PtrType->getPointeeType().getTypePtr();
8500 } else {
8501 // Any component after the first should never have a raw pointer type;
8502 // by this point. ElementType must already be known (set above or in
8503 // prior array / CAT / VAT handling).
8504 assert(!Ty->isPointerType() &&
8505 "Non-first components should not be raw pointers");
8506 }
8507
8508 // At this stage, if ElementType was a base pointer and we are in the
8509 // first iteration, it has been computed.
8510 if (ElementType) {
8511 // For the case that having pointer as base, we need to remove one
8512 // level of indirection.
8513 if (&Component != &*Components.begin())
8514 ElementType = ElementType->getPointeeOrArrayElementType();
8515 ElementTypeSize =
8516 Context.getTypeSizeInChars(T: ElementType).getQuantity();
8517 CurStrides.push_back(
8518 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
8519 }
8520 }
8521 // Get dimension value except for the last dimension since we don't need
8522 // it.
8523 if (DimSizes.size() < Components.size() - 1) {
8524 if (CAT)
8525 DimSizes.push_back(
8526 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
8527 else if (VAT)
8528 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
8529 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
8530 /*IsSigned=*/isSigned: false));
8531 }
8532 }
8533
8534 // Skip the dummy dimension since we have already have its information.
8535 auto *DI = DimSizes.begin() + 1;
8536 // Product of dimension.
8537 llvm::Value *DimProd =
8538 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
8539
8540 // Collect info for non-contiguous. Notice that offset, count, and stride
8541 // are only meaningful for array-section, so we insert a null for anything
8542 // other than array-section.
8543 // Also, the size of offset, count, and stride are not the same as
8544 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8545 // count, and stride are the same as the number of non-contiguous
8546 // declaration in target update to/from clause.
8547 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8548 Components) {
8549 const Expr *AssocExpr = Component.getAssociatedExpression();
8550
8551 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
8552 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8553 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
8554 /*isSigned=*/false);
8555 CurOffsets.push_back(Elt: Offset);
8556 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
8557 CurStrides.push_back(Elt: CurStrides.back());
8558 continue;
8559 }
8560
8561 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8562
8563 if (!OASE)
8564 continue;
8565
8566 // Offset
8567 const Expr *OffsetExpr = OASE->getLowerBound();
8568 llvm::Value *Offset = nullptr;
8569 if (!OffsetExpr) {
8570 // If offset is absent, then we just set it to zero.
8571 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
8572 } else {
8573 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
8574 DestTy: CGF.Int64Ty,
8575 /*isSigned=*/false);
8576 }
8577 CurOffsets.push_back(Elt: Offset);
8578
8579 // Count
8580 const Expr *CountExpr = OASE->getLength();
8581 llvm::Value *Count = nullptr;
8582 if (!CountExpr) {
8583 // In Clang, once a high dimension is an array section, we construct all
8584 // the lower dimension as array section, however, for case like
8585 // arr[0:2][2], Clang construct the inner dimension as an array section
8586 // but it actually is not in an array section form according to spec.
8587 if (!OASE->getColonLocFirst().isValid() &&
8588 !OASE->getColonLocSecond().isValid()) {
8589 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
8590 } else {
8591 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8592 // When the length is absent it defaults to ⌈(size −
8593 // lower-bound)/stride⌉, where size is the size of the array
8594 // dimension.
8595 const Expr *StrideExpr = OASE->getStride();
8596 llvm::Value *Stride =
8597 StrideExpr
8598 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8599 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8600 : nullptr;
8601 if (Stride)
8602 Count = CGF.Builder.CreateUDiv(
8603 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
8604 else
8605 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
8606 }
8607 } else {
8608 Count = CGF.EmitScalarExpr(E: CountExpr);
8609 }
8610 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
8611 CurCounts.push_back(Elt: Count);
8612
8613 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8614 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8615 // Offset Count Stride
8616 // D0 0 1 4 (int) <- dummy dimension
8617 // D1 0 2 8 (2 * (1) * 4)
8618 // D2 1 2 20 (1 * (1 * 5) * 4)
8619 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8620 const Expr *StrideExpr = OASE->getStride();
8621 llvm::Value *Stride =
8622 StrideExpr
8623 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8624 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8625 : nullptr;
8626 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
8627 if (Stride)
8628 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
8629 else
8630 CurStrides.push_back(Elt: DimProd);
8631 if (DI != DimSizes.end())
8632 ++DI;
8633 }
8634
8635 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
8636 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
8637 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
8638 }
8639
8640 /// Return the adjusted map modifiers if the declaration a capture refers to
8641 /// appears in a first-private clause. This is expected to be used only with
8642 /// directives that start with 'target'.
8643 OpenMPOffloadMappingFlags
8644 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8645 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8646
8647 // A first private variable captured by reference will use only the
8648 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8649 // declaration is known as first-private in this handler.
8650 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
8651 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8652 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8653 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8654 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8655 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8656 }
8657 auto I = LambdasMap.find(Val: Cap.getCapturedVar()->getCanonicalDecl());
8658 if (I != LambdasMap.end())
8659 // for map(to: lambda): using user specified map type.
8660 return getMapTypeBits(
8661 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
8662 /*MotionModifiers=*/{}, IsImplicit: I->getSecond()->isImplicit(),
8663 /*AddPtrFlag=*/false,
8664 /*AddIsTargetParamFlag=*/false,
8665 /*isNonContiguous=*/IsNonContiguous: false);
8666 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8667 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8668 }
8669
8670 void getPlainLayout(const CXXRecordDecl *RD,
8671 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8672 bool AsBase) const {
8673 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8674
8675 llvm::StructType *St =
8676 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8677
8678 unsigned NumElements = St->getNumElements();
8679 llvm::SmallVector<
8680 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8681 RecordLayout(NumElements);
8682
8683 // Fill bases.
8684 for (const auto &I : RD->bases()) {
8685 if (I.isVirtual())
8686 continue;
8687
8688 QualType BaseTy = I.getType();
8689 const auto *Base = BaseTy->getAsCXXRecordDecl();
8690 // Ignore empty bases.
8691 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
8692 CGF.getContext()
8693 .getASTRecordLayout(D: Base)
8694 .getNonVirtualSize()
8695 .isZero())
8696 continue;
8697
8698 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
8699 RecordLayout[FieldIndex] = Base;
8700 }
8701 // Fill in virtual bases.
8702 for (const auto &I : RD->vbases()) {
8703 QualType BaseTy = I.getType();
8704 // Ignore empty bases.
8705 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
8706 continue;
8707
8708 const auto *Base = BaseTy->getAsCXXRecordDecl();
8709 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
8710 if (RecordLayout[FieldIndex])
8711 continue;
8712 RecordLayout[FieldIndex] = Base;
8713 }
8714 // Fill in all the fields.
8715 assert(!RD->isUnion() && "Unexpected union.");
8716 for (const auto *Field : RD->fields()) {
8717 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8718 // will fill in later.)
8719 if (!Field->isBitField() &&
8720 !isEmptyFieldForLayout(Context: CGF.getContext(), FD: Field)) {
8721 unsigned FieldIndex = RL.getLLVMFieldNo(FD: Field);
8722 RecordLayout[FieldIndex] = Field;
8723 }
8724 }
8725 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8726 &Data : RecordLayout) {
8727 if (Data.isNull())
8728 continue;
8729 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Val: Data))
8730 getPlainLayout(RD: Base, Layout, /*AsBase=*/true);
8731 else
8732 Layout.push_back(Elt: cast<const FieldDecl *>(Val: Data));
8733 }
8734 }
8735
8736 /// Returns the address corresponding to \p PointerExpr.
8737 static Address getAttachPtrAddr(const Expr *PointerExpr,
8738 CodeGenFunction &CGF) {
8739 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8740 Address AttachPtrAddr = Address::invalid();
8741
8742 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: PointerExpr)) {
8743 // If the pointer is a variable, we can use its address directly.
8744 AttachPtrAddr = CGF.EmitLValue(E: DRE).getAddress();
8745 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(Val: PointerExpr)) {
8746 AttachPtrAddr =
8747 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/true).getAddress();
8748 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: PointerExpr)) {
8749 AttachPtrAddr = CGF.EmitLValue(E: ASE).getAddress();
8750 } else if (auto *ME = dyn_cast<MemberExpr>(Val: PointerExpr)) {
8751 AttachPtrAddr = CGF.EmitMemberExpr(E: ME).getAddress();
8752 } else if (auto *UO = dyn_cast<UnaryOperator>(Val: PointerExpr)) {
8753 assert(UO->getOpcode() == UO_Deref &&
8754 "Unexpected unary-operator on attach-ptr-expr");
8755 AttachPtrAddr = CGF.EmitLValue(E: UO).getAddress();
8756 }
8757 assert(AttachPtrAddr.isValid() &&
8758 "Failed to get address for attach pointer expression");
8759 return AttachPtrAddr;
8760 }
8761
8762 /// Get the address of the attach pointer, and a load from it, to get the
8763 /// pointee base address.
8764 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8765 /// contains invalid addresses if \p AttachPtrExpr is null.
8766 static std::pair<Address, Address>
8767 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8768 CodeGenFunction &CGF) {
8769
8770 if (!AttachPtrExpr)
8771 return {Address::invalid(), Address::invalid()};
8772
8773 Address AttachPtrAddr = getAttachPtrAddr(PointerExpr: AttachPtrExpr, CGF);
8774 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8775
8776 QualType AttachPtrType =
8777 OMPClauseMappableExprCommon::getComponentExprElementType(Exp: AttachPtrExpr)
8778 .getCanonicalType();
8779
8780 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8781 Ptr: AttachPtrAddr, PtrTy: AttachPtrType->castAs<PointerType>());
8782 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8783
8784 return {AttachPtrAddr, AttachPteeBaseAddr};
8785 }
8786
8787 /// Returns whether an attach entry should be emitted for a map on
8788 /// \p MapBaseDecl on the directive \p CurDir.
8789 static bool
8790 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8791 CodeGenFunction &CGF,
8792 llvm::PointerUnion<const OMPExecutableDirective *,
8793 const OMPDeclareMapperDecl *>
8794 CurDir) {
8795 if (!PointerExpr)
8796 return false;
8797
8798 // Pointer attachment is needed at map-entering time or for declare
8799 // mappers.
8800 return isa<const OMPDeclareMapperDecl *>(Val: CurDir) ||
8801 isOpenMPTargetMapEnteringDirective(
8802 DKind: cast<const OMPExecutableDirective *>(Val&: CurDir)
8803 ->getDirectiveKind());
8804 }
8805
8806 /// Computes the attach-ptr expr for \p Components, and updates various maps
8807 /// with the information.
8808 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8809 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8810 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8811 /// AttachPtrExprMap.
8812 void collectAttachPtrExprInfo(
8813 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
8814 llvm::PointerUnion<const OMPExecutableDirective *,
8815 const OMPDeclareMapperDecl *>
8816 CurDir) {
8817
8818 OpenMPDirectiveKind CurDirectiveID =
8819 isa<const OMPDeclareMapperDecl *>(Val: CurDir)
8820 ? OMPD_declare_mapper
8821 : cast<const OMPExecutableDirective *>(Val&: CurDir)->getDirectiveKind();
8822
8823 const auto &[AttachPtrExpr, Depth] =
8824 OMPClauseMappableExprCommon::findAttachPtrExpr(Components,
8825 CurDirKind: CurDirectiveID);
8826
8827 AttachPtrComputationOrderMap.try_emplace(
8828 Key: AttachPtrExpr, Args: AttachPtrComputationOrderMap.size());
8829 AttachPtrComponentDepthMap.try_emplace(Key: AttachPtrExpr, Args: Depth);
8830 AttachPtrExprMap.try_emplace(Key: Components, Args: AttachPtrExpr);
8831 }
8832
8833 /// Generate all the base pointers, section pointers, sizes, map types, and
8834 /// mappers for the extracted mappable expressions (all included in \a
8835 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8836 /// pair of the relevant declaration and index where it occurs is appended to
8837 /// the device pointers info array.
8838 void generateAllInfoForClauses(
8839 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8840 llvm::OpenMPIRBuilder &OMPBuilder,
8841 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8842 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8843 // We have to process the component lists that relate with the same
8844 // declaration in a single chunk so that we can generate the map flags
8845 // correctly. Therefore, we organize all lists in a map.
8846 enum MapKind { Present, Allocs, Other, Total };
8847 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8848 SmallVector<SmallVector<MapInfo, 8>, 4>>
8849 Info;
8850
8851 // Helper function to fill the information map for the different supported
8852 // clauses.
8853 auto &&InfoGen =
8854 [&Info, &SkipVarSet](
8855 const ValueDecl *D, MapKind Kind,
8856 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8857 OpenMPMapClauseKind MapType,
8858 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8859 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8860 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8861 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8862 if (SkipVarSet.contains(V: D))
8863 return;
8864 auto It = Info.try_emplace(Key: D, Args: Total).first;
8865 It->second[Kind].emplace_back(
8866 Args&: L, Args&: MapType, Args&: MapModifiers, Args&: MotionModifiers, Args&: ReturnDevicePointer,
8867 Args&: IsImplicit, Args&: Mapper, Args&: VarRef, Args&: ForDeviceAddr);
8868 };
8869
8870 for (const auto *Cl : Clauses) {
8871 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
8872 if (!C)
8873 continue;
8874 MapKind Kind = Other;
8875 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
8876 Element: OMPC_MAP_MODIFIER_present))
8877 Kind = Present;
8878 else if (C->getMapType() == OMPC_MAP_alloc)
8879 Kind = Allocs;
8880 const auto *EI = C->getVarRefs().begin();
8881 for (const auto L : C->component_lists()) {
8882 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8883 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), C->getMapType(),
8884 C->getMapTypeModifiers(), {},
8885 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8886 E);
8887 ++EI;
8888 }
8889 }
8890 for (const auto *Cl : Clauses) {
8891 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
8892 if (!C)
8893 continue;
8894 MapKind Kind = Other;
8895 if (llvm::is_contained(Range: C->getMotionModifiers(),
8896 Element: OMPC_MOTION_MODIFIER_present))
8897 Kind = Present;
8898 if (llvm::is_contained(Range: C->getMotionModifiers(),
8899 Element: OMPC_MOTION_MODIFIER_iterator)) {
8900 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8901 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8902 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8903 CGF.EmitVarDecl(D: *VD);
8904 }
8905 }
8906
8907 const auto *EI = C->getVarRefs().begin();
8908 for (const auto L : C->component_lists()) {
8909 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_to, {},
8910 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8911 C->isImplicit(), std::get<2>(t: L), *EI);
8912 ++EI;
8913 }
8914 }
8915 for (const auto *Cl : Clauses) {
8916 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
8917 if (!C)
8918 continue;
8919 MapKind Kind = Other;
8920 if (llvm::is_contained(Range: C->getMotionModifiers(),
8921 Element: OMPC_MOTION_MODIFIER_present))
8922 Kind = Present;
8923 if (llvm::is_contained(Range: C->getMotionModifiers(),
8924 Element: OMPC_MOTION_MODIFIER_iterator)) {
8925 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8926 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8927 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8928 CGF.EmitVarDecl(D: *VD);
8929 }
8930 }
8931
8932 const auto *EI = C->getVarRefs().begin();
8933 for (const auto L : C->component_lists()) {
8934 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_from, {},
8935 C->getMotionModifiers(),
8936 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8937 *EI);
8938 ++EI;
8939 }
8940 }
8941
8942 // Look at the use_device_ptr and use_device_addr clauses information and
8943 // mark the existing map entries as such. If there is no map information for
8944 // an entry in the use_device_ptr and use_device_addr list, we create one
8945 // with map type 'return_param' and zero size section. It is the user's
8946 // fault if that was not mapped before. If there is no map information, then
8947 // we defer the emission of that entry until all the maps for the same VD
8948 // have been handled.
8949 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8950
8951 auto &&UseDeviceDataCombinedInfoGen =
8952 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8953 CodeGenFunction &CGF, bool IsDevAddr,
8954 bool HasUdpFbNullify = false) {
8955 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
8956 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
8957 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8958 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8959 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8960 // FIXME: For use_device_addr on array-sections, this should
8961 // be the starting address of the section.
8962 // e.g. int *p;
8963 // ... use_device_addr(p[3])
8964 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8965 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
8966 UseDeviceDataCombinedInfo.Sizes.push_back(
8967 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8968 OpenMPOffloadMappingFlags Flags =
8969 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8970 if (HasUdpFbNullify)
8971 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8972 UseDeviceDataCombinedInfo.Types.push_back(Elt: Flags);
8973 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
8974 };
8975
8976 auto &&MapInfoGen =
8977 [&UseDeviceDataCombinedInfoGen](
8978 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8979 OMPClauseMappableExprCommon::MappableExprComponentListRef
8980 Components,
8981 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
8982 bool HasUdpFbNullify = false) {
8983 // We didn't find any match in our map information - generate a zero
8984 // size array section.
8985 llvm::Value *Ptr;
8986 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8987 if (IE->isGLValue())
8988 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
8989 else
8990 Ptr = CGF.EmitScalarExpr(E: IE);
8991 } else {
8992 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
8993 }
8994 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
8995 // For the purpose of address-translation, treat something like the
8996 // following:
8997 // int *p;
8998 // ... use_device_addr(p[1])
8999 // equivalent to
9000 // ... use_device_ptr(p)
9001 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
9002 !TreatDevAddrAsDevPtr,
9003 HasUdpFbNullify);
9004 };
9005
9006 auto &&IsMapInfoExist =
9007 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9008 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9009 bool HasUdpFbNullify = false) -> bool {
9010 // We potentially have map information for this declaration already.
9011 // Look for the first set of components that refer to it. If found,
9012 // return true.
9013 // If the first component is a member expression, we have to look into
9014 // 'this', which maps to null in the map of map information. Otherwise
9015 // look directly for the information.
9016 auto It = Info.find(Key: isa<MemberExpr>(Val: IE) ? nullptr : VD);
9017 if (It != Info.end()) {
9018 bool Found = false;
9019 for (auto &Data : It->second) {
9020 MapInfo *CI = nullptr;
9021 // We potentially have multiple maps for the same decl. We need to
9022 // only consider those for which the attach-ptr matches the desired
9023 // attach-ptr.
9024 auto *It = llvm::find_if(Range&: Data, P: [&](const MapInfo &MI) {
9025 if (MI.Components.back().getAssociatedDeclaration() != VD)
9026 return false;
9027
9028 const Expr *MapAttachPtr = getAttachPtrExpr(Components: MI.Components);
9029 bool Match = AttachPtrComparator.areEqual(LHS: MapAttachPtr,
9030 RHS: DesiredAttachPtrExpr);
9031 return Match;
9032 });
9033
9034 if (It != Data.end())
9035 CI = &*It;
9036
9037 if (CI) {
9038 if (IsDevAddr) {
9039 CI->ForDeviceAddr = true;
9040 CI->ReturnDevicePointer = true;
9041 CI->HasUdpFbNullify = HasUdpFbNullify;
9042 Found = true;
9043 break;
9044 } else {
9045 auto PrevCI = std::next(x: CI->Components.rbegin());
9046 const auto *VarD = dyn_cast<VarDecl>(Val: VD);
9047 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: CI->Components);
9048 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9049 isa<MemberExpr>(Val: IE) ||
9050 !VD->getType().getNonReferenceType()->isPointerType() ||
9051 PrevCI == CI->Components.rend() ||
9052 isa<MemberExpr>(Val: PrevCI->getAssociatedExpression()) || !VarD ||
9053 VarD->hasLocalStorage() ||
9054 (isa_and_nonnull<DeclRefExpr>(Val: AttachPtrExpr) &&
9055 VD == cast<DeclRefExpr>(Val: AttachPtrExpr)->getDecl())) {
9056 CI->ForDeviceAddr = IsDevAddr;
9057 CI->ReturnDevicePointer = true;
9058 CI->HasUdpFbNullify = HasUdpFbNullify;
9059 Found = true;
9060 break;
9061 }
9062 }
9063 }
9064 }
9065 return Found;
9066 }
9067 return false;
9068 };
9069
9070 // Look at the use_device_ptr clause information and mark the existing map
9071 // entries as such. If there is no map information for an entry in the
9072 // use_device_ptr list, we create one with map type 'alloc' and zero size
9073 // section. It is the user fault if that was not mapped before. If there is
9074 // no map information and the pointer is a struct member, then we defer the
9075 // emission of that entry until the whole struct has been processed.
9076 for (const auto *Cl : Clauses) {
9077 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
9078 if (!C)
9079 continue;
9080 bool HasUdpFbNullify =
9081 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9082 for (const auto L : C->component_lists()) {
9083 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9084 std::get<1>(t: L);
9085 assert(!Components.empty() &&
9086 "Not expecting empty list of components!");
9087 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9088 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9089 const Expr *IE = Components.back().getAssociatedExpression();
9090 // For use_device_ptr, we match an existing map clause if its attach-ptr
9091 // is same as the use_device_ptr operand. e.g.
9092 // map expr | use_device_ptr expr | current behavior
9093 // ---------|---------------------|-----------------
9094 // p[1] | p | match
9095 // ps->a | ps | match
9096 // p | p | no match
9097 const Expr *UDPOperandExpr =
9098 Components.front().getAssociatedExpression();
9099 if (IsMapInfoExist(CGF, VD, IE,
9100 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9101 /*IsDevAddr=*/false, HasUdpFbNullify))
9102 continue;
9103 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9104 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9105 }
9106 }
9107
9108 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9109 for (const auto *Cl : Clauses) {
9110 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
9111 if (!C)
9112 continue;
9113 for (const auto L : C->component_lists()) {
9114 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9115 std::get<1>(t: L);
9116 assert(!std::get<1>(L).empty() &&
9117 "Not expecting empty list of components!");
9118 const ValueDecl *VD = std::get<1>(t: L).back().getAssociatedDeclaration();
9119 if (!Processed.insert(V: VD).second)
9120 continue;
9121 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9122 // For use_device_addr, we match an existing map clause if the
9123 // use_device_addr operand's attach-ptr matches the map operand's
9124 // attach-ptr.
9125 // We chould also restrict to only match cases when there is a full
9126 // match between the map/use_device_addr clause exprs, but that may be
9127 // unnecessary.
9128 //
9129 // map expr | use_device_addr expr | current | possible restrictive/
9130 // | | behavior | safer behavior
9131 // ---------|----------------------|-----------|-----------------------
9132 // p | p | match | match
9133 // p[0] | p[0] | match | match
9134 // p[0:1] | p[0] | match | no match
9135 // p[0:1] | p[2:1] | match | no match
9136 // p[1] | p[0] | match | no match
9137 // ps->a | ps->b | match | no match
9138 // p | p[0] | no match | no match
9139 // pp | pp[0][0] | no match | no match
9140 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9141 const Expr *IE = std::get<1>(t: L).back().getAssociatedExpression();
9142 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9143 "use_device_addr operand has an attach-ptr, but does not match "
9144 "last component's expr.");
9145 if (IsMapInfoExist(CGF, VD, IE,
9146 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9147 /*IsDevAddr=*/true))
9148 continue;
9149 MapInfoGen(CGF, IE, VD, Components,
9150 /*IsDevAddr=*/true,
9151 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9152 }
9153 }
9154
9155 for (const auto &Data : Info) {
9156 MapCombinedInfoTy CurInfo;
9157 const Decl *D = Data.first;
9158 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
9159 // Group component lists by their AttachPtrExpr and process them in order
9160 // of increasing complexity (nullptr first, then simple expressions like
9161 // p, then more complex ones like p[0], etc.)
9162 //
9163 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9164 // grouping for target constructs.
9165 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9166
9167 // First, collect all MapData entries with their attach-ptr exprs.
9168 for (const auto &M : Data.second) {
9169 for (const MapInfo &L : M) {
9170 assert(!L.Components.empty() &&
9171 "Not expecting declaration with no component lists.");
9172
9173 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: L.Components);
9174 AttachPtrMapInfoPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9175 }
9176 }
9177
9178 // Next, sort by increasing order of their complexity.
9179 llvm::stable_sort(Range&: AttachPtrMapInfoPairs,
9180 C: [this](const auto &LHS, const auto &RHS) {
9181 return AttachPtrComparator(LHS.first, RHS.first);
9182 });
9183
9184 // And finally, process them all in order, grouping those with
9185 // equivalent attach-ptr exprs together.
9186 auto *It = AttachPtrMapInfoPairs.begin();
9187 while (It != AttachPtrMapInfoPairs.end()) {
9188 const Expr *AttachPtrExpr = It->first;
9189
9190 SmallVector<MapInfo, 8> GroupLists;
9191 while (It != AttachPtrMapInfoPairs.end() &&
9192 (It->first == AttachPtrExpr ||
9193 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
9194 GroupLists.push_back(Elt: It->second);
9195 ++It;
9196 }
9197 assert(!GroupLists.empty() && "GroupLists should not be empty");
9198
9199 StructRangeInfoTy PartialStruct;
9200 AttachInfoTy AttachInfo;
9201 MapCombinedInfoTy GroupCurInfo;
9202 // Current group's struct base information:
9203 MapCombinedInfoTy GroupStructBaseCurInfo;
9204 for (const MapInfo &L : GroupLists) {
9205 // Remember the current base pointer index.
9206 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9207 unsigned StructBasePointersIdx =
9208 GroupStructBaseCurInfo.BasePointers.size();
9209
9210 GroupCurInfo.NonContigInfo.IsNonContiguous =
9211 L.Components.back().isNonContiguous();
9212 generateInfoForComponentList(
9213 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
9214 CombinedInfo&: GroupCurInfo, StructBaseCombinedInfo&: GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9215 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
9216 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
9217 MapExpr: L.VarRef, /*OverlappedElements*/ {});
9218
9219 // If this entry relates to a device pointer, set the relevant
9220 // declaration and add the 'return pointer' flag.
9221 if (L.ReturnDevicePointer) {
9222 // Check whether a value was added to either GroupCurInfo or
9223 // GroupStructBaseCurInfo and error if no value was added to either
9224 // of them:
9225 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9226 StructBasePointersIdx <
9227 GroupStructBaseCurInfo.BasePointers.size()) &&
9228 "Unexpected number of mapped base pointers.");
9229
9230 // Choose a base pointer index which is always valid:
9231 const ValueDecl *RelevantVD =
9232 L.Components.back().getAssociatedDeclaration();
9233 assert(RelevantVD &&
9234 "No relevant declaration related with device pointer??");
9235
9236 // If GroupStructBaseCurInfo has been updated this iteration then
9237 // work on the first new entry added to it i.e. make sure that when
9238 // multiple values are added to any of the lists, the first value
9239 // added is being modified by the assignments below (not the last
9240 // value added).
9241 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9242 unsigned Idx) {
9243 Info.DevicePtrDecls[Idx] = RelevantVD;
9244 Info.DevicePointers[Idx] = L.ForDeviceAddr
9245 ? DeviceInfoTy::Address
9246 : DeviceInfoTy::Pointer;
9247 Info.Types[Idx] |=
9248 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9249 if (L.HasUdpFbNullify)
9250 Info.Types[Idx] |=
9251 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9252 };
9253
9254 if (StructBasePointersIdx <
9255 GroupStructBaseCurInfo.BasePointers.size())
9256 SetDevicePointerInfo(GroupStructBaseCurInfo,
9257 StructBasePointersIdx);
9258 else
9259 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9260 }
9261 }
9262
9263 // Unify entries in one list making sure the struct mapping precedes the
9264 // individual fields:
9265 MapCombinedInfoTy GroupUnionCurInfo;
9266 GroupUnionCurInfo.append(CurInfo&: GroupStructBaseCurInfo);
9267 GroupUnionCurInfo.append(CurInfo&: GroupCurInfo);
9268
9269 // If there is an entry in PartialStruct it means we have a struct with
9270 // individual members mapped. Emit an extra combined entry.
9271 if (PartialStruct.Base.isValid()) {
9272 // Prepend a synthetic dimension of length 1 to represent the
9273 // aggregated struct object. Using 1 (not 0, as 0 produced an
9274 // incorrect non-contiguous descriptor (DimSize==1), causing the
9275 // non-contiguous motion clause path to be skipped.) is important:
9276 // * It preserves the correct rank so targetDataUpdate() computes
9277 // DimSize == 2 for cases like strided array sections originating
9278 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9279 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9280 I: GroupUnionCurInfo.NonContigInfo.Dims.begin(), Elt: 1);
9281 emitCombinedEntry(
9282 CombinedInfo&: CurInfo, CurTypes&: GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9283 /*IsMapThis=*/!VD, OMPBuilder, VD,
9284 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9285 /*NotTargetParams=*/true);
9286 }
9287
9288 // Append this group's results to the overall CurInfo in the correct
9289 // order: combined-entry -> original-field-entries -> attach-entry
9290 CurInfo.append(CurInfo&: GroupUnionCurInfo);
9291 if (AttachInfo.isValid())
9292 emitAttachEntry(CGF, CombinedInfo&: CurInfo, AttachInfo);
9293 }
9294
9295 // We need to append the results of this capture to what we already have.
9296 CombinedInfo.append(CurInfo);
9297 }
9298 // Append data for use_device_ptr/addr clauses.
9299 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
9300 }
9301
9302public:
9303 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9304 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9305 // Extract firstprivate clause information.
9306 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9307 for (const auto *D : C->varlist())
9308 FirstPrivateDecls.try_emplace(
9309 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl()), Args: C->isImplicit());
9310 // Extract implicit firstprivates from uses_allocators clauses.
9311 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9312 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9313 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9314 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
9315 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
9316 /*Implicit=*/Args: true);
9317 else if (const auto *VD = dyn_cast<VarDecl>(
9318 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
9319 ->getDecl()))
9320 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
9321 }
9322 }
9323 // Extract defaultmap clause information.
9324 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9325 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9326 DefaultmapFirstprivateKinds.insert(V: C->getDefaultmapKind());
9327 // Extract device pointer clause information.
9328 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9329 for (auto L : C->component_lists())
9330 DevPointersMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9331 // Extract device addr clause information.
9332 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9333 for (auto L : C->component_lists())
9334 HasDevAddrsMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9335 // Extract map information.
9336 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9337 if (C->getMapType() != OMPC_MAP_to)
9338 continue;
9339 for (auto L : C->component_lists()) {
9340 const ValueDecl *VD = std::get<0>(t&: L);
9341 const auto *RD = VD ? VD->getType()
9342 .getCanonicalType()
9343 .getNonReferenceType()
9344 ->getAsCXXRecordDecl()
9345 : nullptr;
9346 if (RD && RD->isLambda())
9347 LambdasMap.try_emplace(Key: std::get<0>(t&: L), Args&: C);
9348 }
9349 }
9350
9351 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9352 for (auto L : C->component_lists()) {
9353 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9354 std::get<1>(L);
9355 if (!Components.empty())
9356 collectAttachPtrExprInfo(Components, CurDir);
9357 }
9358 };
9359
9360 // Populate the AttachPtrExprMap for all component lists from map-related
9361 // clauses.
9362 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9363 CollectAttachPtrExprsForClauseComponents(C);
9364 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9365 CollectAttachPtrExprsForClauseComponents(C);
9366 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9367 CollectAttachPtrExprsForClauseComponents(C);
9368 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9369 CollectAttachPtrExprsForClauseComponents(C);
9370 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9371 CollectAttachPtrExprsForClauseComponents(C);
9372 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9373 CollectAttachPtrExprsForClauseComponents(C);
9374 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9375 CollectAttachPtrExprsForClauseComponents(C);
9376 }
9377
9378 /// Constructor for the declare mapper directive.
9379 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9380 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9381
9382 /// Generate code for the combined entry if we have a partially mapped struct
9383 /// and take care of the mapping flags of the arguments corresponding to
9384 /// individual struct members.
9385 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9386 /// to the combined-entry's begin address, if emitted.
9387 /// \p PartialStruct contains attach base-pointer information.
9388 /// \returns The index of the combined entry if one was added, std::nullopt
9389 /// otherwise.
9390 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9391 MapFlagsArrayTy &CurTypes,
9392 const StructRangeInfoTy &PartialStruct,
9393 AttachInfoTy &AttachInfo, bool IsMapThis,
9394 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9395 unsigned OffsetForMemberOfFlag,
9396 bool NotTargetParams) const {
9397 if (CurTypes.size() == 1 &&
9398 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9399 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9400 !PartialStruct.IsArraySection)
9401 return;
9402 Address LBAddr = PartialStruct.LowestElem.second;
9403 Address HBAddr = PartialStruct.HighestElem.second;
9404 if (PartialStruct.HasCompleteRecord) {
9405 LBAddr = PartialStruct.LB;
9406 HBAddr = PartialStruct.LB;
9407 }
9408 CombinedInfo.Exprs.push_back(Elt: VD);
9409 // Base is the base of the struct
9410 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9411 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9412 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9413 // Pointer is the address of the lowest element
9414 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9415 const CXXMethodDecl *MD =
9416 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
9417 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9418 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9419 // There should not be a mapper for a combined entry.
9420 if (HasBaseClass) {
9421 // OpenMP 5.2 148:21:
9422 // If the target construct is within a class non-static member function,
9423 // and a variable is an accessible data member of the object for which the
9424 // non-static data member function is invoked, the variable is treated as
9425 // if the this[:1] expression had appeared in a map clause with a map-type
9426 // of tofrom.
9427 // Emit this[:1]
9428 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9429 QualType Ty = MD->getFunctionObjectParameterType();
9430 llvm::Value *Size =
9431 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
9432 /*isSigned=*/true);
9433 CombinedInfo.Sizes.push_back(Elt: Size);
9434 } else {
9435 CombinedInfo.Pointers.push_back(Elt: LB);
9436 // Size is (addr of {highest+1} element) - (addr of lowest element)
9437 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9438 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9439 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
9440 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
9441 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
9442 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(LHS: CHAddr, RHS: CLAddr);
9443 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
9444 /*isSigned=*/false);
9445 CombinedInfo.Sizes.push_back(Elt: Size);
9446 }
9447 CombinedInfo.Mappers.push_back(Elt: nullptr);
9448 // Map type is always TARGET_PARAM, if generate info for captures.
9449 CombinedInfo.Types.push_back(
9450 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9451 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9452 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9453 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9454 // If any element has the present modifier, then make sure the runtime
9455 // doesn't attempt to allocate the struct.
9456 if (CurTypes.end() !=
9457 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9458 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9459 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9460 }))
9461 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9462 // Remove TARGET_PARAM flag from the first element
9463 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9464 // If any element has the ompx_hold modifier, then make sure the runtime
9465 // uses the hold reference count for the struct as a whole so that it won't
9466 // be unmapped by an extra dynamic reference count decrement. Add it to all
9467 // elements as well so the runtime knows which reference count to check
9468 // when determining whether it's time for device-to-host transfers of
9469 // individual elements.
9470 if (CurTypes.end() !=
9471 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9472 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9473 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9474 })) {
9475 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9476 for (auto &M : CurTypes)
9477 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9478 }
9479
9480 // All other current entries will be MEMBER_OF the combined entry
9481 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9482 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9483 // to be handled by themselves, after all other maps).
9484 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9485 Position: OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9486 for (auto &M : CurTypes)
9487 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
9488
9489 // When we are emitting a combined entry. If there were any pending
9490 // attachments to be done, we do them to the begin address of the combined
9491 // entry. Note that this means only one attachment per combined-entry will
9492 // be done. So, for instance, if we have:
9493 // S *ps;
9494 // ... map(ps->a, ps->b)
9495 // When we are emitting a combined entry. If AttachInfo is valid,
9496 // update the pointee address to point to the begin address of the combined
9497 // entry. This ensures that if we have multiple maps like:
9498 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9499 //
9500 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9501 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9502 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9503 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9504 if (AttachInfo.isValid())
9505 AttachInfo.AttachPteeAddr = LBAddr;
9506 }
9507
9508 /// Generate all the base pointers, section pointers, sizes, map types, and
9509 /// mappers for the extracted mappable expressions (all included in \a
9510 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9511 /// pair of the relevant declaration and index where it occurs is appended to
9512 /// the device pointers info array.
9513 void generateAllInfo(
9514 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9515 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9516 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9517 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9518 "Expect a executable directive");
9519 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9520 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9521 SkipVarSet);
9522 }
9523
9524 /// Generate all the base pointers, section pointers, sizes, map types, and
9525 /// mappers for the extracted map clauses of user-defined mapper (all included
9526 /// in \a CombinedInfo).
9527 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9528 llvm::OpenMPIRBuilder &OMPBuilder) const {
9529 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9530 "Expect a declare mapper directive");
9531 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(Val: CurDir);
9532 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
9533 OMPBuilder);
9534 }
9535
9536 /// Emit capture info for lambdas for variables captured by reference.
9537 void generateInfoForLambdaCaptures(
9538 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9539 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9540 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9541 const auto *RD = VDType->getAsCXXRecordDecl();
9542 if (!RD || !RD->isLambda())
9543 return;
9544 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
9545 CGF.getContext().getDeclAlign(D: VD));
9546 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
9547 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9548 FieldDecl *ThisCapture = nullptr;
9549 RD->getCaptureFields(Captures, ThisCapture);
9550 if (ThisCapture) {
9551 LValue ThisLVal =
9552 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
9553 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
9554 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
9555 Args: VDLVal.getPointer(CGF));
9556 CombinedInfo.Exprs.push_back(Elt: VD);
9557 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
9558 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9559 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9560 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
9561 CombinedInfo.Sizes.push_back(
9562 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
9563 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9564 CombinedInfo.Types.push_back(
9565 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9566 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9567 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9568 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9569 CombinedInfo.Mappers.push_back(Elt: nullptr);
9570 }
9571 for (const LambdaCapture &LC : RD->captures()) {
9572 if (!LC.capturesVariable())
9573 continue;
9574 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
9575 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9576 continue;
9577 auto It = Captures.find(Val: VD);
9578 assert(It != Captures.end() && "Found lambda capture without field.");
9579 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
9580 if (LC.getCaptureKind() == LCK_ByRef) {
9581 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
9582 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9583 Args: VDLVal.getPointer(CGF));
9584 CombinedInfo.Exprs.push_back(Elt: VD);
9585 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9586 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9587 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9588 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
9589 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9590 V: CGF.getTypeSize(
9591 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
9592 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9593 } else {
9594 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
9595 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9596 Args: VDLVal.getPointer(CGF));
9597 CombinedInfo.Exprs.push_back(Elt: VD);
9598 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9599 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9600 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9601 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
9602 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
9603 }
9604 CombinedInfo.Types.push_back(
9605 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9606 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9607 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9608 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9609 CombinedInfo.Mappers.push_back(Elt: nullptr);
9610 }
9611 }
9612
9613 /// Set correct indices for lambdas captures.
9614 void adjustMemberOfForLambdaCaptures(
9615 llvm::OpenMPIRBuilder &OMPBuilder,
9616 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9617 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9618 MapFlagsArrayTy &Types) const {
9619 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9620 // Set correct member_of idx for all implicit lambda captures.
9621 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9622 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9623 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9624 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9625 continue;
9626 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
9627 assert(BasePtr && "Unable to find base lambda address.");
9628 int TgtIdx = -1;
9629 for (unsigned J = I; J > 0; --J) {
9630 unsigned Idx = J - 1;
9631 if (Pointers[Idx] != BasePtr)
9632 continue;
9633 TgtIdx = Idx;
9634 break;
9635 }
9636 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9637 // All other current entries will be MEMBER_OF the combined entry
9638 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9639 // 0xFFFF in the MEMBER_OF field).
9640 OpenMPOffloadMappingFlags MemberOfFlag =
9641 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
9642 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
9643 }
9644 }
9645
9646 /// Populate component lists for non-lambda captured variables from map,
9647 /// is_device_ptr and has_device_addr clause info.
9648 void populateComponentListsForNonLambdaCaptureFromClauses(
9649 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9650 SmallVectorImpl<
9651 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9652 &StorageForImplicitlyAddedComponentLists) const {
9653 if (VD && LambdasMap.count(Val: VD))
9654 return;
9655
9656 // For member fields list in is_device_ptr, store it in
9657 // DeclComponentLists for generating components info.
9658 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9659 auto It = DevPointersMap.find(Val: VD);
9660 if (It != DevPointersMap.end())
9661 for (const auto &MCL : It->second)
9662 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_to, Args: Unknown,
9663 /*IsImpicit = */ Args: true, Args: nullptr,
9664 Args: nullptr);
9665 auto I = HasDevAddrsMap.find(Val: VD);
9666 if (I != HasDevAddrsMap.end())
9667 for (const auto &MCL : I->second)
9668 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_tofrom, Args: Unknown,
9669 /*IsImpicit = */ Args: true, Args: nullptr,
9670 Args: nullptr);
9671 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9672 "Expect a executable directive");
9673 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9674 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9675 const auto *EI = C->getVarRefs().begin();
9676 for (const auto L : C->decl_component_lists(VD)) {
9677 const ValueDecl *VDecl, *Mapper;
9678 // The Expression is not correct if the mapping is implicit
9679 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9680 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9681 std::tie(args&: VDecl, args&: Components, args&: Mapper) = L;
9682 assert(VDecl == VD && "We got information for the wrong declaration??");
9683 assert(!Components.empty() &&
9684 "Not expecting declaration with no component lists.");
9685 DeclComponentLists.emplace_back(Args&: Components, Args: C->getMapType(),
9686 Args: C->getMapTypeModifiers(),
9687 Args: C->isImplicit(), Args&: Mapper, Args&: E);
9688 ++EI;
9689 }
9690 }
9691
9692 // For the target construct, if there's a map with a base-pointer that's
9693 // a member of an implicitly captured struct, of the current class,
9694 // we need to emit an implicit map on the pointer.
9695 if (isOpenMPTargetExecutionDirective(DKind: CurExecDir->getDirectiveKind()))
9696 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9697 CapturedVD: VD, DeclComponentLists, ComponentVectorStorage&: StorageForImplicitlyAddedComponentLists);
9698
9699 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
9700 const MapData &RHS) {
9701 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
9702 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
9703 bool HasPresent =
9704 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9705 bool HasAllocs = MapType == OMPC_MAP_alloc;
9706 MapModifiers = std::get<2>(t: RHS);
9707 MapType = std::get<1>(t: LHS);
9708 bool HasPresentR =
9709 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9710 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9711 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9712 });
9713 }
9714
9715 /// On a target construct, if there's an implicit map on a struct, or that of
9716 /// this[:], and an explicit map with a member of that struct/class as the
9717 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9718 /// to make sure we don't map the full struct/class. For example:
9719 ///
9720 /// \code
9721 /// struct S {
9722 /// int dummy[10000];
9723 /// int *p;
9724 /// void f1() {
9725 /// #pragma omp target map(p[0:1])
9726 /// (void)this;
9727 /// }
9728 /// }; S s;
9729 ///
9730 /// void f2() {
9731 /// #pragma omp target map(s.p[0:10])
9732 /// (void)s;
9733 /// }
9734 /// \endcode
9735 ///
9736 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9737 //
9738 // OpenMP 6.0: 7.9.6 map clause, pg 285
9739 // If a list item with an implicitly determined data-mapping attribute does
9740 // not have any corresponding storage in the device data environment prior to
9741 // a task encountering the construct associated with the map clause, and one
9742 // or more contiguous parts of the original storage are either list items or
9743 // base pointers to list items that are explicitly mapped on the construct,
9744 // only those parts of the original storage will have corresponding storage in
9745 // the device data environment as a result of the map clauses on the
9746 // construct.
9747 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9748 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9749 SmallVectorImpl<
9750 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9751 &ComponentVectorStorage) const {
9752 bool IsThisCapture = CapturedVD == nullptr;
9753
9754 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9755 OMPClauseMappableExprCommon::MappableExprComponentListRef
9756 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9757 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9758 if (!AttachPtrExpr)
9759 continue;
9760
9761 const auto *ME = dyn_cast<MemberExpr>(Val: AttachPtrExpr);
9762 if (!ME)
9763 continue;
9764
9765 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9766
9767 // If we are handling a "this" capture, then we are looking for
9768 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9769 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Val: Base))
9770 continue;
9771
9772 if (!IsThisCapture && (!isa<DeclRefExpr>(Val: Base) ||
9773 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9774 continue;
9775
9776 // For non-this captures, we are looking for attach-ptrs of form
9777 // `s.p`.
9778 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9779 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Val: Base) ||
9780 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9781 continue;
9782
9783 // Check if we have an existing map on either:
9784 // this[:], s, this->p, or s.p, in which case, we don't need to add
9785 // an implicit one for the attach-ptr s.p/this->p.
9786 bool FoundExistingMap = false;
9787 for (const MapData &ExistingL : DeclComponentLists) {
9788 OMPClauseMappableExprCommon::MappableExprComponentListRef
9789 ExistingComponents = std::get<0>(t: ExistingL);
9790
9791 if (ExistingComponents.empty())
9792 continue;
9793
9794 // First check if we have a map like map(this->p) or map(s.p).
9795 const auto &FirstComponent = ExistingComponents.front();
9796 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9797
9798 if (!FirstExpr)
9799 continue;
9800
9801 // First check if we have a map like map(this->p) or map(s.p).
9802 if (AttachPtrComparator.areEqual(LHS: FirstExpr, RHS: AttachPtrExpr)) {
9803 FoundExistingMap = true;
9804 break;
9805 }
9806
9807 // Check if we have a map like this[0:1]
9808 if (IsThisCapture) {
9809 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: FirstExpr)) {
9810 if (isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts())) {
9811 FoundExistingMap = true;
9812 break;
9813 }
9814 }
9815 continue;
9816 }
9817
9818 // When the attach-ptr is something like `s.p`, check if
9819 // `s` itself is mapped explicitly.
9820 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: FirstExpr)) {
9821 if (DRE->getDecl() == CapturedVD) {
9822 FoundExistingMap = true;
9823 break;
9824 }
9825 }
9826 }
9827
9828 if (FoundExistingMap)
9829 continue;
9830
9831 // If no base map is found, we need to create an implicit map for the
9832 // attach-pointer expr.
9833
9834 ComponentVectorStorage.emplace_back();
9835 auto &AttachPtrComponents = ComponentVectorStorage.back();
9836
9837 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9838 bool SeenAttachPtrComponent = false;
9839 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9840 // components from the component-list which has `s.p/this->p`
9841 // as the attach-ptr, starting from the component which matches
9842 // `s.p/this->p`. This way, we'll have component-lists of
9843 // `s.p` -> `s`, and `this->p` -> `this`.
9844 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9845 const auto &Component = ComponentsWithAttachPtr[i];
9846 const Expr *ComponentExpr = Component.getAssociatedExpression();
9847
9848 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9849 continue;
9850 SeenAttachPtrComponent = true;
9851
9852 AttachPtrComponents.emplace_back(Args: Component.getAssociatedExpression(),
9853 Args: Component.getAssociatedDeclaration(),
9854 Args: Component.isNonContiguous());
9855 }
9856 assert(!AttachPtrComponents.empty() &&
9857 "Could not populate component-lists for mapping attach-ptr");
9858
9859 DeclComponentLists.emplace_back(
9860 Args&: AttachPtrComponents, Args: OMPC_MAP_tofrom, Args: Unknown,
9861 /*IsImplicit=*/Args: true, /*mapper=*/Args: nullptr, Args&: AttachPtrExpr);
9862 }
9863 }
9864
9865 /// For a capture that has an associated clause, generate the base pointers,
9866 /// section pointers, sizes, map types, and mappers (all included in
9867 /// \a CurCaptureVarInfo).
9868 void generateInfoForCaptureFromClauseInfo(
9869 const MapDataArrayTy &DeclComponentListsFromClauses,
9870 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9871 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9872 unsigned OffsetForMemberOfFlag) const {
9873 assert(!Cap->capturesVariableArrayType() &&
9874 "Not expecting to generate map info for a variable array type!");
9875
9876 // We need to know when we generating information for the first component
9877 const ValueDecl *VD = Cap->capturesThis()
9878 ? nullptr
9879 : Cap->getCapturedVar()->getCanonicalDecl();
9880
9881 // for map(to: lambda): skip here, processing it in
9882 // generateDefaultMapInfo
9883 if (LambdasMap.count(Val: VD))
9884 return;
9885
9886 // If this declaration appears in a is_device_ptr clause we just have to
9887 // pass the pointer by value. If it is a reference to a declaration, we just
9888 // pass its value.
9889 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
9890 CurCaptureVarInfo.Exprs.push_back(Elt: VD);
9891 CurCaptureVarInfo.BasePointers.emplace_back(Args&: Arg);
9892 CurCaptureVarInfo.DevicePtrDecls.emplace_back(Args&: VD);
9893 CurCaptureVarInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
9894 CurCaptureVarInfo.Pointers.push_back(Elt: Arg);
9895 CurCaptureVarInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9896 V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), DestTy: CGF.Int64Ty,
9897 /*isSigned=*/true));
9898 CurCaptureVarInfo.Types.push_back(
9899 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9900 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9901 CurCaptureVarInfo.Mappers.push_back(Elt: nullptr);
9902 return;
9903 }
9904
9905 auto GenerateInfoForComponentLists =
9906 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9907 bool IsEligibleForTargetParamFlag) {
9908 MapCombinedInfoTy CurInfoForComponentLists;
9909 StructRangeInfoTy PartialStruct;
9910 AttachInfoTy AttachInfo;
9911
9912 if (DeclComponentListsFromClauses.empty())
9913 return;
9914
9915 generateInfoForCaptureFromComponentLists(
9916 VD, DeclComponentLists: DeclComponentListsFromClauses, CurComponentListInfo&: CurInfoForComponentLists,
9917 PartialStruct, AttachInfo, IsListEligibleForTargetParamFlag: IsEligibleForTargetParamFlag);
9918
9919 // If there is an entry in PartialStruct it means we have a
9920 // struct with individual members mapped. Emit an extra combined
9921 // entry.
9922 if (PartialStruct.Base.isValid()) {
9923 CurCaptureVarInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9924 emitCombinedEntry(
9925 CombinedInfo&: CurCaptureVarInfo, CurTypes&: CurInfoForComponentLists.Types,
9926 PartialStruct, AttachInfo, IsMapThis: Cap->capturesThis(), OMPBuilder,
9927 /*VD=*/nullptr, OffsetForMemberOfFlag,
9928 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9929 }
9930
9931 // We do the appends to get the entries in the following order:
9932 // combined-entry -> individual-field-entries -> attach-entry,
9933 CurCaptureVarInfo.append(CurInfo&: CurInfoForComponentLists);
9934 if (AttachInfo.isValid())
9935 emitAttachEntry(CGF, CombinedInfo&: CurCaptureVarInfo, AttachInfo);
9936 };
9937
9938 // Group component lists by their AttachPtrExpr and process them in order
9939 // of increasing complexity (nullptr first, then simple expressions like p,
9940 // then more complex ones like p[0], etc.)
9941 //
9942 // This ensure that we:
9943 // * handle maps that can contribute towards setting the kernel argument,
9944 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9945 // * allocate a single contiguous storage for all exprs with the same
9946 // captured var and having the same attach-ptr.
9947 //
9948 // Example: The map clauses below should be handled grouped together based
9949 // on their attachable-base-pointers:
9950 // map-clause | attachable-base-pointer
9951 // --------------------------+------------------------
9952 // map(p, ps) | nullptr
9953 // map(p[0]) | p
9954 // map(p[0]->b, p[0]->c) | p[0]
9955 // map(ps->d, ps->e, ps->pt) | ps
9956 // map(ps->pt->d, ps->pt->e) | ps->pt
9957
9958 // First, collect all MapData entries with their attach-ptr exprs.
9959 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9960
9961 for (const MapData &L : DeclComponentListsFromClauses) {
9962 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9963 std::get<0>(t: L);
9964 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9965 AttachPtrMapDataPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9966 }
9967
9968 // Next, sort by increasing order of their complexity.
9969 llvm::stable_sort(Range&: AttachPtrMapDataPairs,
9970 C: [this](const auto &LHS, const auto &RHS) {
9971 return AttachPtrComparator(LHS.first, RHS.first);
9972 });
9973
9974 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9975 bool IsFirstGroup = true;
9976
9977 // And finally, process them all in order, grouping those with
9978 // equivalent attach-ptr exprs together.
9979 auto *It = AttachPtrMapDataPairs.begin();
9980 while (It != AttachPtrMapDataPairs.end()) {
9981 const Expr *AttachPtrExpr = It->first;
9982
9983 MapDataArrayTy GroupLists;
9984 while (It != AttachPtrMapDataPairs.end() &&
9985 (It->first == AttachPtrExpr ||
9986 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
9987 GroupLists.push_back(Elt: It->second);
9988 ++It;
9989 }
9990 assert(!GroupLists.empty() && "GroupLists should not be empty");
9991
9992 // Determine if this group of component-lists is eligible for TARGET_PARAM
9993 // flag. Only the first group processed should be eligible, and only if no
9994 // default mapping was done.
9995 bool IsEligibleForTargetParamFlag =
9996 IsFirstGroup && NoDefaultMappingDoneForVD;
9997
9998 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
9999 IsFirstGroup = false;
10000 }
10001 }
10002
10003 /// Generate the base pointers, section pointers, sizes, map types, and
10004 /// mappers associated to \a DeclComponentLists for a given capture
10005 /// \a VD (all included in \a CurComponentListInfo).
10006 void generateInfoForCaptureFromComponentLists(
10007 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10008 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10009 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10010 // Find overlapping elements (including the offset from the base element).
10011 llvm::SmallDenseMap<
10012 const MapData *,
10013 llvm::SmallVector<
10014 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
10015 4>
10016 OverlappedData;
10017 size_t Count = 0;
10018 for (const MapData &L : DeclComponentLists) {
10019 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10020 OpenMPMapClauseKind MapType;
10021 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10022 bool IsImplicit;
10023 const ValueDecl *Mapper;
10024 const Expr *VarRef;
10025 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10026 L;
10027 ++Count;
10028 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
10029 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
10030 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
10031 args&: VarRef) = L1;
10032 auto CI = Components.rbegin();
10033 auto CE = Components.rend();
10034 auto SI = Components1.rbegin();
10035 auto SE = Components1.rend();
10036 for (; CI != CE && SI != SE; ++CI, ++SI) {
10037 if (CI->getAssociatedExpression()->getStmtClass() !=
10038 SI->getAssociatedExpression()->getStmtClass())
10039 break;
10040 // Are we dealing with different variables/fields?
10041 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10042 break;
10043 }
10044 // Found overlapping if, at least for one component, reached the head
10045 // of the components list.
10046 if (CI == CE || SI == SE) {
10047 // Ignore it if it is the same component.
10048 if (CI == CE && SI == SE)
10049 continue;
10050 const auto It = (SI == SE) ? CI : SI;
10051 // If one component is a pointer and another one is a kind of
10052 // dereference of this pointer (array subscript, section, dereference,
10053 // etc.), it is not an overlapping.
10054 // Same, if one component is a base and another component is a
10055 // dereferenced pointer memberexpr with the same base.
10056 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
10057 (std::prev(x: It)->getAssociatedDeclaration() &&
10058 std::prev(x: It)
10059 ->getAssociatedDeclaration()
10060 ->getType()
10061 ->isPointerType()) ||
10062 (It->getAssociatedDeclaration() &&
10063 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10064 std::next(x: It) != CE && std::next(x: It) != SE))
10065 continue;
10066 const MapData &BaseData = CI == CE ? L : L1;
10067 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
10068 SI == SE ? Components : Components1;
10069 OverlappedData[&BaseData].push_back(Elt: SubData);
10070 }
10071 }
10072 }
10073 // Sort the overlapped elements for each item.
10074 llvm::SmallVector<const FieldDecl *, 4> Layout;
10075 if (!OverlappedData.empty()) {
10076 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10077 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10078 while (BaseType != OrigType) {
10079 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10080 OrigType = BaseType->getPointeeOrArrayElementType();
10081 }
10082
10083 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10084 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
10085 else {
10086 const auto *RD = BaseType->getAsRecordDecl();
10087 Layout.append(in_start: RD->field_begin(), in_end: RD->field_end());
10088 }
10089 }
10090 for (auto &Pair : OverlappedData) {
10091 llvm::stable_sort(
10092 Range&: Pair.getSecond(),
10093 C: [&Layout](
10094 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
10095 OMPClauseMappableExprCommon::MappableExprComponentListRef
10096 Second) {
10097 auto CI = First.rbegin();
10098 auto CE = First.rend();
10099 auto SI = Second.rbegin();
10100 auto SE = Second.rend();
10101 for (; CI != CE && SI != SE; ++CI, ++SI) {
10102 if (CI->getAssociatedExpression()->getStmtClass() !=
10103 SI->getAssociatedExpression()->getStmtClass())
10104 break;
10105 // Are we dealing with different variables/fields?
10106 if (CI->getAssociatedDeclaration() !=
10107 SI->getAssociatedDeclaration())
10108 break;
10109 }
10110
10111 // Lists contain the same elements.
10112 if (CI == CE && SI == SE)
10113 return false;
10114
10115 // List with less elements is less than list with more elements.
10116 if (CI == CE || SI == SE)
10117 return CI == CE;
10118
10119 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
10120 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
10121 if (FD1->getParent() == FD2->getParent())
10122 return FD1->getFieldIndex() < FD2->getFieldIndex();
10123 const auto *It =
10124 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
10125 return FD == FD1 || FD == FD2;
10126 });
10127 return *It == FD1;
10128 });
10129 }
10130
10131 // Associated with a capture, because the mapping flags depend on it.
10132 // Go through all of the elements with the overlapped elements.
10133 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10134 MapCombinedInfoTy StructBaseCombinedInfo;
10135 for (const auto &Pair : OverlappedData) {
10136 const MapData &L = *Pair.getFirst();
10137 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10138 OpenMPMapClauseKind MapType;
10139 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10140 bool IsImplicit;
10141 const ValueDecl *Mapper;
10142 const Expr *VarRef;
10143 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10144 L;
10145 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10146 OverlappedComponents = Pair.getSecond();
10147 generateInfoForComponentList(
10148 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10149 StructBaseCombinedInfo, PartialStruct, AttachInfo, IsFirstComponentList: AddTargetParamFlag,
10150 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10151 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
10152 AddTargetParamFlag = false;
10153 }
10154 // Go through other elements without overlapped elements.
10155 for (const MapData &L : DeclComponentLists) {
10156 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10157 OpenMPMapClauseKind MapType;
10158 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10159 bool IsImplicit;
10160 const ValueDecl *Mapper;
10161 const Expr *VarRef;
10162 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10163 L;
10164 auto It = OverlappedData.find(Val: &L);
10165 if (It == OverlappedData.end())
10166 generateInfoForComponentList(
10167 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10168 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10169 IsFirstComponentList: AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10170 Mapper, /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
10171 /*OverlappedElements*/ {});
10172 AddTargetParamFlag = false;
10173 }
10174 }
10175
10176 /// Check if a variable should be treated as firstprivate due to explicit
10177 /// firstprivate clause or defaultmap(firstprivate:...).
10178 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10179 // Check explicit firstprivate clauses (not implicit from defaultmap)
10180 auto I = FirstPrivateDecls.find(Val: VD);
10181 if (I != FirstPrivateDecls.end() && !I->getSecond())
10182 return true; // Explicit firstprivate only
10183
10184 // Check defaultmap(firstprivate:scalar) for scalar types
10185 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_scalar)) {
10186 if (Type->isScalarType())
10187 return true;
10188 }
10189
10190 // Check defaultmap(firstprivate:pointer) for pointer types
10191 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_pointer)) {
10192 if (Type->isAnyPointerType())
10193 return true;
10194 }
10195
10196 // Check defaultmap(firstprivate:aggregate) for aggregate types
10197 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_aggregate)) {
10198 if (Type->isAggregateType())
10199 return true;
10200 }
10201
10202 // Check defaultmap(firstprivate:all) for all types
10203 return DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_all);
10204 }
10205
10206 /// Generate the default map information for a given capture \a CI,
10207 /// record field declaration \a RI and captured value \a CV.
10208 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10209 const FieldDecl &RI, llvm::Value *CV,
10210 MapCombinedInfoTy &CombinedInfo) const {
10211 bool IsImplicit = true;
10212 // Do the default mapping.
10213 if (CI.capturesThis()) {
10214 CombinedInfo.Exprs.push_back(Elt: nullptr);
10215 CombinedInfo.BasePointers.push_back(Elt: CV);
10216 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10217 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10218 CombinedInfo.Pointers.push_back(Elt: CV);
10219 const auto *PtrTy = cast<PointerType>(Val: RI.getType().getTypePtr());
10220 CombinedInfo.Sizes.push_back(
10221 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
10222 DestTy: CGF.Int64Ty, /*isSigned=*/true));
10223 // Default map type.
10224 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
10225 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10226 } else if (CI.capturesVariableByCopy()) {
10227 const VarDecl *VD = CI.getCapturedVar();
10228 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10229 CombinedInfo.BasePointers.push_back(Elt: CV);
10230 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10231 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10232 CombinedInfo.Pointers.push_back(Elt: CV);
10233 bool IsFirstprivate =
10234 isEffectivelyFirstprivate(VD, Type: RI.getType().getNonReferenceType());
10235
10236 if (!RI.getType()->isAnyPointerType()) {
10237 // We have to signal to the runtime captures passed by value that are
10238 // not pointers.
10239 CombinedInfo.Types.push_back(
10240 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10241 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10242 V: CGF.getTypeSize(Ty: RI.getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10243 } else if (IsFirstprivate) {
10244 // Firstprivate pointers should be passed by value (as literals)
10245 // without performing a present table lookup at runtime.
10246 CombinedInfo.Types.push_back(
10247 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10248 // Use zero size for pointer literals (just passing the pointer value)
10249 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10250 } else {
10251 // Pointers are implicitly mapped with a zero size and no flags
10252 // (other than first map that is added for all implicit maps).
10253 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10254 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10255 }
10256 auto I = FirstPrivateDecls.find(Val: VD);
10257 if (I != FirstPrivateDecls.end())
10258 IsImplicit = I->getSecond();
10259 } else {
10260 assert(CI.capturesVariable() && "Expected captured reference.");
10261 const auto *PtrTy = cast<ReferenceType>(Val: RI.getType().getTypePtr());
10262 QualType ElementType = PtrTy->getPointeeType();
10263 const VarDecl *VD = CI.getCapturedVar();
10264 bool IsFirstprivate = isEffectivelyFirstprivate(VD, Type: ElementType);
10265 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10266 CombinedInfo.BasePointers.push_back(Elt: CV);
10267 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10268 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10269
10270 // For firstprivate pointers, pass by value instead of dereferencing
10271 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10272 // Treat as a literal value (pass the pointer value itself)
10273 CombinedInfo.Pointers.push_back(Elt: CV);
10274 // Use zero size for pointer literals
10275 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10276 CombinedInfo.Types.push_back(
10277 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10278 } else {
10279 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10280 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10281 // The default map type for a scalar/complex type is 'to' because by
10282 // default the value doesn't have to be retrieved. For an aggregate
10283 // type, the default is 'tofrom'.
10284 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
10285 CombinedInfo.Pointers.push_back(Elt: CV);
10286 }
10287 auto I = FirstPrivateDecls.find(Val: VD);
10288 if (I != FirstPrivateDecls.end())
10289 IsImplicit = I->getSecond();
10290 }
10291 // Every default map produces a single argument which is a target parameter.
10292 CombinedInfo.Types.back() |=
10293 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10294
10295 // Add flag stating this is an implicit map.
10296 if (IsImplicit)
10297 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10298
10299 // No user-defined mapper for default mapping.
10300 CombinedInfo.Mappers.push_back(Elt: nullptr);
10301 }
10302};
10303} // anonymous namespace
10304
10305// Try to extract the base declaration from a `this->x` expression if possible.
10306static ValueDecl *getDeclFromThisExpr(const Expr *E) {
10307 if (!E)
10308 return nullptr;
10309
10310 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
10311 if (const MemberExpr *ME =
10312 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
10313 return ME->getMemberDecl();
10314 return nullptr;
10315}
10316
10317/// Emit a string constant containing the names of the values mapped to the
10318/// offloading runtime library.
10319static llvm::Constant *
10320emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10321 MappableExprsHandler::MappingExprInfo &MapExprs) {
10322
10323 uint32_t SrcLocStrSize;
10324 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10325 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10326
10327 SourceLocation Loc;
10328 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10329 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
10330 Loc = VD->getLocation();
10331 else
10332 Loc = MapExprs.getMapExpr()->getExprLoc();
10333 } else {
10334 Loc = MapExprs.getMapDecl()->getLocation();
10335 }
10336
10337 std::string ExprName;
10338 if (MapExprs.getMapExpr()) {
10339 PrintingPolicy P(CGF.getContext().getLangOpts());
10340 llvm::raw_string_ostream OS(ExprName);
10341 MapExprs.getMapExpr()->printPretty(OS, Helper: nullptr, Policy: P);
10342 } else {
10343 ExprName = MapExprs.getMapDecl()->getNameAsString();
10344 }
10345
10346 std::string FileName;
10347 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
10348 if (auto *DbgInfo = CGF.getDebugInfo())
10349 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10350 else
10351 FileName = PLoc.getFilename();
10352 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: FileName, FileName: ExprName, Line: PLoc.getLine(),
10353 Column: PLoc.getColumn(), SrcLocStrSize);
10354}
10355/// Emit the arrays used to pass the captures and map information to the
10356/// offloading runtime library. If there is no map or capture information,
10357/// return nullptr by reference.
10358static void emitOffloadingArraysAndArgs(
10359 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10360 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10361 bool IsNonContiguous = false, bool ForEndCall = false) {
10362 CodeGenModule &CGM = CGF.CGM;
10363
10364 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10365 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10366 CGF.AllocaInsertPt->getIterator());
10367 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10368 CGF.Builder.GetInsertPoint());
10369
10370 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10371 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10372 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10373 }
10374 };
10375
10376 auto CustomMapperCB = [&](unsigned int I) {
10377 llvm::Function *MFunc = nullptr;
10378 if (CombinedInfo.Mappers[I]) {
10379 Info.HasMapper = true;
10380 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10381 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10382 }
10383 return MFunc;
10384 };
10385 cantFail(Err: OMPBuilder.emitOffloadingArraysAndArgs(
10386 AllocaIP, CodeGenIP, Info, RTArgs&: Info.RTArgs, CombinedInfo, CustomMapperCB,
10387 IsNonContiguous, ForEndCall, DeviceAddrCB));
10388}
10389
10390/// Check for inner distribute directive.
10391static const OMPExecutableDirective *
10392getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
10393 const auto *CS = D.getInnermostCapturedStmt();
10394 const auto *Body =
10395 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10396 const Stmt *ChildStmt =
10397 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10398
10399 if (const auto *NestedDir =
10400 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10401 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10402 switch (D.getDirectiveKind()) {
10403 case OMPD_target:
10404 // For now, treat 'target' with nested 'teams loop' as if it's
10405 // distributed (target teams distribute).
10406 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10407 return NestedDir;
10408 if (DKind == OMPD_teams) {
10409 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10410 /*IgnoreCaptured=*/true);
10411 if (!Body)
10412 return nullptr;
10413 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10414 if (const auto *NND =
10415 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10416 DKind = NND->getDirectiveKind();
10417 if (isOpenMPDistributeDirective(DKind))
10418 return NND;
10419 }
10420 }
10421 return nullptr;
10422 case OMPD_target_teams:
10423 if (isOpenMPDistributeDirective(DKind))
10424 return NestedDir;
10425 return nullptr;
10426 case OMPD_target_parallel:
10427 case OMPD_target_simd:
10428 case OMPD_target_parallel_for:
10429 case OMPD_target_parallel_for_simd:
10430 return nullptr;
10431 case OMPD_target_teams_distribute:
10432 case OMPD_target_teams_distribute_simd:
10433 case OMPD_target_teams_distribute_parallel_for:
10434 case OMPD_target_teams_distribute_parallel_for_simd:
10435 case OMPD_parallel:
10436 case OMPD_for:
10437 case OMPD_parallel_for:
10438 case OMPD_parallel_master:
10439 case OMPD_parallel_sections:
10440 case OMPD_for_simd:
10441 case OMPD_parallel_for_simd:
10442 case OMPD_cancel:
10443 case OMPD_cancellation_point:
10444 case OMPD_ordered:
10445 case OMPD_threadprivate:
10446 case OMPD_allocate:
10447 case OMPD_task:
10448 case OMPD_simd:
10449 case OMPD_tile:
10450 case OMPD_unroll:
10451 case OMPD_sections:
10452 case OMPD_section:
10453 case OMPD_single:
10454 case OMPD_master:
10455 case OMPD_critical:
10456 case OMPD_taskyield:
10457 case OMPD_barrier:
10458 case OMPD_taskwait:
10459 case OMPD_taskgroup:
10460 case OMPD_atomic:
10461 case OMPD_flush:
10462 case OMPD_depobj:
10463 case OMPD_scan:
10464 case OMPD_teams:
10465 case OMPD_target_data:
10466 case OMPD_target_exit_data:
10467 case OMPD_target_enter_data:
10468 case OMPD_distribute:
10469 case OMPD_distribute_simd:
10470 case OMPD_distribute_parallel_for:
10471 case OMPD_distribute_parallel_for_simd:
10472 case OMPD_teams_distribute:
10473 case OMPD_teams_distribute_simd:
10474 case OMPD_teams_distribute_parallel_for:
10475 case OMPD_teams_distribute_parallel_for_simd:
10476 case OMPD_target_update:
10477 case OMPD_declare_simd:
10478 case OMPD_declare_variant:
10479 case OMPD_begin_declare_variant:
10480 case OMPD_end_declare_variant:
10481 case OMPD_declare_target:
10482 case OMPD_end_declare_target:
10483 case OMPD_declare_reduction:
10484 case OMPD_declare_mapper:
10485 case OMPD_taskloop:
10486 case OMPD_taskloop_simd:
10487 case OMPD_master_taskloop:
10488 case OMPD_master_taskloop_simd:
10489 case OMPD_parallel_master_taskloop:
10490 case OMPD_parallel_master_taskloop_simd:
10491 case OMPD_requires:
10492 case OMPD_metadirective:
10493 case OMPD_unknown:
10494 default:
10495 llvm_unreachable("Unexpected directive.");
10496 }
10497 }
10498
10499 return nullptr;
10500}
10501
10502/// Emit the user-defined mapper function. The code generation follows the
10503/// pattern in the example below.
10504/// \code
10505/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10506/// void *base, void *begin,
10507/// int64_t size, int64_t type,
10508/// void *name = nullptr) {
10509/// // Allocate space for an array section first.
10510/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10511/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10512/// size*sizeof(Ty), clearToFromMember(type));
10513/// // Map members.
10514/// for (unsigned i = 0; i < size; i++) {
10515/// // For each component specified by this mapper:
10516/// for (auto c : begin[i]->all_components) {
10517/// if (c.hasMapper())
10518/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10519/// c.arg_type, c.arg_name);
10520/// else
10521/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10522/// c.arg_begin, c.arg_size, c.arg_type,
10523/// c.arg_name);
10524/// }
10525/// }
10526/// // Delete the array section.
10527/// if (size > 1 && maptype.IsDelete)
10528/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10529/// size*sizeof(Ty), clearToFromMember(type));
10530/// }
10531/// \endcode
10532void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
10533 CodeGenFunction *CGF) {
10534 if (UDMMap.count(Val: D) > 0)
10535 return;
10536 ASTContext &C = CGM.getContext();
10537 QualType Ty = D->getType();
10538 auto *MapperVarDecl =
10539 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
10540 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
10541 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
10542
10543 CodeGenFunction MapperCGF(CGM);
10544 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10545 auto PrivatizeAndGenMapInfoCB =
10546 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10547 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10548 MapperCGF.Builder.restoreIP(IP: CodeGenIP);
10549
10550 // Privatize the declared variable of mapper to be the current array
10551 // element.
10552 Address PtrCurrent(
10553 PtrPHI, ElemTy,
10554 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10555 .getAlignment()
10556 .alignmentOfArrayElement(elementSize: ElementSize));
10557 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10558 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
10559 (void)Scope.Privatize();
10560
10561 // Get map clause information.
10562 MappableExprsHandler MEHandler(*D, MapperCGF);
10563 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10564
10565 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10566 return emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: MapExpr);
10567 };
10568 if (CGM.getCodeGenOpts().getDebugInfo() !=
10569 llvm::codegenoptions::NoDebugInfo) {
10570 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10571 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10572 F: FillInfoMap);
10573 }
10574
10575 return CombinedInfo;
10576 };
10577
10578 auto CustomMapperCB = [&](unsigned I) {
10579 llvm::Function *MapperFunc = nullptr;
10580 if (CombinedInfo.Mappers[I]) {
10581 // Call the corresponding mapper function.
10582 MapperFunc = getOrCreateUserDefinedMapperFunc(
10583 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10584 assert(MapperFunc && "Expect a valid mapper function is available.");
10585 }
10586 return MapperFunc;
10587 };
10588
10589 SmallString<64> TyStr;
10590 llvm::raw_svector_ostream Out(TyStr);
10591 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
10592 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
10593
10594 llvm::Function *NewFn = cantFail(ValOrErr: OMPBuilder.emitUserDefinedMapper(
10595 PrivAndGenMapInfoCB: PrivatizeAndGenMapInfoCB, ElemTy, FuncName: Name, CustomMapperCB));
10596 UDMMap.try_emplace(Key: D, Args&: NewFn);
10597 if (CGF)
10598 FunctionUDMMap[CGF->CurFn].push_back(Elt: D);
10599}
10600
10601llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10602 const OMPDeclareMapperDecl *D) {
10603 auto I = UDMMap.find(Val: D);
10604 if (I != UDMMap.end())
10605 return I->second;
10606 emitUserDefinedMapper(D);
10607 return UDMMap.lookup(Val: D);
10608}
10609
10610llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
10611 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10612 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10613 const OMPLoopDirective &D)>
10614 SizeEmitter) {
10615 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10616 const OMPExecutableDirective *TD = &D;
10617 // Get nested teams distribute kind directive, if any. For now, treat
10618 // 'target_teams_loop' as if it's really a target_teams_distribute.
10619 if ((!isOpenMPDistributeDirective(DKind: Kind) || !isOpenMPTeamsDirective(DKind: Kind)) &&
10620 Kind != OMPD_target_teams_loop)
10621 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
10622 if (!TD)
10623 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10624
10625 const auto *LD = cast<OMPLoopDirective>(Val: TD);
10626 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10627 return NumIterations;
10628 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10629}
10630
10631static void
10632emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10633 const OMPExecutableDirective &D,
10634 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10635 bool RequiresOuterTask, const CapturedStmt &CS,
10636 bool OffloadingMandatory, CodeGenFunction &CGF) {
10637 if (OffloadingMandatory) {
10638 CGF.Builder.CreateUnreachable();
10639 } else {
10640 if (RequiresOuterTask) {
10641 CapturedVars.clear();
10642 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
10643 }
10644 llvm::SmallVector<llvm::Value *, 16> Args(CapturedVars.begin(),
10645 CapturedVars.end());
10646 Args.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Builder.getPtrTy()));
10647 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
10648 Args);
10649 }
10650}
10651
10652static llvm::Value *emitDeviceID(
10653 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10654 CodeGenFunction &CGF) {
10655 // Emit device ID if any.
10656 llvm::Value *DeviceID;
10657 if (Device.getPointer()) {
10658 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10659 Device.getInt() == OMPC_DEVICE_device_num) &&
10660 "Expected device_num modifier.");
10661 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
10662 DeviceID =
10663 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
10664 } else {
10665 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10666 }
10667 return DeviceID;
10668}
10669
10670static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10671emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF) {
10672 llvm::Value *DynGP = CGF.Builder.getInt32(C: 0);
10673 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10674
10675 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10676 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10677 llvm::Value *DynGPVal =
10678 CGF.EmitScalarExpr(E: DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10679 DynGP = CGF.Builder.CreateIntCast(V: DynGPVal, DestTy: CGF.Int32Ty,
10680 /*isSigned=*/false);
10681 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10682 switch (FallbackModifier) {
10683 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10684 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10685 break;
10686 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10687 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10688 break;
10689 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10690 case OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown:
10691 // This is the default for dyn_groupprivate.
10692 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10693 break;
10694 default:
10695 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10696 }
10697 } else if (auto *OMPXDynCGClause =
10698 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10699 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10700 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(E: OMPXDynCGClause->getSize(),
10701 /*IgnoreResultAssign=*/true);
10702 DynGP = CGF.Builder.CreateIntCast(V: DynCGMemVal, DestTy: CGF.Int32Ty,
10703 /*isSigned=*/false);
10704 }
10705 return {DynGP, DynGPFallback};
10706}
10707
10708static void genMapInfoForCaptures(
10709 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10710 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10711 llvm::OpenMPIRBuilder &OMPBuilder,
10712 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10713 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10714
10715 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10716 auto RI = CS.getCapturedRecordDecl()->field_begin();
10717 auto *CV = CapturedVars.begin();
10718 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10719 CE = CS.capture_end();
10720 CI != CE; ++CI, ++RI, ++CV) {
10721 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10722
10723 // VLA sizes are passed to the outlined region by copy and do not have map
10724 // information associated.
10725 if (CI->capturesVariableArrayType()) {
10726 CurInfo.Exprs.push_back(Elt: nullptr);
10727 CurInfo.BasePointers.push_back(Elt: *CV);
10728 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
10729 CurInfo.DevicePointers.push_back(
10730 Elt: MappableExprsHandler::DeviceInfoTy::None);
10731 CurInfo.Pointers.push_back(Elt: *CV);
10732 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10733 V: CGF.getTypeSize(Ty: RI->getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10734 // Copy to the device as an argument. No need to retrieve it.
10735 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10736 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10737 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10738 CurInfo.Mappers.push_back(Elt: nullptr);
10739 } else {
10740 const ValueDecl *CapturedVD =
10741 CI->capturesThis() ? nullptr
10742 : CI->getCapturedVar()->getCanonicalDecl();
10743 bool HasEntryWithCVAsAttachPtr = false;
10744 if (CapturedVD)
10745 HasEntryWithCVAsAttachPtr =
10746 MEHandler.hasAttachEntryForCapturedVar(VD: CapturedVD);
10747
10748 // Populate component lists for the captured variable from clauses.
10749 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10750 SmallVector<
10751 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>, 4>
10752 StorageForImplicitlyAddedComponentLists;
10753 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10754 VD: CapturedVD, DeclComponentLists,
10755 StorageForImplicitlyAddedComponentLists);
10756
10757 // OpenMP 6.0, 15.8, target construct, restrictions:
10758 // * A list item in a map clause that is specified on a target construct
10759 // must have a base variable or base pointer.
10760 //
10761 // Map clauses on a target construct must either have a base pointer, or a
10762 // base-variable. So, if we don't have a base-pointer, that means that it
10763 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10764 // etc. In such cases, we do not need to handle default map generation
10765 // for `s`.
10766 bool HasEntryWithoutAttachPtr =
10767 llvm::any_of(Range&: DeclComponentLists, P: [&](const auto &MapData) {
10768 OMPClauseMappableExprCommon::MappableExprComponentListRef
10769 Components = std::get<0>(MapData);
10770 return !MEHandler.getAttachPtrExpr(Components);
10771 });
10772
10773 // Generate default map info first if there's no direct map with CV as
10774 // the base-variable, or attach pointer.
10775 if (DeclComponentLists.empty() ||
10776 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10777 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
10778
10779 // If we have any information in the map clause, we use it, otherwise we
10780 // just do a default mapping.
10781 MEHandler.generateInfoForCaptureFromClauseInfo(
10782 DeclComponentListsFromClauses: DeclComponentLists, Cap: CI, Arg: *CV, CurCaptureVarInfo&: CurInfo, OMPBuilder,
10783 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10784
10785 if (!CI->capturesThis())
10786 MappedVarSet.insert(V: CI->getCapturedVar());
10787 else
10788 MappedVarSet.insert(V: nullptr);
10789
10790 // Generate correct mapping for variables captured by reference in
10791 // lambdas.
10792 if (CI->capturesVariable())
10793 MEHandler.generateInfoForLambdaCaptures(VD: CI->getCapturedVar(), Arg: *CV,
10794 CombinedInfo&: CurInfo, LambdaPointers);
10795 }
10796 // We expect to have at least an element of information for this capture.
10797 assert(!CurInfo.BasePointers.empty() &&
10798 "Non-existing map pointer for capture!");
10799 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10800 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10801 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10802 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10803 "Inconsistent map information sizes!");
10804
10805 // We need to append the results of this capture to what we already have.
10806 CombinedInfo.append(CurInfo);
10807 }
10808 // Adjust MEMBER_OF flags for the lambdas captures.
10809 MEHandler.adjustMemberOfForLambdaCaptures(
10810 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
10811 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
10812}
10813static void
10814genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10815 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10816 llvm::OpenMPIRBuilder &OMPBuilder,
10817 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10818 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10819
10820 CodeGenModule &CGM = CGF.CGM;
10821 // Map any list items in a map clause that were not captures because they
10822 // weren't referenced within the construct.
10823 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: SkippedVarSet);
10824
10825 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10826 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10827 };
10828 if (CGM.getCodeGenOpts().getDebugInfo() !=
10829 llvm::codegenoptions::NoDebugInfo) {
10830 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10831 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10832 F: FillInfoMap);
10833 }
10834}
10835
10836static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
10837 const CapturedStmt &CS,
10838 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10839 llvm::OpenMPIRBuilder &OMPBuilder,
10840 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10841 // Get mappable expression information.
10842 MappableExprsHandler MEHandler(D, CGF);
10843 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10844
10845 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10846 MappedVarSet, CombinedInfo);
10847 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, SkippedVarSet: MappedVarSet);
10848}
10849
10850template <typename ClauseTy>
10851static void
10852emitClauseForBareTargetDirective(CodeGenFunction &CGF,
10853 const OMPExecutableDirective &D,
10854 llvm::SmallVectorImpl<llvm::Value *> &Values) {
10855 const auto *C = D.getSingleClause<ClauseTy>();
10856 assert(!C->varlist_empty() &&
10857 "ompx_bare requires explicit num_teams and thread_limit");
10858 CodeGenFunction::RunCleanupsScope Scope(CGF);
10859 for (auto *E : C->varlist()) {
10860 llvm::Value *V = CGF.EmitScalarExpr(E);
10861 Values.push_back(
10862 Elt: CGF.Builder.CreateIntCast(V, DestTy: CGF.Int32Ty, /*isSigned=*/true));
10863 }
10864}
10865
10866static void emitTargetCallKernelLaunch(
10867 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10868 const OMPExecutableDirective &D,
10869 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10870 const CapturedStmt &CS, bool OffloadingMandatory,
10871 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10872 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10873 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10874 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10875 const OMPLoopDirective &D)>
10876 SizeEmitter,
10877 CodeGenFunction &CGF, CodeGenModule &CGM) {
10878 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10879
10880 // Fill up the arrays with all the captured variables.
10881 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10882 CGOpenMPRuntime::TargetDataInfo Info;
10883 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10884
10885 // Append a null entry for the implicit dyn_ptr argument.
10886 using OpenMPOffloadMappingFlags = llvm::omp::OpenMPOffloadMappingFlags;
10887 auto *NullPtr = llvm::Constant::getNullValue(Ty: CGF.Builder.getPtrTy());
10888 CombinedInfo.BasePointers.push_back(Elt: NullPtr);
10889 CombinedInfo.Pointers.push_back(Elt: NullPtr);
10890 CombinedInfo.DevicePointers.push_back(
10891 Elt: llvm::OpenMPIRBuilder::DeviceInfoTy::None);
10892 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.getInt64(C: 0));
10893 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10894 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10895 if (!CombinedInfo.Names.empty())
10896 CombinedInfo.Names.push_back(Elt: NullPtr);
10897 CombinedInfo.Exprs.push_back(Elt: nullptr);
10898 CombinedInfo.Mappers.push_back(Elt: nullptr);
10899 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10900
10901 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10902 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10903
10904 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10905 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10906 CGF.VoidPtrTy, CGM.getPointerAlign());
10907 InputInfo.PointersArray =
10908 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10909 InputInfo.SizesArray =
10910 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10911 InputInfo.MappersArray =
10912 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10913 MapTypesArray = Info.RTArgs.MapTypesArray;
10914 MapNamesArray = Info.RTArgs.MapNamesArray;
10915
10916 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10917 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10918 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10919 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10920 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10921
10922 if (IsReverseOffloading) {
10923 // Reverse offloading is not supported, so just execute on the host.
10924 // FIXME: This fallback solution is incorrect since it ignores the
10925 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10926 // assert here and ensure SEMA emits an error.
10927 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10928 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10929 return;
10930 }
10931
10932 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10933 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10934
10935 llvm::Value *BasePointersArray =
10936 InputInfo.BasePointersArray.emitRawPointer(CGF);
10937 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10938 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10939 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10940
10941 auto &&EmitTargetCallFallbackCB =
10942 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10943 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10944 -> llvm::OpenMPIRBuilder::InsertPointTy {
10945 CGF.Builder.restoreIP(IP);
10946 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10947 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10948 return CGF.Builder.saveIP();
10949 };
10950
10951 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10952 SmallVector<llvm::Value *, 3> NumTeams;
10953 SmallVector<llvm::Value *, 3> NumThreads;
10954 if (IsBare) {
10955 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, Values&: NumTeams);
10956 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
10957 Values&: NumThreads);
10958 } else {
10959 NumTeams.push_back(Elt: OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10960 NumThreads.push_back(
10961 Elt: OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10962 }
10963
10964 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10965 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10966 llvm::Value *NumIterations =
10967 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10968 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10969 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10970 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10971
10972 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10973 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10974 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10975
10976 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10977 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10978 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10979
10980 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10981 cantFail(ValOrErr: OMPRuntime->getOMPBuilder().emitKernelLaunch(
10982 Loc: CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10983 RTLoc, AllocaIP));
10984 CGF.Builder.restoreIP(IP: AfterIP);
10985 };
10986
10987 if (RequiresOuterTask)
10988 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10989 else
10990 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
10991}
10992
10993static void
10994emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10995 const OMPExecutableDirective &D,
10996 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10997 bool RequiresOuterTask, const CapturedStmt &CS,
10998 bool OffloadingMandatory, CodeGenFunction &CGF) {
10999
11000 // Notify that the host version must be executed.
11001 auto &&ElseGen =
11002 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11003 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11004 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
11005 RequiresOuterTask, CS, OffloadingMandatory, CGF);
11006 };
11007
11008 if (RequiresOuterTask) {
11009 CodeGenFunction::OMPTargetDataInfo InputInfo;
11010 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
11011 } else {
11012 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ElseGen);
11013 }
11014}
11015
11016void CGOpenMPRuntime::emitTargetCall(
11017 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11018 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11019 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11020 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11021 const OMPLoopDirective &D)>
11022 SizeEmitter) {
11023 if (!CGF.HaveInsertPoint())
11024 return;
11025
11026 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11027 CGM.getLangOpts().OpenMPOffloadMandatory;
11028
11029 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11030
11031 const bool RequiresOuterTask =
11032 D.hasClausesOfKind<OMPDependClause>() ||
11033 D.hasClausesOfKind<OMPNowaitClause>() ||
11034 D.hasClausesOfKind<OMPInReductionClause>() ||
11035 (CGM.getLangOpts().OpenMP >= 51 &&
11036 needsTaskBasedThreadLimit(DKind: D.getDirectiveKind()) &&
11037 D.hasClausesOfKind<OMPThreadLimitClause>());
11038 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
11039 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
11040 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11041 PrePostActionTy &) {
11042 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
11043 };
11044 emitInlinedDirective(CGF, InnerKind: OMPD_unknown, CodeGen: ArgsCodegen);
11045
11046 CodeGenFunction::OMPTargetDataInfo InputInfo;
11047 llvm::Value *MapTypesArray = nullptr;
11048 llvm::Value *MapNamesArray = nullptr;
11049
11050 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11051 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11052 OutlinedFnID, &InputInfo, &MapTypesArray,
11053 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11054 PrePostActionTy &) {
11055 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
11056 RequiresOuterTask, CS, OffloadingMandatory,
11057 Device, OutlinedFnID, InputInfo, MapTypesArray,
11058 MapNamesArray, SizeEmitter, CGF, CGM);
11059 };
11060
11061 auto &&TargetElseGen =
11062 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11063 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11064 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11065 CS, OffloadingMandatory, CGF);
11066 };
11067
11068 // If we have a target function ID it means that we need to support
11069 // offloading, otherwise, just execute on the host. We need to execute on host
11070 // regardless of the conditional in the if clause if, e.g., the user do not
11071 // specify target triples.
11072 if (OutlinedFnID) {
11073 if (IfCond) {
11074 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
11075 } else {
11076 RegionCodeGenTy ThenRCG(TargetThenGen);
11077 ThenRCG(CGF);
11078 }
11079 } else {
11080 RegionCodeGenTy ElseRCG(TargetElseGen);
11081 ElseRCG(CGF);
11082 }
11083}
11084
11085void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
11086 StringRef ParentName) {
11087 if (!S)
11088 return;
11089
11090 // Register vtable from device for target data and target directives.
11091 // Add this block here since scanForTargetRegionsFunctions ignores
11092 // target data by checking if S is a executable directive (target).
11093 if (auto *E = dyn_cast<OMPExecutableDirective>(Val: S);
11094 E && isOpenMPTargetDataManagementDirective(DKind: E->getDirectiveKind())) {
11095 // Don't need to check if it's device compile
11096 // since scanForTargetRegionsFunctions currently only called
11097 // in device compilation.
11098 registerVTable(D: *E);
11099 }
11100
11101 // Codegen OMP target directives that offload compute to the device.
11102 bool RequiresDeviceCodegen =
11103 isa<OMPExecutableDirective>(Val: S) &&
11104 isOpenMPTargetExecutionDirective(
11105 DKind: cast<OMPExecutableDirective>(Val: S)->getDirectiveKind());
11106
11107 if (RequiresDeviceCodegen) {
11108 const auto &E = *cast<OMPExecutableDirective>(Val: S);
11109
11110 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11111 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
11112
11113 // Is this a target region that should not be emitted as an entry point? If
11114 // so just signal we are done with this target region.
11115 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11116 return;
11117
11118 switch (E.getDirectiveKind()) {
11119 case OMPD_target:
11120 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
11121 S: cast<OMPTargetDirective>(Val: E));
11122 break;
11123 case OMPD_target_parallel:
11124 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
11125 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
11126 break;
11127 case OMPD_target_teams:
11128 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
11129 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
11130 break;
11131 case OMPD_target_teams_distribute:
11132 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
11133 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
11134 break;
11135 case OMPD_target_teams_distribute_simd:
11136 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
11137 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
11138 break;
11139 case OMPD_target_parallel_for:
11140 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
11141 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
11142 break;
11143 case OMPD_target_parallel_for_simd:
11144 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
11145 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
11146 break;
11147 case OMPD_target_simd:
11148 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
11149 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
11150 break;
11151 case OMPD_target_teams_distribute_parallel_for:
11152 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
11153 CGM, ParentName,
11154 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
11155 break;
11156 case OMPD_target_teams_distribute_parallel_for_simd:
11157 CodeGenFunction::
11158 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
11159 CGM, ParentName,
11160 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
11161 break;
11162 case OMPD_target_teams_loop:
11163 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
11164 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
11165 break;
11166 case OMPD_target_parallel_loop:
11167 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
11168 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
11169 break;
11170 case OMPD_parallel:
11171 case OMPD_for:
11172 case OMPD_parallel_for:
11173 case OMPD_parallel_master:
11174 case OMPD_parallel_sections:
11175 case OMPD_for_simd:
11176 case OMPD_parallel_for_simd:
11177 case OMPD_cancel:
11178 case OMPD_cancellation_point:
11179 case OMPD_ordered:
11180 case OMPD_threadprivate:
11181 case OMPD_allocate:
11182 case OMPD_task:
11183 case OMPD_simd:
11184 case OMPD_tile:
11185 case OMPD_unroll:
11186 case OMPD_sections:
11187 case OMPD_section:
11188 case OMPD_single:
11189 case OMPD_master:
11190 case OMPD_critical:
11191 case OMPD_taskyield:
11192 case OMPD_barrier:
11193 case OMPD_taskwait:
11194 case OMPD_taskgroup:
11195 case OMPD_atomic:
11196 case OMPD_flush:
11197 case OMPD_depobj:
11198 case OMPD_scan:
11199 case OMPD_teams:
11200 case OMPD_target_data:
11201 case OMPD_target_exit_data:
11202 case OMPD_target_enter_data:
11203 case OMPD_distribute:
11204 case OMPD_distribute_simd:
11205 case OMPD_distribute_parallel_for:
11206 case OMPD_distribute_parallel_for_simd:
11207 case OMPD_teams_distribute:
11208 case OMPD_teams_distribute_simd:
11209 case OMPD_teams_distribute_parallel_for:
11210 case OMPD_teams_distribute_parallel_for_simd:
11211 case OMPD_target_update:
11212 case OMPD_declare_simd:
11213 case OMPD_declare_variant:
11214 case OMPD_begin_declare_variant:
11215 case OMPD_end_declare_variant:
11216 case OMPD_declare_target:
11217 case OMPD_end_declare_target:
11218 case OMPD_declare_reduction:
11219 case OMPD_declare_mapper:
11220 case OMPD_taskloop:
11221 case OMPD_taskloop_simd:
11222 case OMPD_master_taskloop:
11223 case OMPD_master_taskloop_simd:
11224 case OMPD_parallel_master_taskloop:
11225 case OMPD_parallel_master_taskloop_simd:
11226 case OMPD_requires:
11227 case OMPD_metadirective:
11228 case OMPD_unknown:
11229 default:
11230 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11231 }
11232 return;
11233 }
11234
11235 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
11236 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11237 return;
11238
11239 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
11240 return;
11241 }
11242
11243 // If this is a lambda function, look into its body.
11244 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
11245 S = L->getBody();
11246
11247 // Keep looking for target regions recursively.
11248 for (const Stmt *II : S->children())
11249 scanForTargetRegionsFunctions(S: II, ParentName);
11250}
11251
11252static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11253 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11254 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11255 if (!DevTy)
11256 return false;
11257 // Do not emit device_type(nohost) functions for the host.
11258 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11259 return true;
11260 // Do not emit device_type(host) functions for the device.
11261 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11262 return true;
11263 return false;
11264}
11265
11266bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
11267 // If emitting code for the host, we do not process FD here. Instead we do
11268 // the normal code generation.
11269 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11270 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
11271 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11272 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11273 return true;
11274 return false;
11275 }
11276
11277 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
11278 // Try to detect target regions in the function.
11279 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
11280 StringRef Name = CGM.getMangledName(GD);
11281 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
11282 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11283 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11284 return true;
11285 }
11286
11287 // Do not to emit function if it is not marked as declare target.
11288 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11289 AlreadyEmittedTargetDecls.count(V: VD) == 0;
11290}
11291
11292bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11293 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
11294 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11295 return true;
11296
11297 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11298 return false;
11299
11300 // Check if there are Ctors/Dtors in this declaration and look for target
11301 // regions in it. We use the complete variant to produce the kernel name
11302 // mangling.
11303 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
11304 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11305 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11306 StringRef ParentName =
11307 CGM.getMangledName(GD: GlobalDecl(Ctor, Ctor_Complete));
11308 scanForTargetRegionsFunctions(S: Ctor->getBody(), ParentName);
11309 }
11310 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11311 StringRef ParentName =
11312 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
11313 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
11314 }
11315 }
11316
11317 // Do not to emit variable if it is not marked as declare target.
11318 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11319 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11320 VD: cast<VarDecl>(Val: GD.getDecl()));
11321 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11322 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11323 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11324 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11325 HasRequiresUnifiedSharedMemory)) {
11326 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
11327 return true;
11328 }
11329 return false;
11330}
11331
11332void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
11333 llvm::Constant *Addr) {
11334 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11335 !CGM.getLangOpts().OpenMPIsTargetDevice)
11336 return;
11337
11338 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11339 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11340
11341 // If this is an 'extern' declaration we defer to the canonical definition and
11342 // do not emit an offloading entry.
11343 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11344 VD->hasExternalStorage())
11345 return;
11346
11347 if (!Res) {
11348 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11349 // Register non-target variables being emitted in device code (debug info
11350 // may cause this).
11351 StringRef VarName = CGM.getMangledName(GD: VD);
11352 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
11353 }
11354 return;
11355 }
11356
11357 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
11358 auto LinkageForVariable = [&VD, this]() {
11359 return CGM.getLLVMLinkageVarDefinition(VD);
11360 };
11361
11362 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11363 OMPBuilder.registerTargetGlobalVariable(
11364 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
11365 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11366 IsExternallyVisible: VD->isExternallyVisible(),
11367 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
11368 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
11369 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
11370 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
11371 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
11372 T: CGM.getContext().getPointerType(T: VD->getType())),
11373 Addr);
11374
11375 for (auto *ref : GeneratedRefs)
11376 CGM.addCompilerUsedGlobal(GV: ref);
11377}
11378
11379bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
11380 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
11381 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
11382 return emitTargetFunctions(GD);
11383
11384 return emitTargetGlobalVariable(GD);
11385}
11386
11387void CGOpenMPRuntime::emitDeferredTargetDecls() const {
11388 for (const VarDecl *VD : DeferredGlobalVariables) {
11389 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11390 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11391 if (!Res)
11392 continue;
11393 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11394 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11395 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11396 !HasRequiresUnifiedSharedMemory) {
11397 CGM.EmitGlobal(D: VD);
11398 } else {
11399 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11400 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11401 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11402 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11403 HasRequiresUnifiedSharedMemory)) &&
11404 "Expected link clause or to clause with unified memory.");
11405 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11406 }
11407 }
11408}
11409
11410void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11411 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11412 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11413 " Expected target-based directive.");
11414}
11415
11416void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11417 for (const OMPClause *Clause : D->clauselists()) {
11418 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11419 HasRequiresUnifiedSharedMemory = true;
11420 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11421 } else if (const auto *AC =
11422 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
11423 switch (AC->getAtomicDefaultMemOrderKind()) {
11424 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11425 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11426 break;
11427 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11428 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11429 break;
11430 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11431 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11432 break;
11433 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11434 break;
11435 }
11436 }
11437 }
11438}
11439
11440llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11441 return RequiresAtomicOrdering;
11442}
11443
11444bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11445 LangAS &AS) {
11446 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11447 return false;
11448 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11449 switch(A->getAllocatorType()) {
11450 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11451 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11452 // Not supported, fallback to the default mem space.
11453 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11454 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11455 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11456 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11457 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11458 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11459 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11460 AS = LangAS::Default;
11461 return true;
11462 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11463 llvm_unreachable("Expected predefined allocator for the variables with the "
11464 "static storage.");
11465 }
11466 return false;
11467}
11468
11469bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11470 return HasRequiresUnifiedSharedMemory;
11471}
11472
11473CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11474 CodeGenModule &CGM)
11475 : CGM(CGM) {
11476 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11477 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11478 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11479 }
11480}
11481
11482CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11483 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11484 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11485}
11486
11487bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11488 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11489 return true;
11490
11491 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
11492 // Do not to emit function if it is marked as declare target as it was already
11493 // emitted.
11494 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: D)) {
11495 if (D->hasBody() && AlreadyEmittedTargetDecls.count(V: D) == 0) {
11496 if (auto *F = dyn_cast_or_null<llvm::Function>(
11497 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
11498 return !F->isDeclaration();
11499 return false;
11500 }
11501 return true;
11502 }
11503
11504 return !AlreadyEmittedTargetDecls.insert(V: D).second;
11505}
11506
11507void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11508 const OMPExecutableDirective &D,
11509 SourceLocation Loc,
11510 llvm::Function *OutlinedFn,
11511 ArrayRef<llvm::Value *> CapturedVars) {
11512 if (!CGF.HaveInsertPoint())
11513 return;
11514
11515 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11516 CodeGenFunction::RunCleanupsScope Scope(CGF);
11517
11518 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11519 llvm::Value *Args[] = {
11520 RTLoc,
11521 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
11522 OutlinedFn};
11523 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11524 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
11525 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
11526
11527 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11528 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
11529 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
11530}
11531
11532void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11533 const Expr *NumTeams,
11534 const Expr *ThreadLimit,
11535 SourceLocation Loc) {
11536 if (!CGF.HaveInsertPoint())
11537 return;
11538
11539 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11540
11541 llvm::Value *NumTeamsVal =
11542 NumTeams
11543 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
11544 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11545 : CGF.Builder.getInt32(C: 0);
11546
11547 llvm::Value *ThreadLimitVal =
11548 ThreadLimit
11549 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11550 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11551 : CGF.Builder.getInt32(C: 0);
11552
11553 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11554 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11555 ThreadLimitVal};
11556 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11557 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
11558 args: PushNumTeamsArgs);
11559}
11560
11561void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
11562 const Expr *ThreadLimit,
11563 SourceLocation Loc) {
11564 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11565 llvm::Value *ThreadLimitVal =
11566 ThreadLimit
11567 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11568 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11569 : CGF.Builder.getInt32(C: 0);
11570
11571 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11572 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11573 ThreadLimitVal};
11574 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11575 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
11576 args: ThreadLimitArgs);
11577}
11578
11579void CGOpenMPRuntime::emitTargetDataCalls(
11580 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11581 const Expr *Device, const RegionCodeGenTy &CodeGen,
11582 CGOpenMPRuntime::TargetDataInfo &Info) {
11583 if (!CGF.HaveInsertPoint())
11584 return;
11585
11586 // Action used to replace the default codegen action and turn privatization
11587 // off.
11588 PrePostActionTy NoPrivAction;
11589
11590 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11591
11592 llvm::Value *IfCondVal = nullptr;
11593 if (IfCond)
11594 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
11595
11596 // Emit device ID if any.
11597 llvm::Value *DeviceID = nullptr;
11598 if (Device) {
11599 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11600 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11601 } else {
11602 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11603 }
11604
11605 // Fill up the arrays with all the mapped variables.
11606 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11607 auto GenMapInfoCB =
11608 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11609 CGF.Builder.restoreIP(IP: CodeGenIP);
11610 // Get map clause information.
11611 MappableExprsHandler MEHandler(D, CGF);
11612 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11613
11614 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11615 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
11616 };
11617 if (CGM.getCodeGenOpts().getDebugInfo() !=
11618 llvm::codegenoptions::NoDebugInfo) {
11619 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
11620 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
11621 F: FillInfoMap);
11622 }
11623
11624 return CombinedInfo;
11625 };
11626 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11627 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11628 CGF.Builder.restoreIP(IP: CodeGenIP);
11629 switch (BodyGenType) {
11630 case BodyGenTy::Priv:
11631 if (!Info.CaptureDeviceAddrMap.empty())
11632 CodeGen(CGF);
11633 break;
11634 case BodyGenTy::DupNoPriv:
11635 if (!Info.CaptureDeviceAddrMap.empty()) {
11636 CodeGen.setAction(NoPrivAction);
11637 CodeGen(CGF);
11638 }
11639 break;
11640 case BodyGenTy::NoPriv:
11641 if (Info.CaptureDeviceAddrMap.empty()) {
11642 CodeGen.setAction(NoPrivAction);
11643 CodeGen(CGF);
11644 }
11645 break;
11646 }
11647 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11648 CGF.Builder.GetInsertPoint());
11649 };
11650
11651 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11652 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11653 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
11654 }
11655 };
11656
11657 auto CustomMapperCB = [&](unsigned int I) {
11658 llvm::Function *MFunc = nullptr;
11659 if (CombinedInfo.Mappers[I]) {
11660 Info.HasMapper = true;
11661 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
11662 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
11663 }
11664 return MFunc;
11665 };
11666
11667 // Source location for the ident struct
11668 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11669
11670 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11671 CGF.AllocaInsertPt->getIterator());
11672 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11673 CGF.Builder.GetInsertPoint());
11674 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11675 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11676 cantFail(ValOrErr: OMPBuilder.createTargetData(
11677 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
11678 CustomMapperCB,
11679 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, SrcLocInfo: RTLoc));
11680 CGF.Builder.restoreIP(IP: AfterIP);
11681}
11682
11683void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11684 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11685 const Expr *Device) {
11686 if (!CGF.HaveInsertPoint())
11687 return;
11688
11689 assert((isa<OMPTargetEnterDataDirective>(D) ||
11690 isa<OMPTargetExitDataDirective>(D) ||
11691 isa<OMPTargetUpdateDirective>(D)) &&
11692 "Expecting either target enter, exit data, or update directives.");
11693
11694 CodeGenFunction::OMPTargetDataInfo InputInfo;
11695 llvm::Value *MapTypesArray = nullptr;
11696 llvm::Value *MapNamesArray = nullptr;
11697 // Generate the code for the opening of the data environment.
11698 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11699 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11700 // Emit device ID if any.
11701 llvm::Value *DeviceID = nullptr;
11702 if (Device) {
11703 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11704 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11705 } else {
11706 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11707 }
11708
11709 // Emit the number of elements in the offloading arrays.
11710 llvm::Constant *PointerNum =
11711 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
11712
11713 // Source location for the ident struct
11714 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11715
11716 SmallVector<llvm::Value *, 13> OffloadingArgs(
11717 {RTLoc, DeviceID, PointerNum,
11718 InputInfo.BasePointersArray.emitRawPointer(CGF),
11719 InputInfo.PointersArray.emitRawPointer(CGF),
11720 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11721 InputInfo.MappersArray.emitRawPointer(CGF)});
11722
11723 // Select the right runtime function call for each standalone
11724 // directive.
11725 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11726 RuntimeFunction RTLFn;
11727 switch (D.getDirectiveKind()) {
11728 case OMPD_target_enter_data:
11729 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11730 : OMPRTL___tgt_target_data_begin_mapper;
11731 break;
11732 case OMPD_target_exit_data:
11733 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11734 : OMPRTL___tgt_target_data_end_mapper;
11735 break;
11736 case OMPD_target_update:
11737 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11738 : OMPRTL___tgt_target_data_update_mapper;
11739 break;
11740 case OMPD_parallel:
11741 case OMPD_for:
11742 case OMPD_parallel_for:
11743 case OMPD_parallel_master:
11744 case OMPD_parallel_sections:
11745 case OMPD_for_simd:
11746 case OMPD_parallel_for_simd:
11747 case OMPD_cancel:
11748 case OMPD_cancellation_point:
11749 case OMPD_ordered:
11750 case OMPD_threadprivate:
11751 case OMPD_allocate:
11752 case OMPD_task:
11753 case OMPD_simd:
11754 case OMPD_tile:
11755 case OMPD_unroll:
11756 case OMPD_sections:
11757 case OMPD_section:
11758 case OMPD_single:
11759 case OMPD_master:
11760 case OMPD_critical:
11761 case OMPD_taskyield:
11762 case OMPD_barrier:
11763 case OMPD_taskwait:
11764 case OMPD_taskgroup:
11765 case OMPD_atomic:
11766 case OMPD_flush:
11767 case OMPD_depobj:
11768 case OMPD_scan:
11769 case OMPD_teams:
11770 case OMPD_target_data:
11771 case OMPD_distribute:
11772 case OMPD_distribute_simd:
11773 case OMPD_distribute_parallel_for:
11774 case OMPD_distribute_parallel_for_simd:
11775 case OMPD_teams_distribute:
11776 case OMPD_teams_distribute_simd:
11777 case OMPD_teams_distribute_parallel_for:
11778 case OMPD_teams_distribute_parallel_for_simd:
11779 case OMPD_declare_simd:
11780 case OMPD_declare_variant:
11781 case OMPD_begin_declare_variant:
11782 case OMPD_end_declare_variant:
11783 case OMPD_declare_target:
11784 case OMPD_end_declare_target:
11785 case OMPD_declare_reduction:
11786 case OMPD_declare_mapper:
11787 case OMPD_taskloop:
11788 case OMPD_taskloop_simd:
11789 case OMPD_master_taskloop:
11790 case OMPD_master_taskloop_simd:
11791 case OMPD_parallel_master_taskloop:
11792 case OMPD_parallel_master_taskloop_simd:
11793 case OMPD_target:
11794 case OMPD_target_simd:
11795 case OMPD_target_teams_distribute:
11796 case OMPD_target_teams_distribute_simd:
11797 case OMPD_target_teams_distribute_parallel_for:
11798 case OMPD_target_teams_distribute_parallel_for_simd:
11799 case OMPD_target_teams:
11800 case OMPD_target_parallel:
11801 case OMPD_target_parallel_for:
11802 case OMPD_target_parallel_for_simd:
11803 case OMPD_requires:
11804 case OMPD_metadirective:
11805 case OMPD_unknown:
11806 default:
11807 llvm_unreachable("Unexpected standalone target data directive.");
11808 break;
11809 }
11810 if (HasNowait) {
11811 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11812 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11813 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11814 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11815 }
11816 CGF.EmitRuntimeCall(
11817 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
11818 args: OffloadingArgs);
11819 };
11820
11821 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11822 &MapNamesArray](CodeGenFunction &CGF,
11823 PrePostActionTy &) {
11824 // Fill up the arrays with all the mapped variables.
11825 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11826 CGOpenMPRuntime::TargetDataInfo Info;
11827 MappableExprsHandler MEHandler(D, CGF);
11828 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11829 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11830 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11831
11832 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11833 D.hasClausesOfKind<OMPNowaitClause>();
11834
11835 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11836 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11837 CGF.VoidPtrTy, CGM.getPointerAlign());
11838 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11839 CGM.getPointerAlign());
11840 InputInfo.SizesArray =
11841 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11842 InputInfo.MappersArray =
11843 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11844 MapTypesArray = Info.RTArgs.MapTypesArray;
11845 MapNamesArray = Info.RTArgs.MapNamesArray;
11846 if (RequiresOuterTask)
11847 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
11848 else
11849 emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
11850 };
11851
11852 if (IfCond) {
11853 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
11854 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
11855 } else {
11856 RegionCodeGenTy ThenRCG(TargetThenGen);
11857 ThenRCG(CGF);
11858 }
11859}
11860
11861static unsigned
11862evaluateCDTSize(const FunctionDecl *FD,
11863 ArrayRef<llvm::OpenMPIRBuilder::DeclareSimdAttrTy> ParamAttrs) {
11864 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11865 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11866 // of that clause. The VLEN value must be power of 2.
11867 // In other case the notion of the function`s "characteristic data type" (CDT)
11868 // is used to compute the vector length.
11869 // CDT is defined in the following order:
11870 // a) For non-void function, the CDT is the return type.
11871 // b) If the function has any non-uniform, non-linear parameters, then the
11872 // CDT is the type of the first such parameter.
11873 // c) If the CDT determined by a) or b) above is struct, union, or class
11874 // type which is pass-by-value (except for the type that maps to the
11875 // built-in complex data type), the characteristic data type is int.
11876 // d) If none of the above three cases is applicable, the CDT is int.
11877 // The VLEN is then determined based on the CDT and the size of vector
11878 // register of that ISA for which current vector version is generated. The
11879 // VLEN is computed using the formula below:
11880 // VLEN = sizeof(vector_register) / sizeof(CDT),
11881 // where vector register size specified in section 3.2.1 Registers and the
11882 // Stack Frame of original AMD64 ABI document.
11883 QualType RetType = FD->getReturnType();
11884 if (RetType.isNull())
11885 return 0;
11886 ASTContext &C = FD->getASTContext();
11887 QualType CDT;
11888 if (!RetType.isNull() && !RetType->isVoidType()) {
11889 CDT = RetType;
11890 } else {
11891 unsigned Offset = 0;
11892 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
11893 if (ParamAttrs[Offset].Kind ==
11894 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector)
11895 CDT = C.getPointerType(T: C.getCanonicalTagType(TD: MD->getParent()));
11896 ++Offset;
11897 }
11898 if (CDT.isNull()) {
11899 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11900 if (ParamAttrs[I + Offset].Kind ==
11901 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector) {
11902 CDT = FD->getParamDecl(i: I)->getType();
11903 break;
11904 }
11905 }
11906 }
11907 }
11908 if (CDT.isNull())
11909 CDT = C.IntTy;
11910 CDT = CDT->getCanonicalTypeUnqualified();
11911 if (CDT->isRecordType() || CDT->isUnionType())
11912 CDT = C.IntTy;
11913 return C.getTypeSize(T: CDT);
11914}
11915
11916// This are the Functions that are needed to mangle the name of the
11917// vector functions generated by the compiler, according to the rules
11918// defined in the "Vector Function ABI specifications for AArch64",
11919// available at
11920// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11921
11922/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11923static bool getAArch64MTV(QualType QT,
11924 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind) {
11925 QT = QT.getCanonicalType();
11926
11927 if (QT->isVoidType())
11928 return false;
11929
11930 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform)
11931 return false;
11932
11933 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal ||
11934 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef)
11935 return false;
11936
11937 if ((Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
11938 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal) &&
11939 !QT->isReferenceType())
11940 return false;
11941
11942 return true;
11943}
11944
11945/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11946static bool getAArch64PBV(QualType QT, ASTContext &C) {
11947 QT = QT.getCanonicalType();
11948 unsigned Size = C.getTypeSize(T: QT);
11949
11950 // Only scalars and complex within 16 bytes wide set PVB to true.
11951 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11952 return false;
11953
11954 if (QT->isFloatingType())
11955 return true;
11956
11957 if (QT->isIntegerType())
11958 return true;
11959
11960 if (QT->isPointerType())
11961 return true;
11962
11963 // TODO: Add support for complex types (section 3.1.2, item 2).
11964
11965 return false;
11966}
11967
11968/// Computes the lane size (LS) of a return type or of an input parameter,
11969/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11970/// TODO: Add support for references, section 3.2.1, item 1.
11971static unsigned getAArch64LS(QualType QT,
11972 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind,
11973 ASTContext &C) {
11974 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11975 QualType PTy = QT.getCanonicalType()->getPointeeType();
11976 if (getAArch64PBV(QT: PTy, C))
11977 return C.getTypeSize(T: PTy);
11978 }
11979 if (getAArch64PBV(QT, C))
11980 return C.getTypeSize(T: QT);
11981
11982 return C.getTypeSize(T: C.getUIntPtrType());
11983}
11984
11985// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11986// signature of the scalar function, as defined in 3.2.2 of the
11987// AAVFABI.
11988static std::tuple<unsigned, unsigned, bool>
11989getNDSWDS(const FunctionDecl *FD,
11990 ArrayRef<llvm::OpenMPIRBuilder::DeclareSimdAttrTy> ParamAttrs) {
11991 QualType RetType = FD->getReturnType().getCanonicalType();
11992
11993 ASTContext &C = FD->getASTContext();
11994
11995 bool OutputBecomesInput = false;
11996
11997 llvm::SmallVector<unsigned, 8> Sizes;
11998 if (!RetType->isVoidType()) {
11999 Sizes.push_back(Elt: getAArch64LS(
12000 QT: RetType, Kind: llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector, C));
12001 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
12002 OutputBecomesInput = true;
12003 }
12004 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12005 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
12006 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
12007 }
12008
12009 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12010 // The LS of a function parameter / return value can only be a power
12011 // of 2, starting from 8 bits, up to 128.
12012 assert(llvm::all_of(Sizes,
12013 [](unsigned Size) {
12014 return Size == 8 || Size == 16 || Size == 32 ||
12015 Size == 64 || Size == 128;
12016 }) &&
12017 "Invalid size");
12018
12019 return std::make_tuple(args&: *llvm::min_element(Range&: Sizes), args&: *llvm::max_element(Range&: Sizes),
12020 args&: OutputBecomesInput);
12021}
12022
12023static llvm::OpenMPIRBuilder::DeclareSimdBranch
12024convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State) {
12025 switch (State) {
12026 case OMPDeclareSimdDeclAttr::BS_Undefined:
12027 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Undefined;
12028 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12029 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Inbranch;
12030 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12031 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Notinbranch;
12032 }
12033 llvm_unreachable("unexpected declare simd branch state");
12034}
12035
12036// Check the values provided via `simdlen` by the user.
12037static bool validateAArch64Simdlen(CodeGenModule &CGM, SourceLocation SLoc,
12038 unsigned UserVLEN, unsigned WDS, char ISA) {
12039 // 1. A `simdlen(1)` doesn't produce vector signatures.
12040 if (UserVLEN == 1) {
12041 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_1_no_effect);
12042 return false;
12043 }
12044
12045 // 2. Section 3.3.1, item 1: user input must be a power of 2 for Advanced
12046 // SIMD.
12047 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
12048 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_requires_power_of_2);
12049 return false;
12050 }
12051
12052 // 3. Section 3.4.1: SVE fixed length must obey the architectural limits.
12053 if (ISA == 's' && UserVLEN != 0 &&
12054 ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0))) {
12055 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_must_fit_lanes) << WDS;
12056 return false;
12057 }
12058
12059 return true;
12060}
12061
12062void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
12063 llvm::Function *Fn) {
12064 ASTContext &C = CGM.getContext();
12065 FD = FD->getMostRecentDecl();
12066 while (FD) {
12067 // Map params to their positions in function decl.
12068 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12069 if (isa<CXXMethodDecl>(Val: FD))
12070 ParamPositions.try_emplace(Key: FD, Args: 0);
12071 unsigned ParamPos = ParamPositions.size();
12072 for (const ParmVarDecl *P : FD->parameters()) {
12073 ParamPositions.try_emplace(Key: P->getCanonicalDecl(), Args&: ParamPos);
12074 ++ParamPos;
12075 }
12076 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12077 llvm::SmallVector<llvm::OpenMPIRBuilder::DeclareSimdAttrTy, 8> ParamAttrs(
12078 ParamPositions.size());
12079 // Mark uniform parameters.
12080 for (const Expr *E : Attr->uniforms()) {
12081 E = E->IgnoreParenImpCasts();
12082 unsigned Pos;
12083 if (isa<CXXThisExpr>(Val: E)) {
12084 Pos = ParamPositions[FD];
12085 } else {
12086 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12087 ->getCanonicalDecl();
12088 auto It = ParamPositions.find(Val: PVD);
12089 assert(It != ParamPositions.end() && "Function parameter not found");
12090 Pos = It->second;
12091 }
12092 ParamAttrs[Pos].Kind =
12093 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform;
12094 }
12095 // Get alignment info.
12096 auto *NI = Attr->alignments_begin();
12097 for (const Expr *E : Attr->aligneds()) {
12098 E = E->IgnoreParenImpCasts();
12099 unsigned Pos;
12100 QualType ParmTy;
12101 if (isa<CXXThisExpr>(Val: E)) {
12102 Pos = ParamPositions[FD];
12103 ParmTy = E->getType();
12104 } else {
12105 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12106 ->getCanonicalDecl();
12107 auto It = ParamPositions.find(Val: PVD);
12108 assert(It != ParamPositions.end() && "Function parameter not found");
12109 Pos = It->second;
12110 ParmTy = PVD->getType();
12111 }
12112 ParamAttrs[Pos].Alignment =
12113 (*NI)
12114 ? (*NI)->EvaluateKnownConstInt(Ctx: C)
12115 : llvm::APSInt::getUnsigned(
12116 X: C.toCharUnitsFromBits(BitSize: C.getOpenMPDefaultSimdAlign(T: ParmTy))
12117 .getQuantity());
12118 ++NI;
12119 }
12120 // Mark linear parameters.
12121 auto *SI = Attr->steps_begin();
12122 auto *MI = Attr->modifiers_begin();
12123 for (const Expr *E : Attr->linears()) {
12124 E = E->IgnoreParenImpCasts();
12125 unsigned Pos;
12126 bool IsReferenceType = false;
12127 // Rescaling factor needed to compute the linear parameter
12128 // value in the mangled name.
12129 unsigned PtrRescalingFactor = 1;
12130 if (isa<CXXThisExpr>(Val: E)) {
12131 Pos = ParamPositions[FD];
12132 auto *P = cast<PointerType>(Val: E->getType());
12133 PtrRescalingFactor = CGM.getContext()
12134 .getTypeSizeInChars(T: P->getPointeeType())
12135 .getQuantity();
12136 } else {
12137 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12138 ->getCanonicalDecl();
12139 auto It = ParamPositions.find(Val: PVD);
12140 assert(It != ParamPositions.end() && "Function parameter not found");
12141 Pos = It->second;
12142 if (auto *P = dyn_cast<PointerType>(Val: PVD->getType()))
12143 PtrRescalingFactor = CGM.getContext()
12144 .getTypeSizeInChars(T: P->getPointeeType())
12145 .getQuantity();
12146 else if (PVD->getType()->isReferenceType()) {
12147 IsReferenceType = true;
12148 PtrRescalingFactor =
12149 CGM.getContext()
12150 .getTypeSizeInChars(T: PVD->getType().getNonReferenceType())
12151 .getQuantity();
12152 }
12153 }
12154 llvm::OpenMPIRBuilder::DeclareSimdAttrTy &ParamAttr = ParamAttrs[Pos];
12155 if (*MI == OMPC_LINEAR_ref)
12156 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef;
12157 else if (*MI == OMPC_LINEAR_uval)
12158 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal;
12159 else if (IsReferenceType)
12160 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal;
12161 else
12162 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear;
12163 // Assuming a stride of 1, for `linear` without modifiers.
12164 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: 1);
12165 if (*SI) {
12166 Expr::EvalResult Result;
12167 if (!(*SI)->EvaluateAsInt(Result, Ctx: C, AllowSideEffects: Expr::SE_AllowSideEffects)) {
12168 if (const auto *DRE =
12169 cast<DeclRefExpr>(Val: (*SI)->IgnoreParenImpCasts())) {
12170 if (const auto *StridePVD =
12171 dyn_cast<ParmVarDecl>(Val: DRE->getDecl())) {
12172 ParamAttr.HasVarStride = true;
12173 auto It = ParamPositions.find(Val: StridePVD->getCanonicalDecl());
12174 assert(It != ParamPositions.end() &&
12175 "Function parameter not found");
12176 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: It->second);
12177 }
12178 }
12179 } else {
12180 ParamAttr.StrideOrArg = Result.Val.getInt();
12181 }
12182 }
12183 // If we are using a linear clause on a pointer, we need to
12184 // rescale the value of linear_step with the byte size of the
12185 // pointee type.
12186 if (!ParamAttr.HasVarStride &&
12187 (ParamAttr.Kind ==
12188 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
12189 ParamAttr.Kind ==
12190 llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef))
12191 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12192 ++SI;
12193 ++MI;
12194 }
12195 llvm::APSInt VLENVal;
12196 SourceLocation ExprLoc;
12197 const Expr *VLENExpr = Attr->getSimdlen();
12198 if (VLENExpr) {
12199 VLENVal = VLENExpr->EvaluateKnownConstInt(Ctx: C);
12200 ExprLoc = VLENExpr->getExprLoc();
12201 }
12202 llvm::OpenMPIRBuilder::DeclareSimdBranch State =
12203 convertDeclareSimdBranch(State: Attr->getBranchState());
12204 if (CGM.getTriple().isX86()) {
12205 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
12206 assert(NumElts && "Non-zero simdlen/cdtsize expected");
12207 OMPBuilder.emitX86DeclareSimdFunction(Fn, NumElements: NumElts, VLENVal, ParamAttrs,
12208 Branch: State);
12209 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12210 unsigned VLEN = VLENVal.getExtValue();
12211 // Get basic data for building the vector signature.
12212 const auto Data = getNDSWDS(FD, ParamAttrs);
12213 const unsigned NDS = std::get<0>(t: Data);
12214 const unsigned WDS = std::get<1>(t: Data);
12215 const bool OutputBecomesInput = std::get<2>(t: Data);
12216 if (CGM.getTarget().hasFeature(Feature: "sve")) {
12217 if (validateAArch64Simdlen(CGM, SLoc: ExprLoc, UserVLEN: VLEN, WDS, ISA: 's'))
12218 OMPBuilder.emitAArch64DeclareSimdFunction(
12219 Fn, VLENVal: VLEN, ParamAttrs, Branch: State, ISA: 's', NarrowestDataSize: NDS, OutputBecomesInput);
12220 } else if (CGM.getTarget().hasFeature(Feature: "neon")) {
12221 if (validateAArch64Simdlen(CGM, SLoc: ExprLoc, UserVLEN: VLEN, WDS, ISA: 'n'))
12222 OMPBuilder.emitAArch64DeclareSimdFunction(
12223 Fn, VLENVal: VLEN, ParamAttrs, Branch: State, ISA: 'n', NarrowestDataSize: NDS, OutputBecomesInput);
12224 }
12225 }
12226 }
12227 FD = FD->getPreviousDecl();
12228 }
12229}
12230
12231namespace {
12232/// Cleanup action for doacross support.
12233class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12234public:
12235 static const int DoacrossFinArgs = 2;
12236
12237private:
12238 llvm::FunctionCallee RTLFn;
12239 llvm::Value *Args[DoacrossFinArgs];
12240
12241public:
12242 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12243 ArrayRef<llvm::Value *> CallArgs)
12244 : RTLFn(RTLFn) {
12245 assert(CallArgs.size() == DoacrossFinArgs);
12246 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
12247 }
12248 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12249 if (!CGF.HaveInsertPoint())
12250 return;
12251 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12252 }
12253};
12254} // namespace
12255
12256void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12257 const OMPLoopDirective &D,
12258 ArrayRef<Expr *> NumIterations) {
12259 if (!CGF.HaveInsertPoint())
12260 return;
12261
12262 ASTContext &C = CGM.getContext();
12263 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12264 RecordDecl *RD;
12265 if (KmpDimTy.isNull()) {
12266 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12267 // kmp_int64 lo; // lower
12268 // kmp_int64 up; // upper
12269 // kmp_int64 st; // stride
12270 // };
12271 RD = C.buildImplicitRecord(Name: "kmp_dim");
12272 RD->startDefinition();
12273 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12274 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12275 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12276 RD->completeDefinition();
12277 KmpDimTy = C.getCanonicalTagType(TD: RD);
12278 } else {
12279 RD = KmpDimTy->castAsRecordDecl();
12280 }
12281 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12282 QualType ArrayTy = C.getConstantArrayType(EltTy: KmpDimTy, ArySize: Size, SizeExpr: nullptr,
12283 ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12284
12285 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
12286 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
12287 enum { LowerFD = 0, UpperFD, StrideFD };
12288 // Fill dims with data.
12289 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12290 LValue DimsLVal = CGF.MakeAddrLValue(
12291 Addr: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: I), T: KmpDimTy);
12292 // dims.upper = num_iterations;
12293 LValue UpperLVal = CGF.EmitLValueForField(
12294 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
12295 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12296 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
12297 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
12298 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
12299 // dims.stride = 1;
12300 LValue StrideLVal = CGF.EmitLValueForField(
12301 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
12302 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
12303 lvalue: StrideLVal);
12304 }
12305
12306 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12307 // kmp_int32 num_dims, struct kmp_dim * dims);
12308 llvm::Value *Args[] = {
12309 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
12310 getThreadID(CGF, Loc: D.getBeginLoc()),
12311 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
12312 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12313 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
12314 DestTy: CGM.VoidPtrTy)};
12315
12316 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12317 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
12318 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12319 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12320 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
12321 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12322 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
12323 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
12324 A: llvm::ArrayRef(FiniArgs));
12325}
12326
12327template <typename T>
12328static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
12329 const T *C, llvm::Value *ULoc,
12330 llvm::Value *ThreadID) {
12331 QualType Int64Ty =
12332 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12333 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12334 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12335 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12336 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
12337 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12338 const Expr *CounterVal = C->getLoopData(I);
12339 assert(CounterVal);
12340 llvm::Value *CntVal = CGF.EmitScalarConversion(
12341 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
12342 Loc: CounterVal->getExprLoc());
12343 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
12344 /*Volatile=*/false, Ty: Int64Ty);
12345 }
12346 llvm::Value *Args[] = {
12347 ULoc, ThreadID,
12348 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
12349 llvm::FunctionCallee RTLFn;
12350 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12351 OMPDoacrossKind<T> ODK;
12352 if (ODK.isSource(C)) {
12353 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12354 FnID: OMPRTL___kmpc_doacross_post);
12355 } else {
12356 assert(ODK.isSink(C) && "Expect sink modifier.");
12357 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12358 FnID: OMPRTL___kmpc_doacross_wait);
12359 }
12360 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12361}
12362
12363void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12364 const OMPDependClause *C) {
12365 return EmitDoacrossOrdered<OMPDependClause>(
12366 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12367 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12368}
12369
12370void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12371 const OMPDoacrossClause *C) {
12372 return EmitDoacrossOrdered<OMPDoacrossClause>(
12373 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12374 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12375}
12376
12377void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12378 llvm::FunctionCallee Callee,
12379 ArrayRef<llvm::Value *> Args) const {
12380 assert(Loc.isValid() && "Outlined function call location must be valid.");
12381 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
12382
12383 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
12384 if (Fn->doesNotThrow()) {
12385 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
12386 return;
12387 }
12388 }
12389 CGF.EmitRuntimeCall(callee: Callee, args: Args);
12390}
12391
12392void CGOpenMPRuntime::emitOutlinedFunctionCall(
12393 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12394 ArrayRef<llvm::Value *> Args) const {
12395 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
12396}
12397
12398void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12399 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
12400 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: FD))
12401 HasEmittedDeclareTargetRegion = true;
12402}
12403
12404Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12405 const VarDecl *NativeParam,
12406 const VarDecl *TargetParam) const {
12407 return CGF.GetAddrOfLocalVar(VD: NativeParam);
12408}
12409
12410/// Return allocator value from expression, or return a null allocator (default
12411/// when no allocator specified).
12412static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12413 const Expr *Allocator) {
12414 llvm::Value *AllocVal;
12415 if (Allocator) {
12416 AllocVal = CGF.EmitScalarExpr(E: Allocator);
12417 // According to the standard, the original allocator type is a enum
12418 // (integer). Convert to pointer type, if required.
12419 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
12420 DstTy: CGF.getContext().VoidPtrTy,
12421 Loc: Allocator->getExprLoc());
12422 } else {
12423 // If no allocator specified, it defaults to the null allocator.
12424 AllocVal = llvm::Constant::getNullValue(
12425 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
12426 }
12427 return AllocVal;
12428}
12429
12430/// Return the alignment from an allocate directive if present.
12431static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12432 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12433
12434 if (!AllocateAlignment)
12435 return nullptr;
12436
12437 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
12438}
12439
12440Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12441 const VarDecl *VD) {
12442 if (!VD)
12443 return Address::invalid();
12444 Address UntiedAddr = Address::invalid();
12445 Address UntiedRealAddr = Address::invalid();
12446 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12447 if (It != FunctionToUntiedTaskStackMap.end()) {
12448 const UntiedLocalVarsAddressesMap &UntiedData =
12449 UntiedLocalVarsStack[It->second];
12450 auto I = UntiedData.find(Key: VD);
12451 if (I != UntiedData.end()) {
12452 UntiedAddr = I->second.first;
12453 UntiedRealAddr = I->second.second;
12454 }
12455 }
12456 const VarDecl *CVD = VD->getCanonicalDecl();
12457 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12458 // Use the default allocation.
12459 if (!isAllocatableDecl(VD))
12460 return UntiedAddr;
12461 llvm::Value *Size;
12462 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
12463 if (CVD->getType()->isVariablyModifiedType()) {
12464 Size = CGF.getTypeSize(Ty: CVD->getType());
12465 // Align the size: ((size + align - 1) / align) * align
12466 Size = CGF.Builder.CreateNUWAdd(
12467 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
12468 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
12469 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
12470 } else {
12471 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
12472 Size = CGM.getSize(numChars: Sz.alignTo(Align));
12473 }
12474 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
12475 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12476 const Expr *Allocator = AA->getAllocator();
12477 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12478 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
12479 SmallVector<llvm::Value *, 4> Args;
12480 Args.push_back(Elt: ThreadID);
12481 if (Alignment)
12482 Args.push_back(Elt: Alignment);
12483 Args.push_back(Elt: Size);
12484 Args.push_back(Elt: AllocVal);
12485 llvm::omp::RuntimeFunction FnID =
12486 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12487 llvm::Value *Addr = CGF.EmitRuntimeCall(
12488 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args,
12489 name: getName(Parts: {CVD->getName(), ".void.addr"}));
12490 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12491 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
12492 QualType Ty = CGM.getContext().getPointerType(T: CVD->getType());
12493 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12494 V: Addr, DestTy: CGF.ConvertTypeForMem(T: Ty), Name: getName(Parts: {CVD->getName(), ".addr"}));
12495 if (UntiedAddr.isValid())
12496 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
12497
12498 // Cleanup action for allocate support.
12499 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12500 llvm::FunctionCallee RTLFn;
12501 SourceLocation::UIntTy LocEncoding;
12502 Address Addr;
12503 const Expr *AllocExpr;
12504
12505 public:
12506 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12507 SourceLocation::UIntTy LocEncoding, Address Addr,
12508 const Expr *AllocExpr)
12509 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12510 AllocExpr(AllocExpr) {}
12511 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12512 if (!CGF.HaveInsertPoint())
12513 return;
12514 llvm::Value *Args[3];
12515 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12516 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
12517 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12518 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
12519 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
12520 Args[2] = AllocVal;
12521 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12522 }
12523 };
12524 Address VDAddr =
12525 UntiedRealAddr.isValid()
12526 ? UntiedRealAddr
12527 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
12528 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12529 Kind: NormalAndEHCleanup, A: FiniRTLFn, A: CVD->getLocation().getRawEncoding(),
12530 A: VDAddr, A: Allocator);
12531 if (UntiedRealAddr.isValid())
12532 if (auto *Region =
12533 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
12534 Region->emitUntiedSwitch(CGF);
12535 return VDAddr;
12536 }
12537 return UntiedAddr;
12538}
12539
12540bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12541 const VarDecl *VD) const {
12542 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12543 if (It == FunctionToUntiedTaskStackMap.end())
12544 return false;
12545 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
12546}
12547
12548CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12549 CodeGenModule &CGM, const OMPLoopDirective &S)
12550 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12551 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12552 if (!NeedToPush)
12553 return;
12554 NontemporalDeclsSet &DS =
12555 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12556 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12557 for (const Stmt *Ref : C->private_refs()) {
12558 const auto *SimpleRefExpr = cast<Expr>(Val: Ref)->IgnoreParenImpCasts();
12559 const ValueDecl *VD;
12560 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SimpleRefExpr)) {
12561 VD = DRE->getDecl();
12562 } else {
12563 const auto *ME = cast<MemberExpr>(Val: SimpleRefExpr);
12564 assert((ME->isImplicitCXXThis() ||
12565 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12566 "Expected member of current class.");
12567 VD = ME->getMemberDecl();
12568 }
12569 DS.insert(V: VD);
12570 }
12571 }
12572}
12573
12574CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12575 if (!NeedToPush)
12576 return;
12577 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12578}
12579
12580CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12581 CodeGenFunction &CGF,
12582 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12583 std::pair<Address, Address>> &LocalVars)
12584 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12585 if (!NeedToPush)
12586 return;
12587 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12588 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12589 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
12590}
12591
12592CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12593 if (!NeedToPush)
12594 return;
12595 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12596}
12597
12598bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12599 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12600
12601 return llvm::any_of(
12602 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
12603 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(V: VD); });
12604}
12605
12606void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12607 const OMPExecutableDirective &S,
12608 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12609 const {
12610 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12611 // Vars in target/task regions must be excluded completely.
12612 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()) ||
12613 isOpenMPTaskingDirective(Kind: S.getDirectiveKind())) {
12614 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12615 getOpenMPCaptureRegions(CaptureRegions, DKind: S.getDirectiveKind());
12616 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CaptureRegions.front());
12617 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12618 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12619 NeedToCheckForLPCs.insert(V: Cap.getCapturedVar());
12620 }
12621 }
12622 // Exclude vars in private clauses.
12623 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12624 for (const Expr *Ref : C->varlist()) {
12625 if (!Ref->getType()->isScalarType())
12626 continue;
12627 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12628 if (!DRE)
12629 continue;
12630 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12631 }
12632 }
12633 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12634 for (const Expr *Ref : C->varlist()) {
12635 if (!Ref->getType()->isScalarType())
12636 continue;
12637 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12638 if (!DRE)
12639 continue;
12640 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12641 }
12642 }
12643 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12644 for (const Expr *Ref : C->varlist()) {
12645 if (!Ref->getType()->isScalarType())
12646 continue;
12647 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12648 if (!DRE)
12649 continue;
12650 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12651 }
12652 }
12653 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12654 for (const Expr *Ref : C->varlist()) {
12655 if (!Ref->getType()->isScalarType())
12656 continue;
12657 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12658 if (!DRE)
12659 continue;
12660 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12661 }
12662 }
12663 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12664 for (const Expr *Ref : C->varlist()) {
12665 if (!Ref->getType()->isScalarType())
12666 continue;
12667 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12668 if (!DRE)
12669 continue;
12670 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12671 }
12672 }
12673 for (const Decl *VD : NeedToCheckForLPCs) {
12674 for (const LastprivateConditionalData &Data :
12675 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12676 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
12677 if (!Data.Disabled)
12678 NeedToAddForLPCsAsDisabled.insert(V: VD);
12679 break;
12680 }
12681 }
12682 }
12683}
12684
12685CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12686 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12687 : CGM(CGF.CGM),
12688 Action((CGM.getLangOpts().OpenMP >= 50 &&
12689 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
12690 P: [](const OMPLastprivateClause *C) {
12691 return C->getKind() ==
12692 OMPC_LASTPRIVATE_conditional;
12693 }))
12694 ? ActionToDo::PushAsLastprivateConditional
12695 : ActionToDo::DoNotPush) {
12696 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12697 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12698 return;
12699 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12700 "Expected a push action.");
12701 LastprivateConditionalData &Data =
12702 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12703 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12704 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12705 continue;
12706
12707 for (const Expr *Ref : C->varlist()) {
12708 Data.DeclToUniqueName.insert(KV: std::make_pair(
12709 x: cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts())->getDecl(),
12710 y: SmallString<16>(generateUniqueName(CGM, Prefix: "pl_cond", Ref))));
12711 }
12712 }
12713 Data.IVLVal = IVLVal;
12714 Data.Fn = CGF.CurFn;
12715}
12716
12717CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12718 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12719 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12720 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12721 if (CGM.getLangOpts().OpenMP < 50)
12722 return;
12723 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12724 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12725 if (!NeedToAddForLPCsAsDisabled.empty()) {
12726 Action = ActionToDo::DisableLastprivateConditional;
12727 LastprivateConditionalData &Data =
12728 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12729 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12730 Data.DeclToUniqueName.try_emplace(Key: VD);
12731 Data.Fn = CGF.CurFn;
12732 Data.Disabled = true;
12733 }
12734}
12735
12736CGOpenMPRuntime::LastprivateConditionalRAII
12737CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12738 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12739 return LastprivateConditionalRAII(CGF, S);
12740}
12741
12742CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12743 if (CGM.getLangOpts().OpenMP < 50)
12744 return;
12745 if (Action == ActionToDo::DisableLastprivateConditional) {
12746 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12747 "Expected list of disabled private vars.");
12748 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12749 }
12750 if (Action == ActionToDo::PushAsLastprivateConditional) {
12751 assert(
12752 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12753 "Expected list of lastprivate conditional vars.");
12754 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12755 }
12756}
12757
12758Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12759 const VarDecl *VD) {
12760 ASTContext &C = CGM.getContext();
12761 auto I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
12762 QualType NewType;
12763 const FieldDecl *VDField;
12764 const FieldDecl *FiredField;
12765 LValue BaseLVal;
12766 auto VI = I->getSecond().find(Val: VD);
12767 if (VI == I->getSecond().end()) {
12768 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
12769 RD->startDefinition();
12770 VDField = addFieldToRecordDecl(C, DC: RD, FieldTy: VD->getType().getNonReferenceType());
12771 FiredField = addFieldToRecordDecl(C, DC: RD, FieldTy: C.CharTy);
12772 RD->completeDefinition();
12773 NewType = C.getCanonicalTagType(TD: RD);
12774 Address Addr = CGF.CreateMemTemp(T: NewType, Align: C.getDeclAlign(D: VD), Name: VD->getName());
12775 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
12776 I->getSecond().try_emplace(Key: VD, Args&: NewType, Args&: VDField, Args&: FiredField, Args&: BaseLVal);
12777 } else {
12778 NewType = std::get<0>(t&: VI->getSecond());
12779 VDField = std::get<1>(t&: VI->getSecond());
12780 FiredField = std::get<2>(t&: VI->getSecond());
12781 BaseLVal = std::get<3>(t&: VI->getSecond());
12782 }
12783 LValue FiredLVal =
12784 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
12785 CGF.EmitStoreOfScalar(
12786 value: llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
12787 lvalue: FiredLVal);
12788 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
12789}
12790
12791namespace {
12792/// Checks if the lastprivate conditional variable is referenced in LHS.
12793class LastprivateConditionalRefChecker final
12794 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12795 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12796 const Expr *FoundE = nullptr;
12797 const Decl *FoundD = nullptr;
12798 StringRef UniqueDeclName;
12799 LValue IVLVal;
12800 llvm::Function *FoundFn = nullptr;
12801 SourceLocation Loc;
12802
12803public:
12804 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12805 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12806 llvm::reverse(C&: LPM)) {
12807 auto It = D.DeclToUniqueName.find(Key: E->getDecl());
12808 if (It == D.DeclToUniqueName.end())
12809 continue;
12810 if (D.Disabled)
12811 return false;
12812 FoundE = E;
12813 FoundD = E->getDecl()->getCanonicalDecl();
12814 UniqueDeclName = It->second;
12815 IVLVal = D.IVLVal;
12816 FoundFn = D.Fn;
12817 break;
12818 }
12819 return FoundE == E;
12820 }
12821 bool VisitMemberExpr(const MemberExpr *E) {
12822 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
12823 return false;
12824 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12825 llvm::reverse(C&: LPM)) {
12826 auto It = D.DeclToUniqueName.find(Key: E->getMemberDecl());
12827 if (It == D.DeclToUniqueName.end())
12828 continue;
12829 if (D.Disabled)
12830 return false;
12831 FoundE = E;
12832 FoundD = E->getMemberDecl()->getCanonicalDecl();
12833 UniqueDeclName = It->second;
12834 IVLVal = D.IVLVal;
12835 FoundFn = D.Fn;
12836 break;
12837 }
12838 return FoundE == E;
12839 }
12840 bool VisitStmt(const Stmt *S) {
12841 for (const Stmt *Child : S->children()) {
12842 if (!Child)
12843 continue;
12844 if (const auto *E = dyn_cast<Expr>(Val: Child))
12845 if (!E->isGLValue())
12846 continue;
12847 if (Visit(S: Child))
12848 return true;
12849 }
12850 return false;
12851 }
12852 explicit LastprivateConditionalRefChecker(
12853 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12854 : LPM(LPM) {}
12855 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12856 getFoundData() const {
12857 return std::make_tuple(args: FoundE, args: FoundD, args: UniqueDeclName, args: IVLVal, args: FoundFn);
12858 }
12859};
12860} // namespace
12861
12862void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12863 LValue IVLVal,
12864 StringRef UniqueDeclName,
12865 LValue LVal,
12866 SourceLocation Loc) {
12867 // Last updated loop counter for the lastprivate conditional var.
12868 // int<xx> last_iv = 0;
12869 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
12870 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12871 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
12872 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
12873 IVLVal.getAlignment().getAsAlign());
12874 LValue LastIVLVal =
12875 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
12876
12877 // Last value of the lastprivate conditional.
12878 // decltype(priv_a) last_a;
12879 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12880 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
12881 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
12882 LVal.getAlignment().getAsAlign());
12883 LValue LastLVal =
12884 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
12885
12886 // Global loop counter. Required to handle inner parallel-for regions.
12887 // iv
12888 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
12889
12890 // #pragma omp critical(a)
12891 // if (last_iv <= iv) {
12892 // last_iv = iv;
12893 // last_a = priv_a;
12894 // }
12895 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12896 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12897 Action.Enter(CGF);
12898 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
12899 // (last_iv <= iv) ? Check if the variable is updated and store new
12900 // value in global var.
12901 llvm::Value *CmpRes;
12902 if (IVLVal.getType()->isSignedIntegerType()) {
12903 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
12904 } else {
12905 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12906 "Loop iteration variable must be integer.");
12907 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
12908 }
12909 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
12910 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
12911 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
12912 // {
12913 CGF.EmitBlock(BB: ThenBB);
12914
12915 // last_iv = iv;
12916 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
12917
12918 // last_a = priv_a;
12919 switch (CGF.getEvaluationKind(T: LVal.getType())) {
12920 case TEK_Scalar: {
12921 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
12922 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
12923 break;
12924 }
12925 case TEK_Complex: {
12926 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
12927 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
12928 break;
12929 }
12930 case TEK_Aggregate:
12931 llvm_unreachable(
12932 "Aggregates are not supported in lastprivate conditional.");
12933 }
12934 // }
12935 CGF.EmitBranch(Block: ExitBB);
12936 // There is no need to emit line number for unconditional branch.
12937 (void)ApplyDebugLocation::CreateEmpty(CGF);
12938 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
12939 };
12940
12941 if (CGM.getLangOpts().OpenMPSimd) {
12942 // Do not emit as a critical region as no parallel region could be emitted.
12943 RegionCodeGenTy ThenRCG(CodeGen);
12944 ThenRCG(CGF);
12945 } else {
12946 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
12947 }
12948}
12949
12950void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12951 const Expr *LHS) {
12952 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12953 return;
12954 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12955 if (!Checker.Visit(S: LHS))
12956 return;
12957 const Expr *FoundE;
12958 const Decl *FoundD;
12959 StringRef UniqueDeclName;
12960 LValue IVLVal;
12961 llvm::Function *FoundFn;
12962 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
12963 Checker.getFoundData();
12964 if (FoundFn != CGF.CurFn) {
12965 // Special codegen for inner parallel regions.
12966 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12967 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
12968 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12969 "Lastprivate conditional is not found in outer region.");
12970 QualType StructTy = std::get<0>(t&: It->getSecond());
12971 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
12972 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
12973 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12974 Addr: PrivLVal.getAddress(),
12975 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
12976 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
12977 LValue BaseLVal =
12978 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
12979 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
12980 CGF.EmitAtomicStore(rvalue: RValue::get(V: llvm::ConstantInt::get(
12981 Ty: CGF.ConvertTypeForMem(T: FiredDecl->getType()), V: 1)),
12982 lvalue: FiredLVal, AO: llvm::AtomicOrdering::Unordered,
12983 /*IsVolatile=*/true, /*isInit=*/false);
12984 return;
12985 }
12986
12987 // Private address of the lastprivate conditional in the current context.
12988 // priv_a
12989 LValue LVal = CGF.EmitLValue(E: FoundE);
12990 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12991 Loc: FoundE->getExprLoc());
12992}
12993
12994void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12995 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12996 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12997 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12998 return;
12999 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
13000 auto It = llvm::find_if(
13001 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
13002 if (It == Range.end() || It->Fn != CGF.CurFn)
13003 return;
13004 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
13005 assert(LPCI != LastprivateConditionalToTypes.end() &&
13006 "Lastprivates must be registered already.");
13007 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
13008 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
13009 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: CaptureRegions.back());
13010 for (const auto &Pair : It->DeclToUniqueName) {
13011 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
13012 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
13013 continue;
13014 auto I = LPCI->getSecond().find(Val: Pair.first);
13015 assert(I != LPCI->getSecond().end() &&
13016 "Lastprivate must be rehistered already.");
13017 // bool Cmp = priv_a.Fired != 0;
13018 LValue BaseLVal = std::get<3>(t&: I->getSecond());
13019 LValue FiredLVal =
13020 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
13021 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
13022 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
13023 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
13024 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
13025 // if (Cmp) {
13026 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
13027 CGF.EmitBlock(BB: ThenBB);
13028 Address Addr = CGF.GetAddrOfLocalVar(VD);
13029 LValue LVal;
13030 if (VD->getType()->isReferenceType())
13031 LVal = CGF.EmitLoadOfReferenceLValue(RefAddr: Addr, RefTy: VD->getType(),
13032 Source: AlignmentSource::Decl);
13033 else
13034 LVal = CGF.MakeAddrLValue(Addr, T: VD->getType().getNonReferenceType(),
13035 Source: AlignmentSource::Decl);
13036 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
13037 Loc: D.getBeginLoc());
13038 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
13039 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
13040 // }
13041 }
13042}
13043
13044void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
13045 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13046 SourceLocation Loc) {
13047 if (CGF.getLangOpts().OpenMP < 50)
13048 return;
13049 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(Key: VD);
13050 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13051 "Unknown lastprivate conditional variable.");
13052 StringRef UniqueName = It->second;
13053 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
13054 // The variable was not updated in the region - exit.
13055 if (!GV)
13056 return;
13057 LValue LPLVal = CGF.MakeRawAddrLValue(
13058 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
13059 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
13060 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
13061}
13062
13063llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
13064 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13065 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13066 const RegionCodeGenTy &CodeGen) {
13067 llvm_unreachable("Not supported in SIMD-only mode");
13068}
13069
13070llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
13071 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13072 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13073 const RegionCodeGenTy &CodeGen) {
13074 llvm_unreachable("Not supported in SIMD-only mode");
13075}
13076
13077llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
13078 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13079 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13080 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13081 bool Tied, unsigned &NumberOfParts) {
13082 llvm_unreachable("Not supported in SIMD-only mode");
13083}
13084
13085void CGOpenMPSIMDRuntime::emitParallelCall(
13086 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13087 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13088 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13089 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13090 llvm_unreachable("Not supported in SIMD-only mode");
13091}
13092
13093void CGOpenMPSIMDRuntime::emitCriticalRegion(
13094 CodeGenFunction &CGF, StringRef CriticalName,
13095 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13096 const Expr *Hint) {
13097 llvm_unreachable("Not supported in SIMD-only mode");
13098}
13099
13100void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
13101 const RegionCodeGenTy &MasterOpGen,
13102 SourceLocation Loc) {
13103 llvm_unreachable("Not supported in SIMD-only mode");
13104}
13105
13106void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
13107 const RegionCodeGenTy &MasterOpGen,
13108 SourceLocation Loc,
13109 const Expr *Filter) {
13110 llvm_unreachable("Not supported in SIMD-only mode");
13111}
13112
13113void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
13114 SourceLocation Loc) {
13115 llvm_unreachable("Not supported in SIMD-only mode");
13116}
13117
13118void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
13119 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13120 SourceLocation Loc) {
13121 llvm_unreachable("Not supported in SIMD-only mode");
13122}
13123
13124void CGOpenMPSIMDRuntime::emitSingleRegion(
13125 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13126 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13127 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
13128 ArrayRef<const Expr *> AssignmentOps) {
13129 llvm_unreachable("Not supported in SIMD-only mode");
13130}
13131
13132void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
13133 const RegionCodeGenTy &OrderedOpGen,
13134 SourceLocation Loc,
13135 bool IsThreads) {
13136 llvm_unreachable("Not supported in SIMD-only mode");
13137}
13138
13139void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
13140 SourceLocation Loc,
13141 OpenMPDirectiveKind Kind,
13142 bool EmitChecks,
13143 bool ForceSimpleCall) {
13144 llvm_unreachable("Not supported in SIMD-only mode");
13145}
13146
13147void CGOpenMPSIMDRuntime::emitForDispatchInit(
13148 CodeGenFunction &CGF, SourceLocation Loc,
13149 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13150 bool Ordered, const DispatchRTInput &DispatchValues) {
13151 llvm_unreachable("Not supported in SIMD-only mode");
13152}
13153
13154void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
13155 SourceLocation Loc) {
13156 llvm_unreachable("Not supported in SIMD-only mode");
13157}
13158
13159void CGOpenMPSIMDRuntime::emitForStaticInit(
13160 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
13161 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13162 llvm_unreachable("Not supported in SIMD-only mode");
13163}
13164
13165void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13166 CodeGenFunction &CGF, SourceLocation Loc,
13167 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13168 llvm_unreachable("Not supported in SIMD-only mode");
13169}
13170
13171void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13172 SourceLocation Loc,
13173 unsigned IVSize,
13174 bool IVSigned) {
13175 llvm_unreachable("Not supported in SIMD-only mode");
13176}
13177
13178void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13179 SourceLocation Loc,
13180 OpenMPDirectiveKind DKind) {
13181 llvm_unreachable("Not supported in SIMD-only mode");
13182}
13183
13184llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13185 SourceLocation Loc,
13186 unsigned IVSize, bool IVSigned,
13187 Address IL, Address LB,
13188 Address UB, Address ST) {
13189 llvm_unreachable("Not supported in SIMD-only mode");
13190}
13191
13192void CGOpenMPSIMDRuntime::emitNumThreadsClause(
13193 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13194 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
13195 SourceLocation SeverityLoc, const Expr *Message,
13196 SourceLocation MessageLoc) {
13197 llvm_unreachable("Not supported in SIMD-only mode");
13198}
13199
13200void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13201 ProcBindKind ProcBind,
13202 SourceLocation Loc) {
13203 llvm_unreachable("Not supported in SIMD-only mode");
13204}
13205
13206Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13207 const VarDecl *VD,
13208 Address VDAddr,
13209 SourceLocation Loc) {
13210 llvm_unreachable("Not supported in SIMD-only mode");
13211}
13212
13213llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13214 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13215 CodeGenFunction *CGF) {
13216 llvm_unreachable("Not supported in SIMD-only mode");
13217}
13218
13219Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13220 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13221 llvm_unreachable("Not supported in SIMD-only mode");
13222}
13223
13224void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13225 ArrayRef<const Expr *> Vars,
13226 SourceLocation Loc,
13227 llvm::AtomicOrdering AO) {
13228 llvm_unreachable("Not supported in SIMD-only mode");
13229}
13230
13231void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13232 const OMPExecutableDirective &D,
13233 llvm::Function *TaskFunction,
13234 QualType SharedsTy, Address Shareds,
13235 const Expr *IfCond,
13236 const OMPTaskDataTy &Data) {
13237 llvm_unreachable("Not supported in SIMD-only mode");
13238}
13239
13240void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13241 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13242 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13243 const Expr *IfCond, const OMPTaskDataTy &Data) {
13244 llvm_unreachable("Not supported in SIMD-only mode");
13245}
13246
13247void CGOpenMPSIMDRuntime::emitReduction(
13248 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13249 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13250 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13251 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13252 CGOpenMPRuntime::emitReduction(CGF, Loc, OrgPrivates: Privates, OrgLHSExprs: LHSExprs, OrgRHSExprs: RHSExprs,
13253 OrgReductionOps: ReductionOps, Options);
13254}
13255
13256llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13257 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13258 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13259 llvm_unreachable("Not supported in SIMD-only mode");
13260}
13261
13262void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13263 SourceLocation Loc,
13264 bool IsWorksharingReduction) {
13265 llvm_unreachable("Not supported in SIMD-only mode");
13266}
13267
13268void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13269 SourceLocation Loc,
13270 ReductionCodeGen &RCG,
13271 unsigned N) {
13272 llvm_unreachable("Not supported in SIMD-only mode");
13273}
13274
13275Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13276 SourceLocation Loc,
13277 llvm::Value *ReductionsPtr,
13278 LValue SharedLVal) {
13279 llvm_unreachable("Not supported in SIMD-only mode");
13280}
13281
13282void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13283 SourceLocation Loc,
13284 const OMPTaskDataTy &Data) {
13285 llvm_unreachable("Not supported in SIMD-only mode");
13286}
13287
13288void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13289 CodeGenFunction &CGF, SourceLocation Loc,
13290 OpenMPDirectiveKind CancelRegion) {
13291 llvm_unreachable("Not supported in SIMD-only mode");
13292}
13293
13294void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13295 SourceLocation Loc, const Expr *IfCond,
13296 OpenMPDirectiveKind CancelRegion) {
13297 llvm_unreachable("Not supported in SIMD-only mode");
13298}
13299
13300void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13301 const OMPExecutableDirective &D, StringRef ParentName,
13302 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13303 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13304 llvm_unreachable("Not supported in SIMD-only mode");
13305}
13306
13307void CGOpenMPSIMDRuntime::emitTargetCall(
13308 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13309 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13310 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13311 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13312 const OMPLoopDirective &D)>
13313 SizeEmitter) {
13314 llvm_unreachable("Not supported in SIMD-only mode");
13315}
13316
13317bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13318 llvm_unreachable("Not supported in SIMD-only mode");
13319}
13320
13321bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13322 llvm_unreachable("Not supported in SIMD-only mode");
13323}
13324
13325bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13326 return false;
13327}
13328
13329void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13330 const OMPExecutableDirective &D,
13331 SourceLocation Loc,
13332 llvm::Function *OutlinedFn,
13333 ArrayRef<llvm::Value *> CapturedVars) {
13334 llvm_unreachable("Not supported in SIMD-only mode");
13335}
13336
13337void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13338 const Expr *NumTeams,
13339 const Expr *ThreadLimit,
13340 SourceLocation Loc) {
13341 llvm_unreachable("Not supported in SIMD-only mode");
13342}
13343
13344void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13345 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13346 const Expr *Device, const RegionCodeGenTy &CodeGen,
13347 CGOpenMPRuntime::TargetDataInfo &Info) {
13348 llvm_unreachable("Not supported in SIMD-only mode");
13349}
13350
13351void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13352 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13353 const Expr *Device) {
13354 llvm_unreachable("Not supported in SIMD-only mode");
13355}
13356
13357void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13358 const OMPLoopDirective &D,
13359 ArrayRef<Expr *> NumIterations) {
13360 llvm_unreachable("Not supported in SIMD-only mode");
13361}
13362
13363void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13364 const OMPDependClause *C) {
13365 llvm_unreachable("Not supported in SIMD-only mode");
13366}
13367
13368void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13369 const OMPDoacrossClause *C) {
13370 llvm_unreachable("Not supported in SIMD-only mode");
13371}
13372
13373const VarDecl *
13374CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13375 const VarDecl *NativeParam) const {
13376 llvm_unreachable("Not supported in SIMD-only mode");
13377}
13378
13379Address
13380CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13381 const VarDecl *NativeParam,
13382 const VarDecl *TargetParam) const {
13383 llvm_unreachable("Not supported in SIMD-only mode");
13384}
13385