1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
24#include "clang/AST/OpenMPClause.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/SourceManager.h"
30#include "clang/CodeGen/ConstantInitBuilder.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
166 PtrTy: PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(lvalue: PartIdLVal, Loc: PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
171 CGF.EmitBlock(BB: DoneBB);
172 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
173 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
174 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
175 Dest: CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
183 PtrTy: PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
185 lvalue: PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
189 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
191 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
192 Dest: CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
194 CGF.EmitBlock(BB: CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
572void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573 CodeGenFunction::RunCleanupsScope Scope(CGF);
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
596static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
604 const auto *CE = cast<CallExpr>(Val: InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
614 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
615 (void)PrivateScope.Privatize();
616 RValue Func = RValue::get(V: Reduction.second);
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(E: InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(T: Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
633 RValue::getComplex(C: CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
655static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BB: BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
693 Name: "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
711 Original: SrcElementCurrent, Ty: ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
721 Name: "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
728 Name: "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
733 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
746 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, DestAddr: PrivateAddr, Type: PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 Init: EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SrcAddr: SharedAddr);
765}
766
767ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768 ArrayRef<const Expr *> Origs,
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(N: Shareds.size());
772 SharedAddresses.reserve(N: Shareds.size());
773 Sizes.reserve(N: Shareds.size());
774 BaseDecls.reserve(N: Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
780 std::advance(i&: IOrig, n: 1);
781 std::advance(i&: IPriv, n: 1);
782 std::advance(i&: IRed, n: 1);
783 }
784}
785
786void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
791 SharedAddresses.emplace_back(Args&: First, Args&: Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(Args&: First, Args&: Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
797 OrigAddresses.emplace_back(Args&: First, Args&: Second);
798 }
799}
800
801void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 Args: CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
807 Args: nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
816 LHS: OrigAddresses[N].second.getPointer(CGF),
817 RHS: OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateZExtOrTrunc(V: Size, DestTy: ElemSizeOf->getType());
819 Size = CGF.Builder.CreateNUWAdd(
820 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
821 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
822 } else {
823 SizeInChars =
824 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
825 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
826 }
827 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
828 CodeGenFunction::OpaqueValueMapping OpaqueMap(
829 CGF,
830 cast<OpaqueValueExpr>(
831 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
832 RValue::get(V: Size));
833 CGF.EmitVariablyModifiedType(Ty: PrivateType);
834}
835
836void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
837 llvm::Value *Size) {
838 QualType PrivateType = getPrivateType(N);
839 if (!PrivateType->isVariablyModifiedType()) {
840 assert(!Size && !Sizes[N].second &&
841 "Size should be nullptr for non-variably modified reduction "
842 "items.");
843 return;
844 }
845 CodeGenFunction::OpaqueValueMapping OpaqueMap(
846 CGF,
847 cast<OpaqueValueExpr>(
848 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
849 RValue::get(V: Size));
850 CGF.EmitVariablyModifiedType(Ty: PrivateType);
851}
852
853void ReductionCodeGen::emitInitialization(
854 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
855 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
856 assert(SharedAddresses.size() > N && "No variable was generated");
857 const auto *PrivateVD =
858 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
859 const OMPDeclareReductionDecl *DRD =
860 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
861 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
862 if (DRD && DRD->getInitializer())
863 (void)DefaultInit(CGF);
864 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
865 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
866 (void)DefaultInit(CGF);
867 QualType SharedType = SharedAddresses[N].first.getType();
868 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
869 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
870 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
871 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
872 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
873 Quals: PrivateVD->getType().getQualifiers(),
874 /*IsInitializer=*/false);
875 }
876}
877
878bool ReductionCodeGen::needCleanups(unsigned N) {
879 QualType PrivateType = getPrivateType(N);
880 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
881 return DTorKind != QualType::DK_none;
882}
883
884void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
885 Address PrivateAddr) {
886 QualType PrivateType = getPrivateType(N);
887 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
888 if (needCleanups(N)) {
889 PrivateAddr =
890 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
891 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
892 }
893}
894
895static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
896 LValue BaseLV) {
897 BaseTy = BaseTy.getNonReferenceType();
898 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
899 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
900 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
901 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
902 } else {
903 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
904 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
905 }
906 BaseTy = BaseTy->getPointeeType();
907 }
908 return CGF.MakeAddrLValue(
909 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
910 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
911 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
912}
913
914static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
915 Address OriginalBaseAddress, llvm::Value *Addr) {
916 RawAddress Tmp = RawAddress::invalid();
917 Address TopTmp = Address::invalid();
918 Address MostTopTmp = Address::invalid();
919 BaseTy = BaseTy.getNonReferenceType();
920 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
921 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
922 Tmp = CGF.CreateMemTemp(T: BaseTy);
923 if (TopTmp.isValid())
924 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
925 else
926 MostTopTmp = Tmp;
927 TopTmp = Tmp;
928 BaseTy = BaseTy->getPointeeType();
929 }
930
931 if (Tmp.isValid()) {
932 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
933 V: Addr, DestTy: Tmp.getElementType());
934 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
935 return MostTopTmp;
936 }
937
938 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
939 V: Addr, DestTy: OriginalBaseAddress.getType());
940 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
941}
942
943static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
944 const VarDecl *OrigVD = nullptr;
945 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
946 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
947 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
948 Base = TempOASE->getBase()->IgnoreParenImpCasts();
949 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
950 Base = TempASE->getBase()->IgnoreParenImpCasts();
951 DE = cast<DeclRefExpr>(Val: Base);
952 OrigVD = cast<VarDecl>(Val: DE->getDecl());
953 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
954 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
955 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
956 Base = TempASE->getBase()->IgnoreParenImpCasts();
957 DE = cast<DeclRefExpr>(Val: Base);
958 OrigVD = cast<VarDecl>(Val: DE->getDecl());
959 }
960 return OrigVD;
961}
962
963Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
964 Address PrivateAddr) {
965 const DeclRefExpr *DE;
966 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
967 BaseDecls.emplace_back(Args&: OrigVD);
968 LValue OriginalBaseLValue = CGF.EmitLValue(E: DE);
969 LValue BaseLValue =
970 loadToBegin(CGF, BaseTy: OrigVD->getType(), ElTy: SharedAddresses[N].first.getType(),
971 BaseLV: OriginalBaseLValue);
972 Address SharedAddr = SharedAddresses[N].first.getAddress();
973 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
974 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
975 RHS: SharedAddr.emitRawPointer(CGF));
976 llvm::Value *PrivatePointer =
977 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
978 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
979 llvm::Value *Ptr = CGF.Builder.CreateGEP(
980 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
981 return castToBase(CGF, BaseTy: OrigVD->getType(),
982 ElTy: SharedAddresses[N].first.getType(),
983 OriginalBaseAddress: OriginalBaseLValue.getAddress(), Addr: Ptr);
984 }
985 BaseDecls.emplace_back(
986 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
987 return PrivateAddr;
988}
989
990bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
991 const OMPDeclareReductionDecl *DRD =
992 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
993 return DRD && DRD->getInitializer();
994}
995
996LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
997 return CGF.EmitLoadOfPointerLValue(
998 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
999 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
1000}
1001
1002void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1003 if (!CGF.HaveInsertPoint())
1004 return;
1005 // 1.2.2 OpenMP Language Terminology
1006 // Structured block - An executable statement with a single entry at the
1007 // top and a single exit at the bottom.
1008 // The point of exit cannot be a branch out of the structured block.
1009 // longjmp() and throw() must not violate the entry/exit criteria.
1010 CGF.EHStack.pushTerminate();
1011 if (S)
1012 CGF.incrementProfileCounter(S);
1013 CodeGen(CGF);
1014 CGF.EHStack.popTerminate();
1015}
1016
1017LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1018 CodeGenFunction &CGF) {
1019 return CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1020 T: getThreadIDVariable()->getType(),
1021 Source: AlignmentSource::Decl);
1022}
1023
1024static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1025 QualType FieldTy) {
1026 auto *Field = FieldDecl::Create(
1027 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1028 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1029 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1030 Field->setAccess(AS_public);
1031 DC->addDecl(D: Field);
1032 return Field;
1033}
1034
1035CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1036 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1037 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1038 llvm::OpenMPIRBuilderConfig Config(
1039 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1040 CGM.getLangOpts().OpenMPOffloadMandatory,
1041 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1042 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1043 Config.setDefaultTargetAS(
1044 CGM.getContext().getTargetInfo().getTargetAddressSpace(AS: LangAS::Default));
1045 Config.setRuntimeCC(CGM.getRuntimeCC());
1046
1047 OMPBuilder.setConfig(Config);
1048 OMPBuilder.initialize();
1049 OMPBuilder.loadOffloadInfoMetadata(VFS&: *CGM.getFileSystem(),
1050 HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1051 ? CGM.getLangOpts().OMPHostIRFile
1052 : StringRef{});
1053
1054 // The user forces the compiler to behave as if omp requires
1055 // unified_shared_memory was given.
1056 if (CGM.getLangOpts().OpenMPForceUSM) {
1057 HasRequiresUnifiedSharedMemory = true;
1058 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1059 }
1060}
1061
1062void CGOpenMPRuntime::clear() {
1063 InternalVars.clear();
1064 // Clean non-target variable declarations possibly used only in debug info.
1065 for (const auto &Data : EmittedNonTargetVariables) {
1066 if (!Data.getValue().pointsToAliveValue())
1067 continue;
1068 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1069 if (!GV)
1070 continue;
1071 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1072 continue;
1073 GV->eraseFromParent();
1074 }
1075}
1076
1077std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1078 return OMPBuilder.createPlatformSpecificName(Parts);
1079}
1080
1081static llvm::Function *
1082emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1083 const Expr *CombinerInitializer, const VarDecl *In,
1084 const VarDecl *Out, bool IsCombiner) {
1085 // void .omp_combiner.(Ty *in, Ty *out);
1086 ASTContext &C = CGM.getContext();
1087 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1088 FunctionArgList Args;
1089 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1090 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1091 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1092 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1093 Args.push_back(Elt: &OmpOutParm);
1094 Args.push_back(Elt: &OmpInParm);
1095 const CGFunctionInfo &FnInfo =
1096 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
1097 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1098 std::string Name = CGM.getOpenMPRuntime().getName(
1099 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1100 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1101 N: Name, M: &CGM.getModule());
1102 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1103 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1104 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
1105 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1106 Fn->removeFnAttr(Kind: llvm::Attribute::NoInline);
1107 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
1108 Fn->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
1109 }
1110 CodeGenFunction CGF(CGM);
1111 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1112 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1113 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc: In->getLocation(),
1114 StartLoc: Out->getLocation());
1115 CodeGenFunction::OMPPrivateScope Scope(CGF);
1116 Address AddrIn = CGF.GetAddrOfLocalVar(VD: &OmpInParm);
1117 Scope.addPrivate(
1118 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1119 .getAddress());
1120 Address AddrOut = CGF.GetAddrOfLocalVar(VD: &OmpOutParm);
1121 Scope.addPrivate(
1122 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1123 .getAddress());
1124 (void)Scope.Privatize();
1125 if (!IsCombiner && Out->hasInit() &&
1126 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1127 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1128 Quals: Out->getType().getQualifiers(),
1129 /*IsInitializer=*/true);
1130 }
1131 if (CombinerInitializer)
1132 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1133 Scope.ForceCleanup();
1134 CGF.FinishFunction();
1135 return Fn;
1136}
1137
1138void CGOpenMPRuntime::emitUserDefinedReduction(
1139 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1140 if (UDRMap.count(Val: D) > 0)
1141 return;
1142 llvm::Function *Combiner = emitCombinerOrInitializer(
1143 CGM, Ty: D->getType(), CombinerInitializer: D->getCombiner(),
1144 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1145 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1146 /*IsCombiner=*/true);
1147 llvm::Function *Initializer = nullptr;
1148 if (const Expr *Init = D->getInitializer()) {
1149 Initializer = emitCombinerOrInitializer(
1150 CGM, Ty: D->getType(),
1151 CombinerInitializer: D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1152 : nullptr,
1153 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1154 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1155 /*IsCombiner=*/false);
1156 }
1157 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1158 if (CGF)
1159 FunctionUDRMap[CGF->CurFn].push_back(Elt: D);
1160}
1161
1162std::pair<llvm::Function *, llvm::Function *>
1163CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1164 auto I = UDRMap.find(Val: D);
1165 if (I != UDRMap.end())
1166 return I->second;
1167 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1168 return UDRMap.lookup(Val: D);
1169}
1170
1171namespace {
1172// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1173// Builder if one is present.
1174struct PushAndPopStackRAII {
1175 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1176 bool HasCancel, llvm::omp::Directive Kind)
1177 : OMPBuilder(OMPBuilder) {
1178 if (!OMPBuilder)
1179 return;
1180
1181 // The following callback is the crucial part of clangs cleanup process.
1182 //
1183 // NOTE:
1184 // Once the OpenMPIRBuilder is used to create parallel regions (and
1185 // similar), the cancellation destination (Dest below) is determined via
1186 // IP. That means if we have variables to finalize we split the block at IP,
1187 // use the new block (=BB) as destination to build a JumpDest (via
1188 // getJumpDestInCurrentScope(BB)) which then is fed to
1189 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1190 // to push & pop an FinalizationInfo object.
1191 // The FiniCB will still be needed but at the point where the
1192 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1193 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1194 assert(IP.getBlock()->end() == IP.getPoint() &&
1195 "Clang CG should cause non-terminated block!");
1196 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1197 CGF.Builder.restoreIP(IP);
1198 CodeGenFunction::JumpDest Dest =
1199 CGF.getOMPCancelDestination(Kind: OMPD_parallel);
1200 CGF.EmitBranchThroughCleanup(Dest);
1201 return llvm::Error::success();
1202 };
1203
1204 // TODO: Remove this once we emit parallel regions through the
1205 // OpenMPIRBuilder as it can do this setup internally.
1206 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1207 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1208 }
1209 ~PushAndPopStackRAII() {
1210 if (OMPBuilder)
1211 OMPBuilder->popFinalizationCB();
1212 }
1213 llvm::OpenMPIRBuilder *OMPBuilder;
1214};
1215} // namespace
1216
1217static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1218 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1219 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1220 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1221 assert(ThreadIDVar->getType()->isPointerType() &&
1222 "thread id variable must be of type kmp_int32 *");
1223 CodeGenFunction CGF(CGM, true);
1224 bool HasCancel = false;
1225 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1226 HasCancel = OPD->hasCancel();
1227 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1228 HasCancel = OPD->hasCancel();
1229 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1230 HasCancel = OPSD->hasCancel();
1231 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD =
1238 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1239 HasCancel = OPFD->hasCancel();
1240 else if (const auto *OPFD =
1241 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1242 HasCancel = OPFD->hasCancel();
1243
1244 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1245 // parallel region to make cancellation barriers work properly.
1246 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1247 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1248 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1249 HasCancel, OutlinedHelperName);
1250 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1251 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, D);
1252}
1253
1254std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1255 std::string Suffix = getName(Parts: {"omp_outlined"});
1256 return (Name + Suffix).str();
1257}
1258
1259std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1260 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1261}
1262
1263std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1264 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1265 return (Name + Suffix).str();
1266}
1267
1268llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1269 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_parallel);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1278llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1279 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1280 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1281 const RegionCodeGenTy &CodeGen) {
1282 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_teams);
1283 return emitParallelOrTeamsOutlinedFunction(
1284 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1285 CodeGen);
1286}
1287
1288llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1292 bool Tied, unsigned &NumberOfParts) {
1293 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1294 PrePostActionTy &) {
1295 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1296 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1297 llvm::Value *TaskArgs[] = {
1298 UpLoc, ThreadID,
1299 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1300 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1301 .getPointer(CGF)};
1302 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1303 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1304 args: TaskArgs);
1305 };
1306 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1307 UntiedCodeGen);
1308 CodeGen.setAction(Action);
1309 assert(!ThreadIDVar->getType()->isPointerType() &&
1310 "thread id variable must be of type kmp_int32 for tasks");
1311 const OpenMPDirectiveKind Region =
1312 isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) ? OMPD_taskloop
1313 : OMPD_task;
1314 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: Region);
1315 bool HasCancel = false;
1316 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1317 HasCancel = TD->hasCancel();
1318 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1319 HasCancel = TD->hasCancel();
1320 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1321 HasCancel = TD->hasCancel();
1322 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1323 HasCancel = TD->hasCancel();
1324
1325 CodeGenFunction CGF(CGM, true);
1326 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1327 InnermostKind, HasCancel, Action);
1328 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1329 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1330 if (!Tied)
1331 NumberOfParts = Action.getNumberOfParts();
1332 return Res;
1333}
1334
1335void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1336 bool AtCurrentPoint) {
1337 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1338 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1339
1340 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1341 if (AtCurrentPoint) {
1342 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1343 CGF.Builder.GetInsertBlock());
1344 } else {
1345 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1346 Elem.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt->getIterator());
1347 }
1348}
1349
1350void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1351 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1352 if (Elem.ServiceInsertPt) {
1353 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1354 Elem.ServiceInsertPt = nullptr;
1355 Ptr->eraseFromParent();
1356 }
1357}
1358
1359static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1360 SourceLocation Loc,
1361 SmallString<128> &Buffer) {
1362 llvm::raw_svector_ostream OS(Buffer);
1363 // Build debug location
1364 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1365 OS << ";";
1366 if (auto *DbgInfo = CGF.getDebugInfo())
1367 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1368 else
1369 OS << PLoc.getFilename();
1370 OS << ";";
1371 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1372 OS << FD->getQualifiedNameAsString();
1373 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1374 return OS.str();
1375}
1376
1377llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1378 SourceLocation Loc,
1379 unsigned Flags, bool EmitLoc) {
1380 uint32_t SrcLocStrSize;
1381 llvm::Constant *SrcLocStr;
1382 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1383 llvm::codegenoptions::NoDebugInfo) ||
1384 Loc.isInvalid()) {
1385 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1386 } else {
1387 std::string FunctionName;
1388 std::string FileName;
1389 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1390 FunctionName = FD->getQualifiedNameAsString();
1391 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1392 if (auto *DbgInfo = CGF.getDebugInfo())
1393 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1394 else
1395 FileName = PLoc.getFilename();
1396 unsigned Line = PLoc.getLine();
1397 unsigned Column = PLoc.getColumn();
1398 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1399 Column, SrcLocStrSize);
1400 }
1401 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1402 return OMPBuilder.getOrCreateIdent(
1403 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1404}
1405
1406llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1407 SourceLocation Loc) {
1408 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1409 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1410 // the clang invariants used below might be broken.
1411 if (CGM.getLangOpts().OpenMPIRBuilder) {
1412 SmallString<128> Buffer;
1413 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1414 uint32_t SrcLocStrSize;
1415 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1416 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1417 return OMPBuilder.getOrCreateThreadID(
1418 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1419 }
1420
1421 llvm::Value *ThreadID = nullptr;
1422 // Check whether we've already cached a load of the thread id in this
1423 // function.
1424 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1425 if (I != OpenMPLocThreadIDMap.end()) {
1426 ThreadID = I->second.ThreadID;
1427 if (ThreadID != nullptr)
1428 return ThreadID;
1429 }
1430 // If exceptions are enabled, do not use parameter to avoid possible crash.
1431 if (auto *OMPRegionInfo =
1432 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1433 if (OMPRegionInfo->getThreadIDVariable()) {
1434 // Check if this an outlined function with thread id passed as argument.
1435 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1436 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1437 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1438 !CGF.getLangOpts().CXXExceptions ||
1439 CGF.Builder.GetInsertBlock() == TopBlock ||
1440 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1441 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1442 TopBlock ||
1443 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1444 CGF.Builder.GetInsertBlock()) {
1445 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1446 // If value loaded in entry block, cache it and use it everywhere in
1447 // function.
1448 if (CGF.Builder.GetInsertBlock() == TopBlock)
1449 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1450 return ThreadID;
1451 }
1452 }
1453 }
1454
1455 // This is not an outlined function region - need to call __kmpc_int32
1456 // kmpc_global_thread_num(ident_t *loc).
1457 // Generate thread id value and cache this value for use across the
1458 // function.
1459 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1460 if (!Elem.ServiceInsertPt)
1461 setLocThreadIdInsertPt(CGF);
1462 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1463 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1464 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1465 llvm::CallInst *Call = CGF.Builder.CreateCall(
1466 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1467 FnID: OMPRTL___kmpc_global_thread_num),
1468 Args: emitUpdateLocation(CGF, Loc));
1469 Call->setCallingConv(CGF.getRuntimeCC());
1470 Elem.ThreadID = Call;
1471 return Call;
1472}
1473
1474void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1475 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1476 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1477 clearLocThreadIdInsertPt(CGF);
1478 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1479 }
1480 if (auto I = FunctionUDRMap.find(Val: CGF.CurFn); I != FunctionUDRMap.end()) {
1481 for (const auto *D : I->second)
1482 UDRMap.erase(Val: D);
1483 FunctionUDRMap.erase(I);
1484 }
1485 if (auto I = FunctionUDMMap.find(Val: CGF.CurFn); I != FunctionUDMMap.end()) {
1486 for (const auto *D : I->second)
1487 UDMMap.erase(Val: D);
1488 FunctionUDMMap.erase(I);
1489 }
1490 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1491 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1492}
1493
1494llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1495 return OMPBuilder.IdentPtr;
1496}
1497
1498static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499convertDeviceClause(const VarDecl *VD) {
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522convertCaptureClause(const VarDecl *VD) {
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1534 break;
1535 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1537 break;
1538 default:
1539 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1540 break;
1541 }
1542}
1543
1544static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1545 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1546 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1547
1548 auto FileInfoCallBack = [&]() {
1549 SourceManager &SM = CGM.getContext().getSourceManager();
1550 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1551
1552 if (!CGM.getFileSystem()->exists(Path: PLoc.getFilename()))
1553 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1554
1555 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1556 };
1557
1558 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack,
1559 VFS&: *CGM.getFileSystem(), ParentName);
1560}
1561
1562ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1566 return CGM.getLLVMLinkageVarDefinition(VD);
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1572 T: CGM.getContext().getPointerType(T: VD->getType()));
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1575 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1576 IsExternallyVisible: VD->isExternallyVisible(),
1577 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1578 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
1579 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1580 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1581 VariableLinkage: LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(D: VD));
1586}
1587
1588llvm::Constant *
1589CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1591 !CGM.getContext().getTargetInfo().isTLSSupported());
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName(Parts: {"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1596}
1597
1598Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1599 const VarDecl *VD,
1600 Address VDAddr,
1601 SourceLocation Loc) {
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1603 CGM.getContext().getTargetInfo().isTLSSupported())
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1608 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1609 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1610 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1611 getOrCreateThreadPrivateCache(VD)};
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 callee: OMPBuilder.getOrCreateRuntimeFunction(
1615 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1616 args: Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618}
1619
1620void CGOpenMPRuntime::emitThreadPrivateVarInit(
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1627 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1628 args: OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 callee: OMPBuilder.getOrCreateRuntimeFunction(
1637 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1638 args: Args);
1639}
1640
1641llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1645 CGM.getContext().getTargetInfo().isTLSSupported())
1646 return nullptr;
1647
1648 VD = VD->getDefinition(C&: CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1661 ImplicitParamKind::Other);
1662 Args.push_back(Elt: &Dst);
1663
1664 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1665 resultType: CGM.getContext().VoidPtrTy, args: Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1667 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1669 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1670 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1671 Args, Loc, StartLoc: Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1674 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1681 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1682 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1686 if (VD->getType().isDestructedType() != QualType::DK_none) {
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1693 ImplicitParamKind::Other);
1694 Args.push_back(Elt: &Dst);
1695
1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697 resultType: CGM.getContext().VoidTy, args: Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1699 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1701 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1702 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1703 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1704 Loc, StartLoc: Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 Addr: DtorCGF.GetAddrOfLocalVar(VD: &Dst),
1709 /*Volatile=*/false, Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1712 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1713 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 // Copying constructor for the threadprivate variable.
1722 // Must be NULL - reserved by runtime, but currently it requires that this
1723 // parameter is always NULL. Otherwise it fires assertion.
1724 CopyCtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1725 if (Ctor == nullptr) {
1726 Ctor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1727 }
1728 if (Dtor == nullptr) {
1729 Dtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1730 }
1731 if (!CGF) {
1732 auto *InitFunctionTy =
1733 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1734 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1735 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1736 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1737 CodeGenFunction InitCGF(CGM);
1738 FunctionArgList ArgList;
1739 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1740 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1741 Loc, StartLoc: Loc);
1742 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1743 InitCGF.FinishFunction();
1744 return InitFunction;
1745 }
1746 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1747 }
1748 return nullptr;
1749}
1750
1751void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1752 llvm::GlobalValue *GV) {
1753 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1754 OMPDeclareTargetDeclAttr::getActiveAttr(VD: FD);
1755
1756 // We only need to handle active 'indirect' declare target functions.
1757 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1758 return;
1759
1760 // Get a mangled name to store the new device global in.
1761 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1762 CGM, OMPBuilder, BeginLoc: FD->getCanonicalDecl()->getBeginLoc(), ParentName: FD->getName());
1763 SmallString<128> Name;
1764 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1765
1766 // We need to generate a new global to hold the address of the indirectly
1767 // called device function. Doing this allows us to keep the visibility and
1768 // linkage of the associated function unchanged while allowing the runtime to
1769 // access its value.
1770 llvm::GlobalValue *Addr = GV;
1771 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1772 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1773 C&: CGM.getLLVMContext(),
1774 AddressSpace: CGM.getModule().getDataLayout().getProgramAddressSpace());
1775 Addr = new llvm::GlobalVariable(
1776 CGM.getModule(), FnPtrTy,
1777 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1778 nullptr, llvm::GlobalValue::NotThreadLocal,
1779 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1780 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1781 }
1782
1783 // Register the indirect Vtable:
1784 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1785 // size field refers to the size of memory pointed to, not the size of
1786 // the pointer symbol itself (which is implicitly the size of a pointer).
1787 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1788 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1789 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1790 Linkage: llvm::GlobalValue::WeakODRLinkage);
1791}
1792
1793void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1794 const VarDecl *VD) {
1795 // TODO: add logic to avoid duplicate vtable registrations per
1796 // translation unit; though for external linkage, this should no
1797 // longer be an issue - or at least we can avoid the issue by
1798 // checking for an existing offloading entry. But, perhaps the
1799 // better approach is to defer emission of the vtables and offload
1800 // entries until later (by tracking a list of items that need to be
1801 // emitted).
1802
1803 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1804
1805 // Generate a new externally visible global to point to the
1806 // internally visible vtable. Doing this allows us to keep the
1807 // visibility and linkage of the associated vtable unchanged while
1808 // allowing the runtime to access its value. The externally
1809 // visible global var needs to be emitted with a unique mangled
1810 // name that won't conflict with similarly named (internal)
1811 // vtables in other translation units.
1812
1813 // Register vtable with source location of dynamic object in map
1814 // clause.
1815 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1816 CGM, OMPBuilder, BeginLoc: VD->getCanonicalDecl()->getBeginLoc(),
1817 ParentName: VTable->getName());
1818
1819 llvm::GlobalVariable *Addr = VTable;
1820 SmallString<128> AddrName;
1821 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name&: AddrName, EntryInfo);
1822 AddrName.append(RHS: "addr");
1823
1824 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1825 Addr = new llvm::GlobalVariable(
1826 CGM.getModule(), VTable->getType(),
1827 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1828 AddrName,
1829 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1830 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1831 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1832 }
1833 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1834 VarName: AddrName, Addr: VTable,
1835 VarSize: CGM.getDataLayout().getTypeAllocSize(Ty: VTable->getInitializer()->getType()),
1836 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1837 Linkage: llvm::GlobalValue::WeakODRLinkage);
1838}
1839
1840void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
1841 CXXRecordDecl *CXXRecord,
1842 const VarDecl *VD) {
1843 // Register C++ VTable to OpenMP Offload Entry if it's a new
1844 // CXXRecordDecl.
1845 if (CXXRecord && CXXRecord->isDynamicClass() &&
1846 !CGM.getOpenMPRuntime().VTableDeclMap.contains(Val: CXXRecord)) {
1847 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(Key: CXXRecord, Args&: VD);
1848 if (Res.second) {
1849 CGM.EmitVTable(Class: CXXRecord);
1850 CodeGenVTables VTables = CGM.getVTables();
1851 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(RD: CXXRecord);
1852 assert(VTablesAddr && "Expected non-null VTable address");
1853 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTable: VTablesAddr, VD);
1854 // Emit VTable for all the fields containing dynamic CXXRecord
1855 for (const FieldDecl *Field : CXXRecord->fields()) {
1856 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1857 emitAndRegisterVTable(CGM, CXXRecord: RecordDecl, VD);
1858 }
1859 // Emit VTable for all dynamic parent class
1860 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1861 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1862 emitAndRegisterVTable(CGM, CXXRecord: BaseDecl, VD);
1863 }
1864 }
1865 }
1866}
1867
1868void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
1869 // Register VTable by scanning through the map clause of OpenMP target region.
1870 // Get CXXRecordDecl and VarDecl from Expr.
1871 auto GetVTableDecl = [](const Expr *E) {
1872 QualType VDTy = E->getType();
1873 CXXRecordDecl *CXXRecord = nullptr;
1874 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1875 VDTy = RefType->getPointeeType();
1876 if (VDTy->isPointerType())
1877 CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
1878 else
1879 CXXRecord = VDTy->getAsCXXRecordDecl();
1880
1881 const VarDecl *VD = nullptr;
1882 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: E)) {
1883 VD = cast<VarDecl>(Val: DRE->getDecl());
1884 } else if (auto *MRE = dyn_cast<MemberExpr>(Val: E)) {
1885 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(Val: MRE->getBase())) {
1886 if (auto *BaseVD = dyn_cast<VarDecl>(Val: BaseDRE->getDecl()))
1887 VD = BaseVD;
1888 }
1889 }
1890 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1891 };
1892 // Collect VTable from OpenMP map clause.
1893 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1894 for (const auto *E : C->varlist()) {
1895 auto DeclPair = GetVTableDecl(E);
1896 // Ensure VD is not null
1897 if (DeclPair.second)
1898 emitAndRegisterVTable(CGM, CXXRecord: DeclPair.first, VD: DeclPair.second);
1899 }
1900 }
1901}
1902
1903Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1904 QualType VarType,
1905 StringRef Name) {
1906 std::string Suffix = getName(Parts: {"artificial", ""});
1907 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1908 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1909 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1910 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1911 CGM.getTarget().isTLSSupported()) {
1912 GAddr->setThreadLocal(/*Val=*/true);
1913 return Address(GAddr, GAddr->getValueType(),
1914 CGM.getContext().getTypeAlignInChars(T: VarType));
1915 }
1916 std::string CacheSuffix = getName(Parts: {"cache", ""});
1917 llvm::Value *Args[] = {
1918 emitUpdateLocation(CGF, Loc: SourceLocation()),
1919 getThreadID(CGF, Loc: SourceLocation()),
1920 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1921 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1922 /*isSigned=*/false),
1923 OMPBuilder.getOrCreateInternalVariable(
1924 Ty: CGM.VoidPtrPtrTy,
1925 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1926 return Address(
1927 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1928 V: CGF.EmitRuntimeCall(
1929 callee: OMPBuilder.getOrCreateRuntimeFunction(
1930 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1931 args: Args),
1932 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
1933 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1934}
1935
1936void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1937 const RegionCodeGenTy &ThenGen,
1938 const RegionCodeGenTy &ElseGen) {
1939 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1940
1941 // If the condition constant folds and can be elided, try to avoid emitting
1942 // the condition and the dead arm of the if/else.
1943 bool CondConstant;
1944 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1945 if (CondConstant)
1946 ThenGen(CGF);
1947 else
1948 ElseGen(CGF);
1949 return;
1950 }
1951
1952 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1953 // emit the conditional branch.
1954 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1955 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1956 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1957 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1958
1959 // Emit the 'then' code.
1960 CGF.EmitBlock(BB: ThenBlock);
1961 ThenGen(CGF);
1962 CGF.EmitBranch(Block: ContBlock);
1963 // Emit the 'else' code if present.
1964 // There is no need to emit line number for unconditional branch.
1965 (void)ApplyDebugLocation::CreateEmpty(CGF);
1966 CGF.EmitBlock(BB: ElseBlock);
1967 ElseGen(CGF);
1968 // There is no need to emit line number for unconditional branch.
1969 (void)ApplyDebugLocation::CreateEmpty(CGF);
1970 CGF.EmitBranch(Block: ContBlock);
1971 // Emit the continuation block for code after the if.
1972 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1973}
1974
1975void CGOpenMPRuntime::emitParallelCall(
1976 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1977 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1978 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1979 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1980 if (!CGF.HaveInsertPoint())
1981 return;
1982 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1983 auto &M = CGM.getModule();
1984 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1985 this](CodeGenFunction &CGF, PrePostActionTy &) {
1986 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1987 llvm::Value *Args[] = {
1988 RTLoc,
1989 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1990 OutlinedFn};
1991 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1992 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1993 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1994
1995 llvm::FunctionCallee RTLFn =
1996 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1997 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1998 };
1999 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2000 this](CodeGenFunction &CGF, PrePostActionTy &) {
2001 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2002 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2003 // Build calls:
2004 // __kmpc_serialized_parallel(&Loc, GTid);
2005 llvm::Value *Args[] = {RTLoc, ThreadID};
2006 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2007 M, FnID: OMPRTL___kmpc_serialized_parallel),
2008 args: Args);
2009
2010 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2011 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2012 RawAddress ZeroAddrBound =
2013 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
2014 /*Name=*/".bound.zero.addr");
2015 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
2016 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2017 // ThreadId for serialized parallels is 0.
2018 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
2019 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
2020 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
2021
2022 // Ensure we do not inline the function. This is trivially true for the ones
2023 // passed to __kmpc_fork_call but the ones called in serialized regions
2024 // could be inlined. This is not a perfect but it is closer to the invariant
2025 // we want, namely, every data environment starts with a new function.
2026 // TODO: We should pass the if condition to the runtime function and do the
2027 // handling there. Much cleaner code.
2028 OutlinedFn->removeFnAttr(Kind: llvm::Attribute::AlwaysInline);
2029 OutlinedFn->addFnAttr(Kind: llvm::Attribute::NoInline);
2030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
2031
2032 // __kmpc_end_serialized_parallel(&Loc, GTid);
2033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2034 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2035 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
2036 args: EndArgs);
2037 };
2038 if (IfCond) {
2039 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2040 } else {
2041 RegionCodeGenTy ThenRCG(ThenGen);
2042 ThenRCG(CGF);
2043 }
2044}
2045
2046// If we're inside an (outlined) parallel region, use the region info's
2047// thread-ID variable (it is passed in a first argument of the outlined function
2048// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2049// regular serial code region, get thread ID by calling kmp_int32
2050// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2051// return the address of that temp.
2052Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2053 SourceLocation Loc) {
2054 if (auto *OMPRegionInfo =
2055 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2056 if (OMPRegionInfo->getThreadIDVariable())
2057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2058
2059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2060 QualType Int32Ty =
2061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2062 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
2063 CGF.EmitStoreOfScalar(value: ThreadID,
2064 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
2065
2066 return ThreadIDTemp;
2067}
2068
2069llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2070 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2071 std::string Name = getName(Parts: {Prefix, "var"});
2072 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
2073}
2074
2075namespace {
2076/// Common pre(post)-action for different OpenMP constructs.
2077class CommonActionTy final : public PrePostActionTy {
2078 llvm::FunctionCallee EnterCallee;
2079 ArrayRef<llvm::Value *> EnterArgs;
2080 llvm::FunctionCallee ExitCallee;
2081 ArrayRef<llvm::Value *> ExitArgs;
2082 bool Conditional;
2083 llvm::BasicBlock *ContBlock = nullptr;
2084
2085public:
2086 CommonActionTy(llvm::FunctionCallee EnterCallee,
2087 ArrayRef<llvm::Value *> EnterArgs,
2088 llvm::FunctionCallee ExitCallee,
2089 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2090 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2091 ExitArgs(ExitArgs), Conditional(Conditional) {}
2092 void Enter(CodeGenFunction &CGF) override {
2093 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
2094 if (Conditional) {
2095 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
2096 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
2097 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
2098 // Generate the branch (If-stmt)
2099 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
2100 CGF.EmitBlock(BB: ThenBlock);
2101 }
2102 }
2103 void Done(CodeGenFunction &CGF) {
2104 // Emit the rest of blocks/branches
2105 CGF.EmitBranch(Block: ContBlock);
2106 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2107 }
2108 void Exit(CodeGenFunction &CGF) override {
2109 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
2110 }
2111};
2112} // anonymous namespace
2113
2114void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2115 StringRef CriticalName,
2116 const RegionCodeGenTy &CriticalOpGen,
2117 SourceLocation Loc, const Expr *Hint) {
2118 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2119 // CriticalOpGen();
2120 // __kmpc_end_critical(ident_t *, gtid, Lock);
2121 // Prepare arguments and build a call to __kmpc_critical
2122 if (!CGF.HaveInsertPoint())
2123 return;
2124 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2125 M&: CGM.getModule(),
2126 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2127 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2128 unsigned LockVarArgIdx = 2;
2129 if (cast<llvm::GlobalVariable>(Val: LockVar)->getAddressSpace() !=
2130 RuntimeFcn.getFunctionType()
2131 ->getParamType(i: LockVarArgIdx)
2132 ->getPointerAddressSpace())
2133 LockVar = CGF.Builder.CreateAddrSpaceCast(
2134 V: LockVar, DestTy: RuntimeFcn.getFunctionType()->getParamType(i: LockVarArgIdx));
2135 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2136 LockVar};
2137 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2138 std::end(arr&: Args));
2139 if (Hint) {
2140 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2141 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2142 }
2143 CommonActionTy Action(RuntimeFcn, EnterArgs,
2144 OMPBuilder.getOrCreateRuntimeFunction(
2145 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_critical),
2146 Args);
2147 CriticalOpGen.setAction(Action);
2148 emitInlinedDirective(CGF, InnermostKind: OMPD_critical, CodeGen: CriticalOpGen);
2149}
2150
2151void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2152 const RegionCodeGenTy &MasterOpGen,
2153 SourceLocation Loc) {
2154 if (!CGF.HaveInsertPoint())
2155 return;
2156 // if(__kmpc_master(ident_t *, gtid)) {
2157 // MasterOpGen();
2158 // __kmpc_end_master(ident_t *, gtid);
2159 // }
2160 // Prepare arguments and build a call to __kmpc_master
2161 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2162 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2163 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2164 Args,
2165 OMPBuilder.getOrCreateRuntimeFunction(
2166 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2167 Args,
2168 /*Conditional=*/true);
2169 MasterOpGen.setAction(Action);
2170 emitInlinedDirective(CGF, InnermostKind: OMPD_master, CodeGen: MasterOpGen);
2171 Action.Done(CGF);
2172}
2173
2174void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2175 const RegionCodeGenTy &MaskedOpGen,
2176 SourceLocation Loc, const Expr *Filter) {
2177 if (!CGF.HaveInsertPoint())
2178 return;
2179 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2180 // MaskedOpGen();
2181 // __kmpc_end_masked(iden_t *, gtid);
2182 // }
2183 // Prepare arguments and build a call to __kmpc_masked
2184 llvm::Value *FilterVal = Filter
2185 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2186 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2187 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2188 FilterVal};
2189 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2190 getThreadID(CGF, Loc)};
2191 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2192 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2193 Args,
2194 OMPBuilder.getOrCreateRuntimeFunction(
2195 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2196 ArgsEnd,
2197 /*Conditional=*/true);
2198 MaskedOpGen.setAction(Action);
2199 emitInlinedDirective(CGF, InnermostKind: OMPD_masked, CodeGen: MaskedOpGen);
2200 Action.Done(CGF);
2201}
2202
2203void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2204 SourceLocation Loc) {
2205 if (!CGF.HaveInsertPoint())
2206 return;
2207 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2208 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2209 } else {
2210 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2211 llvm::Value *Args[] = {
2212 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2213 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2214 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2215 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2216 args: Args);
2217 }
2218
2219 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2220 Region->emitUntiedSwitch(CGF);
2221}
2222
2223void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2224 const RegionCodeGenTy &TaskgroupOpGen,
2225 SourceLocation Loc) {
2226 if (!CGF.HaveInsertPoint())
2227 return;
2228 // __kmpc_taskgroup(ident_t *, gtid);
2229 // TaskgroupOpGen();
2230 // __kmpc_end_taskgroup(ident_t *, gtid);
2231 // Prepare arguments and build a call to __kmpc_taskgroup
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2238 Args);
2239 TaskgroupOpGen.setAction(Action);
2240 emitInlinedDirective(CGF, InnermostKind: OMPD_taskgroup, CodeGen: TaskgroupOpGen);
2241}
2242
2243/// Given an array of pointers to variables, project the address of a
2244/// given variable.
2245static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2246 unsigned Index, const VarDecl *Var) {
2247 // Pull out the pointer to the variable.
2248 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2249 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2250
2251 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2252 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(D: Var));
2253}
2254
2255static llvm::Value *emitCopyprivateCopyFunction(
2256 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2257 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2258 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2259 SourceLocation Loc) {
2260 ASTContext &C = CGM.getContext();
2261 // void copy_func(void *LHSArg, void *RHSArg);
2262 FunctionArgList Args;
2263 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2264 ImplicitParamKind::Other);
2265 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2266 ImplicitParamKind::Other);
2267 Args.push_back(Elt: &LHSArg);
2268 Args.push_back(Elt: &RHSArg);
2269 const auto &CGFI =
2270 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
2271 std::string Name =
2272 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2273 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
2274 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
2275 M: &CGM.getModule());
2276 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2277 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2278 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
2279 Fn->setDoesNotRecurse();
2280 CodeGenFunction CGF(CGM);
2281 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2282 // Dest = (void*[n])(LHSArg);
2283 // Src = (void*[n])(RHSArg);
2284 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2285 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
2286 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2287 ArgsElemType, CGF.getPointerAlign());
2288 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2289 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
2290 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2291 ArgsElemType, CGF.getPointerAlign());
2292 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2293 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2294 // ...
2295 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2296 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2297 const auto *DestVar =
2298 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2299 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2300
2301 const auto *SrcVar =
2302 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2303 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2304
2305 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2306 QualType Type = VD->getType();
2307 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2308 }
2309 CGF.FinishFunction();
2310 return Fn;
2311}
2312
2313void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2314 const RegionCodeGenTy &SingleOpGen,
2315 SourceLocation Loc,
2316 ArrayRef<const Expr *> CopyprivateVars,
2317 ArrayRef<const Expr *> SrcExprs,
2318 ArrayRef<const Expr *> DstExprs,
2319 ArrayRef<const Expr *> AssignmentOps) {
2320 if (!CGF.HaveInsertPoint())
2321 return;
2322 assert(CopyprivateVars.size() == SrcExprs.size() &&
2323 CopyprivateVars.size() == DstExprs.size() &&
2324 CopyprivateVars.size() == AssignmentOps.size());
2325 ASTContext &C = CGM.getContext();
2326 // int32 did_it = 0;
2327 // if(__kmpc_single(ident_t *, gtid)) {
2328 // SingleOpGen();
2329 // __kmpc_end_single(ident_t *, gtid);
2330 // did_it = 1;
2331 // }
2332 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2333 // <copy_func>, did_it);
2334
2335 Address DidIt = Address::invalid();
2336 if (!CopyprivateVars.empty()) {
2337 // int32 did_it = 0;
2338 QualType KmpInt32Ty =
2339 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2340 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2341 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2342 }
2343 // Prepare arguments and build a call to __kmpc_single
2344 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2345 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2346 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2347 Args,
2348 OMPBuilder.getOrCreateRuntimeFunction(
2349 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2350 Args,
2351 /*Conditional=*/true);
2352 SingleOpGen.setAction(Action);
2353 emitInlinedDirective(CGF, InnermostKind: OMPD_single, CodeGen: SingleOpGen);
2354 if (DidIt.isValid()) {
2355 // did_it = 1;
2356 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2357 }
2358 Action.Done(CGF);
2359 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2360 // <copy_func>, did_it);
2361 if (DidIt.isValid()) {
2362 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2363 QualType CopyprivateArrayTy = C.getConstantArrayType(
2364 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2365 /*IndexTypeQuals=*/0);
2366 // Create a list of all private variables for copyprivate.
2367 Address CopyprivateList =
2368 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2369 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2370 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2371 CGF.Builder.CreateStore(
2372 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2373 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2374 DestTy: CGF.VoidPtrTy),
2375 Addr: Elem);
2376 }
2377 // Build function that copies private values from single region to all other
2378 // threads in the corresponding parallel region.
2379 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2380 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2381 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2382 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2383 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2384 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2385 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2386 llvm::Value *Args[] = {
2387 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2388 getThreadID(CGF, Loc), // i32 <gtid>
2389 BufSize, // size_t <buf_size>
2390 CL.emitRawPointer(CGF), // void *<copyprivate list>
2391 CpyFn, // void (*) (void *, void *) <copy_func>
2392 DidItVal // i32 did_it
2393 };
2394 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2395 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2396 args: Args);
2397 }
2398}
2399
2400void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2401 const RegionCodeGenTy &OrderedOpGen,
2402 SourceLocation Loc, bool IsThreads) {
2403 if (!CGF.HaveInsertPoint())
2404 return;
2405 // __kmpc_ordered(ident_t *, gtid);
2406 // OrderedOpGen();
2407 // __kmpc_end_ordered(ident_t *, gtid);
2408 // Prepare arguments and build a call to __kmpc_ordered
2409 if (IsThreads) {
2410 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2411 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2412 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2413 Args,
2414 OMPBuilder.getOrCreateRuntimeFunction(
2415 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2416 Args);
2417 OrderedOpGen.setAction(Action);
2418 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2419 return;
2420 }
2421 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2422}
2423
2424unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2425 unsigned Flags;
2426 if (Kind == OMPD_for)
2427 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2428 else if (Kind == OMPD_sections)
2429 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2430 else if (Kind == OMPD_single)
2431 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2432 else if (Kind == OMPD_barrier)
2433 Flags = OMP_IDENT_BARRIER_EXPL;
2434 else
2435 Flags = OMP_IDENT_BARRIER_IMPL;
2436 return Flags;
2437}
2438
2439void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2440 CodeGenFunction &CGF, const OMPLoopDirective &S,
2441 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2442 // Check if the loop directive is actually a doacross loop directive. In this
2443 // case choose static, 1 schedule.
2444 if (llvm::any_of(
2445 Range: S.getClausesOfKind<OMPOrderedClause>(),
2446 P: [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2447 ScheduleKind = OMPC_SCHEDULE_static;
2448 // Chunk size is 1 in this case.
2449 llvm::APInt ChunkSize(32, 1);
2450 ChunkExpr = IntegerLiteral::Create(
2451 C: CGF.getContext(), V: ChunkSize,
2452 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2453 l: SourceLocation());
2454 }
2455}
2456
2457void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2458 OpenMPDirectiveKind Kind, bool EmitChecks,
2459 bool ForceSimpleCall) {
2460 // Check if we should use the OMPBuilder
2461 auto *OMPRegionInfo =
2462 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2463 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2464 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2465 cantFail(ValOrErr: OMPBuilder.createBarrier(Loc: CGF.Builder, Kind, ForceSimpleCall,
2466 CheckCancelFlag: EmitChecks));
2467 CGF.Builder.restoreIP(IP: AfterIP);
2468 return;
2469 }
2470
2471 if (!CGF.HaveInsertPoint())
2472 return;
2473 // Build call __kmpc_cancel_barrier(loc, thread_id);
2474 // Build call __kmpc_barrier(loc, thread_id);
2475 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2476 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2477 // thread_id);
2478 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2479 getThreadID(CGF, Loc)};
2480 if (OMPRegionInfo) {
2481 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2482 llvm::Value *Result = CGF.EmitRuntimeCall(
2483 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2484 FnID: OMPRTL___kmpc_cancel_barrier),
2485 args: Args);
2486 if (EmitChecks) {
2487 // if (__kmpc_cancel_barrier()) {
2488 // exit from construct;
2489 // }
2490 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2491 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2492 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2493 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2494 CGF.EmitBlock(BB: ExitBB);
2495 // exit from construct;
2496 CodeGenFunction::JumpDest CancelDestination =
2497 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
2498 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2499 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2500 }
2501 return;
2502 }
2503 }
2504 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2505 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2506 args: Args);
2507}
2508
2509void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2510 Expr *ME, bool IsFatal) {
2511 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(E: ME)
2512 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2513 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2514 // *message)
2515 llvm::Value *Args[] = {
2516 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2517 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2518 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2519 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2520 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2521 args: Args);
2522}
2523
2524/// Map the OpenMP loop schedule to the runtime enumeration.
2525static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2526 bool Chunked, bool Ordered) {
2527 switch (ScheduleKind) {
2528 case OMPC_SCHEDULE_static:
2529 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2530 : (Ordered ? OMP_ord_static : OMP_sch_static);
2531 case OMPC_SCHEDULE_dynamic:
2532 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2533 case OMPC_SCHEDULE_guided:
2534 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2535 case OMPC_SCHEDULE_runtime:
2536 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2537 case OMPC_SCHEDULE_auto:
2538 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2539 case OMPC_SCHEDULE_unknown:
2540 assert(!Chunked && "chunk was specified but schedule kind not known");
2541 return Ordered ? OMP_ord_static : OMP_sch_static;
2542 }
2543 llvm_unreachable("Unexpected runtime schedule");
2544}
2545
2546/// Map the OpenMP distribute schedule to the runtime enumeration.
2547static OpenMPSchedType
2548getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2549 // only static is allowed for dist_schedule
2550 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2551}
2552
2553bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2554 bool Chunked) const {
2555 OpenMPSchedType Schedule =
2556 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2557 return Schedule == OMP_sch_static;
2558}
2559
2560bool CGOpenMPRuntime::isStaticNonchunked(
2561 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2562 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2563 return Schedule == OMP_dist_sch_static;
2564}
2565
2566bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2567 bool Chunked) const {
2568 OpenMPSchedType Schedule =
2569 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2570 return Schedule == OMP_sch_static_chunked;
2571}
2572
2573bool CGOpenMPRuntime::isStaticChunked(
2574 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2575 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2576 return Schedule == OMP_dist_sch_static_chunked;
2577}
2578
2579bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2580 OpenMPSchedType Schedule =
2581 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2582 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2583 return Schedule != OMP_sch_static;
2584}
2585
2586static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2587 OpenMPScheduleClauseModifier M1,
2588 OpenMPScheduleClauseModifier M2) {
2589 int Modifier = 0;
2590 switch (M1) {
2591 case OMPC_SCHEDULE_MODIFIER_monotonic:
2592 Modifier = OMP_sch_modifier_monotonic;
2593 break;
2594 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2595 Modifier = OMP_sch_modifier_nonmonotonic;
2596 break;
2597 case OMPC_SCHEDULE_MODIFIER_simd:
2598 if (Schedule == OMP_sch_static_chunked)
2599 Schedule = OMP_sch_static_balanced_chunked;
2600 break;
2601 case OMPC_SCHEDULE_MODIFIER_last:
2602 case OMPC_SCHEDULE_MODIFIER_unknown:
2603 break;
2604 }
2605 switch (M2) {
2606 case OMPC_SCHEDULE_MODIFIER_monotonic:
2607 Modifier = OMP_sch_modifier_monotonic;
2608 break;
2609 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2610 Modifier = OMP_sch_modifier_nonmonotonic;
2611 break;
2612 case OMPC_SCHEDULE_MODIFIER_simd:
2613 if (Schedule == OMP_sch_static_chunked)
2614 Schedule = OMP_sch_static_balanced_chunked;
2615 break;
2616 case OMPC_SCHEDULE_MODIFIER_last:
2617 case OMPC_SCHEDULE_MODIFIER_unknown:
2618 break;
2619 }
2620 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2621 // If the static schedule kind is specified or if the ordered clause is
2622 // specified, and if the nonmonotonic modifier is not specified, the effect is
2623 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2624 // modifier is specified, the effect is as if the nonmonotonic modifier is
2625 // specified.
2626 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2627 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2628 Schedule == OMP_sch_static_balanced_chunked ||
2629 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2630 Schedule == OMP_dist_sch_static_chunked ||
2631 Schedule == OMP_dist_sch_static))
2632 Modifier = OMP_sch_modifier_nonmonotonic;
2633 }
2634 return Schedule | Modifier;
2635}
2636
2637void CGOpenMPRuntime::emitForDispatchInit(
2638 CodeGenFunction &CGF, SourceLocation Loc,
2639 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2640 bool Ordered, const DispatchRTInput &DispatchValues) {
2641 if (!CGF.HaveInsertPoint())
2642 return;
2643 OpenMPSchedType Schedule = getRuntimeSchedule(
2644 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2645 assert(Ordered ||
2646 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2647 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2648 Schedule != OMP_sch_static_balanced_chunked));
2649 // Call __kmpc_dispatch_init(
2650 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2651 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2652 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2653
2654 // If the Chunk was not specified in the clause - use default value 1.
2655 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2656 : CGF.Builder.getIntN(N: IVSize, C: 1);
2657 llvm::Value *Args[] = {
2658 emitUpdateLocation(CGF, Loc),
2659 getThreadID(CGF, Loc),
2660 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2661 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2662 DispatchValues.LB, // Lower
2663 DispatchValues.UB, // Upper
2664 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2665 Chunk // Chunk
2666 };
2667 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2668 args: Args);
2669}
2670
2671void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2672 SourceLocation Loc) {
2673 if (!CGF.HaveInsertPoint())
2674 return;
2675 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2676 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2677 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2678}
2679
2680static void emitForStaticInitCall(
2681 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2682 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2683 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2684 const CGOpenMPRuntime::StaticRTInput &Values) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687
2688 assert(!Values.Ordered);
2689 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2690 Schedule == OMP_sch_static_balanced_chunked ||
2691 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2692 Schedule == OMP_dist_sch_static ||
2693 Schedule == OMP_dist_sch_static_chunked);
2694
2695 // Call __kmpc_for_static_init(
2696 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2697 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2698 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2699 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2700 llvm::Value *Chunk = Values.Chunk;
2701 if (Chunk == nullptr) {
2702 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2703 Schedule == OMP_dist_sch_static) &&
2704 "expected static non-chunked schedule");
2705 // If the Chunk was not specified in the clause - use default value 1.
2706 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2707 } else {
2708 assert((Schedule == OMP_sch_static_chunked ||
2709 Schedule == OMP_sch_static_balanced_chunked ||
2710 Schedule == OMP_ord_static_chunked ||
2711 Schedule == OMP_dist_sch_static_chunked) &&
2712 "expected static chunked schedule");
2713 }
2714 llvm::Value *Args[] = {
2715 UpdateLocation,
2716 ThreadId,
2717 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2718 M2)), // Schedule type
2719 Values.IL.emitRawPointer(CGF), // &isLastIter
2720 Values.LB.emitRawPointer(CGF), // &LB
2721 Values.UB.emitRawPointer(CGF), // &UB
2722 Values.ST.emitRawPointer(CGF), // &Stride
2723 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2724 Chunk // Chunk
2725 };
2726 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2727}
2728
2729void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2730 SourceLocation Loc,
2731 OpenMPDirectiveKind DKind,
2732 const OpenMPScheduleTy &ScheduleKind,
2733 const StaticRTInput &Values) {
2734 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2735 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2736 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2737 "Expected loop-based or sections-based directive.");
2738 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2739 Flags: isOpenMPLoopDirective(DKind)
2740 ? OMP_IDENT_WORK_LOOP
2741 : OMP_IDENT_WORK_SECTIONS);
2742 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2743 llvm::FunctionCallee StaticInitFunction =
2744 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2745 IsGPUDistribute: false);
2746 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2747 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2748 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2749}
2750
2751void CGOpenMPRuntime::emitDistributeStaticInit(
2752 CodeGenFunction &CGF, SourceLocation Loc,
2753 OpenMPDistScheduleClauseKind SchedKind,
2754 const CGOpenMPRuntime::StaticRTInput &Values) {
2755 OpenMPSchedType ScheduleNum =
2756 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2757 llvm::Value *UpdatedLocation =
2758 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2759 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2760 llvm::FunctionCallee StaticInitFunction;
2761 bool isGPUDistribute =
2762 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2763 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2764 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2765
2766 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2767 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2768 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2769}
2770
2771void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2772 SourceLocation Loc,
2773 OpenMPDirectiveKind DKind) {
2774 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2775 DKind == OMPD_sections) &&
2776 "Expected distribute, for, or sections directive kind");
2777 if (!CGF.HaveInsertPoint())
2778 return;
2779 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2780 llvm::Value *Args[] = {
2781 emitUpdateLocation(CGF, Loc,
2782 Flags: isOpenMPDistributeDirective(DKind) ||
2783 (DKind == OMPD_target_teams_loop)
2784 ? OMP_IDENT_WORK_DISTRIBUTE
2785 : isOpenMPLoopDirective(DKind)
2786 ? OMP_IDENT_WORK_LOOP
2787 : OMP_IDENT_WORK_SECTIONS),
2788 getThreadID(CGF, Loc)};
2789 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2790 if (isOpenMPDistributeDirective(DKind) &&
2791 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2792 CGF.EmitRuntimeCall(
2793 callee: OMPBuilder.getOrCreateRuntimeFunction(
2794 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2795 args: Args);
2796 else
2797 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2798 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2799 args: Args);
2800}
2801
2802void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2803 SourceLocation Loc,
2804 unsigned IVSize,
2805 bool IVSigned) {
2806 if (!CGF.HaveInsertPoint())
2807 return;
2808 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2809 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2810 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2811 args: Args);
2812}
2813
2814llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2815 SourceLocation Loc, unsigned IVSize,
2816 bool IVSigned, Address IL,
2817 Address LB, Address UB,
2818 Address ST) {
2819 // Call __kmpc_dispatch_next(
2820 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2821 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2822 // kmp_int[32|64] *p_stride);
2823 llvm::Value *Args[] = {
2824 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2825 IL.emitRawPointer(CGF), // &isLastIter
2826 LB.emitRawPointer(CGF), // &Lower
2827 UB.emitRawPointer(CGF), // &Upper
2828 ST.emitRawPointer(CGF) // &Stride
2829 };
2830 llvm::Value *Call = CGF.EmitRuntimeCall(
2831 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2832 return CGF.EmitScalarConversion(
2833 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2834 DstTy: CGF.getContext().BoolTy, Loc);
2835}
2836
2837llvm::Value *CGOpenMPRuntime::emitMessageClause(CodeGenFunction &CGF,
2838 const Expr *Message,
2839 SourceLocation Loc) {
2840 if (!Message)
2841 return llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2842 return CGF.EmitScalarExpr(E: Message);
2843}
2844
2845llvm::Value *
2846CGOpenMPRuntime::emitSeverityClause(OpenMPSeverityClauseKind Severity,
2847 SourceLocation Loc) {
2848 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2849 // as if sev-level is fatal."
2850 return llvm::ConstantInt::get(Ty: CGM.Int32Ty,
2851 V: Severity == OMPC_SEVERITY_warning ? 1 : 2);
2852}
2853
2854void CGOpenMPRuntime::emitNumThreadsClause(
2855 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2856 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
2857 SourceLocation SeverityLoc, const Expr *Message,
2858 SourceLocation MessageLoc) {
2859 if (!CGF.HaveInsertPoint())
2860 return;
2861 llvm::SmallVector<llvm::Value *, 4> Args(
2862 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2863 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)});
2864 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2865 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2866 // messsage) if strict modifier is used.
2867 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2868 if (Modifier == OMPC_NUMTHREADS_strict) {
2869 FnID = OMPRTL___kmpc_push_num_threads_strict;
2870 Args.push_back(Elt: emitSeverityClause(Severity, Loc: SeverityLoc));
2871 Args.push_back(Elt: emitMessageClause(CGF, Message, Loc: MessageLoc));
2872 }
2873 CGF.EmitRuntimeCall(
2874 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args);
2875}
2876
2877void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2878 ProcBindKind ProcBind,
2879 SourceLocation Loc) {
2880 if (!CGF.HaveInsertPoint())
2881 return;
2882 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2883 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2884 llvm::Value *Args[] = {
2885 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2886 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2887 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2888 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2889 args: Args);
2890}
2891
2892void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2893 SourceLocation Loc, llvm::AtomicOrdering AO) {
2894 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2895 OMPBuilder.createFlush(Loc: CGF.Builder);
2896 } else {
2897 if (!CGF.HaveInsertPoint())
2898 return;
2899 // Build call void __kmpc_flush(ident_t *loc)
2900 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2901 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2902 args: emitUpdateLocation(CGF, Loc));
2903 }
2904}
2905
2906namespace {
2907/// Indexes of fields for type kmp_task_t.
2908enum KmpTaskTFields {
2909 /// List of shared variables.
2910 KmpTaskTShareds,
2911 /// Task routine.
2912 KmpTaskTRoutine,
2913 /// Partition id for the untied tasks.
2914 KmpTaskTPartId,
2915 /// Function with call of destructors for private variables.
2916 Data1,
2917 /// Task priority.
2918 Data2,
2919 /// (Taskloops only) Lower bound.
2920 KmpTaskTLowerBound,
2921 /// (Taskloops only) Upper bound.
2922 KmpTaskTUpperBound,
2923 /// (Taskloops only) Stride.
2924 KmpTaskTStride,
2925 /// (Taskloops only) Is last iteration flag.
2926 KmpTaskTLastIter,
2927 /// (Taskloops only) Reduction data.
2928 KmpTaskTReductions,
2929};
2930} // anonymous namespace
2931
2932void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2933 // If we are in simd mode or there are no entries, we don't need to do
2934 // anything.
2935 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2936 return;
2937
2938 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2939 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2940 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2941 SourceLocation Loc;
2942 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2943 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2944 E = CGM.getContext().getSourceManager().fileinfo_end();
2945 I != E; ++I) {
2946 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2947 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2948 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2949 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2950 break;
2951 }
2952 }
2953 }
2954 switch (Kind) {
2955 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2956 CGM.getDiags().Report(Loc,
2957 DiagID: diag::err_target_region_offloading_entry_incorrect)
2958 << EntryInfo.ParentName;
2959 } break;
2960 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2961 CGM.getDiags().Report(
2962 Loc, DiagID: diag::err_target_var_offloading_entry_incorrect_with_parent)
2963 << EntryInfo.ParentName;
2964 } break;
2965 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2966 CGM.getDiags().Report(DiagID: diag::err_target_var_offloading_entry_incorrect);
2967 } break;
2968 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2969 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2970 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for indirect declare "
2971 "target variable is incorrect: the "
2972 "address is invalid.");
2973 CGM.getDiags().Report(DiagID);
2974 } break;
2975 }
2976 };
2977
2978 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2979}
2980
2981void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2982 if (!KmpRoutineEntryPtrTy) {
2983 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2984 ASTContext &C = CGM.getContext();
2985 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2986 FunctionProtoType::ExtProtoInfo EPI;
2987 KmpRoutineEntryPtrQTy = C.getPointerType(
2988 T: C.getFunctionType(ResultTy: KmpInt32Ty, Args: KmpRoutineEntryTyArgs, EPI));
2989 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(T: KmpRoutineEntryPtrQTy);
2990 }
2991}
2992
2993namespace {
2994struct PrivateHelpersTy {
2995 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2996 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2997 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2998 PrivateElemInit(PrivateElemInit) {}
2999 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3000 const Expr *OriginalRef = nullptr;
3001 const VarDecl *Original = nullptr;
3002 const VarDecl *PrivateCopy = nullptr;
3003 const VarDecl *PrivateElemInit = nullptr;
3004 bool isLocalPrivate() const {
3005 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3006 }
3007};
3008typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3009} // anonymous namespace
3010
3011static bool isAllocatableDecl(const VarDecl *VD) {
3012 const VarDecl *CVD = VD->getCanonicalDecl();
3013 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3014 return false;
3015 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3016 // Use the default allocation.
3017 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3018 !AA->getAllocator());
3019}
3020
3021static RecordDecl *
3022createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3023 if (!Privates.empty()) {
3024 ASTContext &C = CGM.getContext();
3025 // Build struct .kmp_privates_t. {
3026 // /* private vars */
3027 // };
3028 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
3029 RD->startDefinition();
3030 for (const auto &Pair : Privates) {
3031 const VarDecl *VD = Pair.second.Original;
3032 QualType Type = VD->getType().getNonReferenceType();
3033 // If the private variable is a local variable with lvalue ref type,
3034 // allocate the pointer instead of the pointee type.
3035 if (Pair.second.isLocalPrivate()) {
3036 if (VD->getType()->isLValueReferenceType())
3037 Type = C.getPointerType(T: Type);
3038 if (isAllocatableDecl(VD))
3039 Type = C.getPointerType(T: Type);
3040 }
3041 FieldDecl *FD = addFieldToRecordDecl(C, DC: RD, FieldTy: Type);
3042 if (VD->hasAttrs()) {
3043 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3044 E(VD->getAttrs().end());
3045 I != E; ++I)
3046 FD->addAttr(A: *I);
3047 }
3048 }
3049 RD->completeDefinition();
3050 return RD;
3051 }
3052 return nullptr;
3053}
3054
3055static RecordDecl *
3056createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3057 QualType KmpInt32Ty,
3058 QualType KmpRoutineEntryPointerQTy) {
3059 ASTContext &C = CGM.getContext();
3060 // Build struct kmp_task_t {
3061 // void * shareds;
3062 // kmp_routine_entry_t routine;
3063 // kmp_int32 part_id;
3064 // kmp_cmplrdata_t data1;
3065 // kmp_cmplrdata_t data2;
3066 // For taskloops additional fields:
3067 // kmp_uint64 lb;
3068 // kmp_uint64 ub;
3069 // kmp_int64 st;
3070 // kmp_int32 liter;
3071 // void * reductions;
3072 // };
3073 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
3074 UD->startDefinition();
3075 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpInt32Ty);
3076 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpRoutineEntryPointerQTy);
3077 UD->completeDefinition();
3078 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(TD: UD);
3079 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
3080 RD->startDefinition();
3081 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3082 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpRoutineEntryPointerQTy);
3083 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3084 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3085 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3086 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3087 QualType KmpUInt64Ty =
3088 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3089 QualType KmpInt64Ty =
3090 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3091 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3092 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3093 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt64Ty);
3094 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3095 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3096 }
3097 RD->completeDefinition();
3098 return RD;
3099}
3100
3101static RecordDecl *
3102createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3103 ArrayRef<PrivateDataTy> Privates) {
3104 ASTContext &C = CGM.getContext();
3105 // Build struct kmp_task_t_with_privates {
3106 // kmp_task_t task_data;
3107 // .kmp_privates_t. privates;
3108 // };
3109 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
3110 RD->startDefinition();
3111 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpTaskTQTy);
3112 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3113 addFieldToRecordDecl(C, DC: RD, FieldTy: C.getCanonicalTagType(TD: PrivateRD));
3114 RD->completeDefinition();
3115 return RD;
3116}
3117
3118/// Emit a proxy function which accepts kmp_task_t as the second
3119/// argument.
3120/// \code
3121/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3122/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3123/// For taskloops:
3124/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3125/// tt->reductions, tt->shareds);
3126/// return 0;
3127/// }
3128/// \endcode
3129static llvm::Function *
3130emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3131 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3132 QualType KmpTaskTWithPrivatesPtrQTy,
3133 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3134 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3135 llvm::Value *TaskPrivatesMap) {
3136 ASTContext &C = CGM.getContext();
3137 FunctionArgList Args;
3138 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3139 ImplicitParamKind::Other);
3140 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3141 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3142 ImplicitParamKind::Other);
3143 Args.push_back(Elt: &GtidArg);
3144 Args.push_back(Elt: &TaskTypeArg);
3145 const auto &TaskEntryFnInfo =
3146 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3147 llvm::FunctionType *TaskEntryTy =
3148 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
3149 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
3150 auto *TaskEntry = llvm::Function::Create(
3151 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3152 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
3153 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3154 TaskEntry->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3155 TaskEntry->setDoesNotRecurse();
3156 CodeGenFunction CGF(CGM);
3157 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
3158 Loc, StartLoc: Loc);
3159
3160 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3161 // tt,
3162 // For taskloops:
3163 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3164 // tt->task_data.shareds);
3165 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3166 Addr: CGF.GetAddrOfLocalVar(VD: &GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3167 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3168 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3169 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3170 const auto *KmpTaskTWithPrivatesQTyRD =
3171 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3172 LValue Base =
3173 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3174 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3175 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3176 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3177 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3178
3179 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3180 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3181 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3182 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3183 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3184
3185 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3186 llvm::Value *PrivatesParam;
3187 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3188 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3189 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3190 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3191 } else {
3192 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3193 }
3194
3195 llvm::Value *CommonArgs[] = {
3196 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3197 CGF.Builder
3198 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3199 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3200 .emitRawPointer(CGF)};
3201 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3202 std::end(arr&: CommonArgs));
3203 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3204 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3205 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3206 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3207 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3208 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3209 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3210 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3211 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3212 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3213 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3214 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3215 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3216 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3217 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3218 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3219 CallArgs.push_back(Elt: LBParam);
3220 CallArgs.push_back(Elt: UBParam);
3221 CallArgs.push_back(Elt: StParam);
3222 CallArgs.push_back(Elt: LIParam);
3223 CallArgs.push_back(Elt: RParam);
3224 }
3225 CallArgs.push_back(Elt: SharedsParam);
3226
3227 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3228 Args: CallArgs);
3229 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3230 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3231 CGF.FinishFunction();
3232 return TaskEntry;
3233}
3234
3235static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3236 SourceLocation Loc,
3237 QualType KmpInt32Ty,
3238 QualType KmpTaskTWithPrivatesPtrQTy,
3239 QualType KmpTaskTWithPrivatesQTy) {
3240 ASTContext &C = CGM.getContext();
3241 FunctionArgList Args;
3242 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3243 ImplicitParamKind::Other);
3244 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3245 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3246 ImplicitParamKind::Other);
3247 Args.push_back(Elt: &GtidArg);
3248 Args.push_back(Elt: &TaskTypeArg);
3249 const auto &DestructorFnInfo =
3250 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3251 llvm::FunctionType *DestructorFnTy =
3252 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3253 std::string Name =
3254 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3255 auto *DestructorFn =
3256 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3257 N: Name, M: &CGM.getModule());
3258 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3259 FI: DestructorFnInfo);
3260 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3261 DestructorFn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3262 DestructorFn->setDoesNotRecurse();
3263 CodeGenFunction CGF(CGM);
3264 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3265 Args, Loc, StartLoc: Loc);
3266
3267 LValue Base = CGF.EmitLoadOfPointerLValue(
3268 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3269 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3270 const auto *KmpTaskTWithPrivatesQTyRD =
3271 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3272 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3273 Base = CGF.EmitLValueForField(Base, Field: *FI);
3274 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3275 if (QualType::DestructionKind DtorKind =
3276 Field->getType().isDestructedType()) {
3277 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3278 CGF.pushDestroy(dtorKind: DtorKind, addr: FieldLValue.getAddress(), type: Field->getType());
3279 }
3280 }
3281 CGF.FinishFunction();
3282 return DestructorFn;
3283}
3284
3285/// Emit a privates mapping function for correct handling of private and
3286/// firstprivate variables.
3287/// \code
3288/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3289/// **noalias priv1,..., <tyn> **noalias privn) {
3290/// *priv1 = &.privates.priv1;
3291/// ...;
3292/// *privn = &.privates.privn;
3293/// }
3294/// \endcode
3295static llvm::Value *
3296emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3297 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3298 ArrayRef<PrivateDataTy> Privates) {
3299 ASTContext &C = CGM.getContext();
3300 FunctionArgList Args;
3301 ImplicitParamDecl TaskPrivatesArg(
3302 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3303 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3304 ImplicitParamKind::Other);
3305 Args.push_back(Elt: &TaskPrivatesArg);
3306 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3307 unsigned Counter = 1;
3308 for (const Expr *E : Data.PrivateVars) {
3309 Args.push_back(Elt: ImplicitParamDecl::Create(
3310 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3311 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3312 .withConst()
3313 .withRestrict(),
3314 ParamKind: ImplicitParamKind::Other));
3315 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3316 PrivateVarsPos[VD] = Counter;
3317 ++Counter;
3318 }
3319 for (const Expr *E : Data.FirstprivateVars) {
3320 Args.push_back(Elt: ImplicitParamDecl::Create(
3321 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3322 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3323 .withConst()
3324 .withRestrict(),
3325 ParamKind: ImplicitParamKind::Other));
3326 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3327 PrivateVarsPos[VD] = Counter;
3328 ++Counter;
3329 }
3330 for (const Expr *E : Data.LastprivateVars) {
3331 Args.push_back(Elt: ImplicitParamDecl::Create(
3332 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3333 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3334 .withConst()
3335 .withRestrict(),
3336 ParamKind: ImplicitParamKind::Other));
3337 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3338 PrivateVarsPos[VD] = Counter;
3339 ++Counter;
3340 }
3341 for (const VarDecl *VD : Data.PrivateLocals) {
3342 QualType Ty = VD->getType().getNonReferenceType();
3343 if (VD->getType()->isLValueReferenceType())
3344 Ty = C.getPointerType(T: Ty);
3345 if (isAllocatableDecl(VD))
3346 Ty = C.getPointerType(T: Ty);
3347 Args.push_back(Elt: ImplicitParamDecl::Create(
3348 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3349 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3350 ParamKind: ImplicitParamKind::Other));
3351 PrivateVarsPos[VD] = Counter;
3352 ++Counter;
3353 }
3354 const auto &TaskPrivatesMapFnInfo =
3355 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3356 llvm::FunctionType *TaskPrivatesMapTy =
3357 CGM.getTypes().GetFunctionType(Info: TaskPrivatesMapFnInfo);
3358 std::string Name =
3359 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3360 auto *TaskPrivatesMap = llvm::Function::Create(
3361 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3362 M: &CGM.getModule());
3363 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3364 FI: TaskPrivatesMapFnInfo);
3365 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3366 TaskPrivatesMap->addFnAttr(Kind: "sample-profile-suffix-elision-policy",
3367 Val: "selected");
3368 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3369 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::NoInline);
3370 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
3371 TaskPrivatesMap->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
3372 }
3373 CodeGenFunction CGF(CGM);
3374 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3375 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3376
3377 // *privi = &.privates.privi;
3378 LValue Base = CGF.EmitLoadOfPointerLValue(
3379 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskPrivatesArg),
3380 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3381 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3382 Counter = 0;
3383 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3384 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3385 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3386 LValue RefLVal =
3387 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD), T: VD->getType());
3388 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3389 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3390 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3391 ++Counter;
3392 }
3393 CGF.FinishFunction();
3394 return TaskPrivatesMap;
3395}
3396
3397/// Emit initialization for private variables in task-based directives.
3398static void emitPrivatesInit(CodeGenFunction &CGF,
3399 const OMPExecutableDirective &D,
3400 Address KmpTaskSharedsPtr, LValue TDBase,
3401 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3402 QualType SharedsTy, QualType SharedsPtrTy,
3403 const OMPTaskDataTy &Data,
3404 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3405 ASTContext &C = CGF.getContext();
3406 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3407 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3408 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())
3409 ? OMPD_taskloop
3410 : OMPD_task;
3411 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: Kind);
3412 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3413 LValue SrcBase;
3414 bool IsTargetTask =
3415 isOpenMPTargetDataManagementDirective(DKind: D.getDirectiveKind()) ||
3416 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
3417 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3418 // PointersArray, SizesArray, and MappersArray. The original variables for
3419 // these arrays are not captured and we get their addresses explicitly.
3420 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3421 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3422 SrcBase = CGF.MakeAddrLValue(
3423 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3424 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3425 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3426 T: SharedsTy);
3427 }
3428 FI = FI->getType()->castAsRecordDecl()->field_begin();
3429 for (const PrivateDataTy &Pair : Privates) {
3430 // Do not initialize private locals.
3431 if (Pair.second.isLocalPrivate()) {
3432 ++FI;
3433 continue;
3434 }
3435 const VarDecl *VD = Pair.second.PrivateCopy;
3436 const Expr *Init = VD->getAnyInitializer();
3437 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3438 !CGF.isTrivialInitializer(Init)))) {
3439 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3440 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3441 const VarDecl *OriginalVD = Pair.second.Original;
3442 // Check if the variable is the target-based BasePointersArray,
3443 // PointersArray, SizesArray, or MappersArray.
3444 LValue SharedRefLValue;
3445 QualType Type = PrivateLValue.getType();
3446 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3447 if (IsTargetTask && !SharedField) {
3448 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3449 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3450 cast<CapturedDecl>(OriginalVD->getDeclContext())
3451 ->getNumParams() == 0 &&
3452 isa<TranslationUnitDecl>(
3453 cast<CapturedDecl>(OriginalVD->getDeclContext())
3454 ->getDeclContext()) &&
3455 "Expected artificial target data variable.");
3456 SharedRefLValue =
3457 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3458 } else if (ForDup) {
3459 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3460 SharedRefLValue = CGF.MakeAddrLValue(
3461 Addr: SharedRefLValue.getAddress().withAlignment(
3462 NewAlignment: C.getDeclAlign(D: OriginalVD)),
3463 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3464 TBAAInfo: SharedRefLValue.getTBAAInfo());
3465 } else if (CGF.LambdaCaptureFields.count(
3466 Val: Pair.second.Original->getCanonicalDecl()) > 0 ||
3467 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3468 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3469 } else {
3470 // Processing for implicitly captured variables.
3471 InlinedOpenMPRegionRAII Region(
3472 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3473 /*HasCancel=*/false, /*NoInheritance=*/true);
3474 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3475 }
3476 if (Type->isArrayType()) {
3477 // Initialize firstprivate array.
3478 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3479 // Perform simple memcpy.
3480 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3481 } else {
3482 // Initialize firstprivate array using element-by-element
3483 // initialization.
3484 CGF.EmitOMPAggregateAssign(
3485 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3486 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3487 Address SrcElement) {
3488 // Clean up any temporaries needed by the initialization.
3489 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3490 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3491 (void)InitScope.Privatize();
3492 // Emit initialization for single element.
3493 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3494 CGF, &CapturesInfo);
3495 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3496 Quals: Init->getType().getQualifiers(),
3497 /*IsInitializer=*/false);
3498 });
3499 }
3500 } else {
3501 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3502 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3503 (void)InitScope.Privatize();
3504 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3505 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue,
3506 /*capturedByInit=*/false);
3507 }
3508 } else {
3509 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue, /*capturedByInit=*/false);
3510 }
3511 }
3512 ++FI;
3513 }
3514}
3515
3516/// Check if duplication function is required for taskloops.
3517static bool checkInitIsRequired(CodeGenFunction &CGF,
3518 ArrayRef<PrivateDataTy> Privates) {
3519 bool InitRequired = false;
3520 for (const PrivateDataTy &Pair : Privates) {
3521 if (Pair.second.isLocalPrivate())
3522 continue;
3523 const VarDecl *VD = Pair.second.PrivateCopy;
3524 const Expr *Init = VD->getAnyInitializer();
3525 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3526 !CGF.isTrivialInitializer(Init));
3527 if (InitRequired)
3528 break;
3529 }
3530 return InitRequired;
3531}
3532
3533
3534/// Emit task_dup function (for initialization of
3535/// private/firstprivate/lastprivate vars and last_iter flag)
3536/// \code
3537/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3538/// lastpriv) {
3539/// // setup lastprivate flag
3540/// task_dst->last = lastpriv;
3541/// // could be constructor calls here...
3542/// }
3543/// \endcode
3544static llvm::Value *
3545emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3546 const OMPExecutableDirective &D,
3547 QualType KmpTaskTWithPrivatesPtrQTy,
3548 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3549 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3550 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3551 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3552 ASTContext &C = CGM.getContext();
3553 FunctionArgList Args;
3554 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3555 KmpTaskTWithPrivatesPtrQTy,
3556 ImplicitParamKind::Other);
3557 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3558 KmpTaskTWithPrivatesPtrQTy,
3559 ImplicitParamKind::Other);
3560 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3561 ImplicitParamKind::Other);
3562 Args.push_back(Elt: &DstArg);
3563 Args.push_back(Elt: &SrcArg);
3564 Args.push_back(Elt: &LastprivArg);
3565 const auto &TaskDupFnInfo =
3566 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3567 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(Info: TaskDupFnInfo);
3568 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3569 auto *TaskDup = llvm::Function::Create(
3570 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3571 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3572 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3573 TaskDup->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3574 TaskDup->setDoesNotRecurse();
3575 CodeGenFunction CGF(CGM);
3576 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3577 StartLoc: Loc);
3578
3579 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3580 Ptr: CGF.GetAddrOfLocalVar(VD: &DstArg),
3581 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3582 // task_dst->liter = lastpriv;
3583 if (WithLastIter) {
3584 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3585 LValue Base = CGF.EmitLValueForField(
3586 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3587 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3588 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3589 Addr: CGF.GetAddrOfLocalVar(VD: &LastprivArg), /*Volatile=*/false, Ty: C.IntTy, Loc);
3590 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3591 }
3592
3593 // Emit initial values for private copies (if any).
3594 assert(!Privates.empty());
3595 Address KmpTaskSharedsPtr = Address::invalid();
3596 if (!Data.FirstprivateVars.empty()) {
3597 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3598 Ptr: CGF.GetAddrOfLocalVar(VD: &SrcArg),
3599 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3600 LValue Base = CGF.EmitLValueForField(
3601 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3602 KmpTaskSharedsPtr = Address(
3603 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3604 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3605 n: KmpTaskTShareds)),
3606 Loc),
3607 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3608 }
3609 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3610 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3611 CGF.FinishFunction();
3612 return TaskDup;
3613}
3614
3615/// Checks if destructor function is required to be generated.
3616/// \return true if cleanups are required, false otherwise.
3617static bool
3618checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3619 ArrayRef<PrivateDataTy> Privates) {
3620 for (const PrivateDataTy &P : Privates) {
3621 if (P.second.isLocalPrivate())
3622 continue;
3623 QualType Ty = P.second.Original->getType().getNonReferenceType();
3624 if (Ty.isDestructedType())
3625 return true;
3626 }
3627 return false;
3628}
3629
3630namespace {
3631/// Loop generator for OpenMP iterator expression.
3632class OMPIteratorGeneratorScope final
3633 : public CodeGenFunction::OMPPrivateScope {
3634 CodeGenFunction &CGF;
3635 const OMPIteratorExpr *E = nullptr;
3636 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3637 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3638 OMPIteratorGeneratorScope() = delete;
3639 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3640
3641public:
3642 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3643 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3644 if (!E)
3645 return;
3646 SmallVector<llvm::Value *, 4> Uppers;
3647 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3648 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3649 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3650 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(T: VD->getType(), Name: VD->getName()));
3651 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3652 addPrivate(
3653 LocalVD: HelperData.CounterVD,
3654 Addr: CGF.CreateMemTemp(T: HelperData.CounterVD->getType(), Name: "counter.addr"));
3655 }
3656 Privatize();
3657
3658 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3659 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3660 LValue CLVal =
3661 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3662 T: HelperData.CounterVD->getType());
3663 // Counter = 0;
3664 CGF.EmitStoreOfScalar(
3665 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3666 lvalue: CLVal);
3667 CodeGenFunction::JumpDest &ContDest =
3668 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3669 CodeGenFunction::JumpDest &ExitDest =
3670 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3671 // N = <number-of_iterations>;
3672 llvm::Value *N = Uppers[I];
3673 // cont:
3674 // if (Counter < N) goto body; else goto exit;
3675 CGF.EmitBlock(BB: ContDest.getBlock());
3676 auto *CVal =
3677 CGF.EmitLoadOfScalar(lvalue: CLVal, Loc: HelperData.CounterVD->getLocation());
3678 llvm::Value *Cmp =
3679 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3680 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3681 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3682 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3683 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3684 // body:
3685 CGF.EmitBlock(BB: BodyBB);
3686 // Iteri = Begini + Counter * Stepi;
3687 CGF.EmitIgnoredExpr(E: HelperData.Update);
3688 }
3689 }
3690 ~OMPIteratorGeneratorScope() {
3691 if (!E)
3692 return;
3693 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3694 // Counter = Counter + 1;
3695 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3696 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3697 // goto cont;
3698 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3699 // exit:
3700 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3701 }
3702 }
3703};
3704} // namespace
3705
3706static std::pair<llvm::Value *, llvm::Value *>
3707getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3708 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3709 llvm::Value *Addr;
3710 if (OASE) {
3711 const Expr *Base = OASE->getBase();
3712 Addr = CGF.EmitScalarExpr(E: Base);
3713 } else {
3714 Addr = CGF.EmitLValue(E).getPointer(CGF);
3715 }
3716 llvm::Value *SizeVal;
3717 QualType Ty = E->getType();
3718 if (OASE) {
3719 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3720 for (const Expr *SE : OASE->getDimensions()) {
3721 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3722 Sz = CGF.EmitScalarConversion(
3723 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3724 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3725 }
3726 } else if (const auto *ASE =
3727 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3728 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3729 Address UpAddrAddress = UpAddrLVal.getAddress();
3730 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3731 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3732 /*Idx0=*/1);
3733 SizeVal = CGF.Builder.CreatePtrDiff(LHS: UpAddr, RHS: Addr, Name: "", /*IsNUW=*/true);
3734 } else {
3735 SizeVal = CGF.getTypeSize(Ty);
3736 }
3737 return std::make_pair(x&: Addr, y&: SizeVal);
3738}
3739
3740/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3741static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3742 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3743 if (KmpTaskAffinityInfoTy.isNull()) {
3744 RecordDecl *KmpAffinityInfoRD =
3745 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3746 KmpAffinityInfoRD->startDefinition();
3747 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getIntPtrType());
3748 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getSizeType());
3749 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: FlagsTy);
3750 KmpAffinityInfoRD->completeDefinition();
3751 KmpTaskAffinityInfoTy = C.getCanonicalTagType(TD: KmpAffinityInfoRD);
3752 }
3753}
3754
3755CGOpenMPRuntime::TaskResultTy
3756CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3757 const OMPExecutableDirective &D,
3758 llvm::Function *TaskFunction, QualType SharedsTy,
3759 Address Shareds, const OMPTaskDataTy &Data) {
3760 ASTContext &C = CGM.getContext();
3761 llvm::SmallVector<PrivateDataTy, 4> Privates;
3762 // Aggregate privates and sort them by the alignment.
3763 const auto *I = Data.PrivateCopies.begin();
3764 for (const Expr *E : Data.PrivateVars) {
3765 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3766 Privates.emplace_back(
3767 Args: C.getDeclAlign(D: VD),
3768 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3769 /*PrivateElemInit=*/nullptr));
3770 ++I;
3771 }
3772 I = Data.FirstprivateCopies.begin();
3773 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3774 for (const Expr *E : Data.FirstprivateVars) {
3775 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3776 Privates.emplace_back(
3777 Args: C.getDeclAlign(D: VD),
3778 Args: PrivateHelpersTy(
3779 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3780 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3781 ++I;
3782 ++IElemInitRef;
3783 }
3784 I = Data.LastprivateCopies.begin();
3785 for (const Expr *E : Data.LastprivateVars) {
3786 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3787 Privates.emplace_back(
3788 Args: C.getDeclAlign(D: VD),
3789 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3790 /*PrivateElemInit=*/nullptr));
3791 ++I;
3792 }
3793 for (const VarDecl *VD : Data.PrivateLocals) {
3794 if (isAllocatableDecl(VD))
3795 Privates.emplace_back(Args: CGM.getPointerAlign(), Args: PrivateHelpersTy(VD));
3796 else
3797 Privates.emplace_back(Args: C.getDeclAlign(D: VD), Args: PrivateHelpersTy(VD));
3798 }
3799 llvm::stable_sort(Range&: Privates,
3800 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3801 return L.first > R.first;
3802 });
3803 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3804 // Build type kmp_routine_entry_t (if not built yet).
3805 emitKmpRoutineEntryT(KmpInt32Ty);
3806 // Build type kmp_task_t (if not built yet).
3807 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())) {
3808 if (SavedKmpTaskloopTQTy.isNull()) {
3809 SavedKmpTaskloopTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3810 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3811 }
3812 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3813 } else {
3814 assert((D.getDirectiveKind() == OMPD_task ||
3815 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3816 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3817 "Expected taskloop, task or target directive");
3818 if (SavedKmpTaskTQTy.isNull()) {
3819 SavedKmpTaskTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3820 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3821 }
3822 KmpTaskTQTy = SavedKmpTaskTQTy;
3823 }
3824 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3825 // Build particular struct kmp_task_t for the given task.
3826 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3827 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3828 CanQualType KmpTaskTWithPrivatesQTy =
3829 C.getCanonicalTagType(TD: KmpTaskTWithPrivatesQTyRD);
3830 QualType KmpTaskTWithPrivatesPtrQTy =
3831 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3832 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(AddrSpace: 0);
3833 llvm::Value *KmpTaskTWithPrivatesTySize =
3834 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3835 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3836
3837 // Emit initial values for private copies (if any).
3838 llvm::Value *TaskPrivatesMap = nullptr;
3839 llvm::Type *TaskPrivatesMapTy =
3840 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3841 if (!Privates.empty()) {
3842 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3843 TaskPrivatesMap =
3844 emitTaskPrivateMappingFunction(CGM, Loc, Data, PrivatesQTy: FI->getType(), Privates);
3845 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3846 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3847 } else {
3848 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3849 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3850 }
3851 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3852 // kmp_task_t *tt);
3853 llvm::Function *TaskEntry = emitProxyTaskFunction(
3854 CGM, Loc, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3855 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3856 TaskPrivatesMap);
3857
3858 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3859 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3860 // kmp_routine_entry_t *task_entry);
3861 // Task flags. Format is taken from
3862 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3863 // description of kmp_tasking_flags struct.
3864 enum {
3865 TiedFlag = 0x1,
3866 FinalFlag = 0x2,
3867 DestructorsFlag = 0x8,
3868 PriorityFlag = 0x20,
3869 DetachableFlag = 0x40,
3870 FreeAgentFlag = 0x80,
3871 TransparentFlag = 0x100,
3872 };
3873 unsigned Flags = Data.Tied ? TiedFlag : 0;
3874 bool NeedsCleanup = false;
3875 if (!Privates.empty()) {
3876 NeedsCleanup =
3877 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3878 if (NeedsCleanup)
3879 Flags = Flags | DestructorsFlag;
3880 }
3881 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3882 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3883 if (Kind == OMPC_THREADSET_omp_pool)
3884 Flags = Flags | FreeAgentFlag;
3885 }
3886 if (D.getSingleClause<OMPTransparentClause>())
3887 Flags |= TransparentFlag;
3888
3889 if (Data.Priority.getInt())
3890 Flags = Flags | PriorityFlag;
3891 if (D.hasClausesOfKind<OMPDetachClause>())
3892 Flags = Flags | DetachableFlag;
3893 llvm::Value *TaskFlags =
3894 Data.Final.getPointer()
3895 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3896 True: CGF.Builder.getInt32(C: FinalFlag),
3897 False: CGF.Builder.getInt32(/*C=*/0))
3898 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3899 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3900 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3901 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3902 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3903 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3904 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3905 llvm::Value *NewTask;
3906 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3907 // Check if we have any device clause associated with the directive.
3908 const Expr *Device = nullptr;
3909 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3910 Device = C->getDevice();
3911 // Emit device ID if any otherwise use default value.
3912 llvm::Value *DeviceID;
3913 if (Device)
3914 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3915 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3916 else
3917 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3918 AllocArgs.push_back(Elt: DeviceID);
3919 NewTask = CGF.EmitRuntimeCall(
3920 callee: OMPBuilder.getOrCreateRuntimeFunction(
3921 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3922 args: AllocArgs);
3923 } else {
3924 NewTask =
3925 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3926 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3927 args: AllocArgs);
3928 }
3929 // Emit detach clause initialization.
3930 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3931 // task_descriptor);
3932 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3933 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3934 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3935
3936 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3937 // int gtid, kmp_task_t *task);
3938 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3939 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3940 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3941 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3942 callee: OMPBuilder.getOrCreateRuntimeFunction(
3943 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3944 args: {Loc, Tid, NewTask});
3945 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3946 Loc: Evt->getExprLoc());
3947 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3948 }
3949 // Process affinity clauses.
3950 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3951 // Process list of affinity data.
3952 ASTContext &C = CGM.getContext();
3953 Address AffinitiesArray = Address::invalid();
3954 // Calculate number of elements to form the array of affinity data.
3955 llvm::Value *NumOfElements = nullptr;
3956 unsigned NumAffinities = 0;
3957 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3958 if (const Expr *Modifier = C->getModifier()) {
3959 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3960 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3961 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3962 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3963 NumOfElements =
3964 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3965 }
3966 } else {
3967 NumAffinities += C->varlist_size();
3968 }
3969 }
3970 getKmpAffinityType(C&: CGM.getContext(), KmpTaskAffinityInfoTy);
3971 // Fields ids in kmp_task_affinity_info record.
3972 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3973
3974 QualType KmpTaskAffinityInfoArrayTy;
3975 if (NumOfElements) {
3976 NumOfElements = CGF.Builder.CreateNUWAdd(
3977 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3978 auto *OVE = new (C) OpaqueValueExpr(
3979 Loc,
3980 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3981 VK_PRValue);
3982 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3983 RValue::get(V: NumOfElements));
3984 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3985 EltTy: KmpTaskAffinityInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
3986 /*IndexTypeQuals=*/0);
3987 // Properly emit variable-sized array.
3988 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3989 ParamKind: ImplicitParamKind::Other);
3990 CGF.EmitVarDecl(D: *PD);
3991 AffinitiesArray = CGF.GetAddrOfLocalVar(VD: PD);
3992 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3993 /*isSigned=*/false);
3994 } else {
3995 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3996 EltTy: KmpTaskAffinityInfoTy,
3997 ArySize: llvm::APInt(C.getTypeSize(T: C.getSizeType()), NumAffinities), SizeExpr: nullptr,
3998 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3999 AffinitiesArray =
4000 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
4001 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
4002 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
4003 /*isSigned=*/IsSigned: false);
4004 }
4005
4006 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4007 // Fill array by elements without iterators.
4008 unsigned Pos = 0;
4009 bool HasIterator = false;
4010 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4011 if (C->getModifier()) {
4012 HasIterator = true;
4013 continue;
4014 }
4015 for (const Expr *E : C->varlist()) {
4016 llvm::Value *Addr;
4017 llvm::Value *Size;
4018 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4019 LValue Base =
4020 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateConstGEP(Addr: AffinitiesArray, Index: Pos),
4021 T: KmpTaskAffinityInfoTy);
4022 // affs[i].base_addr = &<Affinities[i].second>;
4023 LValue BaseAddrLVal = CGF.EmitLValueForField(
4024 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4025 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4026 lvalue: BaseAddrLVal);
4027 // affs[i].len = sizeof(<Affinities[i].second>);
4028 LValue LenLVal = CGF.EmitLValueForField(
4029 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4030 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4031 ++Pos;
4032 }
4033 }
4034 LValue PosLVal;
4035 if (HasIterator) {
4036 PosLVal = CGF.MakeAddrLValue(
4037 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
4038 T: C.getSizeType());
4039 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4040 }
4041 // Process elements with iterators.
4042 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4043 const Expr *Modifier = C->getModifier();
4044 if (!Modifier)
4045 continue;
4046 OMPIteratorGeneratorScope IteratorScope(
4047 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
4048 for (const Expr *E : C->varlist()) {
4049 llvm::Value *Addr;
4050 llvm::Value *Size;
4051 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4052 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4053 LValue Base =
4054 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateGEP(CGF, Addr: AffinitiesArray, Index: Idx),
4055 T: KmpTaskAffinityInfoTy);
4056 // affs[i].base_addr = &<Affinities[i].second>;
4057 LValue BaseAddrLVal = CGF.EmitLValueForField(
4058 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4059 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4060 lvalue: BaseAddrLVal);
4061 // affs[i].len = sizeof(<Affinities[i].second>);
4062 LValue LenLVal = CGF.EmitLValueForField(
4063 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4064 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4065 Idx = CGF.Builder.CreateNUWAdd(
4066 LHS: Idx, RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4067 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4068 }
4069 }
4070 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4071 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4072 // naffins, kmp_task_affinity_info_t *affin_list);
4073 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4074 llvm::Value *GTid = getThreadID(CGF, Loc);
4075 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4076 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
4077 // FIXME: Emit the function and ignore its result for now unless the
4078 // runtime function is properly implemented.
4079 (void)CGF.EmitRuntimeCall(
4080 callee: OMPBuilder.getOrCreateRuntimeFunction(
4081 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
4082 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4083 }
4084 llvm::Value *NewTaskNewTaskTTy =
4085 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4086 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
4087 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
4088 T: KmpTaskTWithPrivatesQTy);
4089 LValue TDBase =
4090 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
4091 // Fill the data in the resulting kmp_task_t record.
4092 // Copy shareds if there are any.
4093 Address KmpTaskSharedsPtr = Address::invalid();
4094 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4095 KmpTaskSharedsPtr = Address(
4096 CGF.EmitLoadOfScalar(
4097 lvalue: CGF.EmitLValueForField(
4098 Base: TDBase,
4099 Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds)),
4100 Loc),
4101 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
4102 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
4103 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
4104 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
4105 }
4106 // Emit initial values for private copies (if any).
4107 TaskResultTy Result;
4108 if (!Privates.empty()) {
4109 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
4110 SharedsTy, SharedsPtrTy, Data, Privates,
4111 /*ForDup=*/false);
4112 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
4113 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4114 Result.TaskDupFn = emitTaskDupFunction(
4115 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4116 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4117 /*WithLastIter=*/!Data.LastprivateVars.empty());
4118 }
4119 }
4120 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4121 enum { Priority = 0, Destructors = 1 };
4122 // Provide pointer to function with destructors for privates.
4123 auto FI = std::next(x: KmpTaskTQTyRD->field_begin(), n: Data1);
4124 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4125 assert(KmpCmplrdataUD->isUnion());
4126 if (NeedsCleanup) {
4127 llvm::Value *DestructorFn = emitDestructorsFunction(
4128 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4129 KmpTaskTWithPrivatesQTy);
4130 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
4131 LValue DestructorsLV = CGF.EmitLValueForField(
4132 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
4133 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4134 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
4135 lvalue: DestructorsLV);
4136 }
4137 // Set priority.
4138 if (Data.Priority.getInt()) {
4139 LValue Data2LV = CGF.EmitLValueForField(
4140 Base: TDBase, Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: Data2));
4141 LValue PriorityLV = CGF.EmitLValueForField(
4142 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
4143 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
4144 }
4145 Result.NewTask = NewTask;
4146 Result.TaskEntry = TaskEntry;
4147 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4148 Result.TDBase = TDBase;
4149 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4150 return Result;
4151}
4152
4153/// Translates internal dependency kind into the runtime kind.
4154static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4155 RTLDependenceKindTy DepKind;
4156 switch (K) {
4157 case OMPC_DEPEND_in:
4158 DepKind = RTLDependenceKindTy::DepIn;
4159 break;
4160 // Out and InOut dependencies must use the same code.
4161 case OMPC_DEPEND_out:
4162 case OMPC_DEPEND_inout:
4163 DepKind = RTLDependenceKindTy::DepInOut;
4164 break;
4165 case OMPC_DEPEND_mutexinoutset:
4166 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4167 break;
4168 case OMPC_DEPEND_inoutset:
4169 DepKind = RTLDependenceKindTy::DepInOutSet;
4170 break;
4171 case OMPC_DEPEND_outallmemory:
4172 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4173 break;
4174 case OMPC_DEPEND_source:
4175 case OMPC_DEPEND_sink:
4176 case OMPC_DEPEND_depobj:
4177 case OMPC_DEPEND_inoutallmemory:
4178 case OMPC_DEPEND_unknown:
4179 llvm_unreachable("Unknown task dependence type");
4180 }
4181 return DepKind;
4182}
4183
4184/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4185static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4186 QualType &FlagsTy) {
4187 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.BoolTy), /*Signed=*/false);
4188 if (KmpDependInfoTy.isNull()) {
4189 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4190 KmpDependInfoRD->startDefinition();
4191 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getIntPtrType());
4192 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getSizeType());
4193 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: FlagsTy);
4194 KmpDependInfoRD->completeDefinition();
4195 KmpDependInfoTy = C.getCanonicalTagType(TD: KmpDependInfoRD);
4196 }
4197}
4198
4199std::pair<llvm::Value *, LValue>
4200CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4201 SourceLocation Loc) {
4202 ASTContext &C = CGM.getContext();
4203 QualType FlagsTy;
4204 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4205 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4206 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4207 LValue Base = CGF.EmitLoadOfPointerLValue(
4208 Ptr: DepobjLVal.getAddress().withElementType(
4209 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4210 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4211 Address DepObjAddr = CGF.Builder.CreateGEP(
4212 CGF, Addr: Base.getAddress(),
4213 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4214 LValue NumDepsBase = CGF.MakeAddrLValue(
4215 Addr: DepObjAddr, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(), TBAAInfo: Base.getTBAAInfo());
4216 // NumDeps = deps[i].base_addr;
4217 LValue BaseAddrLVal = CGF.EmitLValueForField(
4218 Base: NumDepsBase,
4219 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4220 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4221 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4222 return std::make_pair(x&: NumDeps, y&: Base);
4223}
4224
4225static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4226 llvm::PointerUnion<unsigned *, LValue *> Pos,
4227 const OMPTaskDataTy::DependData &Data,
4228 Address DependenciesArray) {
4229 CodeGenModule &CGM = CGF.CGM;
4230 ASTContext &C = CGM.getContext();
4231 QualType FlagsTy;
4232 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4233 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4234 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4235
4236 OMPIteratorGeneratorScope IteratorScope(
4237 CGF, cast_or_null<OMPIteratorExpr>(
4238 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4239 : nullptr));
4240 for (const Expr *E : Data.DepExprs) {
4241 llvm::Value *Addr;
4242 llvm::Value *Size;
4243
4244 // The expression will be a nullptr in the 'omp_all_memory' case.
4245 if (E) {
4246 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4247 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4248 } else {
4249 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4250 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4251 }
4252 LValue Base;
4253 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4254 Base = CGF.MakeAddrLValue(
4255 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4256 } else {
4257 assert(E && "Expected a non-null expression");
4258 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4259 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4260 Base = CGF.MakeAddrLValue(
4261 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4262 }
4263 // deps[i].base_addr = &<Dependencies[i].second>;
4264 LValue BaseAddrLVal = CGF.EmitLValueForField(
4265 Base,
4266 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4267 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4268 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4269 // deps[i].len = sizeof(<Dependencies[i].second>);
4270 LValue LenLVal = CGF.EmitLValueForField(
4271 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4272 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4273 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4274 // deps[i].flags = <Dependencies[i].first>;
4275 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4276 LValue FlagsLVal = CGF.EmitLValueForField(
4277 Base,
4278 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4279 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4280 CGF.EmitStoreOfScalar(
4281 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4282 lvalue: FlagsLVal);
4283 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4284 ++(*P);
4285 } else {
4286 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4287 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4288 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4289 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4290 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4291 }
4292 }
4293}
4294
4295SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4296 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4297 const OMPTaskDataTy::DependData &Data) {
4298 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4299 "Expected depobj dependency kind.");
4300 SmallVector<llvm::Value *, 4> Sizes;
4301 SmallVector<LValue, 4> SizeLVals;
4302 ASTContext &C = CGF.getContext();
4303 {
4304 OMPIteratorGeneratorScope IteratorScope(
4305 CGF, cast_or_null<OMPIteratorExpr>(
4306 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4307 : nullptr));
4308 for (const Expr *E : Data.DepExprs) {
4309 llvm::Value *NumDeps;
4310 LValue Base;
4311 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4312 std::tie(args&: NumDeps, args&: Base) =
4313 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4314 LValue NumLVal = CGF.MakeAddrLValue(
4315 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4316 T: C.getUIntPtrType());
4317 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4318 Addr: NumLVal.getAddress());
4319 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4320 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4321 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4322 SizeLVals.push_back(Elt: NumLVal);
4323 }
4324 }
4325 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4326 llvm::Value *Size =
4327 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4328 Sizes.push_back(Elt: Size);
4329 }
4330 return Sizes;
4331}
4332
4333void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4334 QualType &KmpDependInfoTy,
4335 LValue PosLVal,
4336 const OMPTaskDataTy::DependData &Data,
4337 Address DependenciesArray) {
4338 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4339 "Expected depobj dependency kind.");
4340 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4341 {
4342 OMPIteratorGeneratorScope IteratorScope(
4343 CGF, cast_or_null<OMPIteratorExpr>(
4344 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4345 : nullptr));
4346 for (const Expr *E : Data.DepExprs) {
4347 llvm::Value *NumDeps;
4348 LValue Base;
4349 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4350 std::tie(args&: NumDeps, args&: Base) =
4351 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4352
4353 // memcopy dependency data.
4354 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4355 LHS: ElSize,
4356 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4357 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4358 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4359 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4360
4361 // Increase pos.
4362 // pos += size;
4363 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4364 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4365 }
4366 }
4367}
4368
4369std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4370 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4371 SourceLocation Loc) {
4372 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4373 return D.DepExprs.empty();
4374 }))
4375 return std::make_pair(x: nullptr, y: Address::invalid());
4376 // Process list of dependencies.
4377 ASTContext &C = CGM.getContext();
4378 Address DependenciesArray = Address::invalid();
4379 llvm::Value *NumOfElements = nullptr;
4380 unsigned NumDependencies = std::accumulate(
4381 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4382 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4383 return D.DepKind == OMPC_DEPEND_depobj
4384 ? V
4385 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4386 });
4387 QualType FlagsTy;
4388 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4389 bool HasDepobjDeps = false;
4390 bool HasRegularWithIterators = false;
4391 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4392 llvm::Value *NumOfRegularWithIterators =
4393 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4394 // Calculate number of depobj dependencies and regular deps with the
4395 // iterators.
4396 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4397 if (D.DepKind == OMPC_DEPEND_depobj) {
4398 SmallVector<llvm::Value *, 4> Sizes =
4399 emitDepobjElementsSizes(CGF, KmpDependInfoTy, Data: D);
4400 for (llvm::Value *Size : Sizes) {
4401 NumOfDepobjElements =
4402 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4403 }
4404 HasDepobjDeps = true;
4405 continue;
4406 }
4407 // Include number of iterations, if any.
4408
4409 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4410 llvm::Value *ClauseIteratorSpace =
4411 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4412 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4413 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4414 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4415 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4416 }
4417 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4418 LHS: ClauseIteratorSpace,
4419 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4420 NumOfRegularWithIterators =
4421 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4422 HasRegularWithIterators = true;
4423 continue;
4424 }
4425 }
4426
4427 QualType KmpDependInfoArrayTy;
4428 if (HasDepobjDeps || HasRegularWithIterators) {
4429 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4430 /*isSigned=*/IsSigned: false);
4431 if (HasDepobjDeps) {
4432 NumOfElements =
4433 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4434 }
4435 if (HasRegularWithIterators) {
4436 NumOfElements =
4437 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4438 }
4439 auto *OVE = new (C) OpaqueValueExpr(
4440 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4441 VK_PRValue);
4442 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4443 RValue::get(V: NumOfElements));
4444 KmpDependInfoArrayTy =
4445 C.getVariableArrayType(EltTy: KmpDependInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
4446 /*IndexTypeQuals=*/0);
4447 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4448 // Properly emit variable-sized array.
4449 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4450 ParamKind: ImplicitParamKind::Other);
4451 CGF.EmitVarDecl(D: *PD);
4452 DependenciesArray = CGF.GetAddrOfLocalVar(VD: PD);
4453 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4454 /*isSigned=*/false);
4455 } else {
4456 KmpDependInfoArrayTy = C.getConstantArrayType(
4457 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies), SizeExpr: nullptr,
4458 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4459 DependenciesArray =
4460 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4461 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4462 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4463 /*isSigned=*/IsSigned: false);
4464 }
4465 unsigned Pos = 0;
4466 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4467 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4468 continue;
4469 emitDependData(CGF, KmpDependInfoTy, Pos: &Pos, Data: Dep, DependenciesArray);
4470 }
4471 // Copy regular dependencies with iterators.
4472 LValue PosLVal = CGF.MakeAddrLValue(
4473 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4474 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4475 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4476 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4477 continue;
4478 emitDependData(CGF, KmpDependInfoTy, Pos: &PosLVal, Data: Dep, DependenciesArray);
4479 }
4480 // Copy final depobj arrays without iterators.
4481 if (HasDepobjDeps) {
4482 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4483 if (Dep.DepKind != OMPC_DEPEND_depobj)
4484 continue;
4485 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Data: Dep, DependenciesArray);
4486 }
4487 }
4488 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4489 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4490 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4491}
4492
4493Address CGOpenMPRuntime::emitDepobjDependClause(
4494 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4495 SourceLocation Loc) {
4496 if (Dependencies.DepExprs.empty())
4497 return Address::invalid();
4498 // Process list of dependencies.
4499 ASTContext &C = CGM.getContext();
4500 Address DependenciesArray = Address::invalid();
4501 unsigned NumDependencies = Dependencies.DepExprs.size();
4502 QualType FlagsTy;
4503 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4504 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4505
4506 llvm::Value *Size;
4507 // Define type kmp_depend_info[<Dependencies.size()>];
4508 // For depobj reserve one extra element to store the number of elements.
4509 // It is required to handle depobj(x) update(in) construct.
4510 // kmp_depend_info[<Dependencies.size()>] deps;
4511 llvm::Value *NumDepsVal;
4512 CharUnits Align = C.getTypeAlignInChars(T: KmpDependInfoTy);
4513 if (const auto *IE =
4514 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4515 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4516 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4517 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4518 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4519 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4520 }
4521 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4522 RHS: NumDepsVal);
4523 CharUnits SizeInBytes =
4524 C.getTypeSizeInChars(T: KmpDependInfoTy).alignTo(Align);
4525 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4526 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4527 NumDepsVal =
4528 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4529 } else {
4530 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4531 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4532 SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4533 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4534 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4535 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4536 }
4537 // Need to allocate on the dynamic memory.
4538 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4539 // Use default allocator.
4540 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4541 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4542
4543 llvm::Value *Addr =
4544 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4545 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4546 args: Args, name: ".dep.arr.addr");
4547 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(T: KmpDependInfoTy);
4548 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4549 V: Addr, DestTy: CGF.Builder.getPtrTy(AddrSpace: 0));
4550 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4551 // Write number of elements in the first element of array for depobj.
4552 LValue Base = CGF.MakeAddrLValue(Addr: DependenciesArray, T: KmpDependInfoTy);
4553 // deps[i].base_addr = NumDependencies;
4554 LValue BaseAddrLVal = CGF.EmitLValueForField(
4555 Base,
4556 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4557 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4558 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4559 llvm::PointerUnion<unsigned *, LValue *> Pos;
4560 unsigned Idx = 1;
4561 LValue PosLVal;
4562 if (Dependencies.IteratorExpr) {
4563 PosLVal = CGF.MakeAddrLValue(
4564 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4565 T: C.getSizeType());
4566 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4567 /*IsInit=*/isInit: true);
4568 Pos = &PosLVal;
4569 } else {
4570 Pos = &Idx;
4571 }
4572 emitDependData(CGF, KmpDependInfoTy, Pos, Data: Dependencies, DependenciesArray);
4573 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4574 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4575 ElementTy: CGF.Int8Ty);
4576 return DependenciesArray;
4577}
4578
4579void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4580 SourceLocation Loc) {
4581 ASTContext &C = CGM.getContext();
4582 QualType FlagsTy;
4583 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4584 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4585 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4586 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4587 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4588 Addr: Base.getAddress(), Ty: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy),
4589 ElementTy: CGF.ConvertTypeForMem(T: KmpDependInfoTy));
4590 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4591 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4592 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4593 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4594 DestTy: CGF.VoidPtrTy);
4595 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4596 // Use default allocator.
4597 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4598 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4599
4600 // _kmpc_free(gtid, addr, nullptr);
4601 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4602 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4603 args: Args);
4604}
4605
4606void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4607 OpenMPDependClauseKind NewDepKind,
4608 SourceLocation Loc) {
4609 ASTContext &C = CGM.getContext();
4610 QualType FlagsTy;
4611 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4612 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4613 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4614 llvm::Value *NumDeps;
4615 LValue Base;
4616 std::tie(args&: NumDeps, args&: Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4617
4618 Address Begin = Base.getAddress();
4619 // Cast from pointer to array type to pointer to single element.
4620 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4621 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4622 // The basic structure here is a while-do loop.
4623 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4624 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4625 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4626 CGF.EmitBlock(BB: BodyBB);
4627 llvm::PHINode *ElementPHI =
4628 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4629 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4630 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4631 Base = CGF.MakeAddrLValue(Addr: Begin, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(),
4632 TBAAInfo: Base.getTBAAInfo());
4633 // deps[i].flags = NewDepKind;
4634 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4635 LValue FlagsLVal = CGF.EmitLValueForField(
4636 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4637 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4638 CGF.EmitStoreOfScalar(
4639 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4640 lvalue: FlagsLVal);
4641
4642 // Shift the address forward by one element.
4643 llvm::Value *ElementNext =
4644 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4645 .emitRawPointer(CGF);
4646 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4647 llvm::Value *IsEmpty =
4648 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4649 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4650 // Done.
4651 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4652}
4653
4654void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4655 const OMPExecutableDirective &D,
4656 llvm::Function *TaskFunction,
4657 QualType SharedsTy, Address Shareds,
4658 const Expr *IfCond,
4659 const OMPTaskDataTy &Data) {
4660 if (!CGF.HaveInsertPoint())
4661 return;
4662
4663 TaskResultTy Result =
4664 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4665 llvm::Value *NewTask = Result.NewTask;
4666 llvm::Function *TaskEntry = Result.TaskEntry;
4667 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4668 LValue TDBase = Result.TDBase;
4669 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4670 // Process list of dependences.
4671 Address DependenciesArray = Address::invalid();
4672 llvm::Value *NumOfElements;
4673 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4674 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4675
4676 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4677 // libcall.
4678 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4679 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4680 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4681 // list is not empty
4682 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4683 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4684 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4685 llvm::Value *DepTaskArgs[7];
4686 if (!Data.Dependences.empty()) {
4687 DepTaskArgs[0] = UpLoc;
4688 DepTaskArgs[1] = ThreadID;
4689 DepTaskArgs[2] = NewTask;
4690 DepTaskArgs[3] = NumOfElements;
4691 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4692 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4693 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4694 }
4695 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4696 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4697 if (!Data.Tied) {
4698 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4699 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4700 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4701 }
4702 if (!Data.Dependences.empty()) {
4703 CGF.EmitRuntimeCall(
4704 callee: OMPBuilder.getOrCreateRuntimeFunction(
4705 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4706 args: DepTaskArgs);
4707 } else {
4708 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4709 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4710 args: TaskArgs);
4711 }
4712 // Check if parent region is untied and build return for untied task;
4713 if (auto *Region =
4714 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4715 Region->emitUntiedSwitch(CGF);
4716 };
4717
4718 llvm::Value *DepWaitTaskArgs[7];
4719 if (!Data.Dependences.empty()) {
4720 DepWaitTaskArgs[0] = UpLoc;
4721 DepWaitTaskArgs[1] = ThreadID;
4722 DepWaitTaskArgs[2] = NumOfElements;
4723 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4724 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4725 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4726 DepWaitTaskArgs[6] =
4727 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4728 }
4729 auto &M = CGM.getModule();
4730 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4731 TaskEntry, &Data, &DepWaitTaskArgs,
4732 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4733 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4734 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4735 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4736 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4737 // is specified.
4738 if (!Data.Dependences.empty())
4739 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4740 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4741 args: DepWaitTaskArgs);
4742 // Call proxy_task_entry(gtid, new_task);
4743 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4744 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4745 Action.Enter(CGF);
4746 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4747 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4748 Args: OutlinedFnArgs);
4749 };
4750
4751 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4752 // kmp_task_t *new_task);
4753 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4754 // kmp_task_t *new_task);
4755 RegionCodeGenTy RCG(CodeGen);
4756 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4757 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4758 TaskArgs,
4759 OMPBuilder.getOrCreateRuntimeFunction(
4760 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4761 TaskArgs);
4762 RCG.setAction(Action);
4763 RCG(CGF);
4764 };
4765
4766 if (IfCond) {
4767 emitIfClause(CGF, Cond: IfCond, ThenGen: ThenCodeGen, ElseGen: ElseCodeGen);
4768 } else {
4769 RegionCodeGenTy ThenRCG(ThenCodeGen);
4770 ThenRCG(CGF);
4771 }
4772}
4773
4774void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4775 const OMPLoopDirective &D,
4776 llvm::Function *TaskFunction,
4777 QualType SharedsTy, Address Shareds,
4778 const Expr *IfCond,
4779 const OMPTaskDataTy &Data) {
4780 if (!CGF.HaveInsertPoint())
4781 return;
4782 TaskResultTy Result =
4783 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4784 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4785 // libcall.
4786 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4787 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4788 // sched, kmp_uint64 grainsize, void *task_dup);
4789 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4790 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4791 llvm::Value *IfVal;
4792 if (IfCond) {
4793 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4794 /*isSigned=*/true);
4795 } else {
4796 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4797 }
4798
4799 LValue LBLVal = CGF.EmitLValueForField(
4800 Base: Result.TDBase,
4801 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4802 const auto *LBVar =
4803 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4804 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4805 /*IsInitializer=*/true);
4806 LValue UBLVal = CGF.EmitLValueForField(
4807 Base: Result.TDBase,
4808 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4809 const auto *UBVar =
4810 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4811 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4812 /*IsInitializer=*/true);
4813 LValue StLVal = CGF.EmitLValueForField(
4814 Base: Result.TDBase,
4815 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4816 const auto *StVar =
4817 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4818 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4819 /*IsInitializer=*/true);
4820 // Store reductions address.
4821 LValue RedLVal = CGF.EmitLValueForField(
4822 Base: Result.TDBase,
4823 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4824 if (Data.Reductions) {
4825 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4826 } else {
4827 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4828 Ty: CGF.getContext().VoidPtrTy);
4829 }
4830 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4831 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4832 UpLoc,
4833 ThreadID,
4834 Result.NewTask,
4835 IfVal,
4836 LBLVal.getPointer(CGF),
4837 UBLVal.getPointer(CGF),
4838 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4839 llvm::ConstantInt::getSigned(
4840 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4841 llvm::ConstantInt::getSigned(
4842 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4843 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4844 : NoSchedule),
4845 Data.Schedule.getPointer()
4846 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4847 /*isSigned=*/false)
4848 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0)};
4849 if (Data.HasModifier)
4850 TaskArgs.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 1));
4851
4852 TaskArgs.push_back(Elt: Result.TaskDupFn
4853 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4854 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4855 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy));
4856 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4857 M&: CGM.getModule(), FnID: Data.HasModifier
4858 ? OMPRTL___kmpc_taskloop_5
4859 : OMPRTL___kmpc_taskloop),
4860 args: TaskArgs);
4861}
4862
4863/// Emit reduction operation for each element of array (required for
4864/// array sections) LHS op = RHS.
4865/// \param Type Type of array.
4866/// \param LHSVar Variable on the left side of the reduction operation
4867/// (references element of array in original variable).
4868/// \param RHSVar Variable on the right side of the reduction operation
4869/// (references element of array in original variable).
4870/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4871/// RHSVar.
4872static void EmitOMPAggregateReduction(
4873 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4874 const VarDecl *RHSVar,
4875 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4876 const Expr *, const Expr *)> &RedOpGen,
4877 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4878 const Expr *UpExpr = nullptr) {
4879 // Perform element-by-element initialization.
4880 QualType ElementTy;
4881 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4882 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4883
4884 // Drill down to the base element type on both arrays.
4885 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4886 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4887
4888 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4889 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4890 // Cast from pointer to array type to pointer to single element.
4891 llvm::Value *LHSEnd =
4892 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4893 // The basic structure here is a while-do loop.
4894 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4895 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4896 llvm::Value *IsEmpty =
4897 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4898 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4899
4900 // Enter the loop body, making that address the current address.
4901 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4902 CGF.EmitBlock(BB: BodyBB);
4903
4904 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4905
4906 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4907 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4908 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4909 Address RHSElementCurrent(
4910 RHSElementPHI, RHSAddr.getElementType(),
4911 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4912
4913 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4914 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4915 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4916 Address LHSElementCurrent(
4917 LHSElementPHI, LHSAddr.getElementType(),
4918 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4919
4920 // Emit copy.
4921 CodeGenFunction::OMPPrivateScope Scope(CGF);
4922 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4923 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4924 Scope.Privatize();
4925 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4926 Scope.ForceCleanup();
4927
4928 // Shift the address forward by one element.
4929 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4930 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4931 Name: "omp.arraycpy.dest.element");
4932 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4933 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4934 Name: "omp.arraycpy.src.element");
4935 // Check whether we've reached the end.
4936 llvm::Value *Done =
4937 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4938 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4939 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4940 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4941
4942 // Done.
4943 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4944}
4945
4946/// Emit reduction combiner. If the combiner is a simple expression emit it as
4947/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4948/// UDR combiner function.
4949static void emitReductionCombiner(CodeGenFunction &CGF,
4950 const Expr *ReductionOp) {
4951 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4952 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4953 if (const auto *DRE =
4954 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4955 if (const auto *DRD =
4956 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4957 std::pair<llvm::Function *, llvm::Function *> Reduction =
4958 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4959 RValue Func = RValue::get(V: Reduction.first);
4960 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4961 CGF.EmitIgnoredExpr(E: ReductionOp);
4962 return;
4963 }
4964 CGF.EmitIgnoredExpr(E: ReductionOp);
4965}
4966
4967llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4968 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4969 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4970 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4971 ASTContext &C = CGM.getContext();
4972
4973 // void reduction_func(void *LHSArg, void *RHSArg);
4974 FunctionArgList Args;
4975 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4976 ImplicitParamKind::Other);
4977 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4978 ImplicitParamKind::Other);
4979 Args.push_back(Elt: &LHSArg);
4980 Args.push_back(Elt: &RHSArg);
4981 const auto &CGFI =
4982 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
4983 std::string Name = getReductionFuncName(Name: ReducerName);
4984 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
4985 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
4986 M: &CGM.getModule());
4987 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4988 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
4989 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
4990 Fn->setDoesNotRecurse();
4991 CodeGenFunction CGF(CGM);
4992 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4993
4994 // Dst = (void*[n])(LHSArg);
4995 // Src = (void*[n])(RHSArg);
4996 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4997 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
4998 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4999 ArgsElemType, CGF.getPointerAlign());
5000 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5001 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
5002 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5003 ArgsElemType, CGF.getPointerAlign());
5004
5005 // ...
5006 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5007 // ...
5008 CodeGenFunction::OMPPrivateScope Scope(CGF);
5009 const auto *IPriv = Privates.begin();
5010 unsigned Idx = 0;
5011 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5012 const auto *RHSVar =
5013 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
5014 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
5015 const auto *LHSVar =
5016 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
5017 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
5018 QualType PrivTy = (*IPriv)->getType();
5019 if (PrivTy->isVariablyModifiedType()) {
5020 // Get array size and emit VLA type.
5021 ++Idx;
5022 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
5023 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
5024 const VariableArrayType *VLA =
5025 CGF.getContext().getAsVariableArrayType(T: PrivTy);
5026 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
5027 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5028 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
5029 CGF.EmitVariablyModifiedType(Ty: PrivTy);
5030 }
5031 }
5032 Scope.Privatize();
5033 IPriv = Privates.begin();
5034 const auto *ILHS = LHSExprs.begin();
5035 const auto *IRHS = RHSExprs.begin();
5036 for (const Expr *E : ReductionOps) {
5037 if ((*IPriv)->getType()->isArrayType()) {
5038 // Emit reduction for array section.
5039 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5040 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5041 EmitOMPAggregateReduction(
5042 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5043 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5044 emitReductionCombiner(CGF, ReductionOp: E);
5045 });
5046 } else {
5047 // Emit reduction for array subscript or single variable.
5048 emitReductionCombiner(CGF, ReductionOp: E);
5049 }
5050 ++IPriv;
5051 ++ILHS;
5052 ++IRHS;
5053 }
5054 Scope.ForceCleanup();
5055 CGF.FinishFunction();
5056 return Fn;
5057}
5058
5059void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5060 const Expr *ReductionOp,
5061 const Expr *PrivateRef,
5062 const DeclRefExpr *LHS,
5063 const DeclRefExpr *RHS) {
5064 if (PrivateRef->getType()->isArrayType()) {
5065 // Emit reduction for array section.
5066 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
5067 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
5068 EmitOMPAggregateReduction(
5069 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
5070 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5071 emitReductionCombiner(CGF, ReductionOp);
5072 });
5073 } else {
5074 // Emit reduction for array subscript or single variable.
5075 emitReductionCombiner(CGF, ReductionOp);
5076 }
5077}
5078
5079static std::string generateUniqueName(CodeGenModule &CGM,
5080 llvm::StringRef Prefix, const Expr *Ref);
5081
5082void CGOpenMPRuntime::emitPrivateReduction(
5083 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5084 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5085
5086 // Create a shared global variable (__shared_reduction_var) to accumulate the
5087 // final result.
5088 //
5089 // Call __kmpc_barrier to synchronize threads before initialization.
5090 //
5091 // The master thread (thread_id == 0) initializes __shared_reduction_var
5092 // with the identity value or initializer.
5093 //
5094 // Call __kmpc_barrier to synchronize before combining.
5095 // For each i:
5096 // - Thread enters critical section.
5097 // - Reads its private value from LHSExprs[i].
5098 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5099 // Privates[i]).
5100 // - Exits critical section.
5101 //
5102 // Call __kmpc_barrier after combining.
5103 //
5104 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5105 //
5106 // Final __kmpc_barrier to synchronize after broadcasting
5107 QualType PrivateType = Privates->getType();
5108 llvm::Type *LLVMType = CGF.ConvertTypeForMem(T: PrivateType);
5109
5110 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOp: ReductionOps);
5111 std::string ReductionVarNameStr;
5112 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates->IgnoreParenCasts()))
5113 ReductionVarNameStr =
5114 generateUniqueName(CGM, Prefix: DRE->getDecl()->getNameAsString(), Ref: Privates);
5115 else
5116 ReductionVarNameStr = "unnamed_priv_var";
5117
5118 // Create an internal shared variable
5119 std::string SharedName =
5120 CGM.getOpenMPRuntime().getName(Parts: {"internal_pivate_", ReductionVarNameStr});
5121 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5122 Ty: LLVMType, Name: ".omp.reduction." + SharedName);
5123
5124 SharedVar->setAlignment(
5125 llvm::MaybeAlign(CGF.getContext().getTypeAlign(T: PrivateType) / 8));
5126
5127 Address SharedResult =
5128 CGF.MakeNaturalAlignRawAddrLValue(V: SharedVar, T: PrivateType).getAddress();
5129
5130 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5131 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5132 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5133
5134 llvm::BasicBlock *InitBB = CGF.createBasicBlock(name: "init");
5135 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock(name: "init.end");
5136
5137 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5138 LHS: ThreadId, RHS: llvm::ConstantInt::get(Ty: ThreadId->getType(), V: 0));
5139 CGF.Builder.CreateCondBr(Cond: IsWorker, True: InitBB, False: InitEndBB);
5140
5141 CGF.EmitBlock(BB: InitBB);
5142
5143 auto EmitSharedInit = [&]() {
5144 if (UDR) { // Check if it's a User-Defined Reduction
5145 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5146 std::pair<llvm::Function *, llvm::Function *> FnPair =
5147 getUserDefinedReduction(D: UDR);
5148 llvm::Function *InitializerFn = FnPair.second;
5149 if (InitializerFn) {
5150 if (const auto *CE =
5151 dyn_cast<CallExpr>(Val: UDRInitExpr->IgnoreParenImpCasts())) {
5152 const auto *OutDRE = cast<DeclRefExpr>(
5153 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5154 ->getSubExpr());
5155 const VarDecl *OutVD = cast<VarDecl>(Val: OutDRE->getDecl());
5156
5157 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5158 LocalScope.addPrivate(LocalVD: OutVD, Addr: SharedResult);
5159
5160 (void)LocalScope.Privatize();
5161 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5162 Val: CE->getCallee()->IgnoreParenImpCasts())) {
5163 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5164 CGF, OVE, RValue::get(V: InitializerFn));
5165 CGF.EmitIgnoredExpr(E: CE);
5166 } else {
5167 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5168 Quals: PrivateType.getQualifiers(),
5169 /*IsInitializer=*/true);
5170 }
5171 } else {
5172 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5173 Quals: PrivateType.getQualifiers(),
5174 /*IsInitializer=*/true);
5175 }
5176 } else {
5177 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5178 Quals: PrivateType.getQualifiers(),
5179 /*IsInitializer=*/true);
5180 }
5181 } else {
5182 // EmitNullInitialization handles default construction for C++ classes
5183 // and zeroing for scalars, which is a reasonable default.
5184 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5185 }
5186 return; // UDR initialization handled
5187 }
5188 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates)) {
5189 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
5190 if (const Expr *InitExpr = VD->getInit()) {
5191 CGF.EmitAnyExprToMem(E: InitExpr, Location: SharedResult,
5192 Quals: PrivateType.getQualifiers(), IsInitializer: true);
5193 return;
5194 }
5195 }
5196 }
5197 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5198 };
5199 EmitSharedInit();
5200 CGF.Builder.CreateBr(Dest: InitEndBB);
5201 CGF.EmitBlock(BB: InitEndBB);
5202
5203 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5204 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5205 args: BarrierArgs);
5206
5207 const Expr *ReductionOp = ReductionOps;
5208 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5209 LValue SharedLV = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5210 LValue LHSLV = CGF.EmitLValue(E: Privates);
5211
5212 auto EmitCriticalReduction = [&](auto ReductionGen) {
5213 std::string CriticalName = getName(Parts: {"reduction_critical"});
5214 emitCriticalRegion(CGF, CriticalName, CriticalOpGen: ReductionGen, Loc);
5215 };
5216
5217 if (CurrentUDR) {
5218 // Handle user-defined reduction.
5219 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5220 Action.Enter(CGF);
5221 std::pair<llvm::Function *, llvm::Function *> FnPair =
5222 getUserDefinedReduction(D: CurrentUDR);
5223 if (FnPair.first) {
5224 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp)) {
5225 const auto *OutDRE = cast<DeclRefExpr>(
5226 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5227 ->getSubExpr());
5228 const auto *InDRE = cast<DeclRefExpr>(
5229 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 1)->IgnoreParenImpCasts())
5230 ->getSubExpr());
5231 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5232 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: OutDRE->getDecl()),
5233 Addr: SharedLV.getAddress());
5234 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: InDRE->getDecl()),
5235 Addr: LHSLV.getAddress());
5236 (void)LocalScope.Privatize();
5237 emitReductionCombiner(CGF, ReductionOp);
5238 }
5239 }
5240 };
5241 EmitCriticalReduction(ReductionGen);
5242 } else {
5243 // Handle built-in reduction operations.
5244#ifndef NDEBUG
5245 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5246 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5247 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5248
5249 const Expr *AssignRHS = nullptr;
5250 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5251 if (BinOp->getOpcode() == BO_Assign)
5252 AssignRHS = BinOp->getRHS();
5253 } else if (const auto *OpCall =
5254 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5255 if (OpCall->getOperator() == OO_Equal)
5256 AssignRHS = OpCall->getArg(1);
5257 }
5258
5259 assert(AssignRHS &&
5260 "Private Variable Reduction : Invalid ReductionOp expression");
5261#endif
5262
5263 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5264 Action.Enter(CGF);
5265 const auto *OmpOutDRE =
5266 dyn_cast<DeclRefExpr>(Val: LHSExprs->IgnoreParenImpCasts());
5267 const auto *OmpInDRE =
5268 dyn_cast<DeclRefExpr>(Val: RHSExprs->IgnoreParenImpCasts());
5269 assert(
5270 OmpOutDRE && OmpInDRE &&
5271 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5272 const VarDecl *OmpOutVD = cast<VarDecl>(Val: OmpOutDRE->getDecl());
5273 const VarDecl *OmpInVD = cast<VarDecl>(Val: OmpInDRE->getDecl());
5274 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5275 LocalScope.addPrivate(LocalVD: OmpOutVD, Addr: SharedLV.getAddress());
5276 LocalScope.addPrivate(LocalVD: OmpInVD, Addr: LHSLV.getAddress());
5277 (void)LocalScope.Privatize();
5278 // Emit the actual reduction operation
5279 CGF.EmitIgnoredExpr(E: ReductionOp);
5280 };
5281 EmitCriticalReduction(ReductionGen);
5282 }
5283
5284 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5285 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5286 args: BarrierArgs);
5287
5288 // Broadcast final result
5289 bool IsAggregate = PrivateType->isAggregateType();
5290 LValue SharedLV1 = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5291 llvm::Value *FinalResultVal = nullptr;
5292 Address FinalResultAddr = Address::invalid();
5293
5294 if (IsAggregate)
5295 FinalResultAddr = SharedResult;
5296 else
5297 FinalResultVal = CGF.EmitLoadOfScalar(lvalue: SharedLV1, Loc);
5298
5299 LValue TargetLHSLV = CGF.EmitLValue(E: RHSExprs);
5300 if (IsAggregate) {
5301 CGF.EmitAggregateCopy(Dest: TargetLHSLV,
5302 Src: CGF.MakeAddrLValue(Addr: FinalResultAddr, T: PrivateType),
5303 EltTy: PrivateType, MayOverlap: AggValueSlot::DoesNotOverlap, isVolatile: false);
5304 } else {
5305 CGF.EmitStoreOfScalar(value: FinalResultVal, lvalue: TargetLHSLV);
5306 }
5307 // Final synchronization barrier
5308 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5309 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5310 args: BarrierArgs);
5311
5312 // Combiner with original list item
5313 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5314 PrePostActionTy &Action) {
5315 Action.Enter(CGF);
5316 emitSingleReductionCombiner(CGF, ReductionOp: ReductionOps, PrivateRef: Privates,
5317 LHS: cast<DeclRefExpr>(Val: LHSExprs),
5318 RHS: cast<DeclRefExpr>(Val: RHSExprs));
5319 };
5320 EmitCriticalReduction(OriginalListCombiner);
5321}
5322
5323void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5324 ArrayRef<const Expr *> OrgPrivates,
5325 ArrayRef<const Expr *> OrgLHSExprs,
5326 ArrayRef<const Expr *> OrgRHSExprs,
5327 ArrayRef<const Expr *> OrgReductionOps,
5328 ReductionOptionsTy Options) {
5329 if (!CGF.HaveInsertPoint())
5330 return;
5331
5332 bool WithNowait = Options.WithNowait;
5333 bool SimpleReduction = Options.SimpleReduction;
5334
5335 // Next code should be emitted for reduction:
5336 //
5337 // static kmp_critical_name lock = { 0 };
5338 //
5339 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5340 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5341 // ...
5342 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5343 // *(Type<n>-1*)rhs[<n>-1]);
5344 // }
5345 //
5346 // ...
5347 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5348 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5349 // RedList, reduce_func, &<lock>)) {
5350 // case 1:
5351 // ...
5352 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5353 // ...
5354 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5355 // break;
5356 // case 2:
5357 // ...
5358 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5359 // ...
5360 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5361 // break;
5362 // default:;
5363 // }
5364 //
5365 // if SimpleReduction is true, only the next code is generated:
5366 // ...
5367 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5368 // ...
5369
5370 ASTContext &C = CGM.getContext();
5371
5372 if (SimpleReduction) {
5373 CodeGenFunction::RunCleanupsScope Scope(CGF);
5374 const auto *IPriv = OrgPrivates.begin();
5375 const auto *ILHS = OrgLHSExprs.begin();
5376 const auto *IRHS = OrgRHSExprs.begin();
5377 for (const Expr *E : OrgReductionOps) {
5378 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5379 RHS: cast<DeclRefExpr>(Val: *IRHS));
5380 ++IPriv;
5381 ++ILHS;
5382 ++IRHS;
5383 }
5384 return;
5385 }
5386
5387 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5388 // Only keep entries where the corresponding variable is not private.
5389 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5390 FilteredRHSExprs, FilteredReductionOps;
5391 for (unsigned I : llvm::seq<unsigned>(
5392 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5393 if (!Options.IsPrivateVarReduction[I]) {
5394 FilteredPrivates.emplace_back(Args: OrgPrivates[I]);
5395 FilteredLHSExprs.emplace_back(Args: OrgLHSExprs[I]);
5396 FilteredRHSExprs.emplace_back(Args: OrgRHSExprs[I]);
5397 FilteredReductionOps.emplace_back(Args: OrgReductionOps[I]);
5398 }
5399 }
5400 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5401 // processing.
5402 ArrayRef<const Expr *> Privates = FilteredPrivates;
5403 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5404 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5405 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5406
5407 // 1. Build a list of reduction variables.
5408 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5409 auto Size = RHSExprs.size();
5410 for (const Expr *E : Privates) {
5411 if (E->getType()->isVariablyModifiedType())
5412 // Reserve place for array size.
5413 ++Size;
5414 }
5415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5416 QualType ReductionArrayTy = C.getConstantArrayType(
5417 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5418 /*IndexTypeQuals=*/0);
5419 RawAddress ReductionList =
5420 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5421 const auto *IPriv = Privates.begin();
5422 unsigned Idx = 0;
5423 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5424 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5425 CGF.Builder.CreateStore(
5426 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5427 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5428 Addr: Elem);
5429 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5430 // Store array size.
5431 ++Idx;
5432 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5433 llvm::Value *Size = CGF.Builder.CreateIntCast(
5434 V: CGF.getVLASize(
5435 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5436 .NumElts,
5437 DestTy: CGF.SizeTy, /*isSigned=*/false);
5438 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5439 Addr: Elem);
5440 }
5441 }
5442
5443 // 2. Emit reduce_func().
5444 llvm::Function *ReductionFn = emitReductionFunction(
5445 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5446 Privates, LHSExprs, RHSExprs, ReductionOps);
5447
5448 // 3. Create static kmp_critical_name lock = { 0 };
5449 std::string Name = getName(Parts: {"reduction"});
5450 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5451
5452 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5453 // RedList, reduce_func, &<lock>);
5454 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5455 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5456 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5457 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5458 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5459 llvm::Value *Args[] = {
5460 IdentTLoc, // ident_t *<loc>
5461 ThreadId, // i32 <gtid>
5462 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5463 ReductionArrayTySize, // size_type sizeof(RedList)
5464 RL, // void *RedList
5465 ReductionFn, // void (*) (void *, void *) <reduce_func>
5466 Lock // kmp_critical_name *&<lock>
5467 };
5468 llvm::Value *Res = CGF.EmitRuntimeCall(
5469 callee: OMPBuilder.getOrCreateRuntimeFunction(
5470 M&: CGM.getModule(),
5471 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5472 args: Args);
5473
5474 // 5. Build switch(res)
5475 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5476 llvm::SwitchInst *SwInst =
5477 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5478
5479 // 6. Build case 1:
5480 // ...
5481 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5482 // ...
5483 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5484 // break;
5485 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5486 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5487 CGF.EmitBlock(BB: Case1BB);
5488
5489 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5490 llvm::Value *EndArgs[] = {
5491 IdentTLoc, // ident_t *<loc>
5492 ThreadId, // i32 <gtid>
5493 Lock // kmp_critical_name *&<lock>
5494 };
5495 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5496 CodeGenFunction &CGF, PrePostActionTy &Action) {
5497 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5498 const auto *IPriv = Privates.begin();
5499 const auto *ILHS = LHSExprs.begin();
5500 const auto *IRHS = RHSExprs.begin();
5501 for (const Expr *E : ReductionOps) {
5502 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5503 RHS: cast<DeclRefExpr>(Val: *IRHS));
5504 ++IPriv;
5505 ++ILHS;
5506 ++IRHS;
5507 }
5508 };
5509 RegionCodeGenTy RCG(CodeGen);
5510 CommonActionTy Action(
5511 nullptr, {},
5512 OMPBuilder.getOrCreateRuntimeFunction(
5513 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5514 : OMPRTL___kmpc_end_reduce),
5515 EndArgs);
5516 RCG.setAction(Action);
5517 RCG(CGF);
5518
5519 CGF.EmitBranch(Block: DefaultBB);
5520
5521 // 7. Build case 2:
5522 // ...
5523 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5524 // ...
5525 // break;
5526 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5527 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5528 CGF.EmitBlock(BB: Case2BB);
5529
5530 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5531 CodeGenFunction &CGF, PrePostActionTy &Action) {
5532 const auto *ILHS = LHSExprs.begin();
5533 const auto *IRHS = RHSExprs.begin();
5534 const auto *IPriv = Privates.begin();
5535 for (const Expr *E : ReductionOps) {
5536 const Expr *XExpr = nullptr;
5537 const Expr *EExpr = nullptr;
5538 const Expr *UpExpr = nullptr;
5539 BinaryOperatorKind BO = BO_Comma;
5540 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5541 if (BO->getOpcode() == BO_Assign) {
5542 XExpr = BO->getLHS();
5543 UpExpr = BO->getRHS();
5544 }
5545 }
5546 // Try to emit update expression as a simple atomic.
5547 const Expr *RHSExpr = UpExpr;
5548 if (RHSExpr) {
5549 // Analyze RHS part of the whole expression.
5550 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5551 Val: RHSExpr->IgnoreParenImpCasts())) {
5552 // If this is a conditional operator, analyze its condition for
5553 // min/max reduction operator.
5554 RHSExpr = ACO->getCond();
5555 }
5556 if (const auto *BORHS =
5557 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5558 EExpr = BORHS->getRHS();
5559 BO = BORHS->getOpcode();
5560 }
5561 }
5562 if (XExpr) {
5563 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5564 auto &&AtomicRedGen = [BO, VD,
5565 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5566 const Expr *EExpr, const Expr *UpExpr) {
5567 LValue X = CGF.EmitLValue(E: XExpr);
5568 RValue E;
5569 if (EExpr)
5570 E = CGF.EmitAnyExpr(E: EExpr);
5571 CGF.EmitOMPAtomicSimpleUpdateExpr(
5572 X, E, BO, /*IsXLHSInRHSPart=*/true,
5573 AO: llvm::AtomicOrdering::Monotonic, Loc,
5574 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5575 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5576 Address LHSTemp = CGF.CreateMemTemp(T: VD->getType());
5577 CGF.emitOMPSimpleStore(
5578 LVal: CGF.MakeAddrLValue(Addr: LHSTemp, T: VD->getType()), RVal: XRValue,
5579 RValTy: VD->getType().getNonReferenceType(), Loc);
5580 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5581 (void)PrivateScope.Privatize();
5582 return CGF.EmitAnyExpr(E: UpExpr);
5583 });
5584 };
5585 if ((*IPriv)->getType()->isArrayType()) {
5586 // Emit atomic reduction for array section.
5587 const auto *RHSVar =
5588 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5589 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5590 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5591 } else {
5592 // Emit atomic reduction for array subscript or single variable.
5593 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5594 }
5595 } else {
5596 // Emit as a critical region.
5597 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5598 const Expr *, const Expr *) {
5599 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5600 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5601 RT.emitCriticalRegion(
5602 CGF, CriticalName: Name,
5603 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5604 Action.Enter(CGF);
5605 emitReductionCombiner(CGF, ReductionOp: E);
5606 },
5607 Loc);
5608 };
5609 if ((*IPriv)->getType()->isArrayType()) {
5610 const auto *LHSVar =
5611 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5612 const auto *RHSVar =
5613 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5614 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5615 RedOpGen: CritRedGen);
5616 } else {
5617 CritRedGen(CGF, nullptr, nullptr, nullptr);
5618 }
5619 }
5620 ++ILHS;
5621 ++IRHS;
5622 ++IPriv;
5623 }
5624 };
5625 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5626 if (!WithNowait) {
5627 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5628 llvm::Value *EndArgs[] = {
5629 IdentTLoc, // ident_t *<loc>
5630 ThreadId, // i32 <gtid>
5631 Lock // kmp_critical_name *&<lock>
5632 };
5633 CommonActionTy Action(nullptr, {},
5634 OMPBuilder.getOrCreateRuntimeFunction(
5635 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5636 EndArgs);
5637 AtomicRCG.setAction(Action);
5638 AtomicRCG(CGF);
5639 } else {
5640 AtomicRCG(CGF);
5641 }
5642
5643 CGF.EmitBranch(Block: DefaultBB);
5644 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5645 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5646 "PrivateVarReduction: Privates size mismatch");
5647 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5648 "PrivateVarReduction: ReductionOps size mismatch");
5649 for (unsigned I : llvm::seq<unsigned>(
5650 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5651 if (Options.IsPrivateVarReduction[I])
5652 emitPrivateReduction(CGF, Loc, Privates: OrgPrivates[I], LHSExprs: OrgLHSExprs[I],
5653 RHSExprs: OrgRHSExprs[I], ReductionOps: OrgReductionOps[I]);
5654 }
5655}
5656
5657/// Generates unique name for artificial threadprivate variables.
5658/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5659static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5660 const Expr *Ref) {
5661 SmallString<256> Buffer;
5662 llvm::raw_svector_ostream Out(Buffer);
5663 const clang::DeclRefExpr *DE;
5664 const VarDecl *D = ::getBaseDecl(Ref, DE);
5665 if (!D)
5666 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5667 D = D->getCanonicalDecl();
5668 std::string Name = CGM.getOpenMPRuntime().getName(
5669 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5670 Out << Prefix << Name << "_"
5671 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5672 return std::string(Out.str());
5673}
5674
5675/// Emits reduction initializer function:
5676/// \code
5677/// void @.red_init(void* %arg, void* %orig) {
5678/// %0 = bitcast void* %arg to <type>*
5679/// store <type> <init>, <type>* %0
5680/// ret void
5681/// }
5682/// \endcode
5683static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5684 SourceLocation Loc,
5685 ReductionCodeGen &RCG, unsigned N) {
5686 ASTContext &C = CGM.getContext();
5687 QualType VoidPtrTy = C.VoidPtrTy;
5688 VoidPtrTy.addRestrict();
5689 FunctionArgList Args;
5690 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5691 ImplicitParamKind::Other);
5692 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5693 ImplicitParamKind::Other);
5694 Args.emplace_back(Args: &Param);
5695 Args.emplace_back(Args: &ParamOrig);
5696 const auto &FnInfo =
5697 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5698 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5699 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5700 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5701 N: Name, M: &CGM.getModule());
5702 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5703 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5704 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5705 Fn->setDoesNotRecurse();
5706 CodeGenFunction CGF(CGM);
5707 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5708 QualType PrivateType = RCG.getPrivateType(N);
5709 Address PrivateAddr = CGF.EmitLoadOfPointer(
5710 Ptr: CGF.GetAddrOfLocalVar(VD: &Param).withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5711 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5712 llvm::Value *Size = nullptr;
5713 // If the size of the reduction item is non-constant, load it from global
5714 // threadprivate variable.
5715 if (RCG.getSizes(N).second) {
5716 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5717 CGF, VarType: CGM.getContext().getSizeType(),
5718 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5719 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5720 Ty: CGM.getContext().getSizeType(), Loc);
5721 }
5722 RCG.emitAggregateType(CGF, N, Size);
5723 Address OrigAddr = Address::invalid();
5724 // If initializer uses initializer from declare reduction construct, emit a
5725 // pointer to the address of the original reduction item (reuired by reduction
5726 // initializer)
5727 if (RCG.usesReductionInitializer(N)) {
5728 Address SharedAddr = CGF.GetAddrOfLocalVar(VD: &ParamOrig);
5729 OrigAddr = CGF.EmitLoadOfPointer(
5730 Ptr: SharedAddr,
5731 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5732 }
5733 // Emit the initializer:
5734 // %0 = bitcast void* %arg to <type>*
5735 // store <type> <init>, <type>* %0
5736 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5737 DefaultInit: [](CodeGenFunction &) { return false; });
5738 CGF.FinishFunction();
5739 return Fn;
5740}
5741
5742/// Emits reduction combiner function:
5743/// \code
5744/// void @.red_comb(void* %arg0, void* %arg1) {
5745/// %lhs = bitcast void* %arg0 to <type>*
5746/// %rhs = bitcast void* %arg1 to <type>*
5747/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5748/// store <type> %2, <type>* %lhs
5749/// ret void
5750/// }
5751/// \endcode
5752static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5753 SourceLocation Loc,
5754 ReductionCodeGen &RCG, unsigned N,
5755 const Expr *ReductionOp,
5756 const Expr *LHS, const Expr *RHS,
5757 const Expr *PrivateRef) {
5758 ASTContext &C = CGM.getContext();
5759 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5760 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5761 FunctionArgList Args;
5762 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5763 C.VoidPtrTy, ImplicitParamKind::Other);
5764 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5765 ImplicitParamKind::Other);
5766 Args.emplace_back(Args: &ParamInOut);
5767 Args.emplace_back(Args: &ParamIn);
5768 const auto &FnInfo =
5769 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5770 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5771 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5772 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5773 N: Name, M: &CGM.getModule());
5774 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5775 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5776 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5777 Fn->setDoesNotRecurse();
5778 CodeGenFunction CGF(CGM);
5779 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5780 llvm::Value *Size = nullptr;
5781 // If the size of the reduction item is non-constant, load it from global
5782 // threadprivate variable.
5783 if (RCG.getSizes(N).second) {
5784 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5785 CGF, VarType: CGM.getContext().getSizeType(),
5786 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5787 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5788 Ty: CGM.getContext().getSizeType(), Loc);
5789 }
5790 RCG.emitAggregateType(CGF, N, Size);
5791 // Remap lhs and rhs variables to the addresses of the function arguments.
5792 // %lhs = bitcast void* %arg0 to <type>*
5793 // %rhs = bitcast void* %arg1 to <type>*
5794 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5795 PrivateScope.addPrivate(
5796 LocalVD: LHSVD,
5797 // Pull out the pointer to the variable.
5798 Addr: CGF.EmitLoadOfPointer(
5799 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamInOut)
5800 .withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5801 PtrTy: C.getPointerType(T: LHSVD->getType())->castAs<PointerType>()));
5802 PrivateScope.addPrivate(
5803 LocalVD: RHSVD,
5804 // Pull out the pointer to the variable.
5805 Addr: CGF.EmitLoadOfPointer(
5806 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamIn).withElementType(
5807 ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5808 PtrTy: C.getPointerType(T: RHSVD->getType())->castAs<PointerType>()));
5809 PrivateScope.Privatize();
5810 // Emit the combiner body:
5811 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5812 // store <type> %2, <type>* %lhs
5813 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5814 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5815 RHS: cast<DeclRefExpr>(Val: RHS));
5816 CGF.FinishFunction();
5817 return Fn;
5818}
5819
5820/// Emits reduction finalizer function:
5821/// \code
5822/// void @.red_fini(void* %arg) {
5823/// %0 = bitcast void* %arg to <type>*
5824/// <destroy>(<type>* %0)
5825/// ret void
5826/// }
5827/// \endcode
5828static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5829 SourceLocation Loc,
5830 ReductionCodeGen &RCG, unsigned N) {
5831 if (!RCG.needCleanups(N))
5832 return nullptr;
5833 ASTContext &C = CGM.getContext();
5834 FunctionArgList Args;
5835 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5836 ImplicitParamKind::Other);
5837 Args.emplace_back(Args: &Param);
5838 const auto &FnInfo =
5839 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5840 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5841 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5842 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5843 N: Name, M: &CGM.getModule());
5844 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5845 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5846 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5847 Fn->setDoesNotRecurse();
5848 CodeGenFunction CGF(CGM);
5849 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5850 Address PrivateAddr = CGF.EmitLoadOfPointer(
5851 Ptr: CGF.GetAddrOfLocalVar(VD: &Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5852 llvm::Value *Size = nullptr;
5853 // If the size of the reduction item is non-constant, load it from global
5854 // threadprivate variable.
5855 if (RCG.getSizes(N).second) {
5856 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5857 CGF, VarType: CGM.getContext().getSizeType(),
5858 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5859 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5860 Ty: CGM.getContext().getSizeType(), Loc);
5861 }
5862 RCG.emitAggregateType(CGF, N, Size);
5863 // Emit the finalizer body:
5864 // <destroy>(<type>* %0)
5865 RCG.emitCleanups(CGF, N, PrivateAddr);
5866 CGF.FinishFunction(EndLoc: Loc);
5867 return Fn;
5868}
5869
5870llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5871 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5872 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5873 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5874 return nullptr;
5875
5876 // Build typedef struct:
5877 // kmp_taskred_input {
5878 // void *reduce_shar; // shared reduction item
5879 // void *reduce_orig; // original reduction item used for initialization
5880 // size_t reduce_size; // size of data item
5881 // void *reduce_init; // data initialization routine
5882 // void *reduce_fini; // data finalization routine
5883 // void *reduce_comb; // data combiner routine
5884 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5885 // } kmp_taskred_input_t;
5886 ASTContext &C = CGM.getContext();
5887 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5888 RD->startDefinition();
5889 const FieldDecl *SharedFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5890 const FieldDecl *OrigFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5891 const FieldDecl *SizeFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.getSizeType());
5892 const FieldDecl *InitFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5893 const FieldDecl *FiniFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5894 const FieldDecl *CombFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5895 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5896 C, DC: RD, FieldTy: C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5897 RD->completeDefinition();
5898 CanQualType RDType = C.getCanonicalTagType(TD: RD);
5899 unsigned Size = Data.ReductionVars.size();
5900 llvm::APInt ArraySize(/*numBits=*/64, Size);
5901 QualType ArrayRDType =
5902 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5903 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5904 // kmp_task_red_input_t .rd_input.[Size];
5905 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5906 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5907 Data.ReductionCopies, Data.ReductionOps);
5908 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5909 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5910 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5911 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5912 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5913 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5914 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5915 Name: ".rd_input.gep.");
5916 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5917 // ElemLVal.reduce_shar = &Shareds[Cnt];
5918 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5919 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5920 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5921 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5922 // ElemLVal.reduce_orig = &Origs[Cnt];
5923 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5924 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5925 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5926 RCG.emitAggregateType(CGF, N: Cnt);
5927 llvm::Value *SizeValInChars;
5928 llvm::Value *SizeVal;
5929 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5930 // We use delayed creation/initialization for VLAs and array sections. It is
5931 // required because runtime does not provide the way to pass the sizes of
5932 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5933 // threadprivate global variables are used to store these values and use
5934 // them in the functions.
5935 bool DelayedCreation = !!SizeVal;
5936 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5937 /*isSigned=*/false);
5938 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5939 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5940 // ElemLVal.reduce_init = init;
5941 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5942 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5943 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5944 // ElemLVal.reduce_fini = fini;
5945 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5946 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5947 llvm::Value *FiniAddr =
5948 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5949 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5950 // ElemLVal.reduce_comb = comb;
5951 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5952 llvm::Value *CombAddr = emitReduceCombFunction(
5953 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5954 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5955 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5956 // ElemLVal.flags = 0;
5957 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5958 if (DelayedCreation) {
5959 CGF.EmitStoreOfScalar(
5960 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5961 lvalue: FlagsLVal);
5962 } else
5963 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5964 }
5965 if (Data.IsReductionWithTaskMod) {
5966 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5967 // is_ws, int num, void *data);
5968 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5969 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5970 DestTy: CGM.IntTy, /*isSigned=*/true);
5971 llvm::Value *Args[] = {
5972 IdentTLoc, GTid,
5973 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5974 /*isSigned=*/IsSigned: true),
5975 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5976 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5977 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5978 return CGF.EmitRuntimeCall(
5979 callee: OMPBuilder.getOrCreateRuntimeFunction(
5980 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5981 args: Args);
5982 }
5983 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5984 llvm::Value *Args[] = {
5985 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5986 /*isSigned=*/true),
5987 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5988 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5989 DestTy: CGM.VoidPtrTy)};
5990 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5991 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5992 args: Args);
5993}
5994
5995void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5996 SourceLocation Loc,
5997 bool IsWorksharingReduction) {
5998 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5999 // is_ws, int num, void *data);
6000 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6001 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6002 DestTy: CGM.IntTy, /*isSigned=*/true);
6003 llvm::Value *Args[] = {IdentTLoc, GTid,
6004 llvm::ConstantInt::get(Ty: CGM.IntTy,
6005 V: IsWorksharingReduction ? 1 : 0,
6006 /*isSigned=*/IsSigned: true)};
6007 (void)CGF.EmitRuntimeCall(
6008 callee: OMPBuilder.getOrCreateRuntimeFunction(
6009 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
6010 args: Args);
6011}
6012
6013void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6014 SourceLocation Loc,
6015 ReductionCodeGen &RCG,
6016 unsigned N) {
6017 auto Sizes = RCG.getSizes(N);
6018 // Emit threadprivate global variable if the type is non-constant
6019 // (Sizes.second = nullptr).
6020 if (Sizes.second) {
6021 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
6022 /*isSigned=*/false);
6023 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6024 CGF, VarType: CGM.getContext().getSizeType(),
6025 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
6026 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
6027 }
6028}
6029
6030Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6031 SourceLocation Loc,
6032 llvm::Value *ReductionsPtr,
6033 LValue SharedLVal) {
6034 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6035 // *d);
6036 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6037 DestTy: CGM.IntTy,
6038 /*isSigned=*/true),
6039 ReductionsPtr,
6040 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6041 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
6042 return Address(
6043 CGF.EmitRuntimeCall(
6044 callee: OMPBuilder.getOrCreateRuntimeFunction(
6045 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
6046 args: Args),
6047 CGF.Int8Ty, SharedLVal.getAlignment());
6048}
6049
6050void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6051 const OMPTaskDataTy &Data) {
6052 if (!CGF.HaveInsertPoint())
6053 return;
6054
6055 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6056 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6057 OMPBuilder.createTaskwait(Loc: CGF.Builder);
6058 } else {
6059 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6060 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6061 auto &M = CGM.getModule();
6062 Address DependenciesArray = Address::invalid();
6063 llvm::Value *NumOfElements;
6064 std::tie(args&: NumOfElements, args&: DependenciesArray) =
6065 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
6066 if (!Data.Dependences.empty()) {
6067 llvm::Value *DepWaitTaskArgs[7];
6068 DepWaitTaskArgs[0] = UpLoc;
6069 DepWaitTaskArgs[1] = ThreadID;
6070 DepWaitTaskArgs[2] = NumOfElements;
6071 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6072 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
6073 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6074 DepWaitTaskArgs[6] =
6075 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
6076
6077 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6078
6079 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6080 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6081 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6082 // kmp_int32 has_no_wait); if dependence info is specified.
6083 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6084 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
6085 args: DepWaitTaskArgs);
6086
6087 } else {
6088
6089 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6090 // global_tid);
6091 llvm::Value *Args[] = {UpLoc, ThreadID};
6092 // Ignore return result until untied tasks are supported.
6093 CGF.EmitRuntimeCall(
6094 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
6095 args: Args);
6096 }
6097 }
6098
6099 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
6100 Region->emitUntiedSwitch(CGF);
6101}
6102
6103void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6104 OpenMPDirectiveKind InnerKind,
6105 const RegionCodeGenTy &CodeGen,
6106 bool HasCancel) {
6107 if (!CGF.HaveInsertPoint())
6108 return;
6109 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6110 InnerKind != OMPD_critical &&
6111 InnerKind != OMPD_master &&
6112 InnerKind != OMPD_masked);
6113 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6114}
6115
6116namespace {
6117enum RTCancelKind {
6118 CancelNoreq = 0,
6119 CancelParallel = 1,
6120 CancelLoop = 2,
6121 CancelSections = 3,
6122 CancelTaskgroup = 4
6123};
6124} // anonymous namespace
6125
6126static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6127 RTCancelKind CancelKind = CancelNoreq;
6128 if (CancelRegion == OMPD_parallel)
6129 CancelKind = CancelParallel;
6130 else if (CancelRegion == OMPD_for)
6131 CancelKind = CancelLoop;
6132 else if (CancelRegion == OMPD_sections)
6133 CancelKind = CancelSections;
6134 else {
6135 assert(CancelRegion == OMPD_taskgroup);
6136 CancelKind = CancelTaskgroup;
6137 }
6138 return CancelKind;
6139}
6140
6141void CGOpenMPRuntime::emitCancellationPointCall(
6142 CodeGenFunction &CGF, SourceLocation Loc,
6143 OpenMPDirectiveKind CancelRegion) {
6144 if (!CGF.HaveInsertPoint())
6145 return;
6146 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6147 // global_tid, kmp_int32 cncl_kind);
6148 if (auto *OMPRegionInfo =
6149 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6150 // For 'cancellation point taskgroup', the task region info may not have a
6151 // cancel. This may instead happen in another adjacent task.
6152 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6153 llvm::Value *Args[] = {
6154 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6155 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6156 // Ignore return result until untied tasks are supported.
6157 llvm::Value *Result = CGF.EmitRuntimeCall(
6158 callee: OMPBuilder.getOrCreateRuntimeFunction(
6159 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
6160 args: Args);
6161 // if (__kmpc_cancellationpoint()) {
6162 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6163 // exit from construct;
6164 // }
6165 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6166 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6167 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6168 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6169 CGF.EmitBlock(BB: ExitBB);
6170 if (CancelRegion == OMPD_parallel)
6171 emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6172 // exit from construct;
6173 CodeGenFunction::JumpDest CancelDest =
6174 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6175 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6176 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6177 }
6178 }
6179}
6180
6181void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6182 const Expr *IfCond,
6183 OpenMPDirectiveKind CancelRegion) {
6184 if (!CGF.HaveInsertPoint())
6185 return;
6186 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6187 // kmp_int32 cncl_kind);
6188 auto &M = CGM.getModule();
6189 if (auto *OMPRegionInfo =
6190 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6191 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6192 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6193 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6194 llvm::Value *Args[] = {
6195 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6196 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6197 // Ignore return result until untied tasks are supported.
6198 llvm::Value *Result = CGF.EmitRuntimeCall(
6199 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), args: Args);
6200 // if (__kmpc_cancel()) {
6201 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6202 // exit from construct;
6203 // }
6204 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6205 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6206 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6207 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6208 CGF.EmitBlock(BB: ExitBB);
6209 if (CancelRegion == OMPD_parallel)
6210 RT.emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6211 // exit from construct;
6212 CodeGenFunction::JumpDest CancelDest =
6213 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6214 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6215 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6216 };
6217 if (IfCond) {
6218 emitIfClause(CGF, Cond: IfCond, ThenGen,
6219 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
6220 } else {
6221 RegionCodeGenTy ThenRCG(ThenGen);
6222 ThenRCG(CGF);
6223 }
6224 }
6225}
6226
6227namespace {
6228/// Cleanup action for uses_allocators support.
6229class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6230 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6231
6232public:
6233 OMPUsesAllocatorsActionTy(
6234 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6235 : Allocators(Allocators) {}
6236 void Enter(CodeGenFunction &CGF) override {
6237 if (!CGF.HaveInsertPoint())
6238 return;
6239 for (const auto &AllocatorData : Allocators) {
6240 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6241 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
6242 }
6243 }
6244 void Exit(CodeGenFunction &CGF) override {
6245 if (!CGF.HaveInsertPoint())
6246 return;
6247 for (const auto &AllocatorData : Allocators) {
6248 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6249 Allocator: AllocatorData.first);
6250 }
6251 }
6252};
6253} // namespace
6254
6255void CGOpenMPRuntime::emitTargetOutlinedFunction(
6256 const OMPExecutableDirective &D, StringRef ParentName,
6257 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6258 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6259 assert(!ParentName.empty() && "Invalid target entry parent name!");
6260 HasEmittedTargetRegion = true;
6261 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6262 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6263 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6264 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6265 if (!D.AllocatorTraits)
6266 continue;
6267 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
6268 }
6269 }
6270 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6271 CodeGen.setAction(UsesAllocatorAction);
6272 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6273 IsOffloadEntry, CodeGen);
6274}
6275
6276void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6277 const Expr *Allocator,
6278 const Expr *AllocatorTraits) {
6279 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6280 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6281 // Use default memspace handle.
6282 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6283 llvm::Value *NumTraits = llvm::ConstantInt::get(
6284 Ty: CGF.IntTy, V: cast<ConstantArrayType>(
6285 Val: AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6286 ->getSize()
6287 .getLimitedValue());
6288 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
6289 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6290 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
6291 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, T: CGF.getContext().VoidPtrTy,
6292 BaseInfo: AllocatorTraitsLVal.getBaseInfo(),
6293 TBAAInfo: AllocatorTraitsLVal.getTBAAInfo());
6294 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6295
6296 llvm::Value *AllocatorVal =
6297 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6298 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
6299 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
6300 // Store to allocator.
6301 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
6302 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
6303 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6304 AllocatorVal =
6305 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
6306 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
6307 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
6308}
6309
6310void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6311 const Expr *Allocator) {
6312 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6313 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6314 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6315 llvm::Value *AllocatorVal =
6316 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
6317 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
6318 DstTy: CGF.getContext().VoidPtrTy,
6319 Loc: Allocator->getExprLoc());
6320 (void)CGF.EmitRuntimeCall(
6321 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
6322 FnID: OMPRTL___kmpc_destroy_allocator),
6323 args: {ThreadId, AllocatorVal});
6324}
6325
6326void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6327 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6328 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6329 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6330 "invalid default attrs structure");
6331 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6332 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6333
6334 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: Attrs.MinTeams, MaxTeamsVal);
6335 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
6336 /*UpperBoundOnly=*/true);
6337
6338 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6339 for (auto *A : C->getAttrs()) {
6340 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6341 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6342 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(Val: A))
6343 CGM.handleCUDALaunchBoundsAttr(F: nullptr, A: Attr, MaxThreadsVal: &AttrMaxThreadsVal,
6344 MinBlocksVal: &AttrMinBlocksVal, MaxClusterRankVal: &AttrMaxBlocksVal);
6345 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(Val: A))
6346 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6347 F: nullptr, A: Attr, /*ReqdWGS=*/nullptr, MinThreadsVal: &AttrMinThreadsVal,
6348 MaxThreadsVal: &AttrMaxThreadsVal);
6349 else
6350 continue;
6351
6352 Attrs.MinThreads = std::max(a: Attrs.MinThreads, b: AttrMinThreadsVal);
6353 if (AttrMaxThreadsVal > 0)
6354 MaxThreadsVal = MaxThreadsVal > 0
6355 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
6356 : AttrMaxThreadsVal;
6357 Attrs.MinTeams = std::max(a: Attrs.MinTeams, b: AttrMinBlocksVal);
6358 if (AttrMaxBlocksVal > 0)
6359 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
6360 : AttrMaxBlocksVal;
6361 }
6362 }
6363}
6364
6365void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6366 const OMPExecutableDirective &D, StringRef ParentName,
6367 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6368 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6369
6370 llvm::TargetRegionEntryInfo EntryInfo =
6371 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
6372
6373 CodeGenFunction CGF(CGM, true);
6374 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6375 [&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
6376 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
6377
6378 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6379 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6380 if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
6381 return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(S: CS, D);
6382 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, D);
6383 };
6384
6385 cantFail(Err: OMPBuilder.emitTargetRegionFunction(
6386 EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6387 OutlinedFnID));
6388
6389 if (!OutlinedFn)
6390 return;
6391
6392 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
6393
6394 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6395 for (auto *A : C->getAttrs()) {
6396 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(Val: A))
6397 CGM.handleAMDGPUWavesPerEUAttr(F: OutlinedFn, A: Attr);
6398 }
6399 }
6400 registerVTable(D);
6401}
6402
6403/// Checks if the expression is constant or does not have non-trivial function
6404/// calls.
6405static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6406 // We can skip constant expressions.
6407 // We can skip expressions with trivial calls or simple expressions.
6408 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
6409 !E->hasNonTrivialCall(Ctx)) &&
6410 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6411}
6412
6413const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6414 const Stmt *Body) {
6415 const Stmt *Child = Body->IgnoreContainers();
6416 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
6417 Child = nullptr;
6418 for (const Stmt *S : C->body()) {
6419 if (const auto *E = dyn_cast<Expr>(Val: S)) {
6420 if (isTrivial(Ctx, E))
6421 continue;
6422 }
6423 // Some of the statements can be ignored.
6424 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
6425 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
6426 continue;
6427 // Analyze declarations.
6428 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
6429 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
6430 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
6431 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
6432 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
6433 isa<UsingDirectiveDecl>(Val: D) ||
6434 isa<OMPDeclareReductionDecl>(Val: D) ||
6435 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6436 return true;
6437 const auto *VD = dyn_cast<VarDecl>(Val: D);
6438 if (!VD)
6439 return false;
6440 return VD->hasGlobalStorage() || !VD->isUsed();
6441 }))
6442 continue;
6443 }
6444 // Found multiple children - cannot get the one child only.
6445 if (Child)
6446 return nullptr;
6447 Child = S;
6448 }
6449 if (Child)
6450 Child = Child->IgnoreContainers();
6451 }
6452 return Child;
6453}
6454
6455const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6456 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6457 int32_t &MaxTeamsVal) {
6458
6459 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6460 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6461 "Expected target-based executable directive.");
6462 switch (DirectiveKind) {
6463 case OMPD_target: {
6464 const auto *CS = D.getInnermostCapturedStmt();
6465 const auto *Body =
6466 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6467 const Stmt *ChildStmt =
6468 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6469 if (const auto *NestedDir =
6470 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6471 if (isOpenMPTeamsDirective(DKind: NestedDir->getDirectiveKind())) {
6472 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6473 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6474 ->getNumTeams()
6475 .front();
6476 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6477 if (auto Constant =
6478 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6479 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6480 return NumTeams;
6481 }
6482 MinTeamsVal = MaxTeamsVal = 0;
6483 return nullptr;
6484 }
6485 MinTeamsVal = MaxTeamsVal = 1;
6486 return nullptr;
6487 }
6488 // A value of -1 is used to check if we need to emit no teams region
6489 MinTeamsVal = MaxTeamsVal = -1;
6490 return nullptr;
6491 }
6492 case OMPD_target_teams_loop:
6493 case OMPD_target_teams:
6494 case OMPD_target_teams_distribute:
6495 case OMPD_target_teams_distribute_simd:
6496 case OMPD_target_teams_distribute_parallel_for:
6497 case OMPD_target_teams_distribute_parallel_for_simd: {
6498 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6499 const Expr *NumTeams =
6500 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6501 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6502 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6503 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6504 return NumTeams;
6505 }
6506 MinTeamsVal = MaxTeamsVal = 0;
6507 return nullptr;
6508 }
6509 case OMPD_target_parallel:
6510 case OMPD_target_parallel_for:
6511 case OMPD_target_parallel_for_simd:
6512 case OMPD_target_parallel_loop:
6513 case OMPD_target_simd:
6514 MinTeamsVal = MaxTeamsVal = 1;
6515 return nullptr;
6516 case OMPD_parallel:
6517 case OMPD_for:
6518 case OMPD_parallel_for:
6519 case OMPD_parallel_loop:
6520 case OMPD_parallel_master:
6521 case OMPD_parallel_sections:
6522 case OMPD_for_simd:
6523 case OMPD_parallel_for_simd:
6524 case OMPD_cancel:
6525 case OMPD_cancellation_point:
6526 case OMPD_ordered:
6527 case OMPD_threadprivate:
6528 case OMPD_allocate:
6529 case OMPD_task:
6530 case OMPD_simd:
6531 case OMPD_tile:
6532 case OMPD_unroll:
6533 case OMPD_sections:
6534 case OMPD_section:
6535 case OMPD_single:
6536 case OMPD_master:
6537 case OMPD_critical:
6538 case OMPD_taskyield:
6539 case OMPD_barrier:
6540 case OMPD_taskwait:
6541 case OMPD_taskgroup:
6542 case OMPD_atomic:
6543 case OMPD_flush:
6544 case OMPD_depobj:
6545 case OMPD_scan:
6546 case OMPD_teams:
6547 case OMPD_target_data:
6548 case OMPD_target_exit_data:
6549 case OMPD_target_enter_data:
6550 case OMPD_distribute:
6551 case OMPD_distribute_simd:
6552 case OMPD_distribute_parallel_for:
6553 case OMPD_distribute_parallel_for_simd:
6554 case OMPD_teams_distribute:
6555 case OMPD_teams_distribute_simd:
6556 case OMPD_teams_distribute_parallel_for:
6557 case OMPD_teams_distribute_parallel_for_simd:
6558 case OMPD_target_update:
6559 case OMPD_declare_simd:
6560 case OMPD_declare_variant:
6561 case OMPD_begin_declare_variant:
6562 case OMPD_end_declare_variant:
6563 case OMPD_declare_target:
6564 case OMPD_end_declare_target:
6565 case OMPD_declare_reduction:
6566 case OMPD_declare_mapper:
6567 case OMPD_taskloop:
6568 case OMPD_taskloop_simd:
6569 case OMPD_master_taskloop:
6570 case OMPD_master_taskloop_simd:
6571 case OMPD_parallel_master_taskloop:
6572 case OMPD_parallel_master_taskloop_simd:
6573 case OMPD_requires:
6574 case OMPD_metadirective:
6575 case OMPD_unknown:
6576 break;
6577 default:
6578 break;
6579 }
6580 llvm_unreachable("Unexpected directive kind.");
6581}
6582
6583llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6584 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6585 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6586 "Clauses associated with the teams directive expected to be emitted "
6587 "only for the host!");
6588 CGBuilderTy &Bld = CGF.Builder;
6589 int32_t MinNT = -1, MaxNT = -1;
6590 const Expr *NumTeams =
6591 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6592 if (NumTeams != nullptr) {
6593 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6594
6595 switch (DirectiveKind) {
6596 case OMPD_target: {
6597 const auto *CS = D.getInnermostCapturedStmt();
6598 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6599 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6600 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6601 /*IgnoreResultAssign*/ true);
6602 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6603 /*isSigned=*/true);
6604 }
6605 case OMPD_target_teams:
6606 case OMPD_target_teams_distribute:
6607 case OMPD_target_teams_distribute_simd:
6608 case OMPD_target_teams_distribute_parallel_for:
6609 case OMPD_target_teams_distribute_parallel_for_simd: {
6610 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6611 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6612 /*IgnoreResultAssign*/ true);
6613 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6614 /*isSigned=*/true);
6615 }
6616 default:
6617 break;
6618 }
6619 }
6620
6621 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6622 return llvm::ConstantInt::getSigned(Ty: CGF.Int32Ty, V: MinNT);
6623}
6624
6625/// Check for a num threads constant value (stored in \p DefaultVal), or
6626/// expression (stored in \p E). If the value is conditional (via an if-clause),
6627/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6628/// nullptr, no expression evaluation is perfomed.
6629static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6630 const Expr **E, int32_t &UpperBound,
6631 bool UpperBoundOnly, llvm::Value **CondVal) {
6632 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6633 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6634 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6635 if (!Dir)
6636 return;
6637
6638 if (isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6639 // Handle if clause. If if clause present, the number of threads is
6640 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6641 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6642 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6643 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6644 const OMPIfClause *IfClause = nullptr;
6645 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6646 if (C->getNameModifier() == OMPD_unknown ||
6647 C->getNameModifier() == OMPD_parallel) {
6648 IfClause = C;
6649 break;
6650 }
6651 }
6652 if (IfClause) {
6653 const Expr *CondExpr = IfClause->getCondition();
6654 bool Result;
6655 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6656 if (!Result) {
6657 UpperBound = 1;
6658 return;
6659 }
6660 } else {
6661 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6662 if (const auto *PreInit =
6663 cast_or_null<DeclStmt>(Val: IfClause->getPreInitStmt())) {
6664 for (const auto *I : PreInit->decls()) {
6665 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6666 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6667 } else {
6668 CodeGenFunction::AutoVarEmission Emission =
6669 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6670 CGF.EmitAutoVarCleanups(emission: Emission);
6671 }
6672 }
6673 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6674 }
6675 }
6676 }
6677 }
6678 // Check the value of num_threads clause iff if clause was not specified
6679 // or is not evaluated to false.
6680 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6681 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6682 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6683 const auto *NumThreadsClause =
6684 Dir->getSingleClause<OMPNumThreadsClause>();
6685 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6686 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6687 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6688 UpperBound =
6689 UpperBound
6690 ? Constant->getZExtValue()
6691 : std::min(a: UpperBound,
6692 b: static_cast<int32_t>(Constant->getZExtValue()));
6693 // If we haven't found a upper bound, remember we saw a thread limiting
6694 // clause.
6695 if (UpperBound == -1)
6696 UpperBound = 0;
6697 if (!E)
6698 return;
6699 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6700 if (const auto *PreInit =
6701 cast_or_null<DeclStmt>(Val: NumThreadsClause->getPreInitStmt())) {
6702 for (const auto *I : PreInit->decls()) {
6703 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6704 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6705 } else {
6706 CodeGenFunction::AutoVarEmission Emission =
6707 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6708 CGF.EmitAutoVarCleanups(emission: Emission);
6709 }
6710 }
6711 }
6712 *E = NTExpr;
6713 }
6714 return;
6715 }
6716 if (isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6717 UpperBound = 1;
6718}
6719
6720const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6721 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6722 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6723 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6724 "Clauses associated with the teams directive expected to be emitted "
6725 "only for the host!");
6726 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6727 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6728 "Expected target-based executable directive.");
6729
6730 const Expr *NT = nullptr;
6731 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6732
6733 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6734 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6735 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6736 UpperBound = UpperBound ? Constant->getZExtValue()
6737 : std::min(a: UpperBound,
6738 b: int32_t(Constant->getZExtValue()));
6739 }
6740 // If we haven't found a upper bound, remember we saw a thread limiting
6741 // clause.
6742 if (UpperBound == -1)
6743 UpperBound = 0;
6744 if (EPtr)
6745 *EPtr = E;
6746 };
6747
6748 auto ReturnSequential = [&]() {
6749 UpperBound = 1;
6750 return NT;
6751 };
6752
6753 switch (DirectiveKind) {
6754 case OMPD_target: {
6755 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6756 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6757 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6758 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6759 // TODO: The standard is not clear how to resolve two thread limit clauses,
6760 // let's pick the teams one if it's present, otherwise the target one.
6761 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6762 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6763 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6764 ThreadLimitClause = TLC;
6765 if (ThreadLimitExpr) {
6766 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6767 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6768 CodeGenFunction::LexicalScope Scope(
6769 CGF,
6770 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6771 if (const auto *PreInit =
6772 cast_or_null<DeclStmt>(Val: ThreadLimitClause->getPreInitStmt())) {
6773 for (const auto *I : PreInit->decls()) {
6774 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6775 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6776 } else {
6777 CodeGenFunction::AutoVarEmission Emission =
6778 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6779 CGF.EmitAutoVarCleanups(emission: Emission);
6780 }
6781 }
6782 }
6783 }
6784 }
6785 }
6786 if (ThreadLimitClause)
6787 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6788 ThreadLimitExpr);
6789 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6790 if (isOpenMPTeamsDirective(DKind: Dir->getDirectiveKind()) &&
6791 !isOpenMPDistributeDirective(DKind: Dir->getDirectiveKind())) {
6792 CS = Dir->getInnermostCapturedStmt();
6793 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6794 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6795 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6796 }
6797 if (Dir && isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6798 CS = Dir->getInnermostCapturedStmt();
6799 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6800 } else if (Dir && isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6801 return ReturnSequential();
6802 }
6803 return NT;
6804 }
6805 case OMPD_target_teams: {
6806 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6807 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6808 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6809 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6810 ThreadLimitExpr);
6811 }
6812 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6813 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6814 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6815 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6816 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6817 if (Dir->getDirectiveKind() == OMPD_distribute) {
6818 CS = Dir->getInnermostCapturedStmt();
6819 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6820 }
6821 }
6822 return NT;
6823 }
6824 case OMPD_target_teams_distribute:
6825 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6826 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6827 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6828 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6829 ThreadLimitExpr);
6830 }
6831 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6832 UpperBoundOnly, CondVal);
6833 return NT;
6834 case OMPD_target_teams_loop:
6835 case OMPD_target_parallel_loop:
6836 case OMPD_target_parallel:
6837 case OMPD_target_parallel_for:
6838 case OMPD_target_parallel_for_simd:
6839 case OMPD_target_teams_distribute_parallel_for:
6840 case OMPD_target_teams_distribute_parallel_for_simd: {
6841 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6842 const OMPIfClause *IfClause = nullptr;
6843 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6844 if (C->getNameModifier() == OMPD_unknown ||
6845 C->getNameModifier() == OMPD_parallel) {
6846 IfClause = C;
6847 break;
6848 }
6849 }
6850 if (IfClause) {
6851 const Expr *Cond = IfClause->getCondition();
6852 bool Result;
6853 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6854 if (!Result)
6855 return ReturnSequential();
6856 } else {
6857 CodeGenFunction::RunCleanupsScope Scope(CGF);
6858 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6859 }
6860 }
6861 }
6862 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6863 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6864 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6865 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6866 ThreadLimitExpr);
6867 }
6868 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6869 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6870 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6871 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6872 return NumThreadsClause->getNumThreads();
6873 }
6874 return NT;
6875 }
6876 case OMPD_target_teams_distribute_simd:
6877 case OMPD_target_simd:
6878 return ReturnSequential();
6879 default:
6880 break;
6881 }
6882 llvm_unreachable("Unsupported directive kind.");
6883}
6884
6885llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6886 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6887 llvm::Value *NumThreadsVal = nullptr;
6888 llvm::Value *CondVal = nullptr;
6889 llvm::Value *ThreadLimitVal = nullptr;
6890 const Expr *ThreadLimitExpr = nullptr;
6891 int32_t UpperBound = -1;
6892
6893 const Expr *NT = getNumThreadsExprForTargetDirective(
6894 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6895 ThreadLimitExpr: &ThreadLimitExpr);
6896
6897 // Thread limit expressions are used below, emit them.
6898 if (ThreadLimitExpr) {
6899 ThreadLimitVal =
6900 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6901 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6902 /*isSigned=*/false);
6903 }
6904
6905 // Generate the num teams expression.
6906 if (UpperBound == 1) {
6907 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6908 } else if (NT) {
6909 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6910 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6911 /*isSigned=*/false);
6912 } else if (ThreadLimitVal) {
6913 // If we do not have a num threads value but a thread limit, replace the
6914 // former with the latter. We know handled the thread limit expression.
6915 NumThreadsVal = ThreadLimitVal;
6916 ThreadLimitVal = nullptr;
6917 } else {
6918 // Default to "0" which means runtime choice.
6919 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6920 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6921 }
6922
6923 // Handle if clause. If if clause present, the number of threads is
6924 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6925 if (CondVal) {
6926 CodeGenFunction::RunCleanupsScope Scope(CGF);
6927 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6928 False: CGF.Builder.getInt32(C: 1));
6929 }
6930
6931 // If the thread limit and num teams expression were present, take the
6932 // minimum.
6933 if (ThreadLimitVal) {
6934 NumThreadsVal = CGF.Builder.CreateSelect(
6935 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6936 True: ThreadLimitVal, False: NumThreadsVal);
6937 }
6938
6939 return NumThreadsVal;
6940}
6941
6942namespace {
6943LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6944
6945// Utility to handle information from clauses associated with a given
6946// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6947// It provides a convenient interface to obtain the information and generate
6948// code for that information.
6949class MappableExprsHandler {
6950public:
6951 /// Custom comparator for attach-pointer expressions that compares them by
6952 /// complexity (i.e. their component-depth) first, then by the order in which
6953 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6954 /// different.
6955 struct AttachPtrExprComparator {
6956 const MappableExprsHandler &Handler;
6957 // Cache of previous equality comparison results.
6958 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6959 CachedEqualityComparisons;
6960
6961 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6962 AttachPtrExprComparator() = delete;
6963
6964 // Return true iff LHS is "less than" RHS.
6965 bool operator()(const Expr *LHS, const Expr *RHS) const {
6966 if (LHS == RHS)
6967 return false;
6968
6969 // First, compare by complexity (depth)
6970 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(Val: LHS);
6971 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(Val: RHS);
6972
6973 std::optional<size_t> DepthLHS =
6974 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6975 : std::nullopt;
6976 std::optional<size_t> DepthRHS =
6977 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6978 : std::nullopt;
6979
6980 // std::nullopt (no attach pointer) has lowest complexity
6981 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6982 // Both have same complexity, now check semantic equality
6983 if (areEqual(LHS, RHS))
6984 return false;
6985 // Different semantically, compare by computation order
6986 return wasComputedBefore(LHS, RHS);
6987 }
6988 if (!DepthLHS.has_value())
6989 return true; // LHS has lower complexity
6990 if (!DepthRHS.has_value())
6991 return false; // RHS has lower complexity
6992
6993 // Both have values, compare by depth (lower depth = lower complexity)
6994 if (DepthLHS.value() != DepthRHS.value())
6995 return DepthLHS.value() < DepthRHS.value();
6996
6997 // Same complexity, now check semantic equality
6998 if (areEqual(LHS, RHS))
6999 return false;
7000 // Different semantically, compare by computation order
7001 return wasComputedBefore(LHS, RHS);
7002 }
7003
7004 public:
7005 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7006 /// results, if available, otherwise does a recursive semantic comparison.
7007 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7008 // Check cache first for faster lookup
7009 const auto CachedResultIt = CachedEqualityComparisons.find(Val: {LHS, RHS});
7010 if (CachedResultIt != CachedEqualityComparisons.end())
7011 return CachedResultIt->second;
7012
7013 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7014
7015 // Cache the result for future lookups (both orders since semantic
7016 // equality is commutative)
7017 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7018 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7019 return ComparisonResult;
7020 }
7021
7022 /// Compare the two attach-ptr expressions by their computation order.
7023 /// Returns true iff LHS was computed before RHS by
7024 /// collectAttachPtrExprInfo().
7025 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7026 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(Val: LHS);
7027 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(Val: RHS);
7028
7029 return OrderLHS < OrderRHS;
7030 }
7031
7032 private:
7033 /// Helper function to compare attach-pointer expressions semantically.
7034 /// This function handles various expression types that can be part of an
7035 /// attach-pointer.
7036 /// TODO: Not urgent, but we should ideally return true when comparing
7037 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7038 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7039 if (LHS == RHS)
7040 return true;
7041
7042 // If only one is null, they aren't equal
7043 if (!LHS || !RHS)
7044 return false;
7045
7046 ASTContext &Ctx = Handler.CGF.getContext();
7047 // Strip away parentheses and no-op casts to get to the core expression
7048 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7049 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7050
7051 // Direct pointer comparison of the underlying expressions
7052 if (LHS == RHS)
7053 return true;
7054
7055 // Check if the expression classes match
7056 if (LHS->getStmtClass() != RHS->getStmtClass())
7057 return false;
7058
7059 // Handle DeclRefExpr (variable references)
7060 if (const auto *LD = dyn_cast<DeclRefExpr>(Val: LHS)) {
7061 const auto *RD = dyn_cast<DeclRefExpr>(Val: RHS);
7062 if (!RD)
7063 return false;
7064 return LD->getDecl()->getCanonicalDecl() ==
7065 RD->getDecl()->getCanonicalDecl();
7066 }
7067
7068 // Handle ArraySubscriptExpr (array indexing like a[i])
7069 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(Val: LHS)) {
7070 const auto *RA = dyn_cast<ArraySubscriptExpr>(Val: RHS);
7071 if (!RA)
7072 return false;
7073 return areSemanticallyEqual(LHS: LA->getBase(), RHS: RA->getBase()) &&
7074 areSemanticallyEqual(LHS: LA->getIdx(), RHS: RA->getIdx());
7075 }
7076
7077 // Handle MemberExpr (member access like s.m or p->m)
7078 if (const auto *LM = dyn_cast<MemberExpr>(Val: LHS)) {
7079 const auto *RM = dyn_cast<MemberExpr>(Val: RHS);
7080 if (!RM)
7081 return false;
7082 if (LM->getMemberDecl()->getCanonicalDecl() !=
7083 RM->getMemberDecl()->getCanonicalDecl())
7084 return false;
7085 return areSemanticallyEqual(LHS: LM->getBase(), RHS: RM->getBase());
7086 }
7087
7088 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7089 if (const auto *LU = dyn_cast<UnaryOperator>(Val: LHS)) {
7090 const auto *RU = dyn_cast<UnaryOperator>(Val: RHS);
7091 if (!RU)
7092 return false;
7093 if (LU->getOpcode() != RU->getOpcode())
7094 return false;
7095 return areSemanticallyEqual(LHS: LU->getSubExpr(), RHS: RU->getSubExpr());
7096 }
7097
7098 // Handle BinaryOperator (binary operations like p + offset)
7099 if (const auto *LB = dyn_cast<BinaryOperator>(Val: LHS)) {
7100 const auto *RB = dyn_cast<BinaryOperator>(Val: RHS);
7101 if (!RB)
7102 return false;
7103 if (LB->getOpcode() != RB->getOpcode())
7104 return false;
7105 return areSemanticallyEqual(LHS: LB->getLHS(), RHS: RB->getLHS()) &&
7106 areSemanticallyEqual(LHS: LB->getRHS(), RHS: RB->getRHS());
7107 }
7108
7109 // Handle ArraySectionExpr (array sections like a[0:1])
7110 // Attach pointers should not contain array-sections, but currently we
7111 // don't emit an error.
7112 if (const auto *LAS = dyn_cast<ArraySectionExpr>(Val: LHS)) {
7113 const auto *RAS = dyn_cast<ArraySectionExpr>(Val: RHS);
7114 if (!RAS)
7115 return false;
7116 return areSemanticallyEqual(LHS: LAS->getBase(), RHS: RAS->getBase()) &&
7117 areSemanticallyEqual(LHS: LAS->getLowerBound(),
7118 RHS: RAS->getLowerBound()) &&
7119 areSemanticallyEqual(LHS: LAS->getLength(), RHS: RAS->getLength());
7120 }
7121
7122 // Handle CastExpr (explicit casts)
7123 if (const auto *LC = dyn_cast<CastExpr>(Val: LHS)) {
7124 const auto *RC = dyn_cast<CastExpr>(Val: RHS);
7125 if (!RC)
7126 return false;
7127 if (LC->getCastKind() != RC->getCastKind())
7128 return false;
7129 return areSemanticallyEqual(LHS: LC->getSubExpr(), RHS: RC->getSubExpr());
7130 }
7131
7132 // Handle CXXThisExpr (this pointer)
7133 if (isa<CXXThisExpr>(Val: LHS) && isa<CXXThisExpr>(Val: RHS))
7134 return true;
7135
7136 // Handle IntegerLiteral (integer constants)
7137 if (const auto *LI = dyn_cast<IntegerLiteral>(Val: LHS)) {
7138 const auto *RI = dyn_cast<IntegerLiteral>(Val: RHS);
7139 if (!RI)
7140 return false;
7141 return LI->getValue() == RI->getValue();
7142 }
7143
7144 // Handle CharacterLiteral (character constants)
7145 if (const auto *LC = dyn_cast<CharacterLiteral>(Val: LHS)) {
7146 const auto *RC = dyn_cast<CharacterLiteral>(Val: RHS);
7147 if (!RC)
7148 return false;
7149 return LC->getValue() == RC->getValue();
7150 }
7151
7152 // Handle FloatingLiteral (floating point constants)
7153 if (const auto *LF = dyn_cast<FloatingLiteral>(Val: LHS)) {
7154 const auto *RF = dyn_cast<FloatingLiteral>(Val: RHS);
7155 if (!RF)
7156 return false;
7157 // Use bitwise comparison for floating point literals
7158 return LF->getValue().bitwiseIsEqual(RHS: RF->getValue());
7159 }
7160
7161 // Handle StringLiteral (string constants)
7162 if (const auto *LS = dyn_cast<StringLiteral>(Val: LHS)) {
7163 const auto *RS = dyn_cast<StringLiteral>(Val: RHS);
7164 if (!RS)
7165 return false;
7166 return LS->getString() == RS->getString();
7167 }
7168
7169 // Handle CXXNullPtrLiteralExpr (nullptr)
7170 if (isa<CXXNullPtrLiteralExpr>(Val: LHS) && isa<CXXNullPtrLiteralExpr>(Val: RHS))
7171 return true;
7172
7173 // Handle CXXBoolLiteralExpr (true/false)
7174 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(Val: LHS)) {
7175 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(Val: RHS);
7176 if (!RB)
7177 return false;
7178 return LB->getValue() == RB->getValue();
7179 }
7180
7181 // Fallback for other forms - use the existing comparison method
7182 return Expr::isSameComparisonOperand(E1: LHS, E2: RHS);
7183 }
7184 };
7185
7186 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7187 static unsigned getFlagMemberOffset() {
7188 unsigned Offset = 0;
7189 for (uint64_t Remain =
7190 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7191 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7192 !(Remain & 1); Remain = Remain >> 1)
7193 Offset++;
7194 return Offset;
7195 }
7196
7197 /// Class that holds debugging information for a data mapping to be passed to
7198 /// the runtime library.
7199 class MappingExprInfo {
7200 /// The variable declaration used for the data mapping.
7201 const ValueDecl *MapDecl = nullptr;
7202 /// The original expression used in the map clause, or null if there is
7203 /// none.
7204 const Expr *MapExpr = nullptr;
7205
7206 public:
7207 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7208 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7209
7210 const ValueDecl *getMapDecl() const { return MapDecl; }
7211 const Expr *getMapExpr() const { return MapExpr; }
7212 };
7213
7214 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7215 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7216 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7217 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7218 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7219 using MapNonContiguousArrayTy =
7220 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7221 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7222 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7223 using MapData =
7224 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7225 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7226 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7227 using MapDataArrayTy = SmallVector<MapData, 4>;
7228
7229 /// This structure contains combined information generated for mappable
7230 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7231 /// mappers, and non-contiguous information.
7232 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7233 MapExprsArrayTy Exprs;
7234 MapValueDeclsArrayTy Mappers;
7235 MapValueDeclsArrayTy DevicePtrDecls;
7236
7237 /// Append arrays in \a CurInfo.
7238 void append(MapCombinedInfoTy &CurInfo) {
7239 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
7240 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
7241 in_end: CurInfo.DevicePtrDecls.end());
7242 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
7243 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7244 }
7245 };
7246
7247 /// Map between a struct and the its lowest & highest elements which have been
7248 /// mapped.
7249 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7250 /// HE(FieldIndex, Pointer)}
7251 struct StructRangeInfoTy {
7252 MapCombinedInfoTy PreliminaryMapData;
7253 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7254 0, Address::invalid()};
7255 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7256 0, Address::invalid()};
7257 Address Base = Address::invalid();
7258 Address LB = Address::invalid();
7259 bool IsArraySection = false;
7260 bool HasCompleteRecord = false;
7261 };
7262
7263 /// A struct to store the attach pointer and pointee information, to be used
7264 /// when emitting an attach entry.
7265 struct AttachInfoTy {
7266 Address AttachPtrAddr = Address::invalid();
7267 Address AttachPteeAddr = Address::invalid();
7268 const ValueDecl *AttachPtrDecl = nullptr;
7269 const Expr *AttachMapExpr = nullptr;
7270
7271 bool isValid() const {
7272 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7273 }
7274 };
7275
7276 /// Check if there's any component list where the attach pointer expression
7277 /// matches the given captured variable.
7278 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7279 for (const auto &AttachEntry : AttachPtrExprMap) {
7280 if (AttachEntry.second) {
7281 // Check if the attach pointer expression is a DeclRefExpr that
7282 // references the captured variable
7283 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: AttachEntry.second))
7284 if (DRE->getDecl() == VD)
7285 return true;
7286 }
7287 }
7288 return false;
7289 }
7290
7291 /// Get the previously-cached attach pointer for a component list, if-any.
7292 const Expr *getAttachPtrExpr(
7293 OMPClauseMappableExprCommon::MappableExprComponentListRef Components)
7294 const {
7295 const auto It = AttachPtrExprMap.find(Val: Components);
7296 if (It != AttachPtrExprMap.end())
7297 return It->second;
7298
7299 return nullptr;
7300 }
7301
7302private:
7303 /// Kind that defines how a device pointer has to be returned.
7304 struct MapInfo {
7305 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7306 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7307 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7308 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7309 bool ReturnDevicePointer = false;
7310 bool IsImplicit = false;
7311 const ValueDecl *Mapper = nullptr;
7312 const Expr *VarRef = nullptr;
7313 bool ForDeviceAddr = false;
7314 bool HasUdpFbNullify = false;
7315
7316 MapInfo() = default;
7317 MapInfo(
7318 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7319 OpenMPMapClauseKind MapType,
7320 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7321 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7322 bool ReturnDevicePointer, bool IsImplicit,
7323 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7324 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7325 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7326 MotionModifiers(MotionModifiers),
7327 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7328 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7329 HasUdpFbNullify(HasUdpFbNullify) {}
7330 };
7331
7332 /// The target directive from where the mappable clauses were extracted. It
7333 /// is either a executable directive or a user-defined mapper directive.
7334 llvm::PointerUnion<const OMPExecutableDirective *,
7335 const OMPDeclareMapperDecl *>
7336 CurDir;
7337
7338 /// Function the directive is being generated for.
7339 CodeGenFunction &CGF;
7340
7341 /// Set of all first private variables in the current directive.
7342 /// bool data is set to true if the variable is implicitly marked as
7343 /// firstprivate, false otherwise.
7344 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7345
7346 /// Set of defaultmap clause kinds that use firstprivate behavior.
7347 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7348
7349 /// Map between device pointer declarations and their expression components.
7350 /// The key value for declarations in 'this' is null.
7351 llvm::DenseMap<
7352 const ValueDecl *,
7353 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7354 DevPointersMap;
7355
7356 /// Map between device addr declarations and their expression components.
7357 /// The key value for declarations in 'this' is null.
7358 llvm::DenseMap<
7359 const ValueDecl *,
7360 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7361 HasDevAddrsMap;
7362
7363 /// Map between lambda declarations and their map type.
7364 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7365
7366 /// Map from component lists to their attach pointer expressions.
7367 llvm::DenseMap<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7368 const Expr *>
7369 AttachPtrExprMap;
7370
7371 /// Map from attach pointer expressions to their component depth.
7372 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7373 /// expressions with increasing/decreasing depth.
7374 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7375 /// TODO: Not urgent, but we should ideally use the number of pointer
7376 /// dereferences in an expr as an indicator of its complexity, instead of the
7377 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7378 /// `*(p + 5 + 5)` together.
7379 llvm::DenseMap<const Expr *, std::optional<size_t>>
7380 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7381
7382 /// Map from attach pointer expressions to the order they were computed in, in
7383 /// collectAttachPtrExprInfo().
7384 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7385 {nullptr, 0}};
7386
7387 /// An instance of attach-ptr-expr comparator that can be used throughout the
7388 /// lifetime of this handler.
7389 AttachPtrExprComparator AttachPtrComparator;
7390
7391 llvm::Value *getExprTypeSize(const Expr *E) const {
7392 QualType ExprTy = E->getType().getCanonicalType();
7393
7394 // Calculate the size for array shaping expression.
7395 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
7396 llvm::Value *Size =
7397 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
7398 for (const Expr *SE : OAE->getDimensions()) {
7399 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
7400 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
7401 DstTy: CGF.getContext().getSizeType(),
7402 Loc: SE->getExprLoc());
7403 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
7404 }
7405 return Size;
7406 }
7407
7408 // Reference types are ignored for mapping purposes.
7409 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7410 ExprTy = RefTy->getPointeeType().getCanonicalType();
7411
7412 // Given that an array section is considered a built-in type, we need to
7413 // do the calculation based on the length of the section instead of relying
7414 // on CGF.getTypeSize(E->getType()).
7415 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
7416 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7417 Base: OAE->getBase()->IgnoreParenImpCasts())
7418 .getCanonicalType();
7419
7420 // If there is no length associated with the expression and lower bound is
7421 // not specified too, that means we are using the whole length of the
7422 // base.
7423 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7424 !OAE->getLowerBound())
7425 return CGF.getTypeSize(Ty: BaseTy);
7426
7427 llvm::Value *ElemSize;
7428 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7429 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
7430 } else {
7431 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
7432 assert(ATy && "Expecting array type if not a pointer type.");
7433 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
7434 }
7435
7436 // If we don't have a length at this point, that is because we have an
7437 // array section with a single element.
7438 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7439 return ElemSize;
7440
7441 if (const Expr *LenExpr = OAE->getLength()) {
7442 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
7443 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
7444 DstTy: CGF.getContext().getSizeType(),
7445 Loc: LenExpr->getExprLoc());
7446 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
7447 }
7448 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7449 OAE->getLowerBound() && "expected array_section[lb:].");
7450 // Size = sizetype - lb * elemtype;
7451 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
7452 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
7453 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
7454 DstTy: CGF.getContext().getSizeType(),
7455 Loc: OAE->getLowerBound()->getExprLoc());
7456 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
7457 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
7458 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
7459 LengthVal = CGF.Builder.CreateSelect(
7460 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
7461 return LengthVal;
7462 }
7463 return CGF.getTypeSize(Ty: ExprTy);
7464 }
7465
7466 /// Return the corresponding bits for a given map clause modifier. Add
7467 /// a flag marking the map as a pointer if requested. Add a flag marking the
7468 /// map as the first one of a series of maps that relate to the same map
7469 /// expression.
7470 OpenMPOffloadMappingFlags getMapTypeBits(
7471 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7472 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7473 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7474 OpenMPOffloadMappingFlags Bits =
7475 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7476 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7477 switch (MapType) {
7478 case OMPC_MAP_alloc:
7479 case OMPC_MAP_release:
7480 // alloc and release is the default behavior in the runtime library, i.e.
7481 // if we don't pass any bits alloc/release that is what the runtime is
7482 // going to do. Therefore, we don't need to signal anything for these two
7483 // type modifiers.
7484 break;
7485 case OMPC_MAP_to:
7486 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7487 break;
7488 case OMPC_MAP_from:
7489 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7490 break;
7491 case OMPC_MAP_tofrom:
7492 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7493 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7494 break;
7495 case OMPC_MAP_delete:
7496 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7497 break;
7498 case OMPC_MAP_unknown:
7499 llvm_unreachable("Unexpected map type!");
7500 }
7501 if (AddPtrFlag)
7502 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7503 if (AddIsTargetParamFlag)
7504 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7505 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
7506 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7507 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
7508 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7509 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
7510 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
7511 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7512 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
7513 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7514 if (IsNonContiguous)
7515 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7516 return Bits;
7517 }
7518
7519 /// Return true if the provided expression is a final array section. A
7520 /// final array section, is one whose length can't be proved to be one.
7521 bool isFinalArraySectionExpression(const Expr *E) const {
7522 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
7523
7524 // It is not an array section and therefore not a unity-size one.
7525 if (!OASE)
7526 return false;
7527
7528 // An array section with no colon always refer to a single element.
7529 if (OASE->getColonLocFirst().isInvalid())
7530 return false;
7531
7532 const Expr *Length = OASE->getLength();
7533
7534 // If we don't have a length we have to check if the array has size 1
7535 // for this dimension. Also, we should always expect a length if the
7536 // base type is pointer.
7537 if (!Length) {
7538 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7539 Base: OASE->getBase()->IgnoreParenImpCasts())
7540 .getCanonicalType();
7541 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
7542 return ATy->getSExtSize() != 1;
7543 // If we don't have a constant dimension length, we have to consider
7544 // the current section as having any size, so it is not necessarily
7545 // unitary. If it happen to be unity size, that's user fault.
7546 return true;
7547 }
7548
7549 // Check if the length evaluates to 1.
7550 Expr::EvalResult Result;
7551 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
7552 return true; // Can have more that size 1.
7553
7554 llvm::APSInt ConstLength = Result.Val.getInt();
7555 return ConstLength.getSExtValue() != 1;
7556 }
7557
7558 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7559 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7560 /// an attach entry has the following form:
7561 /// &p, &p[1], sizeof(void*), ATTACH
7562 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7563 const AttachInfoTy &AttachInfo) const {
7564 assert(AttachInfo.isValid() &&
7565 "Expected valid attach pointer/pointee information!");
7566
7567 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7568 // size
7569 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7570 V: llvm::ConstantInt::get(
7571 Ty: CGF.CGM.SizeTy, V: CGF.getContext()
7572 .getTypeSizeInChars(T: CGF.getContext().VoidPtrTy)
7573 .getQuantity()),
7574 DestTy: CGF.Int64Ty, /*isSigned=*/true);
7575
7576 CombinedInfo.Exprs.emplace_back(Args: AttachInfo.AttachPtrDecl,
7577 Args: AttachInfo.AttachMapExpr);
7578 CombinedInfo.BasePointers.push_back(
7579 Elt: AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7580 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7581 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7582 CombinedInfo.Pointers.push_back(
7583 Elt: AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7584 CombinedInfo.Sizes.push_back(Elt: PointerSize);
7585 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7586 CombinedInfo.Mappers.push_back(Elt: nullptr);
7587 CombinedInfo.NonContigInfo.Dims.push_back(Elt: 1);
7588 }
7589
7590 /// A helper class to copy structures with overlapped elements, i.e. those
7591 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7592 /// are not explicitly copied have mapping nodes synthesized for them,
7593 /// taking care to avoid generating zero-sized copies.
7594 class CopyOverlappedEntryGaps {
7595 CodeGenFunction &CGF;
7596 MapCombinedInfoTy &CombinedInfo;
7597 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7598 const ValueDecl *MapDecl = nullptr;
7599 const Expr *MapExpr = nullptr;
7600 Address BP = Address::invalid();
7601 bool IsNonContiguous = false;
7602 uint64_t DimSize = 0;
7603 // These elements track the position as the struct is iterated over
7604 // (in order of increasing element address).
7605 const RecordDecl *LastParent = nullptr;
7606 uint64_t Cursor = 0;
7607 unsigned LastIndex = -1u;
7608 Address LB = Address::invalid();
7609
7610 public:
7611 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7612 MapCombinedInfoTy &CombinedInfo,
7613 OpenMPOffloadMappingFlags Flags,
7614 const ValueDecl *MapDecl, const Expr *MapExpr,
7615 Address BP, Address LB, bool IsNonContiguous,
7616 uint64_t DimSize)
7617 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7618 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7619 DimSize(DimSize), LB(LB) {}
7620
7621 void processField(
7622 const OMPClauseMappableExprCommon::MappableComponent &MC,
7623 const FieldDecl *FD,
7624 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7625 EmitMemberExprBase) {
7626 const RecordDecl *RD = FD->getParent();
7627 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(D: RD);
7628 uint64_t FieldOffset = RL.getFieldOffset(FieldNo: FD->getFieldIndex());
7629 uint64_t FieldSize =
7630 CGF.getContext().getTypeSize(T: FD->getType().getCanonicalType());
7631 Address ComponentLB = Address::invalid();
7632
7633 if (FD->getType()->isLValueReferenceType()) {
7634 const auto *ME = cast<MemberExpr>(Val: MC.getAssociatedExpression());
7635 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7636 ComponentLB =
7637 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD).getAddress();
7638 } else {
7639 ComponentLB =
7640 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression()).getAddress();
7641 }
7642
7643 if (!LastParent)
7644 LastParent = RD;
7645 if (FD->getParent() == LastParent) {
7646 if (FD->getFieldIndex() != LastIndex + 1)
7647 copyUntilField(FD, ComponentLB);
7648 } else {
7649 LastParent = FD->getParent();
7650 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7651 copyUntilField(FD, ComponentLB);
7652 }
7653 Cursor = FieldOffset + FieldSize;
7654 LastIndex = FD->getFieldIndex();
7655 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7656 }
7657
7658 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7659 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7660 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7661 llvm::Value *Size = CGF.Builder.CreatePtrDiff(LHS: ComponentLBPtr, RHS: LBPtr);
7662 copySizedChunk(Base: LBPtr, Size);
7663 }
7664
7665 void copyUntilEnd(Address HB) {
7666 if (LastParent) {
7667 const ASTRecordLayout &RL =
7668 CGF.getContext().getASTRecordLayout(D: LastParent);
7669 if ((uint64_t)CGF.getContext().toBits(CharSize: RL.getSize()) <= Cursor)
7670 return;
7671 }
7672 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7673 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7674 LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF), RHS: LBPtr);
7675 copySizedChunk(Base: LBPtr, Size);
7676 }
7677
7678 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7679 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7680 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7681 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7682 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7683 CombinedInfo.Pointers.push_back(Elt: Base);
7684 CombinedInfo.Sizes.push_back(
7685 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/false));
7686 CombinedInfo.Types.push_back(Elt: Flags);
7687 CombinedInfo.Mappers.push_back(Elt: nullptr);
7688 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize : 1);
7689 }
7690 };
7691
7692 /// Generate the base pointers, section pointers, sizes, map type bits, and
7693 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7694 /// map type, map or motion modifiers, and expression components.
7695 /// \a IsFirstComponent should be set to true if the provided set of
7696 /// components is the first associated with a capture.
7697 void generateInfoForComponentList(
7698 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7699 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7700 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7701 MapCombinedInfoTy &CombinedInfo,
7702 MapCombinedInfoTy &StructBaseCombinedInfo,
7703 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7704 bool IsFirstComponentList, bool IsImplicit,
7705 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7706 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7707 const Expr *MapExpr = nullptr,
7708 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7709 OverlappedElements = {}) const {
7710
7711 // The following summarizes what has to be generated for each map and the
7712 // types below. The generated information is expressed in this order:
7713 // base pointer, section pointer, size, flags
7714 // (to add to the ones that come from the map type and modifier).
7715 // Entries annotated with (+) are only generated for "target" constructs,
7716 // and only if the variable at the beginning of the expression is used in
7717 // the region.
7718 //
7719 // double d;
7720 // int i[100];
7721 // float *p;
7722 // int **a = &i;
7723 //
7724 // struct S1 {
7725 // int i;
7726 // float f[50];
7727 // }
7728 // struct S2 {
7729 // int i;
7730 // float f[50];
7731 // S1 s;
7732 // double *p;
7733 // double *&pref;
7734 // struct S2 *ps;
7735 // int &ref;
7736 // }
7737 // S2 s;
7738 // S2 *ps;
7739 //
7740 // map(d)
7741 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7742 //
7743 // map(i)
7744 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7745 //
7746 // map(i[1:23])
7747 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7748 //
7749 // map(p)
7750 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7751 //
7752 // map(p[1:24])
7753 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7754 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7755 // // are present, and either is new
7756 //
7757 // map(([22])p)
7758 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7759 // &p, p, sizeof(void*), ATTACH
7760 //
7761 // map((*a)[0:3])
7762 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7763 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7764 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7765 // (+) Only on target, if a is used in the region
7766 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7767 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7768 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7769 // referenced in the target region, because it is a pointer.
7770 //
7771 // map(**a)
7772 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7773 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7774 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7775 // (+) Only on target, if a is used in the region
7776 //
7777 // map(s)
7778 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7779 // effect is supposed to be same as if the user had a map for every element
7780 // of the struct. We currently do a shallow-map of s.
7781 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7782 //
7783 // map(s.i)
7784 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7785 //
7786 // map(s.s.f)
7787 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7788 //
7789 // map(s.p)
7790 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7791 //
7792 // map(to: s.p[:22])
7793 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7794 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7795 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7796 //
7797 // map(to: s.ref)
7798 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7799 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7800 // (*) alloc space for struct members, only this is a target parameter.
7801 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7802 // optimizes this entry out, same in the examples below)
7803 // (***) map the pointee (map: to)
7804 // Note: ptr(s.ref) represents the referring pointer of s.ref
7805 // ptee(s.ref) represents the referenced pointee of s.ref
7806 //
7807 // map(to: s.pref)
7808 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7809 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7810 //
7811 // map(to: s.pref[:22])
7812 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7813 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7814 // FROM | IMPLICIT // (+)
7815 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7816 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7817 //
7818 // map(s.ps)
7819 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7820 //
7821 // map(from: s.ps->s.i)
7822 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7823 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7824 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7825 //
7826 // map(to: s.ps->ps)
7827 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7828 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7829 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7830 //
7831 // map(s.ps->ps->ps)
7832 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7833 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7834 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7835 //
7836 // map(to: s.ps->ps->s.f[:22])
7837 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7838 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7839 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7840 //
7841 // map(ps)
7842 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7843 //
7844 // map(ps->i)
7845 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7846 // &ps, &(ps->i), sizeof(void*), ATTACH
7847 //
7848 // map(ps->s.f)
7849 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7850 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7851 //
7852 // map(from: ps->p)
7853 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7854 // &ps, &(ps->p), sizeof(ps), ATTACH
7855 //
7856 // map(to: ps->p[:22])
7857 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7858 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7859 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7860 //
7861 // map(ps->ps)
7862 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7863 // &ps, &(ps->ps), sizeof(ps), ATTACH
7864 //
7865 // map(from: ps->ps->s.i)
7866 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7867 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7868 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7869 //
7870 // map(from: ps->ps->ps)
7871 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7872 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7873 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7874 //
7875 // map(ps->ps->ps->ps)
7876 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7877 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7878 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7879 //
7880 // map(to: ps->ps->ps->s.f[:22])
7881 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7882 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7883 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7884 //
7885 // map(to: s.f[:22]) map(from: s.p[:33])
7886 // On target, and if s is used in the region:
7887 //
7888 // &s, &(s.f[0]), 50*sizeof(float) +
7889 // sizeof(struct S1) +
7890 // sizeof(double*) (**), TARGET_PARAM
7891 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7892 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7893 // FROM | IMPLICIT
7894 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7895 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7896 // (**) allocate contiguous space needed to fit all mapped members even if
7897 // we allocate space for members not mapped (in this example,
7898 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7899 // them as well because they fall between &s.f[0] and &s.p)
7900 //
7901 // On other constructs, and, if s is not used in the region, on target:
7902 // &s, &(s.f[0]), 22*sizeof(float), TO
7903 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7904 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7905 //
7906 // map(from: s.f[:22]) map(to: ps->p[:33])
7907 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7908 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7909 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7910 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7911 //
7912 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7913 // &s, &(s.f[0]), 50*sizeof(float) +
7914 // sizeof(struct S1), TARGET_PARAM
7915 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7916 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7917 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7918 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7919 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7920 //
7921 // map(p[:100], p)
7922 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7923 // p, &p[0], 100*sizeof(float), TO | FROM
7924 // &p, &p[0], sizeof(float*), ATTACH
7925
7926 // Track if the map information being generated is the first for a capture.
7927 bool IsCaptureFirstInfo = IsFirstComponentList;
7928 // When the variable is on a declare target link or in a to clause with
7929 // unified memory, a reference is needed to hold the host/device address
7930 // of the variable.
7931 bool RequiresReference = false;
7932
7933 // Scan the components from the base to the complete expression.
7934 auto CI = Components.rbegin();
7935 auto CE = Components.rend();
7936 auto I = CI;
7937
7938 // Track if the map information being generated is the first for a list of
7939 // components.
7940 bool IsExpressionFirstInfo = true;
7941 bool FirstPointerInComplexData = false;
7942 Address BP = Address::invalid();
7943 Address FinalLowestElem = Address::invalid();
7944 const Expr *AssocExpr = I->getAssociatedExpression();
7945 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7946 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7947 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7948
7949 // Get the pointer-attachment base-pointer for the given list, if any.
7950 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7951 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7952 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7953
7954 bool HasAttachPtr = AttachPtrExpr != nullptr;
7955 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7956 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7957
7958 if (FirstComponentIsForAttachPtr) {
7959 // No need to process AttachPtr here. It will be processed at the end
7960 // after we have computed the pointee's address.
7961 ++I;
7962 } else if (isa<MemberExpr>(Val: AssocExpr)) {
7963 // The base is the 'this' pointer. The content of the pointer is going
7964 // to be the base of the field being mapped.
7965 BP = CGF.LoadCXXThisAddress();
7966 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7967 (OASE &&
7968 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7969 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7970 } else if (OAShE &&
7971 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7972 BP = Address(
7973 CGF.EmitScalarExpr(E: OAShE->getBase()),
7974 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7975 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7976 } else {
7977 // The base is the reference to the variable.
7978 // BP = &Var.
7979 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7980 if (const auto *VD =
7981 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7982 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7983 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7984 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7985 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7986 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
7987 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
7988 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7989 RequiresReference = true;
7990 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7991 }
7992 }
7993 }
7994
7995 // If the variable is a pointer and is being dereferenced (i.e. is not
7996 // the last component), the base has to be the pointer itself, not its
7997 // reference. References are ignored for mapping purposes.
7998 QualType Ty =
7999 I->getAssociatedDeclaration()->getType().getNonReferenceType();
8000 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
8001 // No need to generate individual map information for the pointer, it
8002 // can be associated with the combined storage if shared memory mode is
8003 // active or the base declaration is not global variable.
8004 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
8005 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8006 !VD || VD->hasLocalStorage() || HasAttachPtr)
8007 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8008 else
8009 FirstPointerInComplexData = true;
8010 ++I;
8011 }
8012 }
8013
8014 // Track whether a component of the list should be marked as MEMBER_OF some
8015 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8016 // in a component list should be marked as MEMBER_OF, all subsequent entries
8017 // do not belong to the base struct. E.g.
8018 // struct S2 s;
8019 // s.ps->ps->ps->f[:]
8020 // (1) (2) (3) (4)
8021 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8022 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8023 // is the pointee of ps(2) which is not member of struct s, so it should not
8024 // be marked as such (it is still PTR_AND_OBJ).
8025 // The variable is initialized to false so that PTR_AND_OBJ entries which
8026 // are not struct members are not considered (e.g. array of pointers to
8027 // data).
8028 bool ShouldBeMemberOf = false;
8029
8030 // Variable keeping track of whether or not we have encountered a component
8031 // in the component list which is a member expression. Useful when we have a
8032 // pointer or a final array section, in which case it is the previous
8033 // component in the list which tells us whether we have a member expression.
8034 // E.g. X.f[:]
8035 // While processing the final array section "[:]" it is "f" which tells us
8036 // whether we are dealing with a member of a declared struct.
8037 const MemberExpr *EncounteredME = nullptr;
8038
8039 // Track for the total number of dimension. Start from one for the dummy
8040 // dimension.
8041 uint64_t DimSize = 1;
8042
8043 // Detects non-contiguous updates due to strided accesses.
8044 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8045 // correctly when generating information to be passed to the runtime. The
8046 // flag is set to true if any array section has a stride not equal to 1, or
8047 // if the stride is not a constant expression (conservatively assumed
8048 // non-contiguous).
8049 bool IsNonContiguous =
8050 CombinedInfo.NonContigInfo.IsNonContiguous ||
8051 any_of(Range&: Components, P: [&](const auto &Component) {
8052 const auto *OASE =
8053 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8054 if (!OASE)
8055 return false;
8056
8057 const Expr *StrideExpr = OASE->getStride();
8058 if (!StrideExpr)
8059 return false;
8060
8061 assert(StrideExpr->getType()->isIntegerType() &&
8062 "Stride expression must be of integer type");
8063
8064 // If stride is not evaluatable as a constant, treat as
8065 // non-contiguous.
8066 const auto Constant =
8067 StrideExpr->getIntegerConstantExpr(Ctx: CGF.getContext());
8068 if (!Constant)
8069 return true;
8070
8071 // Treat non-unitary strides as non-contiguous.
8072 return !Constant->isOne();
8073 });
8074
8075 bool IsPrevMemberReference = false;
8076
8077 bool IsPartialMapped =
8078 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8079
8080 // We need to check if we will be encountering any MEs. If we do not
8081 // encounter any ME expression it means we will be mapping the whole struct.
8082 // In that case we need to skip adding an entry for the struct to the
8083 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8084 // list only when generating all info for clauses.
8085 bool IsMappingWholeStruct = true;
8086 if (!GenerateAllInfoForClauses) {
8087 IsMappingWholeStruct = false;
8088 } else {
8089 for (auto TempI = I; TempI != CE; ++TempI) {
8090 const MemberExpr *PossibleME =
8091 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
8092 if (PossibleME) {
8093 IsMappingWholeStruct = false;
8094 break;
8095 }
8096 }
8097 }
8098
8099 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8100 for (; I != CE; ++I) {
8101 // If we have a valid attach-ptr, we skip processing all components until
8102 // after the attach-ptr.
8103 if (HasAttachPtr && !SeenAttachPtr) {
8104 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8105 continue;
8106 }
8107
8108 // After finding the attach pointer, skip binary-ops, to skip past
8109 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8110 // the attach-ptr.
8111 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8112 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8113 if (BO)
8114 continue;
8115
8116 // Found the first non-binary-operator component after attach
8117 SeenFirstNonBinOpExprAfterAttachPtr = true;
8118 BP = AttachPteeBaseAddr;
8119 }
8120
8121 // If the current component is member of a struct (parent struct) mark it.
8122 if (!EncounteredME) {
8123 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
8124 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8125 // as MEMBER_OF the parent struct.
8126 if (EncounteredME) {
8127 ShouldBeMemberOf = true;
8128 // Do not emit as complex pointer if this is actually not array-like
8129 // expression.
8130 if (FirstPointerInComplexData) {
8131 QualType Ty = std::prev(x: I)
8132 ->getAssociatedDeclaration()
8133 ->getType()
8134 .getNonReferenceType();
8135 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8136 FirstPointerInComplexData = false;
8137 }
8138 }
8139 }
8140
8141 auto Next = std::next(x: I);
8142
8143 // We need to generate the addresses and sizes if this is the last
8144 // component, if the component is a pointer or if it is an array section
8145 // whose length can't be proved to be one. If this is a pointer, it
8146 // becomes the base address for the following components.
8147
8148 // A final array section, is one whose length can't be proved to be one.
8149 // If the map item is non-contiguous then we don't treat any array section
8150 // as final array section.
8151 bool IsFinalArraySection =
8152 !IsNonContiguous &&
8153 isFinalArraySectionExpression(E: I->getAssociatedExpression());
8154
8155 // If we have a declaration for the mapping use that, otherwise use
8156 // the base declaration of the map clause.
8157 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8158 ? I->getAssociatedDeclaration()
8159 : BaseDecl;
8160 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8161 : MapExpr;
8162
8163 // Get information on whether the element is a pointer. Have to do a
8164 // special treatment for array sections given that they are built-in
8165 // types.
8166 const auto *OASE =
8167 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
8168 const auto *OAShE =
8169 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
8170 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
8171 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8172 bool IsPointer =
8173 OAShE ||
8174 (OASE && ArraySectionExpr::getBaseOriginalType(Base: OASE)
8175 .getCanonicalType()
8176 ->isAnyPointerType()) ||
8177 I->getAssociatedExpression()->getType()->isAnyPointerType();
8178 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
8179 MapDecl &&
8180 MapDecl->getType()->isLValueReferenceType();
8181 bool IsNonDerefPointer = IsPointer &&
8182 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8183 !IsNonContiguous;
8184
8185 if (OASE)
8186 ++DimSize;
8187
8188 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8189 IsFinalArraySection) {
8190 // If this is not the last component, we expect the pointer to be
8191 // associated with an array expression or member expression.
8192 assert((Next == CE ||
8193 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8194 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8195 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8196 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8197 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8198 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8199 "Unexpected expression");
8200
8201 Address LB = Address::invalid();
8202 Address LowestElem = Address::invalid();
8203 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8204 const MemberExpr *E) {
8205 const Expr *BaseExpr = E->getBase();
8206 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8207 // scalar.
8208 LValue BaseLV;
8209 if (E->isArrow()) {
8210 LValueBaseInfo BaseInfo;
8211 TBAAAccessInfo TBAAInfo;
8212 Address Addr =
8213 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
8214 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8215 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
8216 } else {
8217 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
8218 }
8219 return BaseLV;
8220 };
8221 if (OAShE) {
8222 LowestElem = LB =
8223 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
8224 CGF.ConvertTypeForMem(
8225 T: OAShE->getBase()->getType()->getPointeeType()),
8226 CGF.getContext().getTypeAlignInChars(
8227 T: OAShE->getBase()->getType()));
8228 } else if (IsMemberReference) {
8229 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
8230 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8231 LowestElem = CGF.EmitLValueForFieldInitialization(
8232 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
8233 .getAddress();
8234 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
8235 .getAddress();
8236 } else {
8237 LowestElem = LB =
8238 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
8239 .getAddress();
8240 }
8241
8242 // Save the final LowestElem, to use it as the pointee in attach maps,
8243 // if emitted.
8244 if (Next == CE)
8245 FinalLowestElem = LowestElem;
8246
8247 // If this component is a pointer inside the base struct then we don't
8248 // need to create any entry for it - it will be combined with the object
8249 // it is pointing to into a single PTR_AND_OBJ entry.
8250 bool IsMemberPointerOrAddr =
8251 EncounteredME &&
8252 (((IsPointer || ForDeviceAddr) &&
8253 I->getAssociatedExpression() == EncounteredME) ||
8254 (IsPrevMemberReference && !IsPointer) ||
8255 (IsMemberReference && Next != CE &&
8256 !Next->getAssociatedExpression()->getType()->isPointerType()));
8257 if (!OverlappedElements.empty() && Next == CE) {
8258 // Handle base element with the info for overlapped elements.
8259 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8260 assert(!IsPointer &&
8261 "Unexpected base element with the pointer type.");
8262 // Mark the whole struct as the struct that requires allocation on the
8263 // device.
8264 PartialStruct.LowestElem = {0, LowestElem};
8265 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8266 T: I->getAssociatedExpression()->getType());
8267 Address HB = CGF.Builder.CreateConstGEP(
8268 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8269 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
8270 Index: TypeSize.getQuantity() - 1);
8271 PartialStruct.HighestElem = {
8272 std::numeric_limits<decltype(
8273 PartialStruct.HighestElem.first)>::max(),
8274 HB};
8275 PartialStruct.Base = BP;
8276 PartialStruct.LB = LB;
8277 assert(
8278 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8279 "Overlapped elements must be used only once for the variable.");
8280 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
8281 // Emit data for non-overlapped data.
8282 OpenMPOffloadMappingFlags Flags =
8283 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8284 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8285 /*AddPtrFlag=*/false,
8286 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8287 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8288 MapExpr, BP, LB, IsNonContiguous,
8289 DimSize);
8290 // Do bitcopy of all non-overlapped structure elements.
8291 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8292 Component : OverlappedElements) {
8293 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8294 Component) {
8295 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8296 if (const auto *FD = dyn_cast<FieldDecl>(Val: VD)) {
8297 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8298 }
8299 }
8300 }
8301 }
8302 CopyGaps.copyUntilEnd(HB);
8303 break;
8304 }
8305 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
8306 // Skip adding an entry in the CurInfo of this combined entry if the
8307 // whole struct is currently being mapped. The struct needs to be added
8308 // in the first position before any data internal to the struct is being
8309 // mapped.
8310 // Skip adding an entry in the CurInfo of this combined entry if the
8311 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8312 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8313 (Next == CE && MapType != OMPC_MAP_unknown)) {
8314 if (!IsMappingWholeStruct) {
8315 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8316 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
8317 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8318 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8319 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8320 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8321 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8322 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
8323 : 1);
8324 } else {
8325 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8326 StructBaseCombinedInfo.BasePointers.push_back(
8327 Elt: BP.emitRawPointer(CGF));
8328 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8329 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8330 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8331 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8332 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8333 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8334 Elt: IsNonContiguous ? DimSize : 1);
8335 }
8336
8337 // If Mapper is valid, the last component inherits the mapper.
8338 bool HasMapper = Mapper && Next == CE;
8339 if (!IsMappingWholeStruct)
8340 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
8341 else
8342 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
8343 : nullptr);
8344
8345 // We need to add a pointer flag for each map that comes from the
8346 // same expression except for the first one. We also need to signal
8347 // this map is the first one that relates with the current capture
8348 // (there is a set of entries for each capture).
8349 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8350 MapType, MapModifiers, MotionModifiers, IsImplicit,
8351 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
8352 FirstPointerInComplexData || IsMemberReference,
8353 AddIsTargetParamFlag: IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8354
8355 if (!IsExpressionFirstInfo || IsMemberReference) {
8356 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8357 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8358 if (IsPointer || (IsMemberReference && Next != CE))
8359 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8360 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8361 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8362 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8363 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8364
8365 if (ShouldBeMemberOf) {
8366 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8367 // should be later updated with the correct value of MEMBER_OF.
8368 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8369 // From now on, all subsequent PTR_AND_OBJ entries should not be
8370 // marked as MEMBER_OF.
8371 ShouldBeMemberOf = false;
8372 }
8373 }
8374
8375 if (!IsMappingWholeStruct)
8376 CombinedInfo.Types.push_back(Elt: Flags);
8377 else
8378 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
8379 }
8380
8381 // If we have encountered a member expression so far, keep track of the
8382 // mapped member. If the parent is "*this", then the value declaration
8383 // is nullptr.
8384 if (EncounteredME) {
8385 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
8386 unsigned FieldIndex = FD->getFieldIndex();
8387
8388 // Update info about the lowest and highest elements for this struct
8389 if (!PartialStruct.Base.isValid()) {
8390 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8391 if (IsFinalArraySection && OASE) {
8392 Address HB =
8393 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8394 .getAddress();
8395 PartialStruct.HighestElem = {FieldIndex, HB};
8396 } else {
8397 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8398 }
8399 PartialStruct.Base = BP;
8400 PartialStruct.LB = BP;
8401 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8402 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8403 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8404 if (IsFinalArraySection && OASE) {
8405 Address HB =
8406 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8407 .getAddress();
8408 PartialStruct.HighestElem = {FieldIndex, HB};
8409 } else {
8410 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8411 }
8412 }
8413 }
8414
8415 // Need to emit combined struct for array sections.
8416 if (IsFinalArraySection || IsNonContiguous)
8417 PartialStruct.IsArraySection = true;
8418
8419 // If we have a final array section, we are done with this expression.
8420 if (IsFinalArraySection)
8421 break;
8422
8423 // The pointer becomes the base for the next element.
8424 if (Next != CE)
8425 BP = IsMemberReference ? LowestElem : LB;
8426 if (!IsPartialMapped)
8427 IsExpressionFirstInfo = false;
8428 IsCaptureFirstInfo = false;
8429 FirstPointerInComplexData = false;
8430 IsPrevMemberReference = IsMemberReference;
8431 } else if (FirstPointerInComplexData) {
8432 QualType Ty = Components.rbegin()
8433 ->getAssociatedDeclaration()
8434 ->getType()
8435 .getNonReferenceType();
8436 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8437 FirstPointerInComplexData = false;
8438 }
8439 }
8440 // If ran into the whole component - allocate the space for the whole
8441 // record.
8442 if (!EncounteredME)
8443 PartialStruct.HasCompleteRecord = true;
8444
8445 // Populate ATTACH information for later processing by emitAttachEntry.
8446 if (shouldEmitAttachEntry(PointerExpr: AttachPtrExpr, MapBaseDecl: BaseDecl, CGF, CurDir)) {
8447 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8448 AttachInfo.AttachPteeAddr = FinalLowestElem;
8449 AttachInfo.AttachPtrDecl = BaseDecl;
8450 AttachInfo.AttachMapExpr = MapExpr;
8451 }
8452
8453 if (!IsNonContiguous)
8454 return;
8455
8456 const ASTContext &Context = CGF.getContext();
8457
8458 // For supporting stride in array section, we need to initialize the first
8459 // dimension size as 1, first offset as 0, and first count as 1
8460 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
8461 MapValuesArrayTy CurCounts;
8462 MapValuesArrayTy CurStrides = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8463 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8464 uint64_t ElementTypeSize;
8465
8466 // Collect Size information for each dimension and get the element size as
8467 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8468 // should be [10, 10] and the first stride is 4 btyes.
8469 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8470 Components) {
8471 const Expr *AssocExpr = Component.getAssociatedExpression();
8472 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8473
8474 if (!OASE)
8475 continue;
8476
8477 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
8478 auto *CAT = Context.getAsConstantArrayType(T: Ty);
8479 auto *VAT = Context.getAsVariableArrayType(T: Ty);
8480
8481 // We need all the dimension size except for the last dimension.
8482 assert((VAT || CAT || &Component == &*Components.begin()) &&
8483 "Should be either ConstantArray or VariableArray if not the "
8484 "first Component");
8485
8486 // Get element size if CurCounts is empty.
8487 if (CurCounts.empty()) {
8488 const Type *ElementType = nullptr;
8489 if (CAT)
8490 ElementType = CAT->getElementType().getTypePtr();
8491 else if (VAT)
8492 ElementType = VAT->getElementType().getTypePtr();
8493 else if (&Component == &*Components.begin()) {
8494 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8495 // there was no earlier CAT/VAT/array handling to establish
8496 // ElementType. Capture the pointee type now so that subsequent
8497 // components (offset/length/stride) have a concrete element type to
8498 // work with. This makes pointer-backed sections behave consistently
8499 // with CAT/VAT/array bases.
8500 if (const auto *PtrType = Ty->getAs<PointerType>())
8501 ElementType = PtrType->getPointeeType().getTypePtr();
8502 } else {
8503 // Any component after the first should never have a raw pointer type;
8504 // by this point. ElementType must already be known (set above or in
8505 // prior array / CAT / VAT handling).
8506 assert(!Ty->isPointerType() &&
8507 "Non-first components should not be raw pointers");
8508 }
8509
8510 // At this stage, if ElementType was a base pointer and we are in the
8511 // first iteration, it has been computed.
8512 if (ElementType) {
8513 // For the case that having pointer as base, we need to remove one
8514 // level of indirection.
8515 if (&Component != &*Components.begin())
8516 ElementType = ElementType->getPointeeOrArrayElementType();
8517 ElementTypeSize =
8518 Context.getTypeSizeInChars(T: ElementType).getQuantity();
8519 CurCounts.push_back(
8520 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
8521 }
8522 }
8523 // Get dimension value except for the last dimension since we don't need
8524 // it.
8525 if (DimSizes.size() < Components.size() - 1) {
8526 if (CAT)
8527 DimSizes.push_back(
8528 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
8529 else if (VAT)
8530 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
8531 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
8532 /*IsSigned=*/isSigned: false));
8533 }
8534 }
8535
8536 // Skip the dummy dimension since we have already have its information.
8537 auto *DI = DimSizes.begin() + 1;
8538 // Product of dimension.
8539 llvm::Value *DimProd =
8540 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
8541
8542 // Collect info for non-contiguous. Notice that offset, count, and stride
8543 // are only meaningful for array-section, so we insert a null for anything
8544 // other than array-section.
8545 // Also, the size of offset, count, and stride are not the same as
8546 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8547 // count, and stride are the same as the number of non-contiguous
8548 // declaration in target update to/from clause.
8549 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8550 Components) {
8551 const Expr *AssocExpr = Component.getAssociatedExpression();
8552
8553 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
8554 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8555 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
8556 /*isSigned=*/false);
8557 CurOffsets.push_back(Elt: Offset);
8558 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
8559 CurStrides.push_back(Elt: CurStrides.back());
8560 continue;
8561 }
8562
8563 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8564
8565 if (!OASE)
8566 continue;
8567
8568 // Offset
8569 const Expr *OffsetExpr = OASE->getLowerBound();
8570 llvm::Value *Offset = nullptr;
8571 if (!OffsetExpr) {
8572 // If offset is absent, then we just set it to zero.
8573 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
8574 } else {
8575 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
8576 DestTy: CGF.Int64Ty,
8577 /*isSigned=*/false);
8578 }
8579
8580 // Count
8581 const Expr *CountExpr = OASE->getLength();
8582 llvm::Value *Count = nullptr;
8583 if (!CountExpr) {
8584 // In Clang, once a high dimension is an array section, we construct all
8585 // the lower dimension as array section, however, for case like
8586 // arr[0:2][2], Clang construct the inner dimension as an array section
8587 // but it actually is not in an array section form according to spec.
8588 if (!OASE->getColonLocFirst().isValid() &&
8589 !OASE->getColonLocSecond().isValid()) {
8590 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
8591 } else {
8592 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8593 // When the length is absent it defaults to ⌈(size −
8594 // lower-bound)/stride⌉, where size is the size of the array
8595 // dimension.
8596 const Expr *StrideExpr = OASE->getStride();
8597 llvm::Value *Stride =
8598 StrideExpr
8599 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8600 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8601 : nullptr;
8602 if (Stride)
8603 Count = CGF.Builder.CreateUDiv(
8604 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
8605 else
8606 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
8607 }
8608 } else {
8609 Count = CGF.EmitScalarExpr(E: CountExpr);
8610 }
8611 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
8612 CurCounts.push_back(Elt: Count);
8613
8614 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8615 // Offset_n' = Offset_n * (D_0 * D_1 ... * D_n-1) * Unit size
8616 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8617 // Offset Count Stride
8618 // D0 0 4 1 (int) <- dummy dimension
8619 // D1 0 2 8 (2 * (1) * 4)
8620 // D2 100 2 20 (1 * (1 * 5) * 4)
8621 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8622 const Expr *StrideExpr = OASE->getStride();
8623 llvm::Value *Stride =
8624 StrideExpr
8625 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8626 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8627 : nullptr;
8628 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
8629 if (Stride)
8630 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
8631 else
8632 CurStrides.push_back(Elt: DimProd);
8633
8634 Offset = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Offset);
8635 CurOffsets.push_back(Elt: Offset);
8636
8637 if (DI != DimSizes.end())
8638 ++DI;
8639 }
8640
8641 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
8642 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
8643 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
8644 }
8645
8646 /// Return the adjusted map modifiers if the declaration a capture refers to
8647 /// appears in a first-private clause. This is expected to be used only with
8648 /// directives that start with 'target'.
8649 OpenMPOffloadMappingFlags
8650 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8651 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8652
8653 // A first private variable captured by reference will use only the
8654 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8655 // declaration is known as first-private in this handler.
8656 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
8657 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8658 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8659 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8660 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8661 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8662 }
8663 auto I = LambdasMap.find(Val: Cap.getCapturedVar()->getCanonicalDecl());
8664 if (I != LambdasMap.end())
8665 // for map(to: lambda): using user specified map type.
8666 return getMapTypeBits(
8667 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
8668 /*MotionModifiers=*/{}, IsImplicit: I->getSecond()->isImplicit(),
8669 /*AddPtrFlag=*/false,
8670 /*AddIsTargetParamFlag=*/false,
8671 /*isNonContiguous=*/IsNonContiguous: false);
8672 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8673 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8674 }
8675
8676 void getPlainLayout(const CXXRecordDecl *RD,
8677 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8678 bool AsBase) const {
8679 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8680
8681 llvm::StructType *St =
8682 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8683
8684 unsigned NumElements = St->getNumElements();
8685 llvm::SmallVector<
8686 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8687 RecordLayout(NumElements);
8688
8689 // Fill bases.
8690 for (const auto &I : RD->bases()) {
8691 if (I.isVirtual())
8692 continue;
8693
8694 QualType BaseTy = I.getType();
8695 const auto *Base = BaseTy->getAsCXXRecordDecl();
8696 // Ignore empty bases.
8697 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
8698 CGF.getContext()
8699 .getASTRecordLayout(D: Base)
8700 .getNonVirtualSize()
8701 .isZero())
8702 continue;
8703
8704 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
8705 RecordLayout[FieldIndex] = Base;
8706 }
8707 // Fill in virtual bases.
8708 for (const auto &I : RD->vbases()) {
8709 QualType BaseTy = I.getType();
8710 // Ignore empty bases.
8711 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
8712 continue;
8713
8714 const auto *Base = BaseTy->getAsCXXRecordDecl();
8715 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
8716 if (RecordLayout[FieldIndex])
8717 continue;
8718 RecordLayout[FieldIndex] = Base;
8719 }
8720 // Fill in all the fields.
8721 assert(!RD->isUnion() && "Unexpected union.");
8722 for (const auto *Field : RD->fields()) {
8723 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8724 // will fill in later.)
8725 if (!Field->isBitField() &&
8726 !isEmptyFieldForLayout(Context: CGF.getContext(), FD: Field)) {
8727 unsigned FieldIndex = RL.getLLVMFieldNo(FD: Field);
8728 RecordLayout[FieldIndex] = Field;
8729 }
8730 }
8731 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8732 &Data : RecordLayout) {
8733 if (Data.isNull())
8734 continue;
8735 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Val: Data))
8736 getPlainLayout(RD: Base, Layout, /*AsBase=*/true);
8737 else
8738 Layout.push_back(Elt: cast<const FieldDecl *>(Val: Data));
8739 }
8740 }
8741
8742 /// Returns the address corresponding to \p PointerExpr.
8743 static Address getAttachPtrAddr(const Expr *PointerExpr,
8744 CodeGenFunction &CGF) {
8745 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8746 Address AttachPtrAddr = Address::invalid();
8747
8748 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: PointerExpr)) {
8749 // If the pointer is a variable, we can use its address directly.
8750 AttachPtrAddr = CGF.EmitLValue(E: DRE).getAddress();
8751 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(Val: PointerExpr)) {
8752 AttachPtrAddr =
8753 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/true).getAddress();
8754 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: PointerExpr)) {
8755 AttachPtrAddr = CGF.EmitLValue(E: ASE).getAddress();
8756 } else if (auto *ME = dyn_cast<MemberExpr>(Val: PointerExpr)) {
8757 AttachPtrAddr = CGF.EmitMemberExpr(E: ME).getAddress();
8758 } else if (auto *UO = dyn_cast<UnaryOperator>(Val: PointerExpr)) {
8759 assert(UO->getOpcode() == UO_Deref &&
8760 "Unexpected unary-operator on attach-ptr-expr");
8761 AttachPtrAddr = CGF.EmitLValue(E: UO).getAddress();
8762 }
8763 assert(AttachPtrAddr.isValid() &&
8764 "Failed to get address for attach pointer expression");
8765 return AttachPtrAddr;
8766 }
8767
8768 /// Get the address of the attach pointer, and a load from it, to get the
8769 /// pointee base address.
8770 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8771 /// contains invalid addresses if \p AttachPtrExpr is null.
8772 static std::pair<Address, Address>
8773 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8774 CodeGenFunction &CGF) {
8775
8776 if (!AttachPtrExpr)
8777 return {Address::invalid(), Address::invalid()};
8778
8779 Address AttachPtrAddr = getAttachPtrAddr(PointerExpr: AttachPtrExpr, CGF);
8780 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8781
8782 QualType AttachPtrType =
8783 OMPClauseMappableExprCommon::getComponentExprElementType(Exp: AttachPtrExpr)
8784 .getCanonicalType();
8785
8786 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8787 Ptr: AttachPtrAddr, PtrTy: AttachPtrType->castAs<PointerType>());
8788 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8789
8790 return {AttachPtrAddr, AttachPteeBaseAddr};
8791 }
8792
8793 /// Returns whether an attach entry should be emitted for a map on
8794 /// \p MapBaseDecl on the directive \p CurDir.
8795 static bool
8796 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8797 CodeGenFunction &CGF,
8798 llvm::PointerUnion<const OMPExecutableDirective *,
8799 const OMPDeclareMapperDecl *>
8800 CurDir) {
8801 if (!PointerExpr)
8802 return false;
8803
8804 // Pointer attachment is needed at map-entering time or for declare
8805 // mappers.
8806 return isa<const OMPDeclareMapperDecl *>(Val: CurDir) ||
8807 isOpenMPTargetMapEnteringDirective(
8808 DKind: cast<const OMPExecutableDirective *>(Val&: CurDir)
8809 ->getDirectiveKind());
8810 }
8811
8812 /// Computes the attach-ptr expr for \p Components, and updates various maps
8813 /// with the information.
8814 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8815 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8816 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8817 /// AttachPtrExprMap.
8818 void collectAttachPtrExprInfo(
8819 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
8820 llvm::PointerUnion<const OMPExecutableDirective *,
8821 const OMPDeclareMapperDecl *>
8822 CurDir) {
8823
8824 OpenMPDirectiveKind CurDirectiveID =
8825 isa<const OMPDeclareMapperDecl *>(Val: CurDir)
8826 ? OMPD_declare_mapper
8827 : cast<const OMPExecutableDirective *>(Val&: CurDir)->getDirectiveKind();
8828
8829 const auto &[AttachPtrExpr, Depth] =
8830 OMPClauseMappableExprCommon::findAttachPtrExpr(Components,
8831 CurDirKind: CurDirectiveID);
8832
8833 AttachPtrComputationOrderMap.try_emplace(
8834 Key: AttachPtrExpr, Args: AttachPtrComputationOrderMap.size());
8835 AttachPtrComponentDepthMap.try_emplace(Key: AttachPtrExpr, Args: Depth);
8836 AttachPtrExprMap.try_emplace(Key: Components, Args: AttachPtrExpr);
8837 }
8838
8839 /// Generate all the base pointers, section pointers, sizes, map types, and
8840 /// mappers for the extracted mappable expressions (all included in \a
8841 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8842 /// pair of the relevant declaration and index where it occurs is appended to
8843 /// the device pointers info array.
8844 void generateAllInfoForClauses(
8845 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8846 llvm::OpenMPIRBuilder &OMPBuilder,
8847 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8848 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8849 // We have to process the component lists that relate with the same
8850 // declaration in a single chunk so that we can generate the map flags
8851 // correctly. Therefore, we organize all lists in a map.
8852 enum MapKind { Present, Allocs, Other, Total };
8853 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8854 SmallVector<SmallVector<MapInfo, 8>, 4>>
8855 Info;
8856
8857 // Helper function to fill the information map for the different supported
8858 // clauses.
8859 auto &&InfoGen =
8860 [&Info, &SkipVarSet](
8861 const ValueDecl *D, MapKind Kind,
8862 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8863 OpenMPMapClauseKind MapType,
8864 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8865 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8866 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8867 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8868 if (SkipVarSet.contains(V: D))
8869 return;
8870 auto It = Info.try_emplace(Key: D, Args: Total).first;
8871 It->second[Kind].emplace_back(
8872 Args&: L, Args&: MapType, Args&: MapModifiers, Args&: MotionModifiers, Args&: ReturnDevicePointer,
8873 Args&: IsImplicit, Args&: Mapper, Args&: VarRef, Args&: ForDeviceAddr);
8874 };
8875
8876 for (const auto *Cl : Clauses) {
8877 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
8878 if (!C)
8879 continue;
8880 MapKind Kind = Other;
8881 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
8882 Element: OMPC_MAP_MODIFIER_present))
8883 Kind = Present;
8884 else if (C->getMapType() == OMPC_MAP_alloc)
8885 Kind = Allocs;
8886 const auto *EI = C->getVarRefs().begin();
8887 for (const auto L : C->component_lists()) {
8888 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8889 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), C->getMapType(),
8890 C->getMapTypeModifiers(), {},
8891 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8892 E);
8893 ++EI;
8894 }
8895 }
8896 for (const auto *Cl : Clauses) {
8897 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
8898 if (!C)
8899 continue;
8900 MapKind Kind = Other;
8901 if (llvm::is_contained(Range: C->getMotionModifiers(),
8902 Element: OMPC_MOTION_MODIFIER_present))
8903 Kind = Present;
8904 if (llvm::is_contained(Range: C->getMotionModifiers(),
8905 Element: OMPC_MOTION_MODIFIER_iterator)) {
8906 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8907 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8908 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8909 CGF.EmitVarDecl(D: *VD);
8910 }
8911 }
8912
8913 const auto *EI = C->getVarRefs().begin();
8914 for (const auto L : C->component_lists()) {
8915 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_to, {},
8916 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8917 C->isImplicit(), std::get<2>(t: L), *EI);
8918 ++EI;
8919 }
8920 }
8921 for (const auto *Cl : Clauses) {
8922 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
8923 if (!C)
8924 continue;
8925 MapKind Kind = Other;
8926 if (llvm::is_contained(Range: C->getMotionModifiers(),
8927 Element: OMPC_MOTION_MODIFIER_present))
8928 Kind = Present;
8929 if (llvm::is_contained(Range: C->getMotionModifiers(),
8930 Element: OMPC_MOTION_MODIFIER_iterator)) {
8931 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8932 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8933 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8934 CGF.EmitVarDecl(D: *VD);
8935 }
8936 }
8937
8938 const auto *EI = C->getVarRefs().begin();
8939 for (const auto L : C->component_lists()) {
8940 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_from, {},
8941 C->getMotionModifiers(),
8942 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8943 *EI);
8944 ++EI;
8945 }
8946 }
8947
8948 // Look at the use_device_ptr and use_device_addr clauses information and
8949 // mark the existing map entries as such. If there is no map information for
8950 // an entry in the use_device_ptr and use_device_addr list, we create one
8951 // with map type 'return_param' and zero size section. It is the user's
8952 // fault if that was not mapped before. If there is no map information, then
8953 // we defer the emission of that entry until all the maps for the same VD
8954 // have been handled.
8955 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8956
8957 auto &&UseDeviceDataCombinedInfoGen =
8958 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8959 CodeGenFunction &CGF, bool IsDevAddr,
8960 bool HasUdpFbNullify = false) {
8961 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
8962 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
8963 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8964 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8965 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8966 // FIXME: For use_device_addr on array-sections, this should
8967 // be the starting address of the section.
8968 // e.g. int *p;
8969 // ... use_device_addr(p[3])
8970 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8971 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
8972 UseDeviceDataCombinedInfo.Sizes.push_back(
8973 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8974 OpenMPOffloadMappingFlags Flags =
8975 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8976 if (HasUdpFbNullify)
8977 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8978 UseDeviceDataCombinedInfo.Types.push_back(Elt: Flags);
8979 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
8980 };
8981
8982 auto &&MapInfoGen =
8983 [&UseDeviceDataCombinedInfoGen](
8984 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8985 OMPClauseMappableExprCommon::MappableExprComponentListRef
8986 Components,
8987 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
8988 bool HasUdpFbNullify = false) {
8989 // We didn't find any match in our map information - generate a zero
8990 // size array section.
8991 llvm::Value *Ptr;
8992 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8993 if (IE->isGLValue())
8994 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
8995 else
8996 Ptr = CGF.EmitScalarExpr(E: IE);
8997 } else {
8998 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
8999 }
9000 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
9001 // For the purpose of address-translation, treat something like the
9002 // following:
9003 // int *p;
9004 // ... use_device_addr(p[1])
9005 // equivalent to
9006 // ... use_device_ptr(p)
9007 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
9008 !TreatDevAddrAsDevPtr,
9009 HasUdpFbNullify);
9010 };
9011
9012 auto &&IsMapInfoExist =
9013 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9014 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9015 bool HasUdpFbNullify = false) -> bool {
9016 // We potentially have map information for this declaration already.
9017 // Look for the first set of components that refer to it. If found,
9018 // return true.
9019 // If the first component is a member expression, we have to look into
9020 // 'this', which maps to null in the map of map information. Otherwise
9021 // look directly for the information.
9022 auto It = Info.find(Key: isa<MemberExpr>(Val: IE) ? nullptr : VD);
9023 if (It != Info.end()) {
9024 bool Found = false;
9025 for (auto &Data : It->second) {
9026 MapInfo *CI = nullptr;
9027 // We potentially have multiple maps for the same decl. We need to
9028 // only consider those for which the attach-ptr matches the desired
9029 // attach-ptr.
9030 auto *It = llvm::find_if(Range&: Data, P: [&](const MapInfo &MI) {
9031 if (MI.Components.back().getAssociatedDeclaration() != VD)
9032 return false;
9033
9034 const Expr *MapAttachPtr = getAttachPtrExpr(Components: MI.Components);
9035 bool Match = AttachPtrComparator.areEqual(LHS: MapAttachPtr,
9036 RHS: DesiredAttachPtrExpr);
9037 return Match;
9038 });
9039
9040 if (It != Data.end())
9041 CI = &*It;
9042
9043 if (CI) {
9044 if (IsDevAddr) {
9045 CI->ForDeviceAddr = true;
9046 CI->ReturnDevicePointer = true;
9047 CI->HasUdpFbNullify = HasUdpFbNullify;
9048 Found = true;
9049 break;
9050 } else {
9051 auto PrevCI = std::next(x: CI->Components.rbegin());
9052 const auto *VarD = dyn_cast<VarDecl>(Val: VD);
9053 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: CI->Components);
9054 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9055 isa<MemberExpr>(Val: IE) ||
9056 !VD->getType().getNonReferenceType()->isPointerType() ||
9057 PrevCI == CI->Components.rend() ||
9058 isa<MemberExpr>(Val: PrevCI->getAssociatedExpression()) || !VarD ||
9059 VarD->hasLocalStorage() ||
9060 (isa_and_nonnull<DeclRefExpr>(Val: AttachPtrExpr) &&
9061 VD == cast<DeclRefExpr>(Val: AttachPtrExpr)->getDecl())) {
9062 CI->ForDeviceAddr = IsDevAddr;
9063 CI->ReturnDevicePointer = true;
9064 CI->HasUdpFbNullify = HasUdpFbNullify;
9065 Found = true;
9066 break;
9067 }
9068 }
9069 }
9070 }
9071 return Found;
9072 }
9073 return false;
9074 };
9075
9076 // Look at the use_device_ptr clause information and mark the existing map
9077 // entries as such. If there is no map information for an entry in the
9078 // use_device_ptr list, we create one with map type 'alloc' and zero size
9079 // section. It is the user fault if that was not mapped before. If there is
9080 // no map information and the pointer is a struct member, then we defer the
9081 // emission of that entry until the whole struct has been processed.
9082 for (const auto *Cl : Clauses) {
9083 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
9084 if (!C)
9085 continue;
9086 bool HasUdpFbNullify =
9087 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9088 for (const auto L : C->component_lists()) {
9089 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9090 std::get<1>(t: L);
9091 assert(!Components.empty() &&
9092 "Not expecting empty list of components!");
9093 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9094 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9095 const Expr *IE = Components.back().getAssociatedExpression();
9096 // For use_device_ptr, we match an existing map clause if its attach-ptr
9097 // is same as the use_device_ptr operand. e.g.
9098 // map expr | use_device_ptr expr | current behavior
9099 // ---------|---------------------|-----------------
9100 // p[1] | p | match
9101 // ps->a | ps | match
9102 // p | p | no match
9103 const Expr *UDPOperandExpr =
9104 Components.front().getAssociatedExpression();
9105 if (IsMapInfoExist(CGF, VD, IE,
9106 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9107 /*IsDevAddr=*/false, HasUdpFbNullify))
9108 continue;
9109 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9110 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9111 }
9112 }
9113
9114 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9115 for (const auto *Cl : Clauses) {
9116 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
9117 if (!C)
9118 continue;
9119 for (const auto L : C->component_lists()) {
9120 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9121 std::get<1>(t: L);
9122 assert(!std::get<1>(L).empty() &&
9123 "Not expecting empty list of components!");
9124 const ValueDecl *VD = std::get<1>(t: L).back().getAssociatedDeclaration();
9125 if (!Processed.insert(V: VD).second)
9126 continue;
9127 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9128 // For use_device_addr, we match an existing map clause if the
9129 // use_device_addr operand's attach-ptr matches the map operand's
9130 // attach-ptr.
9131 // We chould also restrict to only match cases when there is a full
9132 // match between the map/use_device_addr clause exprs, but that may be
9133 // unnecessary.
9134 //
9135 // map expr | use_device_addr expr | current | possible restrictive/
9136 // | | behavior | safer behavior
9137 // ---------|----------------------|-----------|-----------------------
9138 // p | p | match | match
9139 // p[0] | p[0] | match | match
9140 // p[0:1] | p[0] | match | no match
9141 // p[0:1] | p[2:1] | match | no match
9142 // p[1] | p[0] | match | no match
9143 // ps->a | ps->b | match | no match
9144 // p | p[0] | no match | no match
9145 // pp | pp[0][0] | no match | no match
9146 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9147 const Expr *IE = std::get<1>(t: L).back().getAssociatedExpression();
9148 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9149 "use_device_addr operand has an attach-ptr, but does not match "
9150 "last component's expr.");
9151 if (IsMapInfoExist(CGF, VD, IE,
9152 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9153 /*IsDevAddr=*/true))
9154 continue;
9155 MapInfoGen(CGF, IE, VD, Components,
9156 /*IsDevAddr=*/true,
9157 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9158 }
9159 }
9160
9161 for (const auto &Data : Info) {
9162 MapCombinedInfoTy CurInfo;
9163 const Decl *D = Data.first;
9164 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
9165 // Group component lists by their AttachPtrExpr and process them in order
9166 // of increasing complexity (nullptr first, then simple expressions like
9167 // p, then more complex ones like p[0], etc.)
9168 //
9169 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9170 // grouping for target constructs.
9171 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9172
9173 // First, collect all MapData entries with their attach-ptr exprs.
9174 for (const auto &M : Data.second) {
9175 for (const MapInfo &L : M) {
9176 assert(!L.Components.empty() &&
9177 "Not expecting declaration with no component lists.");
9178
9179 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: L.Components);
9180 AttachPtrMapInfoPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9181 }
9182 }
9183
9184 // Next, sort by increasing order of their complexity.
9185 llvm::stable_sort(Range&: AttachPtrMapInfoPairs,
9186 C: [this](const auto &LHS, const auto &RHS) {
9187 return AttachPtrComparator(LHS.first, RHS.first);
9188 });
9189
9190 // And finally, process them all in order, grouping those with
9191 // equivalent attach-ptr exprs together.
9192 auto *It = AttachPtrMapInfoPairs.begin();
9193 while (It != AttachPtrMapInfoPairs.end()) {
9194 const Expr *AttachPtrExpr = It->first;
9195
9196 SmallVector<MapInfo, 8> GroupLists;
9197 while (It != AttachPtrMapInfoPairs.end() &&
9198 (It->first == AttachPtrExpr ||
9199 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
9200 GroupLists.push_back(Elt: It->second);
9201 ++It;
9202 }
9203 assert(!GroupLists.empty() && "GroupLists should not be empty");
9204
9205 StructRangeInfoTy PartialStruct;
9206 AttachInfoTy AttachInfo;
9207 MapCombinedInfoTy GroupCurInfo;
9208 // Current group's struct base information:
9209 MapCombinedInfoTy GroupStructBaseCurInfo;
9210 for (const MapInfo &L : GroupLists) {
9211 // Remember the current base pointer index.
9212 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9213 unsigned StructBasePointersIdx =
9214 GroupStructBaseCurInfo.BasePointers.size();
9215
9216 GroupCurInfo.NonContigInfo.IsNonContiguous =
9217 L.Components.back().isNonContiguous();
9218 generateInfoForComponentList(
9219 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
9220 CombinedInfo&: GroupCurInfo, StructBaseCombinedInfo&: GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9221 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
9222 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
9223 MapExpr: L.VarRef, /*OverlappedElements*/ {});
9224
9225 // If this entry relates to a device pointer, set the relevant
9226 // declaration and add the 'return pointer' flag.
9227 if (L.ReturnDevicePointer) {
9228 // Check whether a value was added to either GroupCurInfo or
9229 // GroupStructBaseCurInfo and error if no value was added to either
9230 // of them:
9231 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9232 StructBasePointersIdx <
9233 GroupStructBaseCurInfo.BasePointers.size()) &&
9234 "Unexpected number of mapped base pointers.");
9235
9236 // Choose a base pointer index which is always valid:
9237 const ValueDecl *RelevantVD =
9238 L.Components.back().getAssociatedDeclaration();
9239 assert(RelevantVD &&
9240 "No relevant declaration related with device pointer??");
9241
9242 // If GroupStructBaseCurInfo has been updated this iteration then
9243 // work on the first new entry added to it i.e. make sure that when
9244 // multiple values are added to any of the lists, the first value
9245 // added is being modified by the assignments below (not the last
9246 // value added).
9247 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9248 unsigned Idx) {
9249 Info.DevicePtrDecls[Idx] = RelevantVD;
9250 Info.DevicePointers[Idx] = L.ForDeviceAddr
9251 ? DeviceInfoTy::Address
9252 : DeviceInfoTy::Pointer;
9253 Info.Types[Idx] |=
9254 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9255 if (L.HasUdpFbNullify)
9256 Info.Types[Idx] |=
9257 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9258 };
9259
9260 if (StructBasePointersIdx <
9261 GroupStructBaseCurInfo.BasePointers.size())
9262 SetDevicePointerInfo(GroupStructBaseCurInfo,
9263 StructBasePointersIdx);
9264 else
9265 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9266 }
9267 }
9268
9269 // Unify entries in one list making sure the struct mapping precedes the
9270 // individual fields:
9271 MapCombinedInfoTy GroupUnionCurInfo;
9272 GroupUnionCurInfo.append(CurInfo&: GroupStructBaseCurInfo);
9273 GroupUnionCurInfo.append(CurInfo&: GroupCurInfo);
9274
9275 // If there is an entry in PartialStruct it means we have a struct with
9276 // individual members mapped. Emit an extra combined entry.
9277 if (PartialStruct.Base.isValid()) {
9278 // Prepend a synthetic dimension of length 1 to represent the
9279 // aggregated struct object. Using 1 (not 0, as 0 produced an
9280 // incorrect non-contiguous descriptor (DimSize==1), causing the
9281 // non-contiguous motion clause path to be skipped.) is important:
9282 // * It preserves the correct rank so targetDataUpdate() computes
9283 // DimSize == 2 for cases like strided array sections originating
9284 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9285 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9286 I: GroupUnionCurInfo.NonContigInfo.Dims.begin(), Elt: 1);
9287 emitCombinedEntry(
9288 CombinedInfo&: CurInfo, CurTypes&: GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9289 /*IsMapThis=*/!VD, OMPBuilder, VD,
9290 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9291 /*NotTargetParams=*/true);
9292 }
9293
9294 // Append this group's results to the overall CurInfo in the correct
9295 // order: combined-entry -> original-field-entries -> attach-entry
9296 CurInfo.append(CurInfo&: GroupUnionCurInfo);
9297 if (AttachInfo.isValid())
9298 emitAttachEntry(CGF, CombinedInfo&: CurInfo, AttachInfo);
9299 }
9300
9301 // We need to append the results of this capture to what we already have.
9302 CombinedInfo.append(CurInfo);
9303 }
9304 // Append data for use_device_ptr/addr clauses.
9305 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
9306 }
9307
9308public:
9309 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9310 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9311 // Extract firstprivate clause information.
9312 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9313 for (const auto *D : C->varlist())
9314 FirstPrivateDecls.try_emplace(
9315 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl()), Args: C->isImplicit());
9316 // Extract implicit firstprivates from uses_allocators clauses.
9317 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9318 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9319 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9320 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
9321 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
9322 /*Implicit=*/Args: true);
9323 else if (const auto *VD = dyn_cast<VarDecl>(
9324 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
9325 ->getDecl()))
9326 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
9327 }
9328 }
9329 // Extract defaultmap clause information.
9330 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9331 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9332 DefaultmapFirstprivateKinds.insert(V: C->getDefaultmapKind());
9333 // Extract device pointer clause information.
9334 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9335 for (auto L : C->component_lists())
9336 DevPointersMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9337 // Extract device addr clause information.
9338 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9339 for (auto L : C->component_lists())
9340 HasDevAddrsMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9341 // Extract map information.
9342 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9343 if (C->getMapType() != OMPC_MAP_to)
9344 continue;
9345 for (auto L : C->component_lists()) {
9346 const ValueDecl *VD = std::get<0>(t&: L);
9347 const auto *RD = VD ? VD->getType()
9348 .getCanonicalType()
9349 .getNonReferenceType()
9350 ->getAsCXXRecordDecl()
9351 : nullptr;
9352 if (RD && RD->isLambda())
9353 LambdasMap.try_emplace(Key: std::get<0>(t&: L), Args&: C);
9354 }
9355 }
9356
9357 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9358 for (auto L : C->component_lists()) {
9359 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9360 std::get<1>(L);
9361 if (!Components.empty())
9362 collectAttachPtrExprInfo(Components, CurDir);
9363 }
9364 };
9365
9366 // Populate the AttachPtrExprMap for all component lists from map-related
9367 // clauses.
9368 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9369 CollectAttachPtrExprsForClauseComponents(C);
9370 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9371 CollectAttachPtrExprsForClauseComponents(C);
9372 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9373 CollectAttachPtrExprsForClauseComponents(C);
9374 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9375 CollectAttachPtrExprsForClauseComponents(C);
9376 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9377 CollectAttachPtrExprsForClauseComponents(C);
9378 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9379 CollectAttachPtrExprsForClauseComponents(C);
9380 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9381 CollectAttachPtrExprsForClauseComponents(C);
9382 }
9383
9384 /// Constructor for the declare mapper directive.
9385 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9386 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9387
9388 /// Generate code for the combined entry if we have a partially mapped struct
9389 /// and take care of the mapping flags of the arguments corresponding to
9390 /// individual struct members.
9391 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9392 /// to the combined-entry's begin address, if emitted.
9393 /// \p PartialStruct contains attach base-pointer information.
9394 /// \returns The index of the combined entry if one was added, std::nullopt
9395 /// otherwise.
9396 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9397 MapFlagsArrayTy &CurTypes,
9398 const StructRangeInfoTy &PartialStruct,
9399 AttachInfoTy &AttachInfo, bool IsMapThis,
9400 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9401 unsigned OffsetForMemberOfFlag,
9402 bool NotTargetParams) const {
9403 if (CurTypes.size() == 1 &&
9404 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9405 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9406 !PartialStruct.IsArraySection)
9407 return;
9408 Address LBAddr = PartialStruct.LowestElem.second;
9409 Address HBAddr = PartialStruct.HighestElem.second;
9410 if (PartialStruct.HasCompleteRecord) {
9411 LBAddr = PartialStruct.LB;
9412 HBAddr = PartialStruct.LB;
9413 }
9414 CombinedInfo.Exprs.push_back(Elt: VD);
9415 // Base is the base of the struct
9416 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9417 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9418 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9419 // Pointer is the address of the lowest element
9420 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9421 const CXXMethodDecl *MD =
9422 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
9423 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9424 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9425 // There should not be a mapper for a combined entry.
9426 if (HasBaseClass) {
9427 // OpenMP 5.2 148:21:
9428 // If the target construct is within a class non-static member function,
9429 // and a variable is an accessible data member of the object for which the
9430 // non-static data member function is invoked, the variable is treated as
9431 // if the this[:1] expression had appeared in a map clause with a map-type
9432 // of tofrom.
9433 // Emit this[:1]
9434 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9435 QualType Ty = MD->getFunctionObjectParameterType();
9436 llvm::Value *Size =
9437 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
9438 /*isSigned=*/true);
9439 CombinedInfo.Sizes.push_back(Elt: Size);
9440 } else {
9441 CombinedInfo.Pointers.push_back(Elt: LB);
9442 // Size is (addr of {highest+1} element) - (addr of lowest element)
9443 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9444 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9445 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
9446 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
9447 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
9448 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(LHS: CHAddr, RHS: CLAddr);
9449 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
9450 /*isSigned=*/false);
9451 CombinedInfo.Sizes.push_back(Elt: Size);
9452 }
9453 CombinedInfo.Mappers.push_back(Elt: nullptr);
9454 // Map type is always TARGET_PARAM, if generate info for captures.
9455 CombinedInfo.Types.push_back(
9456 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9457 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9458 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9459 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9460 // If any element has the present modifier, then make sure the runtime
9461 // doesn't attempt to allocate the struct.
9462 if (CurTypes.end() !=
9463 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9464 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9465 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9466 }))
9467 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9468 // Remove TARGET_PARAM flag from the first element
9469 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9470 // If any element has the ompx_hold modifier, then make sure the runtime
9471 // uses the hold reference count for the struct as a whole so that it won't
9472 // be unmapped by an extra dynamic reference count decrement. Add it to all
9473 // elements as well so the runtime knows which reference count to check
9474 // when determining whether it's time for device-to-host transfers of
9475 // individual elements.
9476 if (CurTypes.end() !=
9477 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9478 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9479 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9480 })) {
9481 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9482 for (auto &M : CurTypes)
9483 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9484 }
9485
9486 // All other current entries will be MEMBER_OF the combined entry
9487 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9488 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9489 // to be handled by themselves, after all other maps).
9490 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9491 Position: OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9492 for (auto &M : CurTypes)
9493 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
9494
9495 // When we are emitting a combined entry. If there were any pending
9496 // attachments to be done, we do them to the begin address of the combined
9497 // entry. Note that this means only one attachment per combined-entry will
9498 // be done. So, for instance, if we have:
9499 // S *ps;
9500 // ... map(ps->a, ps->b)
9501 // When we are emitting a combined entry. If AttachInfo is valid,
9502 // update the pointee address to point to the begin address of the combined
9503 // entry. This ensures that if we have multiple maps like:
9504 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9505 //
9506 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9507 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9508 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9509 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9510 if (AttachInfo.isValid())
9511 AttachInfo.AttachPteeAddr = LBAddr;
9512 }
9513
9514 /// Generate all the base pointers, section pointers, sizes, map types, and
9515 /// mappers for the extracted mappable expressions (all included in \a
9516 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9517 /// pair of the relevant declaration and index where it occurs is appended to
9518 /// the device pointers info array.
9519 void generateAllInfo(
9520 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9521 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9522 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9523 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9524 "Expect a executable directive");
9525 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9526 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9527 SkipVarSet);
9528 }
9529
9530 /// Generate all the base pointers, section pointers, sizes, map types, and
9531 /// mappers for the extracted map clauses of user-defined mapper (all included
9532 /// in \a CombinedInfo).
9533 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9534 llvm::OpenMPIRBuilder &OMPBuilder) const {
9535 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9536 "Expect a declare mapper directive");
9537 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(Val: CurDir);
9538 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
9539 OMPBuilder);
9540 }
9541
9542 /// Emit capture info for lambdas for variables captured by reference.
9543 void generateInfoForLambdaCaptures(
9544 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9545 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9546 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9547 const auto *RD = VDType->getAsCXXRecordDecl();
9548 if (!RD || !RD->isLambda())
9549 return;
9550 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
9551 CGF.getContext().getDeclAlign(D: VD));
9552 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
9553 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9554 FieldDecl *ThisCapture = nullptr;
9555 RD->getCaptureFields(Captures, ThisCapture);
9556 if (ThisCapture) {
9557 LValue ThisLVal =
9558 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
9559 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
9560 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
9561 Args: VDLVal.getPointer(CGF));
9562 CombinedInfo.Exprs.push_back(Elt: VD);
9563 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
9564 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9565 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9566 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
9567 CombinedInfo.Sizes.push_back(
9568 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
9569 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9570 CombinedInfo.Types.push_back(
9571 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9572 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9573 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9574 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9575 CombinedInfo.Mappers.push_back(Elt: nullptr);
9576 }
9577 for (const LambdaCapture &LC : RD->captures()) {
9578 if (!LC.capturesVariable())
9579 continue;
9580 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
9581 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9582 continue;
9583 auto It = Captures.find(Val: VD);
9584 assert(It != Captures.end() && "Found lambda capture without field.");
9585 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
9586 if (LC.getCaptureKind() == LCK_ByRef) {
9587 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
9588 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9589 Args: VDLVal.getPointer(CGF));
9590 CombinedInfo.Exprs.push_back(Elt: VD);
9591 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9592 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9593 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9594 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
9595 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9596 V: CGF.getTypeSize(
9597 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
9598 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9599 } else {
9600 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
9601 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9602 Args: VDLVal.getPointer(CGF));
9603 CombinedInfo.Exprs.push_back(Elt: VD);
9604 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9605 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9606 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9607 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
9608 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
9609 }
9610 CombinedInfo.Types.push_back(
9611 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9612 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9613 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9614 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9615 CombinedInfo.Mappers.push_back(Elt: nullptr);
9616 }
9617 }
9618
9619 /// Set correct indices for lambdas captures.
9620 void adjustMemberOfForLambdaCaptures(
9621 llvm::OpenMPIRBuilder &OMPBuilder,
9622 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9623 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9624 MapFlagsArrayTy &Types) const {
9625 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9626 // Set correct member_of idx for all implicit lambda captures.
9627 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9628 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9629 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9630 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9631 continue;
9632 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
9633 assert(BasePtr && "Unable to find base lambda address.");
9634 int TgtIdx = -1;
9635 for (unsigned J = I; J > 0; --J) {
9636 unsigned Idx = J - 1;
9637 if (Pointers[Idx] != BasePtr)
9638 continue;
9639 TgtIdx = Idx;
9640 break;
9641 }
9642 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9643 // All other current entries will be MEMBER_OF the combined entry
9644 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9645 // 0xFFFF in the MEMBER_OF field).
9646 OpenMPOffloadMappingFlags MemberOfFlag =
9647 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
9648 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
9649 }
9650 }
9651
9652 /// Populate component lists for non-lambda captured variables from map,
9653 /// is_device_ptr and has_device_addr clause info.
9654 void populateComponentListsForNonLambdaCaptureFromClauses(
9655 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9656 SmallVectorImpl<
9657 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9658 &StorageForImplicitlyAddedComponentLists) const {
9659 if (VD && LambdasMap.count(Val: VD))
9660 return;
9661
9662 // For member fields list in is_device_ptr, store it in
9663 // DeclComponentLists for generating components info.
9664 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9665 auto It = DevPointersMap.find(Val: VD);
9666 if (It != DevPointersMap.end())
9667 for (const auto &MCL : It->second)
9668 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_to, Args: Unknown,
9669 /*IsImpicit = */ Args: true, Args: nullptr,
9670 Args: nullptr);
9671 auto I = HasDevAddrsMap.find(Val: VD);
9672 if (I != HasDevAddrsMap.end())
9673 for (const auto &MCL : I->second)
9674 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_tofrom, Args: Unknown,
9675 /*IsImpicit = */ Args: true, Args: nullptr,
9676 Args: nullptr);
9677 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9678 "Expect a executable directive");
9679 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9680 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9681 const auto *EI = C->getVarRefs().begin();
9682 for (const auto L : C->decl_component_lists(VD)) {
9683 const ValueDecl *VDecl, *Mapper;
9684 // The Expression is not correct if the mapping is implicit
9685 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9686 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9687 std::tie(args&: VDecl, args&: Components, args&: Mapper) = L;
9688 assert(VDecl == VD && "We got information for the wrong declaration??");
9689 assert(!Components.empty() &&
9690 "Not expecting declaration with no component lists.");
9691 DeclComponentLists.emplace_back(Args&: Components, Args: C->getMapType(),
9692 Args: C->getMapTypeModifiers(),
9693 Args: C->isImplicit(), Args&: Mapper, Args&: E);
9694 ++EI;
9695 }
9696 }
9697
9698 // For the target construct, if there's a map with a base-pointer that's
9699 // a member of an implicitly captured struct, of the current class,
9700 // we need to emit an implicit map on the pointer.
9701 if (isOpenMPTargetExecutionDirective(DKind: CurExecDir->getDirectiveKind()))
9702 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9703 CapturedVD: VD, DeclComponentLists, ComponentVectorStorage&: StorageForImplicitlyAddedComponentLists);
9704
9705 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
9706 const MapData &RHS) {
9707 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
9708 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
9709 bool HasPresent =
9710 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9711 bool HasAllocs = MapType == OMPC_MAP_alloc;
9712 MapModifiers = std::get<2>(t: RHS);
9713 MapType = std::get<1>(t: LHS);
9714 bool HasPresentR =
9715 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9716 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9717 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9718 });
9719 }
9720
9721 /// On a target construct, if there's an implicit map on a struct, or that of
9722 /// this[:], and an explicit map with a member of that struct/class as the
9723 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9724 /// to make sure we don't map the full struct/class. For example:
9725 ///
9726 /// \code
9727 /// struct S {
9728 /// int dummy[10000];
9729 /// int *p;
9730 /// void f1() {
9731 /// #pragma omp target map(p[0:1])
9732 /// (void)this;
9733 /// }
9734 /// }; S s;
9735 ///
9736 /// void f2() {
9737 /// #pragma omp target map(s.p[0:10])
9738 /// (void)s;
9739 /// }
9740 /// \endcode
9741 ///
9742 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9743 //
9744 // OpenMP 6.0: 7.9.6 map clause, pg 285
9745 // If a list item with an implicitly determined data-mapping attribute does
9746 // not have any corresponding storage in the device data environment prior to
9747 // a task encountering the construct associated with the map clause, and one
9748 // or more contiguous parts of the original storage are either list items or
9749 // base pointers to list items that are explicitly mapped on the construct,
9750 // only those parts of the original storage will have corresponding storage in
9751 // the device data environment as a result of the map clauses on the
9752 // construct.
9753 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9754 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9755 SmallVectorImpl<
9756 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9757 &ComponentVectorStorage) const {
9758 bool IsThisCapture = CapturedVD == nullptr;
9759
9760 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9761 OMPClauseMappableExprCommon::MappableExprComponentListRef
9762 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9763 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9764 if (!AttachPtrExpr)
9765 continue;
9766
9767 const auto *ME = dyn_cast<MemberExpr>(Val: AttachPtrExpr);
9768 if (!ME)
9769 continue;
9770
9771 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9772
9773 // If we are handling a "this" capture, then we are looking for
9774 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9775 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Val: Base))
9776 continue;
9777
9778 if (!IsThisCapture && (!isa<DeclRefExpr>(Val: Base) ||
9779 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9780 continue;
9781
9782 // For non-this captures, we are looking for attach-ptrs of form
9783 // `s.p`.
9784 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9785 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Val: Base) ||
9786 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9787 continue;
9788
9789 // Check if we have an existing map on either:
9790 // this[:], s, this->p, or s.p, in which case, we don't need to add
9791 // an implicit one for the attach-ptr s.p/this->p.
9792 bool FoundExistingMap = false;
9793 for (const MapData &ExistingL : DeclComponentLists) {
9794 OMPClauseMappableExprCommon::MappableExprComponentListRef
9795 ExistingComponents = std::get<0>(t: ExistingL);
9796
9797 if (ExistingComponents.empty())
9798 continue;
9799
9800 // First check if we have a map like map(this->p) or map(s.p).
9801 const auto &FirstComponent = ExistingComponents.front();
9802 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9803
9804 if (!FirstExpr)
9805 continue;
9806
9807 // First check if we have a map like map(this->p) or map(s.p).
9808 if (AttachPtrComparator.areEqual(LHS: FirstExpr, RHS: AttachPtrExpr)) {
9809 FoundExistingMap = true;
9810 break;
9811 }
9812
9813 // Check if we have a map like this[0:1]
9814 if (IsThisCapture) {
9815 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: FirstExpr)) {
9816 if (isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts())) {
9817 FoundExistingMap = true;
9818 break;
9819 }
9820 }
9821 continue;
9822 }
9823
9824 // When the attach-ptr is something like `s.p`, check if
9825 // `s` itself is mapped explicitly.
9826 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: FirstExpr)) {
9827 if (DRE->getDecl() == CapturedVD) {
9828 FoundExistingMap = true;
9829 break;
9830 }
9831 }
9832 }
9833
9834 if (FoundExistingMap)
9835 continue;
9836
9837 // If no base map is found, we need to create an implicit map for the
9838 // attach-pointer expr.
9839
9840 ComponentVectorStorage.emplace_back();
9841 auto &AttachPtrComponents = ComponentVectorStorage.back();
9842
9843 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9844 bool SeenAttachPtrComponent = false;
9845 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9846 // components from the component-list which has `s.p/this->p`
9847 // as the attach-ptr, starting from the component which matches
9848 // `s.p/this->p`. This way, we'll have component-lists of
9849 // `s.p` -> `s`, and `this->p` -> `this`.
9850 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9851 const auto &Component = ComponentsWithAttachPtr[i];
9852 const Expr *ComponentExpr = Component.getAssociatedExpression();
9853
9854 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9855 continue;
9856 SeenAttachPtrComponent = true;
9857
9858 AttachPtrComponents.emplace_back(Args: Component.getAssociatedExpression(),
9859 Args: Component.getAssociatedDeclaration(),
9860 Args: Component.isNonContiguous());
9861 }
9862 assert(!AttachPtrComponents.empty() &&
9863 "Could not populate component-lists for mapping attach-ptr");
9864
9865 DeclComponentLists.emplace_back(
9866 Args&: AttachPtrComponents, Args: OMPC_MAP_tofrom, Args: Unknown,
9867 /*IsImplicit=*/Args: true, /*mapper=*/Args: nullptr, Args&: AttachPtrExpr);
9868 }
9869 }
9870
9871 /// For a capture that has an associated clause, generate the base pointers,
9872 /// section pointers, sizes, map types, and mappers (all included in
9873 /// \a CurCaptureVarInfo).
9874 void generateInfoForCaptureFromClauseInfo(
9875 const MapDataArrayTy &DeclComponentListsFromClauses,
9876 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9877 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9878 unsigned OffsetForMemberOfFlag) const {
9879 assert(!Cap->capturesVariableArrayType() &&
9880 "Not expecting to generate map info for a variable array type!");
9881
9882 // We need to know when we generating information for the first component
9883 const ValueDecl *VD = Cap->capturesThis()
9884 ? nullptr
9885 : Cap->getCapturedVar()->getCanonicalDecl();
9886
9887 // for map(to: lambda): skip here, processing it in
9888 // generateDefaultMapInfo
9889 if (LambdasMap.count(Val: VD))
9890 return;
9891
9892 // If this declaration appears in a is_device_ptr clause we just have to
9893 // pass the pointer by value. If it is a reference to a declaration, we just
9894 // pass its value.
9895 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
9896 CurCaptureVarInfo.Exprs.push_back(Elt: VD);
9897 CurCaptureVarInfo.BasePointers.emplace_back(Args&: Arg);
9898 CurCaptureVarInfo.DevicePtrDecls.emplace_back(Args&: VD);
9899 CurCaptureVarInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
9900 CurCaptureVarInfo.Pointers.push_back(Elt: Arg);
9901 CurCaptureVarInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9902 V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), DestTy: CGF.Int64Ty,
9903 /*isSigned=*/true));
9904 CurCaptureVarInfo.Types.push_back(
9905 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9906 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9907 CurCaptureVarInfo.Mappers.push_back(Elt: nullptr);
9908 return;
9909 }
9910
9911 auto GenerateInfoForComponentLists =
9912 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9913 bool IsEligibleForTargetParamFlag) {
9914 MapCombinedInfoTy CurInfoForComponentLists;
9915 StructRangeInfoTy PartialStruct;
9916 AttachInfoTy AttachInfo;
9917
9918 if (DeclComponentListsFromClauses.empty())
9919 return;
9920
9921 generateInfoForCaptureFromComponentLists(
9922 VD, DeclComponentLists: DeclComponentListsFromClauses, CurComponentListInfo&: CurInfoForComponentLists,
9923 PartialStruct, AttachInfo, IsListEligibleForTargetParamFlag: IsEligibleForTargetParamFlag);
9924
9925 // If there is an entry in PartialStruct it means we have a
9926 // struct with individual members mapped. Emit an extra combined
9927 // entry.
9928 if (PartialStruct.Base.isValid()) {
9929 CurCaptureVarInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9930 emitCombinedEntry(
9931 CombinedInfo&: CurCaptureVarInfo, CurTypes&: CurInfoForComponentLists.Types,
9932 PartialStruct, AttachInfo, IsMapThis: Cap->capturesThis(), OMPBuilder,
9933 /*VD=*/nullptr, OffsetForMemberOfFlag,
9934 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9935 }
9936
9937 // We do the appends to get the entries in the following order:
9938 // combined-entry -> individual-field-entries -> attach-entry,
9939 CurCaptureVarInfo.append(CurInfo&: CurInfoForComponentLists);
9940 if (AttachInfo.isValid())
9941 emitAttachEntry(CGF, CombinedInfo&: CurCaptureVarInfo, AttachInfo);
9942 };
9943
9944 // Group component lists by their AttachPtrExpr and process them in order
9945 // of increasing complexity (nullptr first, then simple expressions like p,
9946 // then more complex ones like p[0], etc.)
9947 //
9948 // This ensure that we:
9949 // * handle maps that can contribute towards setting the kernel argument,
9950 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9951 // * allocate a single contiguous storage for all exprs with the same
9952 // captured var and having the same attach-ptr.
9953 //
9954 // Example: The map clauses below should be handled grouped together based
9955 // on their attachable-base-pointers:
9956 // map-clause | attachable-base-pointer
9957 // --------------------------+------------------------
9958 // map(p, ps) | nullptr
9959 // map(p[0]) | p
9960 // map(p[0]->b, p[0]->c) | p[0]
9961 // map(ps->d, ps->e, ps->pt) | ps
9962 // map(ps->pt->d, ps->pt->e) | ps->pt
9963
9964 // First, collect all MapData entries with their attach-ptr exprs.
9965 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9966
9967 for (const MapData &L : DeclComponentListsFromClauses) {
9968 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9969 std::get<0>(t: L);
9970 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9971 AttachPtrMapDataPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9972 }
9973
9974 // Next, sort by increasing order of their complexity.
9975 llvm::stable_sort(Range&: AttachPtrMapDataPairs,
9976 C: [this](const auto &LHS, const auto &RHS) {
9977 return AttachPtrComparator(LHS.first, RHS.first);
9978 });
9979
9980 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9981 bool IsFirstGroup = true;
9982
9983 // And finally, process them all in order, grouping those with
9984 // equivalent attach-ptr exprs together.
9985 auto *It = AttachPtrMapDataPairs.begin();
9986 while (It != AttachPtrMapDataPairs.end()) {
9987 const Expr *AttachPtrExpr = It->first;
9988
9989 MapDataArrayTy GroupLists;
9990 while (It != AttachPtrMapDataPairs.end() &&
9991 (It->first == AttachPtrExpr ||
9992 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
9993 GroupLists.push_back(Elt: It->second);
9994 ++It;
9995 }
9996 assert(!GroupLists.empty() && "GroupLists should not be empty");
9997
9998 // Determine if this group of component-lists is eligible for TARGET_PARAM
9999 // flag. Only the first group processed should be eligible, and only if no
10000 // default mapping was done.
10001 bool IsEligibleForTargetParamFlag =
10002 IsFirstGroup && NoDefaultMappingDoneForVD;
10003
10004 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
10005 IsFirstGroup = false;
10006 }
10007 }
10008
10009 /// Generate the base pointers, section pointers, sizes, map types, and
10010 /// mappers associated to \a DeclComponentLists for a given capture
10011 /// \a VD (all included in \a CurComponentListInfo).
10012 void generateInfoForCaptureFromComponentLists(
10013 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10014 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10015 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10016 // Find overlapping elements (including the offset from the base element).
10017 llvm::SmallDenseMap<
10018 const MapData *,
10019 llvm::SmallVector<
10020 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
10021 4>
10022 OverlappedData;
10023 size_t Count = 0;
10024 for (const MapData &L : DeclComponentLists) {
10025 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10026 OpenMPMapClauseKind MapType;
10027 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10028 bool IsImplicit;
10029 const ValueDecl *Mapper;
10030 const Expr *VarRef;
10031 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10032 L;
10033 ++Count;
10034 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
10035 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
10036 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
10037 args&: VarRef) = L1;
10038 auto CI = Components.rbegin();
10039 auto CE = Components.rend();
10040 auto SI = Components1.rbegin();
10041 auto SE = Components1.rend();
10042 for (; CI != CE && SI != SE; ++CI, ++SI) {
10043 if (CI->getAssociatedExpression()->getStmtClass() !=
10044 SI->getAssociatedExpression()->getStmtClass())
10045 break;
10046 // Are we dealing with different variables/fields?
10047 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10048 break;
10049 }
10050 // Found overlapping if, at least for one component, reached the head
10051 // of the components list.
10052 if (CI == CE || SI == SE) {
10053 // Ignore it if it is the same component.
10054 if (CI == CE && SI == SE)
10055 continue;
10056 const auto It = (SI == SE) ? CI : SI;
10057 // If one component is a pointer and another one is a kind of
10058 // dereference of this pointer (array subscript, section, dereference,
10059 // etc.), it is not an overlapping.
10060 // Same, if one component is a base and another component is a
10061 // dereferenced pointer memberexpr with the same base.
10062 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
10063 (std::prev(x: It)->getAssociatedDeclaration() &&
10064 std::prev(x: It)
10065 ->getAssociatedDeclaration()
10066 ->getType()
10067 ->isPointerType()) ||
10068 (It->getAssociatedDeclaration() &&
10069 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10070 std::next(x: It) != CE && std::next(x: It) != SE))
10071 continue;
10072 const MapData &BaseData = CI == CE ? L : L1;
10073 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
10074 SI == SE ? Components : Components1;
10075 OverlappedData[&BaseData].push_back(Elt: SubData);
10076 }
10077 }
10078 }
10079 // Sort the overlapped elements for each item.
10080 llvm::SmallVector<const FieldDecl *, 4> Layout;
10081 if (!OverlappedData.empty()) {
10082 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10083 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10084 while (BaseType != OrigType) {
10085 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10086 OrigType = BaseType->getPointeeOrArrayElementType();
10087 }
10088
10089 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10090 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
10091 else {
10092 const auto *RD = BaseType->getAsRecordDecl();
10093 Layout.append(in_start: RD->field_begin(), in_end: RD->field_end());
10094 }
10095 }
10096 for (auto &Pair : OverlappedData) {
10097 llvm::stable_sort(
10098 Range&: Pair.getSecond(),
10099 C: [&Layout](
10100 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
10101 OMPClauseMappableExprCommon::MappableExprComponentListRef
10102 Second) {
10103 auto CI = First.rbegin();
10104 auto CE = First.rend();
10105 auto SI = Second.rbegin();
10106 auto SE = Second.rend();
10107 for (; CI != CE && SI != SE; ++CI, ++SI) {
10108 if (CI->getAssociatedExpression()->getStmtClass() !=
10109 SI->getAssociatedExpression()->getStmtClass())
10110 break;
10111 // Are we dealing with different variables/fields?
10112 if (CI->getAssociatedDeclaration() !=
10113 SI->getAssociatedDeclaration())
10114 break;
10115 }
10116
10117 // Lists contain the same elements.
10118 if (CI == CE && SI == SE)
10119 return false;
10120
10121 // List with less elements is less than list with more elements.
10122 if (CI == CE || SI == SE)
10123 return CI == CE;
10124
10125 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
10126 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
10127 if (FD1->getParent() == FD2->getParent())
10128 return FD1->getFieldIndex() < FD2->getFieldIndex();
10129 const auto *It =
10130 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
10131 return FD == FD1 || FD == FD2;
10132 });
10133 return *It == FD1;
10134 });
10135 }
10136
10137 // Associated with a capture, because the mapping flags depend on it.
10138 // Go through all of the elements with the overlapped elements.
10139 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10140 MapCombinedInfoTy StructBaseCombinedInfo;
10141 for (const auto &Pair : OverlappedData) {
10142 const MapData &L = *Pair.getFirst();
10143 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10144 OpenMPMapClauseKind MapType;
10145 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10146 bool IsImplicit;
10147 const ValueDecl *Mapper;
10148 const Expr *VarRef;
10149 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10150 L;
10151 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10152 OverlappedComponents = Pair.getSecond();
10153 generateInfoForComponentList(
10154 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10155 StructBaseCombinedInfo, PartialStruct, AttachInfo, IsFirstComponentList: AddTargetParamFlag,
10156 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10157 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
10158 AddTargetParamFlag = false;
10159 }
10160 // Go through other elements without overlapped elements.
10161 for (const MapData &L : DeclComponentLists) {
10162 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10163 OpenMPMapClauseKind MapType;
10164 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10165 bool IsImplicit;
10166 const ValueDecl *Mapper;
10167 const Expr *VarRef;
10168 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10169 L;
10170 auto It = OverlappedData.find(Val: &L);
10171 if (It == OverlappedData.end())
10172 generateInfoForComponentList(
10173 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10174 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10175 IsFirstComponentList: AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10176 Mapper, /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
10177 /*OverlappedElements*/ {});
10178 AddTargetParamFlag = false;
10179 }
10180 }
10181
10182 /// Check if a variable should be treated as firstprivate due to explicit
10183 /// firstprivate clause or defaultmap(firstprivate:...).
10184 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10185 // Check explicit firstprivate clauses (not implicit from defaultmap)
10186 auto I = FirstPrivateDecls.find(Val: VD);
10187 if (I != FirstPrivateDecls.end() && !I->getSecond())
10188 return true; // Explicit firstprivate only
10189
10190 // Check defaultmap(firstprivate:scalar) for scalar types
10191 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_scalar)) {
10192 if (Type->isScalarType())
10193 return true;
10194 }
10195
10196 // Check defaultmap(firstprivate:pointer) for pointer types
10197 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_pointer)) {
10198 if (Type->isAnyPointerType())
10199 return true;
10200 }
10201
10202 // Check defaultmap(firstprivate:aggregate) for aggregate types
10203 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_aggregate)) {
10204 if (Type->isAggregateType())
10205 return true;
10206 }
10207
10208 // Check defaultmap(firstprivate:all) for all types
10209 return DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_all);
10210 }
10211
10212 /// Generate the default map information for a given capture \a CI,
10213 /// record field declaration \a RI and captured value \a CV.
10214 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10215 const FieldDecl &RI, llvm::Value *CV,
10216 MapCombinedInfoTy &CombinedInfo) const {
10217 bool IsImplicit = true;
10218 // Do the default mapping.
10219 if (CI.capturesThis()) {
10220 CombinedInfo.Exprs.push_back(Elt: nullptr);
10221 CombinedInfo.BasePointers.push_back(Elt: CV);
10222 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10223 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10224 CombinedInfo.Pointers.push_back(Elt: CV);
10225 const auto *PtrTy = cast<PointerType>(Val: RI.getType().getTypePtr());
10226 CombinedInfo.Sizes.push_back(
10227 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
10228 DestTy: CGF.Int64Ty, /*isSigned=*/true));
10229 // Default map type.
10230 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
10231 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10232 } else if (CI.capturesVariableByCopy()) {
10233 const VarDecl *VD = CI.getCapturedVar();
10234 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10235 CombinedInfo.BasePointers.push_back(Elt: CV);
10236 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10237 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10238 CombinedInfo.Pointers.push_back(Elt: CV);
10239 bool IsFirstprivate =
10240 isEffectivelyFirstprivate(VD, Type: RI.getType().getNonReferenceType());
10241
10242 if (!RI.getType()->isAnyPointerType()) {
10243 // We have to signal to the runtime captures passed by value that are
10244 // not pointers.
10245 CombinedInfo.Types.push_back(
10246 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10247 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10248 V: CGF.getTypeSize(Ty: RI.getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10249 } else if (IsFirstprivate) {
10250 // Firstprivate pointers should be passed by value (as literals)
10251 // without performing a present table lookup at runtime.
10252 CombinedInfo.Types.push_back(
10253 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10254 // Use zero size for pointer literals (just passing the pointer value)
10255 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10256 } else {
10257 // Pointers are implicitly mapped with a zero size and no flags
10258 // (other than first map that is added for all implicit maps).
10259 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10260 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10261 }
10262 auto I = FirstPrivateDecls.find(Val: VD);
10263 if (I != FirstPrivateDecls.end())
10264 IsImplicit = I->getSecond();
10265 } else {
10266 assert(CI.capturesVariable() && "Expected captured reference.");
10267 const auto *PtrTy = cast<ReferenceType>(Val: RI.getType().getTypePtr());
10268 QualType ElementType = PtrTy->getPointeeType();
10269 const VarDecl *VD = CI.getCapturedVar();
10270 bool IsFirstprivate = isEffectivelyFirstprivate(VD, Type: ElementType);
10271 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10272 CombinedInfo.BasePointers.push_back(Elt: CV);
10273 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10274 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10275
10276 // For firstprivate pointers, pass by value instead of dereferencing
10277 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10278 // Treat as a literal value (pass the pointer value itself)
10279 CombinedInfo.Pointers.push_back(Elt: CV);
10280 // Use zero size for pointer literals
10281 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10282 CombinedInfo.Types.push_back(
10283 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10284 } else {
10285 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10286 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10287 // The default map type for a scalar/complex type is 'to' because by
10288 // default the value doesn't have to be retrieved. For an aggregate
10289 // type, the default is 'tofrom'.
10290 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
10291 CombinedInfo.Pointers.push_back(Elt: CV);
10292 }
10293 auto I = FirstPrivateDecls.find(Val: VD);
10294 if (I != FirstPrivateDecls.end())
10295 IsImplicit = I->getSecond();
10296 }
10297 // Every default map produces a single argument which is a target parameter.
10298 CombinedInfo.Types.back() |=
10299 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10300
10301 // Add flag stating this is an implicit map.
10302 if (IsImplicit)
10303 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10304
10305 // No user-defined mapper for default mapping.
10306 CombinedInfo.Mappers.push_back(Elt: nullptr);
10307 }
10308};
10309} // anonymous namespace
10310
10311// Try to extract the base declaration from a `this->x` expression if possible.
10312static ValueDecl *getDeclFromThisExpr(const Expr *E) {
10313 if (!E)
10314 return nullptr;
10315
10316 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
10317 if (const MemberExpr *ME =
10318 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
10319 return ME->getMemberDecl();
10320 return nullptr;
10321}
10322
10323/// Emit a string constant containing the names of the values mapped to the
10324/// offloading runtime library.
10325static llvm::Constant *
10326emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10327 MappableExprsHandler::MappingExprInfo &MapExprs) {
10328
10329 uint32_t SrcLocStrSize;
10330 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10331 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10332
10333 SourceLocation Loc;
10334 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10335 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
10336 Loc = VD->getLocation();
10337 else
10338 Loc = MapExprs.getMapExpr()->getExprLoc();
10339 } else {
10340 Loc = MapExprs.getMapDecl()->getLocation();
10341 }
10342
10343 std::string ExprName;
10344 if (MapExprs.getMapExpr()) {
10345 PrintingPolicy P(CGF.getContext().getLangOpts());
10346 llvm::raw_string_ostream OS(ExprName);
10347 MapExprs.getMapExpr()->printPretty(OS, Helper: nullptr, Policy: P);
10348 } else {
10349 ExprName = MapExprs.getMapDecl()->getNameAsString();
10350 }
10351
10352 std::string FileName;
10353 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
10354 if (auto *DbgInfo = CGF.getDebugInfo())
10355 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10356 else
10357 FileName = PLoc.getFilename();
10358 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: FileName, FileName: ExprName, Line: PLoc.getLine(),
10359 Column: PLoc.getColumn(), SrcLocStrSize);
10360}
10361/// Emit the arrays used to pass the captures and map information to the
10362/// offloading runtime library. If there is no map or capture information,
10363/// return nullptr by reference.
10364static void emitOffloadingArraysAndArgs(
10365 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10366 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10367 bool IsNonContiguous = false, bool ForEndCall = false) {
10368 CodeGenModule &CGM = CGF.CGM;
10369
10370 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10371 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10372 CGF.AllocaInsertPt->getIterator());
10373 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10374 CGF.Builder.GetInsertPoint());
10375
10376 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10377 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10378 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10379 }
10380 };
10381
10382 auto CustomMapperCB = [&](unsigned int I) {
10383 llvm::Function *MFunc = nullptr;
10384 if (CombinedInfo.Mappers[I]) {
10385 Info.HasMapper = true;
10386 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10387 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10388 }
10389 return MFunc;
10390 };
10391 cantFail(Err: OMPBuilder.emitOffloadingArraysAndArgs(
10392 AllocaIP, CodeGenIP, Info, RTArgs&: Info.RTArgs, CombinedInfo, CustomMapperCB,
10393 IsNonContiguous, ForEndCall, DeviceAddrCB));
10394}
10395
10396/// Check for inner distribute directive.
10397static const OMPExecutableDirective *
10398getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
10399 const auto *CS = D.getInnermostCapturedStmt();
10400 const auto *Body =
10401 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10402 const Stmt *ChildStmt =
10403 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10404
10405 if (const auto *NestedDir =
10406 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10407 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10408 switch (D.getDirectiveKind()) {
10409 case OMPD_target:
10410 // For now, treat 'target' with nested 'teams loop' as if it's
10411 // distributed (target teams distribute).
10412 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10413 return NestedDir;
10414 if (DKind == OMPD_teams) {
10415 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10416 /*IgnoreCaptured=*/true);
10417 if (!Body)
10418 return nullptr;
10419 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10420 if (const auto *NND =
10421 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10422 DKind = NND->getDirectiveKind();
10423 if (isOpenMPDistributeDirective(DKind))
10424 return NND;
10425 }
10426 }
10427 return nullptr;
10428 case OMPD_target_teams:
10429 if (isOpenMPDistributeDirective(DKind))
10430 return NestedDir;
10431 return nullptr;
10432 case OMPD_target_parallel:
10433 case OMPD_target_simd:
10434 case OMPD_target_parallel_for:
10435 case OMPD_target_parallel_for_simd:
10436 return nullptr;
10437 case OMPD_target_teams_distribute:
10438 case OMPD_target_teams_distribute_simd:
10439 case OMPD_target_teams_distribute_parallel_for:
10440 case OMPD_target_teams_distribute_parallel_for_simd:
10441 case OMPD_parallel:
10442 case OMPD_for:
10443 case OMPD_parallel_for:
10444 case OMPD_parallel_master:
10445 case OMPD_parallel_sections:
10446 case OMPD_for_simd:
10447 case OMPD_parallel_for_simd:
10448 case OMPD_cancel:
10449 case OMPD_cancellation_point:
10450 case OMPD_ordered:
10451 case OMPD_threadprivate:
10452 case OMPD_allocate:
10453 case OMPD_task:
10454 case OMPD_simd:
10455 case OMPD_tile:
10456 case OMPD_unroll:
10457 case OMPD_sections:
10458 case OMPD_section:
10459 case OMPD_single:
10460 case OMPD_master:
10461 case OMPD_critical:
10462 case OMPD_taskyield:
10463 case OMPD_barrier:
10464 case OMPD_taskwait:
10465 case OMPD_taskgroup:
10466 case OMPD_atomic:
10467 case OMPD_flush:
10468 case OMPD_depobj:
10469 case OMPD_scan:
10470 case OMPD_teams:
10471 case OMPD_target_data:
10472 case OMPD_target_exit_data:
10473 case OMPD_target_enter_data:
10474 case OMPD_distribute:
10475 case OMPD_distribute_simd:
10476 case OMPD_distribute_parallel_for:
10477 case OMPD_distribute_parallel_for_simd:
10478 case OMPD_teams_distribute:
10479 case OMPD_teams_distribute_simd:
10480 case OMPD_teams_distribute_parallel_for:
10481 case OMPD_teams_distribute_parallel_for_simd:
10482 case OMPD_target_update:
10483 case OMPD_declare_simd:
10484 case OMPD_declare_variant:
10485 case OMPD_begin_declare_variant:
10486 case OMPD_end_declare_variant:
10487 case OMPD_declare_target:
10488 case OMPD_end_declare_target:
10489 case OMPD_declare_reduction:
10490 case OMPD_declare_mapper:
10491 case OMPD_taskloop:
10492 case OMPD_taskloop_simd:
10493 case OMPD_master_taskloop:
10494 case OMPD_master_taskloop_simd:
10495 case OMPD_parallel_master_taskloop:
10496 case OMPD_parallel_master_taskloop_simd:
10497 case OMPD_requires:
10498 case OMPD_metadirective:
10499 case OMPD_unknown:
10500 default:
10501 llvm_unreachable("Unexpected directive.");
10502 }
10503 }
10504
10505 return nullptr;
10506}
10507
10508/// Emit the user-defined mapper function. The code generation follows the
10509/// pattern in the example below.
10510/// \code
10511/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10512/// void *base, void *begin,
10513/// int64_t size, int64_t type,
10514/// void *name = nullptr) {
10515/// // Allocate space for an array section first.
10516/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10517/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10518/// size*sizeof(Ty), clearToFromMember(type));
10519/// // Map members.
10520/// for (unsigned i = 0; i < size; i++) {
10521/// // For each component specified by this mapper:
10522/// for (auto c : begin[i]->all_components) {
10523/// if (c.hasMapper())
10524/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10525/// c.arg_type, c.arg_name);
10526/// else
10527/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10528/// c.arg_begin, c.arg_size, c.arg_type,
10529/// c.arg_name);
10530/// }
10531/// }
10532/// // Delete the array section.
10533/// if (size > 1 && maptype.IsDelete)
10534/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10535/// size*sizeof(Ty), clearToFromMember(type));
10536/// }
10537/// \endcode
10538void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
10539 CodeGenFunction *CGF) {
10540 if (UDMMap.count(Val: D) > 0)
10541 return;
10542 ASTContext &C = CGM.getContext();
10543 QualType Ty = D->getType();
10544 auto *MapperVarDecl =
10545 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
10546 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
10547 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
10548
10549 CodeGenFunction MapperCGF(CGM);
10550 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10551 auto PrivatizeAndGenMapInfoCB =
10552 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10553 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10554 MapperCGF.Builder.restoreIP(IP: CodeGenIP);
10555
10556 // Privatize the declared variable of mapper to be the current array
10557 // element.
10558 Address PtrCurrent(
10559 PtrPHI, ElemTy,
10560 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10561 .getAlignment()
10562 .alignmentOfArrayElement(elementSize: ElementSize));
10563 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10564 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
10565 (void)Scope.Privatize();
10566
10567 // Get map clause information.
10568 MappableExprsHandler MEHandler(*D, MapperCGF);
10569 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10570
10571 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10572 return emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: MapExpr);
10573 };
10574 if (CGM.getCodeGenOpts().getDebugInfo() !=
10575 llvm::codegenoptions::NoDebugInfo) {
10576 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10577 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10578 F: FillInfoMap);
10579 }
10580
10581 return CombinedInfo;
10582 };
10583
10584 auto CustomMapperCB = [&](unsigned I) {
10585 llvm::Function *MapperFunc = nullptr;
10586 if (CombinedInfo.Mappers[I]) {
10587 // Call the corresponding mapper function.
10588 MapperFunc = getOrCreateUserDefinedMapperFunc(
10589 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10590 assert(MapperFunc && "Expect a valid mapper function is available.");
10591 }
10592 return MapperFunc;
10593 };
10594
10595 SmallString<64> TyStr;
10596 llvm::raw_svector_ostream Out(TyStr);
10597 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
10598 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
10599
10600 llvm::Function *NewFn = cantFail(ValOrErr: OMPBuilder.emitUserDefinedMapper(
10601 PrivAndGenMapInfoCB: PrivatizeAndGenMapInfoCB, ElemTy, FuncName: Name, CustomMapperCB));
10602 UDMMap.try_emplace(Key: D, Args&: NewFn);
10603 if (CGF)
10604 FunctionUDMMap[CGF->CurFn].push_back(Elt: D);
10605}
10606
10607llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10608 const OMPDeclareMapperDecl *D) {
10609 auto I = UDMMap.find(Val: D);
10610 if (I != UDMMap.end())
10611 return I->second;
10612 emitUserDefinedMapper(D);
10613 return UDMMap.lookup(Val: D);
10614}
10615
10616llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
10617 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10618 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10619 const OMPLoopDirective &D)>
10620 SizeEmitter) {
10621 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10622 const OMPExecutableDirective *TD = &D;
10623 // Get nested teams distribute kind directive, if any. For now, treat
10624 // 'target_teams_loop' as if it's really a target_teams_distribute.
10625 if ((!isOpenMPDistributeDirective(DKind: Kind) || !isOpenMPTeamsDirective(DKind: Kind)) &&
10626 Kind != OMPD_target_teams_loop)
10627 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
10628 if (!TD)
10629 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10630
10631 const auto *LD = cast<OMPLoopDirective>(Val: TD);
10632 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10633 return NumIterations;
10634 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10635}
10636
10637static void
10638emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10639 const OMPExecutableDirective &D,
10640 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10641 bool RequiresOuterTask, const CapturedStmt &CS,
10642 bool OffloadingMandatory, CodeGenFunction &CGF) {
10643 if (OffloadingMandatory) {
10644 CGF.Builder.CreateUnreachable();
10645 } else {
10646 if (RequiresOuterTask) {
10647 CapturedVars.clear();
10648 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
10649 }
10650 llvm::SmallVector<llvm::Value *, 16> Args(CapturedVars.begin(),
10651 CapturedVars.end());
10652 Args.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Builder.getPtrTy()));
10653 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
10654 Args);
10655 }
10656}
10657
10658static llvm::Value *emitDeviceID(
10659 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10660 CodeGenFunction &CGF) {
10661 // Emit device ID if any.
10662 llvm::Value *DeviceID;
10663 if (Device.getPointer()) {
10664 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10665 Device.getInt() == OMPC_DEVICE_device_num) &&
10666 "Expected device_num modifier.");
10667 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
10668 DeviceID =
10669 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
10670 } else {
10671 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10672 }
10673 return DeviceID;
10674}
10675
10676static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10677emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF) {
10678 llvm::Value *DynGP = CGF.Builder.getInt32(C: 0);
10679 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10680
10681 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10682 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10683 llvm::Value *DynGPVal =
10684 CGF.EmitScalarExpr(E: DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10685 DynGP = CGF.Builder.CreateIntCast(V: DynGPVal, DestTy: CGF.Int32Ty,
10686 /*isSigned=*/false);
10687 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10688 switch (FallbackModifier) {
10689 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10690 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10691 break;
10692 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10693 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10694 break;
10695 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10696 case OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown:
10697 // This is the default for dyn_groupprivate.
10698 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10699 break;
10700 default:
10701 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10702 }
10703 } else if (auto *OMPXDynCGClause =
10704 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10705 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10706 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(E: OMPXDynCGClause->getSize(),
10707 /*IgnoreResultAssign=*/true);
10708 DynGP = CGF.Builder.CreateIntCast(V: DynCGMemVal, DestTy: CGF.Int32Ty,
10709 /*isSigned=*/false);
10710 }
10711 return {DynGP, DynGPFallback};
10712}
10713
10714static void genMapInfoForCaptures(
10715 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10716 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10717 llvm::OpenMPIRBuilder &OMPBuilder,
10718 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10719 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10720
10721 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10722 auto RI = CS.getCapturedRecordDecl()->field_begin();
10723 auto *CV = CapturedVars.begin();
10724 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10725 CE = CS.capture_end();
10726 CI != CE; ++CI, ++RI, ++CV) {
10727 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10728
10729 // VLA sizes are passed to the outlined region by copy and do not have map
10730 // information associated.
10731 if (CI->capturesVariableArrayType()) {
10732 CurInfo.Exprs.push_back(Elt: nullptr);
10733 CurInfo.BasePointers.push_back(Elt: *CV);
10734 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
10735 CurInfo.DevicePointers.push_back(
10736 Elt: MappableExprsHandler::DeviceInfoTy::None);
10737 CurInfo.Pointers.push_back(Elt: *CV);
10738 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10739 V: CGF.getTypeSize(Ty: RI->getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10740 // Copy to the device as an argument. No need to retrieve it.
10741 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10742 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10743 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10744 CurInfo.Mappers.push_back(Elt: nullptr);
10745 } else {
10746 const ValueDecl *CapturedVD =
10747 CI->capturesThis() ? nullptr
10748 : CI->getCapturedVar()->getCanonicalDecl();
10749 bool HasEntryWithCVAsAttachPtr = false;
10750 if (CapturedVD)
10751 HasEntryWithCVAsAttachPtr =
10752 MEHandler.hasAttachEntryForCapturedVar(VD: CapturedVD);
10753
10754 // Populate component lists for the captured variable from clauses.
10755 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10756 SmallVector<
10757 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>, 4>
10758 StorageForImplicitlyAddedComponentLists;
10759 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10760 VD: CapturedVD, DeclComponentLists,
10761 StorageForImplicitlyAddedComponentLists);
10762
10763 // OpenMP 6.0, 15.8, target construct, restrictions:
10764 // * A list item in a map clause that is specified on a target construct
10765 // must have a base variable or base pointer.
10766 //
10767 // Map clauses on a target construct must either have a base pointer, or a
10768 // base-variable. So, if we don't have a base-pointer, that means that it
10769 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10770 // etc. In such cases, we do not need to handle default map generation
10771 // for `s`.
10772 bool HasEntryWithoutAttachPtr =
10773 llvm::any_of(Range&: DeclComponentLists, P: [&](const auto &MapData) {
10774 OMPClauseMappableExprCommon::MappableExprComponentListRef
10775 Components = std::get<0>(MapData);
10776 return !MEHandler.getAttachPtrExpr(Components);
10777 });
10778
10779 // Generate default map info first if there's no direct map with CV as
10780 // the base-variable, or attach pointer.
10781 if (DeclComponentLists.empty() ||
10782 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10783 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
10784
10785 // If we have any information in the map clause, we use it, otherwise we
10786 // just do a default mapping.
10787 MEHandler.generateInfoForCaptureFromClauseInfo(
10788 DeclComponentListsFromClauses: DeclComponentLists, Cap: CI, Arg: *CV, CurCaptureVarInfo&: CurInfo, OMPBuilder,
10789 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10790
10791 if (!CI->capturesThis())
10792 MappedVarSet.insert(V: CI->getCapturedVar());
10793 else
10794 MappedVarSet.insert(V: nullptr);
10795
10796 // Generate correct mapping for variables captured by reference in
10797 // lambdas.
10798 if (CI->capturesVariable())
10799 MEHandler.generateInfoForLambdaCaptures(VD: CI->getCapturedVar(), Arg: *CV,
10800 CombinedInfo&: CurInfo, LambdaPointers);
10801 }
10802 // We expect to have at least an element of information for this capture.
10803 assert(!CurInfo.BasePointers.empty() &&
10804 "Non-existing map pointer for capture!");
10805 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10806 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10807 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10808 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10809 "Inconsistent map information sizes!");
10810
10811 // We need to append the results of this capture to what we already have.
10812 CombinedInfo.append(CurInfo);
10813 }
10814 // Adjust MEMBER_OF flags for the lambdas captures.
10815 MEHandler.adjustMemberOfForLambdaCaptures(
10816 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
10817 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
10818}
10819static void
10820genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10821 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10822 llvm::OpenMPIRBuilder &OMPBuilder,
10823 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10824 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10825
10826 CodeGenModule &CGM = CGF.CGM;
10827 // Map any list items in a map clause that were not captures because they
10828 // weren't referenced within the construct.
10829 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: SkippedVarSet);
10830
10831 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10832 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10833 };
10834 if (CGM.getCodeGenOpts().getDebugInfo() !=
10835 llvm::codegenoptions::NoDebugInfo) {
10836 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10837 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10838 F: FillInfoMap);
10839 }
10840}
10841
10842static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
10843 const CapturedStmt &CS,
10844 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10845 llvm::OpenMPIRBuilder &OMPBuilder,
10846 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10847 // Get mappable expression information.
10848 MappableExprsHandler MEHandler(D, CGF);
10849 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10850
10851 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10852 MappedVarSet, CombinedInfo);
10853 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, SkippedVarSet: MappedVarSet);
10854}
10855
10856template <typename ClauseTy>
10857static void
10858emitClauseForBareTargetDirective(CodeGenFunction &CGF,
10859 const OMPExecutableDirective &D,
10860 llvm::SmallVectorImpl<llvm::Value *> &Values) {
10861 const auto *C = D.getSingleClause<ClauseTy>();
10862 assert(!C->varlist_empty() &&
10863 "ompx_bare requires explicit num_teams and thread_limit");
10864 CodeGenFunction::RunCleanupsScope Scope(CGF);
10865 for (auto *E : C->varlist()) {
10866 llvm::Value *V = CGF.EmitScalarExpr(E);
10867 Values.push_back(
10868 Elt: CGF.Builder.CreateIntCast(V, DestTy: CGF.Int32Ty, /*isSigned=*/true));
10869 }
10870}
10871
10872static void emitTargetCallKernelLaunch(
10873 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10874 const OMPExecutableDirective &D,
10875 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10876 const CapturedStmt &CS, bool OffloadingMandatory,
10877 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10878 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10879 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10880 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10881 const OMPLoopDirective &D)>
10882 SizeEmitter,
10883 CodeGenFunction &CGF, CodeGenModule &CGM) {
10884 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10885
10886 // Fill up the arrays with all the captured variables.
10887 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10888 CGOpenMPRuntime::TargetDataInfo Info;
10889 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10890
10891 // Append a null entry for the implicit dyn_ptr argument.
10892 using OpenMPOffloadMappingFlags = llvm::omp::OpenMPOffloadMappingFlags;
10893 auto *NullPtr = llvm::Constant::getNullValue(Ty: CGF.Builder.getPtrTy());
10894 CombinedInfo.BasePointers.push_back(Elt: NullPtr);
10895 CombinedInfo.Pointers.push_back(Elt: NullPtr);
10896 CombinedInfo.DevicePointers.push_back(
10897 Elt: llvm::OpenMPIRBuilder::DeviceInfoTy::None);
10898 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.getInt64(C: 0));
10899 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10900 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10901 if (!CombinedInfo.Names.empty())
10902 CombinedInfo.Names.push_back(Elt: NullPtr);
10903 CombinedInfo.Exprs.push_back(Elt: nullptr);
10904 CombinedInfo.Mappers.push_back(Elt: nullptr);
10905 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10906
10907 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10908 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10909
10910 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10911 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10912 CGF.VoidPtrTy, CGM.getPointerAlign());
10913 InputInfo.PointersArray =
10914 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10915 InputInfo.SizesArray =
10916 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10917 InputInfo.MappersArray =
10918 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10919 MapTypesArray = Info.RTArgs.MapTypesArray;
10920 MapNamesArray = Info.RTArgs.MapNamesArray;
10921
10922 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10923 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10924 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10925 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10926 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10927
10928 if (IsReverseOffloading) {
10929 // Reverse offloading is not supported, so just execute on the host.
10930 // FIXME: This fallback solution is incorrect since it ignores the
10931 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10932 // assert here and ensure SEMA emits an error.
10933 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10934 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10935 return;
10936 }
10937
10938 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10939 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10940
10941 llvm::Value *BasePointersArray =
10942 InputInfo.BasePointersArray.emitRawPointer(CGF);
10943 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10944 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10945 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10946
10947 auto &&EmitTargetCallFallbackCB =
10948 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10949 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10950 -> llvm::OpenMPIRBuilder::InsertPointTy {
10951 CGF.Builder.restoreIP(IP);
10952 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10953 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10954 return CGF.Builder.saveIP();
10955 };
10956
10957 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10958 SmallVector<llvm::Value *, 3> NumTeams;
10959 SmallVector<llvm::Value *, 3> NumThreads;
10960 if (IsBare) {
10961 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, Values&: NumTeams);
10962 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
10963 Values&: NumThreads);
10964 } else {
10965 NumTeams.push_back(Elt: OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10966 NumThreads.push_back(
10967 Elt: OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10968 }
10969
10970 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10971 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10972 llvm::Value *NumIterations =
10973 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10974 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10975 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10976 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10977
10978 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10979 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10980 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10981
10982 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10983 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10984 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10985
10986 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10987 cantFail(ValOrErr: OMPRuntime->getOMPBuilder().emitKernelLaunch(
10988 Loc: CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10989 RTLoc, AllocaIP));
10990 CGF.Builder.restoreIP(IP: AfterIP);
10991 };
10992
10993 if (RequiresOuterTask)
10994 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10995 else
10996 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
10997}
10998
10999static void
11000emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
11001 const OMPExecutableDirective &D,
11002 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
11003 bool RequiresOuterTask, const CapturedStmt &CS,
11004 bool OffloadingMandatory, CodeGenFunction &CGF) {
11005
11006 // Notify that the host version must be executed.
11007 auto &&ElseGen =
11008 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11009 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11010 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
11011 RequiresOuterTask, CS, OffloadingMandatory, CGF);
11012 };
11013
11014 if (RequiresOuterTask) {
11015 CodeGenFunction::OMPTargetDataInfo InputInfo;
11016 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
11017 } else {
11018 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ElseGen);
11019 }
11020}
11021
11022void CGOpenMPRuntime::emitTargetCall(
11023 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11024 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11025 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11026 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11027 const OMPLoopDirective &D)>
11028 SizeEmitter) {
11029 if (!CGF.HaveInsertPoint())
11030 return;
11031
11032 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11033 CGM.getLangOpts().OpenMPOffloadMandatory;
11034
11035 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11036
11037 const bool RequiresOuterTask =
11038 D.hasClausesOfKind<OMPDependClause>() ||
11039 D.hasClausesOfKind<OMPNowaitClause>() ||
11040 D.hasClausesOfKind<OMPInReductionClause>() ||
11041 (CGM.getLangOpts().OpenMP >= 51 &&
11042 needsTaskBasedThreadLimit(DKind: D.getDirectiveKind()) &&
11043 D.hasClausesOfKind<OMPThreadLimitClause>());
11044 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
11045 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
11046 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11047 PrePostActionTy &) {
11048 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
11049 };
11050 emitInlinedDirective(CGF, InnerKind: OMPD_unknown, CodeGen: ArgsCodegen);
11051
11052 CodeGenFunction::OMPTargetDataInfo InputInfo;
11053 llvm::Value *MapTypesArray = nullptr;
11054 llvm::Value *MapNamesArray = nullptr;
11055
11056 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11057 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11058 OutlinedFnID, &InputInfo, &MapTypesArray,
11059 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11060 PrePostActionTy &) {
11061 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
11062 RequiresOuterTask, CS, OffloadingMandatory,
11063 Device, OutlinedFnID, InputInfo, MapTypesArray,
11064 MapNamesArray, SizeEmitter, CGF, CGM);
11065 };
11066
11067 auto &&TargetElseGen =
11068 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11069 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11070 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11071 CS, OffloadingMandatory, CGF);
11072 };
11073
11074 // If we have a target function ID it means that we need to support
11075 // offloading, otherwise, just execute on the host. We need to execute on host
11076 // regardless of the conditional in the if clause if, e.g., the user do not
11077 // specify target triples.
11078 if (OutlinedFnID) {
11079 if (IfCond) {
11080 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
11081 } else {
11082 RegionCodeGenTy ThenRCG(TargetThenGen);
11083 ThenRCG(CGF);
11084 }
11085 } else {
11086 RegionCodeGenTy ElseRCG(TargetElseGen);
11087 ElseRCG(CGF);
11088 }
11089}
11090
11091void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
11092 StringRef ParentName) {
11093 if (!S)
11094 return;
11095
11096 // Register vtable from device for target data and target directives.
11097 // Add this block here since scanForTargetRegionsFunctions ignores
11098 // target data by checking if S is a executable directive (target).
11099 if (auto *E = dyn_cast<OMPExecutableDirective>(Val: S);
11100 E && isOpenMPTargetDataManagementDirective(DKind: E->getDirectiveKind())) {
11101 // Don't need to check if it's device compile
11102 // since scanForTargetRegionsFunctions currently only called
11103 // in device compilation.
11104 registerVTable(D: *E);
11105 }
11106
11107 // Codegen OMP target directives that offload compute to the device.
11108 bool RequiresDeviceCodegen =
11109 isa<OMPExecutableDirective>(Val: S) &&
11110 isOpenMPTargetExecutionDirective(
11111 DKind: cast<OMPExecutableDirective>(Val: S)->getDirectiveKind());
11112
11113 if (RequiresDeviceCodegen) {
11114 const auto &E = *cast<OMPExecutableDirective>(Val: S);
11115
11116 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11117 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
11118
11119 // Is this a target region that should not be emitted as an entry point? If
11120 // so just signal we are done with this target region.
11121 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11122 return;
11123
11124 switch (E.getDirectiveKind()) {
11125 case OMPD_target:
11126 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
11127 S: cast<OMPTargetDirective>(Val: E));
11128 break;
11129 case OMPD_target_parallel:
11130 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
11131 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
11132 break;
11133 case OMPD_target_teams:
11134 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
11135 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
11136 break;
11137 case OMPD_target_teams_distribute:
11138 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
11139 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
11140 break;
11141 case OMPD_target_teams_distribute_simd:
11142 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
11143 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
11144 break;
11145 case OMPD_target_parallel_for:
11146 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
11147 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
11148 break;
11149 case OMPD_target_parallel_for_simd:
11150 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
11151 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
11152 break;
11153 case OMPD_target_simd:
11154 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
11155 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
11156 break;
11157 case OMPD_target_teams_distribute_parallel_for:
11158 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
11159 CGM, ParentName,
11160 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
11161 break;
11162 case OMPD_target_teams_distribute_parallel_for_simd:
11163 CodeGenFunction::
11164 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
11165 CGM, ParentName,
11166 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
11167 break;
11168 case OMPD_target_teams_loop:
11169 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
11170 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
11171 break;
11172 case OMPD_target_parallel_loop:
11173 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
11174 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
11175 break;
11176 case OMPD_parallel:
11177 case OMPD_for:
11178 case OMPD_parallel_for:
11179 case OMPD_parallel_master:
11180 case OMPD_parallel_sections:
11181 case OMPD_for_simd:
11182 case OMPD_parallel_for_simd:
11183 case OMPD_cancel:
11184 case OMPD_cancellation_point:
11185 case OMPD_ordered:
11186 case OMPD_threadprivate:
11187 case OMPD_allocate:
11188 case OMPD_task:
11189 case OMPD_simd:
11190 case OMPD_tile:
11191 case OMPD_unroll:
11192 case OMPD_sections:
11193 case OMPD_section:
11194 case OMPD_single:
11195 case OMPD_master:
11196 case OMPD_critical:
11197 case OMPD_taskyield:
11198 case OMPD_barrier:
11199 case OMPD_taskwait:
11200 case OMPD_taskgroup:
11201 case OMPD_atomic:
11202 case OMPD_flush:
11203 case OMPD_depobj:
11204 case OMPD_scan:
11205 case OMPD_teams:
11206 case OMPD_target_data:
11207 case OMPD_target_exit_data:
11208 case OMPD_target_enter_data:
11209 case OMPD_distribute:
11210 case OMPD_distribute_simd:
11211 case OMPD_distribute_parallel_for:
11212 case OMPD_distribute_parallel_for_simd:
11213 case OMPD_teams_distribute:
11214 case OMPD_teams_distribute_simd:
11215 case OMPD_teams_distribute_parallel_for:
11216 case OMPD_teams_distribute_parallel_for_simd:
11217 case OMPD_target_update:
11218 case OMPD_declare_simd:
11219 case OMPD_declare_variant:
11220 case OMPD_begin_declare_variant:
11221 case OMPD_end_declare_variant:
11222 case OMPD_declare_target:
11223 case OMPD_end_declare_target:
11224 case OMPD_declare_reduction:
11225 case OMPD_declare_mapper:
11226 case OMPD_taskloop:
11227 case OMPD_taskloop_simd:
11228 case OMPD_master_taskloop:
11229 case OMPD_master_taskloop_simd:
11230 case OMPD_parallel_master_taskloop:
11231 case OMPD_parallel_master_taskloop_simd:
11232 case OMPD_requires:
11233 case OMPD_metadirective:
11234 case OMPD_unknown:
11235 default:
11236 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11237 }
11238 return;
11239 }
11240
11241 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
11242 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11243 return;
11244
11245 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
11246 return;
11247 }
11248
11249 // If this is a lambda function, look into its body.
11250 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
11251 S = L->getBody();
11252
11253 // Keep looking for target regions recursively.
11254 for (const Stmt *II : S->children())
11255 scanForTargetRegionsFunctions(S: II, ParentName);
11256}
11257
11258static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11259 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11260 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11261 if (!DevTy)
11262 return false;
11263 // Do not emit device_type(nohost) functions for the host.
11264 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11265 return true;
11266 // Do not emit device_type(host) functions for the device.
11267 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11268 return true;
11269 return false;
11270}
11271
11272bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
11273 // If emitting code for the host, we do not process FD here. Instead we do
11274 // the normal code generation.
11275 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11276 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
11277 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11278 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11279 return true;
11280 return false;
11281 }
11282
11283 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
11284 // Try to detect target regions in the function.
11285 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
11286 StringRef Name = CGM.getMangledName(GD);
11287 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
11288 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11289 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11290 return true;
11291 }
11292
11293 // Do not to emit function if it is not marked as declare target.
11294 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11295 AlreadyEmittedTargetDecls.count(V: VD) == 0;
11296}
11297
11298bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11299 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
11300 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11301 return true;
11302
11303 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11304 return false;
11305
11306 // Check if there are Ctors/Dtors in this declaration and look for target
11307 // regions in it. We use the complete variant to produce the kernel name
11308 // mangling.
11309 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
11310 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11311 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11312 StringRef ParentName =
11313 CGM.getMangledName(GD: GlobalDecl(Ctor, Ctor_Complete));
11314 scanForTargetRegionsFunctions(S: Ctor->getBody(), ParentName);
11315 }
11316 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11317 StringRef ParentName =
11318 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
11319 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
11320 }
11321 }
11322
11323 // Do not to emit variable if it is not marked as declare target.
11324 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11325 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11326 VD: cast<VarDecl>(Val: GD.getDecl()));
11327 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11328 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11329 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11330 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11331 HasRequiresUnifiedSharedMemory)) {
11332 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
11333 return true;
11334 }
11335 return false;
11336}
11337
11338void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
11339 llvm::Constant *Addr) {
11340 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11341 !CGM.getLangOpts().OpenMPIsTargetDevice)
11342 return;
11343
11344 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11345 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11346
11347 // If this is an 'extern' declaration we defer to the canonical definition and
11348 // do not emit an offloading entry.
11349 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11350 VD->hasExternalStorage())
11351 return;
11352
11353 if (!Res) {
11354 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11355 // Register non-target variables being emitted in device code (debug info
11356 // may cause this).
11357 StringRef VarName = CGM.getMangledName(GD: VD);
11358 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
11359 }
11360 return;
11361 }
11362
11363 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
11364 auto LinkageForVariable = [&VD, this]() {
11365 return CGM.getLLVMLinkageVarDefinition(VD);
11366 };
11367
11368 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11369 OMPBuilder.registerTargetGlobalVariable(
11370 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
11371 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11372 IsExternallyVisible: VD->isExternallyVisible(),
11373 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
11374 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
11375 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
11376 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
11377 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
11378 T: CGM.getContext().getPointerType(T: VD->getType())),
11379 Addr);
11380
11381 for (auto *ref : GeneratedRefs)
11382 CGM.addCompilerUsedGlobal(GV: ref);
11383}
11384
11385bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
11386 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
11387 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
11388 return emitTargetFunctions(GD);
11389
11390 return emitTargetGlobalVariable(GD);
11391}
11392
11393void CGOpenMPRuntime::emitDeferredTargetDecls() const {
11394 for (const VarDecl *VD : DeferredGlobalVariables) {
11395 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11396 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11397 if (!Res)
11398 continue;
11399 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11400 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11401 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11402 !HasRequiresUnifiedSharedMemory) {
11403 CGM.EmitGlobal(D: VD);
11404 } else {
11405 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11406 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11407 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11408 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11409 HasRequiresUnifiedSharedMemory)) &&
11410 "Expected link clause or to clause with unified memory.");
11411 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11412 }
11413 }
11414}
11415
11416void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11417 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11418 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11419 " Expected target-based directive.");
11420}
11421
11422void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11423 for (const OMPClause *Clause : D->clauselists()) {
11424 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11425 HasRequiresUnifiedSharedMemory = true;
11426 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11427 } else if (const auto *AC =
11428 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
11429 switch (AC->getAtomicDefaultMemOrderKind()) {
11430 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11431 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11432 break;
11433 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11434 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11435 break;
11436 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11437 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11438 break;
11439 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11440 break;
11441 }
11442 }
11443 }
11444}
11445
11446llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11447 return RequiresAtomicOrdering;
11448}
11449
11450bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11451 LangAS &AS) {
11452 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11453 return false;
11454 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11455 switch(A->getAllocatorType()) {
11456 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11457 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11458 // Not supported, fallback to the default mem space.
11459 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11460 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11461 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11462 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11463 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11464 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11465 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11466 AS = LangAS::Default;
11467 return true;
11468 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11469 llvm_unreachable("Expected predefined allocator for the variables with the "
11470 "static storage.");
11471 }
11472 return false;
11473}
11474
11475bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11476 return HasRequiresUnifiedSharedMemory;
11477}
11478
11479CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11480 CodeGenModule &CGM)
11481 : CGM(CGM) {
11482 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11483 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11484 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11485 }
11486}
11487
11488CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11489 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11490 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11491}
11492
11493bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11494 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11495 return true;
11496
11497 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
11498 // Do not to emit function if it is marked as declare target as it was already
11499 // emitted.
11500 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: D)) {
11501 if (D->hasBody() && AlreadyEmittedTargetDecls.count(V: D) == 0) {
11502 if (auto *F = dyn_cast_or_null<llvm::Function>(
11503 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
11504 return !F->isDeclaration();
11505 return false;
11506 }
11507 return true;
11508 }
11509
11510 return !AlreadyEmittedTargetDecls.insert(V: D).second;
11511}
11512
11513void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11514 const OMPExecutableDirective &D,
11515 SourceLocation Loc,
11516 llvm::Function *OutlinedFn,
11517 ArrayRef<llvm::Value *> CapturedVars) {
11518 if (!CGF.HaveInsertPoint())
11519 return;
11520
11521 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11522 CodeGenFunction::RunCleanupsScope Scope(CGF);
11523
11524 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11525 llvm::Value *Args[] = {
11526 RTLoc,
11527 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
11528 OutlinedFn};
11529 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11530 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
11531 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
11532
11533 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11534 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
11535 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
11536}
11537
11538void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11539 const Expr *NumTeams,
11540 const Expr *ThreadLimit,
11541 SourceLocation Loc) {
11542 if (!CGF.HaveInsertPoint())
11543 return;
11544
11545 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11546
11547 llvm::Value *NumTeamsVal =
11548 NumTeams
11549 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
11550 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11551 : CGF.Builder.getInt32(C: 0);
11552
11553 llvm::Value *ThreadLimitVal =
11554 ThreadLimit
11555 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11556 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11557 : CGF.Builder.getInt32(C: 0);
11558
11559 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11560 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11561 ThreadLimitVal};
11562 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11563 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
11564 args: PushNumTeamsArgs);
11565}
11566
11567void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
11568 const Expr *ThreadLimit,
11569 SourceLocation Loc) {
11570 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11571 llvm::Value *ThreadLimitVal =
11572 ThreadLimit
11573 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11574 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11575 : CGF.Builder.getInt32(C: 0);
11576
11577 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11578 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11579 ThreadLimitVal};
11580 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11581 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
11582 args: ThreadLimitArgs);
11583}
11584
11585void CGOpenMPRuntime::emitTargetDataCalls(
11586 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11587 const Expr *Device, const RegionCodeGenTy &CodeGen,
11588 CGOpenMPRuntime::TargetDataInfo &Info) {
11589 if (!CGF.HaveInsertPoint())
11590 return;
11591
11592 // Action used to replace the default codegen action and turn privatization
11593 // off.
11594 PrePostActionTy NoPrivAction;
11595
11596 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11597
11598 llvm::Value *IfCondVal = nullptr;
11599 if (IfCond)
11600 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
11601
11602 // Emit device ID if any.
11603 llvm::Value *DeviceID = nullptr;
11604 if (Device) {
11605 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11606 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11607 } else {
11608 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11609 }
11610
11611 // Fill up the arrays with all the mapped variables.
11612 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11613 auto GenMapInfoCB =
11614 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11615 CGF.Builder.restoreIP(IP: CodeGenIP);
11616 // Get map clause information.
11617 MappableExprsHandler MEHandler(D, CGF);
11618 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11619
11620 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11621 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
11622 };
11623 if (CGM.getCodeGenOpts().getDebugInfo() !=
11624 llvm::codegenoptions::NoDebugInfo) {
11625 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
11626 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
11627 F: FillInfoMap);
11628 }
11629
11630 return CombinedInfo;
11631 };
11632 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11633 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11634 CGF.Builder.restoreIP(IP: CodeGenIP);
11635 switch (BodyGenType) {
11636 case BodyGenTy::Priv:
11637 if (!Info.CaptureDeviceAddrMap.empty())
11638 CodeGen(CGF);
11639 break;
11640 case BodyGenTy::DupNoPriv:
11641 if (!Info.CaptureDeviceAddrMap.empty()) {
11642 CodeGen.setAction(NoPrivAction);
11643 CodeGen(CGF);
11644 }
11645 break;
11646 case BodyGenTy::NoPriv:
11647 if (Info.CaptureDeviceAddrMap.empty()) {
11648 CodeGen.setAction(NoPrivAction);
11649 CodeGen(CGF);
11650 }
11651 break;
11652 }
11653 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11654 CGF.Builder.GetInsertPoint());
11655 };
11656
11657 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11658 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11659 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
11660 }
11661 };
11662
11663 auto CustomMapperCB = [&](unsigned int I) {
11664 llvm::Function *MFunc = nullptr;
11665 if (CombinedInfo.Mappers[I]) {
11666 Info.HasMapper = true;
11667 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
11668 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
11669 }
11670 return MFunc;
11671 };
11672
11673 // Source location for the ident struct
11674 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11675
11676 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11677 CGF.AllocaInsertPt->getIterator());
11678 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11679 CGF.Builder.GetInsertPoint());
11680 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11681 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11682 cantFail(ValOrErr: OMPBuilder.createTargetData(
11683 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
11684 CustomMapperCB,
11685 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, SrcLocInfo: RTLoc));
11686 CGF.Builder.restoreIP(IP: AfterIP);
11687}
11688
11689void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11690 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11691 const Expr *Device) {
11692 if (!CGF.HaveInsertPoint())
11693 return;
11694
11695 assert((isa<OMPTargetEnterDataDirective>(D) ||
11696 isa<OMPTargetExitDataDirective>(D) ||
11697 isa<OMPTargetUpdateDirective>(D)) &&
11698 "Expecting either target enter, exit data, or update directives.");
11699
11700 CodeGenFunction::OMPTargetDataInfo InputInfo;
11701 llvm::Value *MapTypesArray = nullptr;
11702 llvm::Value *MapNamesArray = nullptr;
11703 // Generate the code for the opening of the data environment.
11704 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11705 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11706 // Emit device ID if any.
11707 llvm::Value *DeviceID = nullptr;
11708 if (Device) {
11709 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11710 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11711 } else {
11712 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11713 }
11714
11715 // Emit the number of elements in the offloading arrays.
11716 llvm::Constant *PointerNum =
11717 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
11718
11719 // Source location for the ident struct
11720 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11721
11722 SmallVector<llvm::Value *, 13> OffloadingArgs(
11723 {RTLoc, DeviceID, PointerNum,
11724 InputInfo.BasePointersArray.emitRawPointer(CGF),
11725 InputInfo.PointersArray.emitRawPointer(CGF),
11726 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11727 InputInfo.MappersArray.emitRawPointer(CGF)});
11728
11729 // Select the right runtime function call for each standalone
11730 // directive.
11731 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11732 RuntimeFunction RTLFn;
11733 switch (D.getDirectiveKind()) {
11734 case OMPD_target_enter_data:
11735 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11736 : OMPRTL___tgt_target_data_begin_mapper;
11737 break;
11738 case OMPD_target_exit_data:
11739 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11740 : OMPRTL___tgt_target_data_end_mapper;
11741 break;
11742 case OMPD_target_update:
11743 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11744 : OMPRTL___tgt_target_data_update_mapper;
11745 break;
11746 case OMPD_parallel:
11747 case OMPD_for:
11748 case OMPD_parallel_for:
11749 case OMPD_parallel_master:
11750 case OMPD_parallel_sections:
11751 case OMPD_for_simd:
11752 case OMPD_parallel_for_simd:
11753 case OMPD_cancel:
11754 case OMPD_cancellation_point:
11755 case OMPD_ordered:
11756 case OMPD_threadprivate:
11757 case OMPD_allocate:
11758 case OMPD_task:
11759 case OMPD_simd:
11760 case OMPD_tile:
11761 case OMPD_unroll:
11762 case OMPD_sections:
11763 case OMPD_section:
11764 case OMPD_single:
11765 case OMPD_master:
11766 case OMPD_critical:
11767 case OMPD_taskyield:
11768 case OMPD_barrier:
11769 case OMPD_taskwait:
11770 case OMPD_taskgroup:
11771 case OMPD_atomic:
11772 case OMPD_flush:
11773 case OMPD_depobj:
11774 case OMPD_scan:
11775 case OMPD_teams:
11776 case OMPD_target_data:
11777 case OMPD_distribute:
11778 case OMPD_distribute_simd:
11779 case OMPD_distribute_parallel_for:
11780 case OMPD_distribute_parallel_for_simd:
11781 case OMPD_teams_distribute:
11782 case OMPD_teams_distribute_simd:
11783 case OMPD_teams_distribute_parallel_for:
11784 case OMPD_teams_distribute_parallel_for_simd:
11785 case OMPD_declare_simd:
11786 case OMPD_declare_variant:
11787 case OMPD_begin_declare_variant:
11788 case OMPD_end_declare_variant:
11789 case OMPD_declare_target:
11790 case OMPD_end_declare_target:
11791 case OMPD_declare_reduction:
11792 case OMPD_declare_mapper:
11793 case OMPD_taskloop:
11794 case OMPD_taskloop_simd:
11795 case OMPD_master_taskloop:
11796 case OMPD_master_taskloop_simd:
11797 case OMPD_parallel_master_taskloop:
11798 case OMPD_parallel_master_taskloop_simd:
11799 case OMPD_target:
11800 case OMPD_target_simd:
11801 case OMPD_target_teams_distribute:
11802 case OMPD_target_teams_distribute_simd:
11803 case OMPD_target_teams_distribute_parallel_for:
11804 case OMPD_target_teams_distribute_parallel_for_simd:
11805 case OMPD_target_teams:
11806 case OMPD_target_parallel:
11807 case OMPD_target_parallel_for:
11808 case OMPD_target_parallel_for_simd:
11809 case OMPD_requires:
11810 case OMPD_metadirective:
11811 case OMPD_unknown:
11812 default:
11813 llvm_unreachable("Unexpected standalone target data directive.");
11814 break;
11815 }
11816 if (HasNowait) {
11817 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11818 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11819 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11820 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11821 }
11822 CGF.EmitRuntimeCall(
11823 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
11824 args: OffloadingArgs);
11825 };
11826
11827 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11828 &MapNamesArray](CodeGenFunction &CGF,
11829 PrePostActionTy &) {
11830 // Fill up the arrays with all the mapped variables.
11831 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11832 CGOpenMPRuntime::TargetDataInfo Info;
11833 MappableExprsHandler MEHandler(D, CGF);
11834 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11835 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11836 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11837
11838 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11839 D.hasClausesOfKind<OMPNowaitClause>();
11840
11841 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11842 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11843 CGF.VoidPtrTy, CGM.getPointerAlign());
11844 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11845 CGM.getPointerAlign());
11846 InputInfo.SizesArray =
11847 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11848 InputInfo.MappersArray =
11849 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11850 MapTypesArray = Info.RTArgs.MapTypesArray;
11851 MapNamesArray = Info.RTArgs.MapNamesArray;
11852 if (RequiresOuterTask)
11853 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
11854 else
11855 emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
11856 };
11857
11858 if (IfCond) {
11859 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
11860 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
11861 } else {
11862 RegionCodeGenTy ThenRCG(TargetThenGen);
11863 ThenRCG(CGF);
11864 }
11865}
11866
11867static unsigned
11868evaluateCDTSize(const FunctionDecl *FD,
11869 ArrayRef<llvm::OpenMPIRBuilder::DeclareSimdAttrTy> ParamAttrs) {
11870 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11871 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11872 // of that clause. The VLEN value must be power of 2.
11873 // In other case the notion of the function`s "characteristic data type" (CDT)
11874 // is used to compute the vector length.
11875 // CDT is defined in the following order:
11876 // a) For non-void function, the CDT is the return type.
11877 // b) If the function has any non-uniform, non-linear parameters, then the
11878 // CDT is the type of the first such parameter.
11879 // c) If the CDT determined by a) or b) above is struct, union, or class
11880 // type which is pass-by-value (except for the type that maps to the
11881 // built-in complex data type), the characteristic data type is int.
11882 // d) If none of the above three cases is applicable, the CDT is int.
11883 // The VLEN is then determined based on the CDT and the size of vector
11884 // register of that ISA for which current vector version is generated. The
11885 // VLEN is computed using the formula below:
11886 // VLEN = sizeof(vector_register) / sizeof(CDT),
11887 // where vector register size specified in section 3.2.1 Registers and the
11888 // Stack Frame of original AMD64 ABI document.
11889 QualType RetType = FD->getReturnType();
11890 if (RetType.isNull())
11891 return 0;
11892 ASTContext &C = FD->getASTContext();
11893 QualType CDT;
11894 if (!RetType.isNull() && !RetType->isVoidType()) {
11895 CDT = RetType;
11896 } else {
11897 unsigned Offset = 0;
11898 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
11899 if (ParamAttrs[Offset].Kind ==
11900 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector)
11901 CDT = C.getPointerType(T: C.getCanonicalTagType(TD: MD->getParent()));
11902 ++Offset;
11903 }
11904 if (CDT.isNull()) {
11905 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11906 if (ParamAttrs[I + Offset].Kind ==
11907 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector) {
11908 CDT = FD->getParamDecl(i: I)->getType();
11909 break;
11910 }
11911 }
11912 }
11913 }
11914 if (CDT.isNull())
11915 CDT = C.IntTy;
11916 CDT = CDT->getCanonicalTypeUnqualified();
11917 if (CDT->isRecordType() || CDT->isUnionType())
11918 CDT = C.IntTy;
11919 return C.getTypeSize(T: CDT);
11920}
11921
11922// This are the Functions that are needed to mangle the name of the
11923// vector functions generated by the compiler, according to the rules
11924// defined in the "Vector Function ABI specifications for AArch64",
11925// available at
11926// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11927
11928/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11929static bool getAArch64MTV(QualType QT,
11930 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind) {
11931 QT = QT.getCanonicalType();
11932
11933 if (QT->isVoidType())
11934 return false;
11935
11936 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform)
11937 return false;
11938
11939 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal ||
11940 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef)
11941 return false;
11942
11943 if ((Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
11944 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal) &&
11945 !QT->isReferenceType())
11946 return false;
11947
11948 return true;
11949}
11950
11951/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11952static bool getAArch64PBV(QualType QT, ASTContext &C) {
11953 QT = QT.getCanonicalType();
11954 unsigned Size = C.getTypeSize(T: QT);
11955
11956 // Only scalars and complex within 16 bytes wide set PVB to true.
11957 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11958 return false;
11959
11960 if (QT->isFloatingType())
11961 return true;
11962
11963 if (QT->isIntegerType())
11964 return true;
11965
11966 if (QT->isPointerType())
11967 return true;
11968
11969 // TODO: Add support for complex types (section 3.1.2, item 2).
11970
11971 return false;
11972}
11973
11974/// Computes the lane size (LS) of a return type or of an input parameter,
11975/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11976/// TODO: Add support for references, section 3.2.1, item 1.
11977static unsigned getAArch64LS(QualType QT,
11978 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind,
11979 ASTContext &C) {
11980 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11981 QualType PTy = QT.getCanonicalType()->getPointeeType();
11982 if (getAArch64PBV(QT: PTy, C))
11983 return C.getTypeSize(T: PTy);
11984 }
11985 if (getAArch64PBV(QT, C))
11986 return C.getTypeSize(T: QT);
11987
11988 return C.getTypeSize(T: C.getUIntPtrType());
11989}
11990
11991// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11992// signature of the scalar function, as defined in 3.2.2 of the
11993// AAVFABI.
11994static std::tuple<unsigned, unsigned, bool>
11995getNDSWDS(const FunctionDecl *FD,
11996 ArrayRef<llvm::OpenMPIRBuilder::DeclareSimdAttrTy> ParamAttrs) {
11997 QualType RetType = FD->getReturnType().getCanonicalType();
11998
11999 ASTContext &C = FD->getASTContext();
12000
12001 bool OutputBecomesInput = false;
12002
12003 llvm::SmallVector<unsigned, 8> Sizes;
12004 if (!RetType->isVoidType()) {
12005 Sizes.push_back(Elt: getAArch64LS(
12006 QT: RetType, Kind: llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector, C));
12007 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
12008 OutputBecomesInput = true;
12009 }
12010 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12011 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
12012 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
12013 }
12014
12015 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12016 // The LS of a function parameter / return value can only be a power
12017 // of 2, starting from 8 bits, up to 128.
12018 assert(llvm::all_of(Sizes,
12019 [](unsigned Size) {
12020 return Size == 8 || Size == 16 || Size == 32 ||
12021 Size == 64 || Size == 128;
12022 }) &&
12023 "Invalid size");
12024
12025 return std::make_tuple(args&: *llvm::min_element(Range&: Sizes), args&: *llvm::max_element(Range&: Sizes),
12026 args&: OutputBecomesInput);
12027}
12028
12029static llvm::OpenMPIRBuilder::DeclareSimdBranch
12030convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State) {
12031 switch (State) {
12032 case OMPDeclareSimdDeclAttr::BS_Undefined:
12033 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Undefined;
12034 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12035 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Inbranch;
12036 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12037 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Notinbranch;
12038 }
12039 llvm_unreachable("unexpected declare simd branch state");
12040}
12041
12042// Check the values provided via `simdlen` by the user.
12043static bool validateAArch64Simdlen(CodeGenModule &CGM, SourceLocation SLoc,
12044 unsigned UserVLEN, unsigned WDS, char ISA) {
12045 // 1. A `simdlen(1)` doesn't produce vector signatures.
12046 if (UserVLEN == 1) {
12047 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_1_no_effect);
12048 return false;
12049 }
12050
12051 // 2. Section 3.3.1, item 1: user input must be a power of 2 for Advanced
12052 // SIMD.
12053 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
12054 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_requires_power_of_2);
12055 return false;
12056 }
12057
12058 // 3. Section 3.4.1: SVE fixed length must obey the architectural limits.
12059 if (ISA == 's' && UserVLEN != 0 &&
12060 ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0))) {
12061 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_must_fit_lanes) << WDS;
12062 return false;
12063 }
12064
12065 return true;
12066}
12067
12068void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
12069 llvm::Function *Fn) {
12070 ASTContext &C = CGM.getContext();
12071 FD = FD->getMostRecentDecl();
12072 while (FD) {
12073 // Map params to their positions in function decl.
12074 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12075 if (isa<CXXMethodDecl>(Val: FD))
12076 ParamPositions.try_emplace(Key: FD, Args: 0);
12077 unsigned ParamPos = ParamPositions.size();
12078 for (const ParmVarDecl *P : FD->parameters()) {
12079 ParamPositions.try_emplace(Key: P->getCanonicalDecl(), Args&: ParamPos);
12080 ++ParamPos;
12081 }
12082 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12083 llvm::SmallVector<llvm::OpenMPIRBuilder::DeclareSimdAttrTy, 8> ParamAttrs(
12084 ParamPositions.size());
12085 // Mark uniform parameters.
12086 for (const Expr *E : Attr->uniforms()) {
12087 E = E->IgnoreParenImpCasts();
12088 unsigned Pos;
12089 if (isa<CXXThisExpr>(Val: E)) {
12090 Pos = ParamPositions[FD];
12091 } else {
12092 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12093 ->getCanonicalDecl();
12094 auto It = ParamPositions.find(Val: PVD);
12095 assert(It != ParamPositions.end() && "Function parameter not found");
12096 Pos = It->second;
12097 }
12098 ParamAttrs[Pos].Kind =
12099 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform;
12100 }
12101 // Get alignment info.
12102 auto *NI = Attr->alignments_begin();
12103 for (const Expr *E : Attr->aligneds()) {
12104 E = E->IgnoreParenImpCasts();
12105 unsigned Pos;
12106 QualType ParmTy;
12107 if (isa<CXXThisExpr>(Val: E)) {
12108 Pos = ParamPositions[FD];
12109 ParmTy = E->getType();
12110 } else {
12111 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12112 ->getCanonicalDecl();
12113 auto It = ParamPositions.find(Val: PVD);
12114 assert(It != ParamPositions.end() && "Function parameter not found");
12115 Pos = It->second;
12116 ParmTy = PVD->getType();
12117 }
12118 ParamAttrs[Pos].Alignment =
12119 (*NI)
12120 ? (*NI)->EvaluateKnownConstInt(Ctx: C)
12121 : llvm::APSInt::getUnsigned(
12122 X: C.toCharUnitsFromBits(BitSize: C.getOpenMPDefaultSimdAlign(T: ParmTy))
12123 .getQuantity());
12124 ++NI;
12125 }
12126 // Mark linear parameters.
12127 auto *SI = Attr->steps_begin();
12128 auto *MI = Attr->modifiers_begin();
12129 for (const Expr *E : Attr->linears()) {
12130 E = E->IgnoreParenImpCasts();
12131 unsigned Pos;
12132 bool IsReferenceType = false;
12133 // Rescaling factor needed to compute the linear parameter
12134 // value in the mangled name.
12135 unsigned PtrRescalingFactor = 1;
12136 if (isa<CXXThisExpr>(Val: E)) {
12137 Pos = ParamPositions[FD];
12138 auto *P = cast<PointerType>(Val: E->getType());
12139 PtrRescalingFactor = CGM.getContext()
12140 .getTypeSizeInChars(T: P->getPointeeType())
12141 .getQuantity();
12142 } else {
12143 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12144 ->getCanonicalDecl();
12145 auto It = ParamPositions.find(Val: PVD);
12146 assert(It != ParamPositions.end() && "Function parameter not found");
12147 Pos = It->second;
12148 if (auto *P = dyn_cast<PointerType>(Val: PVD->getType()))
12149 PtrRescalingFactor = CGM.getContext()
12150 .getTypeSizeInChars(T: P->getPointeeType())
12151 .getQuantity();
12152 else if (PVD->getType()->isReferenceType()) {
12153 IsReferenceType = true;
12154 PtrRescalingFactor =
12155 CGM.getContext()
12156 .getTypeSizeInChars(T: PVD->getType().getNonReferenceType())
12157 .getQuantity();
12158 }
12159 }
12160 llvm::OpenMPIRBuilder::DeclareSimdAttrTy &ParamAttr = ParamAttrs[Pos];
12161 if (*MI == OMPC_LINEAR_ref)
12162 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef;
12163 else if (*MI == OMPC_LINEAR_uval)
12164 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal;
12165 else if (IsReferenceType)
12166 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal;
12167 else
12168 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear;
12169 // Assuming a stride of 1, for `linear` without modifiers.
12170 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: 1);
12171 if (*SI) {
12172 Expr::EvalResult Result;
12173 if (!(*SI)->EvaluateAsInt(Result, Ctx: C, AllowSideEffects: Expr::SE_AllowSideEffects)) {
12174 if (const auto *DRE =
12175 cast<DeclRefExpr>(Val: (*SI)->IgnoreParenImpCasts())) {
12176 if (const auto *StridePVD =
12177 dyn_cast<ParmVarDecl>(Val: DRE->getDecl())) {
12178 ParamAttr.HasVarStride = true;
12179 auto It = ParamPositions.find(Val: StridePVD->getCanonicalDecl());
12180 assert(It != ParamPositions.end() &&
12181 "Function parameter not found");
12182 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: It->second);
12183 }
12184 }
12185 } else {
12186 ParamAttr.StrideOrArg = Result.Val.getInt();
12187 }
12188 }
12189 // If we are using a linear clause on a pointer, we need to
12190 // rescale the value of linear_step with the byte size of the
12191 // pointee type.
12192 if (!ParamAttr.HasVarStride &&
12193 (ParamAttr.Kind ==
12194 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
12195 ParamAttr.Kind ==
12196 llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef))
12197 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12198 ++SI;
12199 ++MI;
12200 }
12201 llvm::APSInt VLENVal;
12202 SourceLocation ExprLoc;
12203 const Expr *VLENExpr = Attr->getSimdlen();
12204 if (VLENExpr) {
12205 VLENVal = VLENExpr->EvaluateKnownConstInt(Ctx: C);
12206 ExprLoc = VLENExpr->getExprLoc();
12207 }
12208 llvm::OpenMPIRBuilder::DeclareSimdBranch State =
12209 convertDeclareSimdBranch(State: Attr->getBranchState());
12210 if (CGM.getTriple().isX86()) {
12211 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
12212 assert(NumElts && "Non-zero simdlen/cdtsize expected");
12213 OMPBuilder.emitX86DeclareSimdFunction(Fn, NumElements: NumElts, VLENVal, ParamAttrs,
12214 Branch: State);
12215 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12216 unsigned VLEN = VLENVal.getExtValue();
12217 // Get basic data for building the vector signature.
12218 const auto Data = getNDSWDS(FD, ParamAttrs);
12219 const unsigned NDS = std::get<0>(t: Data);
12220 const unsigned WDS = std::get<1>(t: Data);
12221 const bool OutputBecomesInput = std::get<2>(t: Data);
12222 if (CGM.getTarget().hasFeature(Feature: "sve")) {
12223 if (validateAArch64Simdlen(CGM, SLoc: ExprLoc, UserVLEN: VLEN, WDS, ISA: 's'))
12224 OMPBuilder.emitAArch64DeclareSimdFunction(
12225 Fn, VLENVal: VLEN, ParamAttrs, Branch: State, ISA: 's', NarrowestDataSize: NDS, OutputBecomesInput);
12226 } else if (CGM.getTarget().hasFeature(Feature: "neon")) {
12227 if (validateAArch64Simdlen(CGM, SLoc: ExprLoc, UserVLEN: VLEN, WDS, ISA: 'n'))
12228 OMPBuilder.emitAArch64DeclareSimdFunction(
12229 Fn, VLENVal: VLEN, ParamAttrs, Branch: State, ISA: 'n', NarrowestDataSize: NDS, OutputBecomesInput);
12230 }
12231 }
12232 }
12233 FD = FD->getPreviousDecl();
12234 }
12235}
12236
12237namespace {
12238/// Cleanup action for doacross support.
12239class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12240public:
12241 static const int DoacrossFinArgs = 2;
12242
12243private:
12244 llvm::FunctionCallee RTLFn;
12245 llvm::Value *Args[DoacrossFinArgs];
12246
12247public:
12248 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12249 ArrayRef<llvm::Value *> CallArgs)
12250 : RTLFn(RTLFn) {
12251 assert(CallArgs.size() == DoacrossFinArgs);
12252 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
12253 }
12254 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12255 if (!CGF.HaveInsertPoint())
12256 return;
12257 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12258 }
12259};
12260} // namespace
12261
12262void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12263 const OMPLoopDirective &D,
12264 ArrayRef<Expr *> NumIterations) {
12265 if (!CGF.HaveInsertPoint())
12266 return;
12267
12268 ASTContext &C = CGM.getContext();
12269 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12270 RecordDecl *RD;
12271 if (KmpDimTy.isNull()) {
12272 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12273 // kmp_int64 lo; // lower
12274 // kmp_int64 up; // upper
12275 // kmp_int64 st; // stride
12276 // };
12277 RD = C.buildImplicitRecord(Name: "kmp_dim");
12278 RD->startDefinition();
12279 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12280 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12281 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12282 RD->completeDefinition();
12283 KmpDimTy = C.getCanonicalTagType(TD: RD);
12284 } else {
12285 RD = KmpDimTy->castAsRecordDecl();
12286 }
12287 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12288 QualType ArrayTy = C.getConstantArrayType(EltTy: KmpDimTy, ArySize: Size, SizeExpr: nullptr,
12289 ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12290
12291 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
12292 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
12293 enum { LowerFD = 0, UpperFD, StrideFD };
12294 // Fill dims with data.
12295 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12296 LValue DimsLVal = CGF.MakeAddrLValue(
12297 Addr: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: I), T: KmpDimTy);
12298 // dims.upper = num_iterations;
12299 LValue UpperLVal = CGF.EmitLValueForField(
12300 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
12301 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12302 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
12303 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
12304 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
12305 // dims.stride = 1;
12306 LValue StrideLVal = CGF.EmitLValueForField(
12307 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
12308 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
12309 lvalue: StrideLVal);
12310 }
12311
12312 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12313 // kmp_int32 num_dims, struct kmp_dim * dims);
12314 llvm::Value *Args[] = {
12315 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
12316 getThreadID(CGF, Loc: D.getBeginLoc()),
12317 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
12318 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12319 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
12320 DestTy: CGM.VoidPtrTy)};
12321
12322 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12323 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
12324 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12325 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12326 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
12327 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12328 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
12329 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
12330 A: llvm::ArrayRef(FiniArgs));
12331}
12332
12333template <typename T>
12334static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
12335 const T *C, llvm::Value *ULoc,
12336 llvm::Value *ThreadID) {
12337 QualType Int64Ty =
12338 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12339 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12340 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12341 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12342 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
12343 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12344 const Expr *CounterVal = C->getLoopData(I);
12345 assert(CounterVal);
12346 llvm::Value *CntVal = CGF.EmitScalarConversion(
12347 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
12348 Loc: CounterVal->getExprLoc());
12349 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
12350 /*Volatile=*/false, Ty: Int64Ty);
12351 }
12352 llvm::Value *Args[] = {
12353 ULoc, ThreadID,
12354 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
12355 llvm::FunctionCallee RTLFn;
12356 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12357 OMPDoacrossKind<T> ODK;
12358 if (ODK.isSource(C)) {
12359 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12360 FnID: OMPRTL___kmpc_doacross_post);
12361 } else {
12362 assert(ODK.isSink(C) && "Expect sink modifier.");
12363 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12364 FnID: OMPRTL___kmpc_doacross_wait);
12365 }
12366 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12367}
12368
12369void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12370 const OMPDependClause *C) {
12371 return EmitDoacrossOrdered<OMPDependClause>(
12372 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12373 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12374}
12375
12376void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12377 const OMPDoacrossClause *C) {
12378 return EmitDoacrossOrdered<OMPDoacrossClause>(
12379 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12380 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12381}
12382
12383void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12384 llvm::FunctionCallee Callee,
12385 ArrayRef<llvm::Value *> Args) const {
12386 assert(Loc.isValid() && "Outlined function call location must be valid.");
12387 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
12388
12389 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
12390 if (Fn->doesNotThrow()) {
12391 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
12392 return;
12393 }
12394 }
12395 CGF.EmitRuntimeCall(callee: Callee, args: Args);
12396}
12397
12398void CGOpenMPRuntime::emitOutlinedFunctionCall(
12399 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12400 ArrayRef<llvm::Value *> Args) const {
12401 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
12402}
12403
12404void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12405 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
12406 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: FD))
12407 HasEmittedDeclareTargetRegion = true;
12408}
12409
12410Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12411 const VarDecl *NativeParam,
12412 const VarDecl *TargetParam) const {
12413 return CGF.GetAddrOfLocalVar(VD: NativeParam);
12414}
12415
12416/// Return allocator value from expression, or return a null allocator (default
12417/// when no allocator specified).
12418static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12419 const Expr *Allocator) {
12420 llvm::Value *AllocVal;
12421 if (Allocator) {
12422 AllocVal = CGF.EmitScalarExpr(E: Allocator);
12423 // According to the standard, the original allocator type is a enum
12424 // (integer). Convert to pointer type, if required.
12425 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
12426 DstTy: CGF.getContext().VoidPtrTy,
12427 Loc: Allocator->getExprLoc());
12428 } else {
12429 // If no allocator specified, it defaults to the null allocator.
12430 AllocVal = llvm::Constant::getNullValue(
12431 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
12432 }
12433 return AllocVal;
12434}
12435
12436/// Return the alignment from an allocate directive if present.
12437static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12438 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12439
12440 if (!AllocateAlignment)
12441 return nullptr;
12442
12443 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
12444}
12445
12446Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12447 const VarDecl *VD) {
12448 if (!VD)
12449 return Address::invalid();
12450 Address UntiedAddr = Address::invalid();
12451 Address UntiedRealAddr = Address::invalid();
12452 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12453 if (It != FunctionToUntiedTaskStackMap.end()) {
12454 const UntiedLocalVarsAddressesMap &UntiedData =
12455 UntiedLocalVarsStack[It->second];
12456 auto I = UntiedData.find(Key: VD);
12457 if (I != UntiedData.end()) {
12458 UntiedAddr = I->second.first;
12459 UntiedRealAddr = I->second.second;
12460 }
12461 }
12462 const VarDecl *CVD = VD->getCanonicalDecl();
12463 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12464 // Use the default allocation.
12465 if (!isAllocatableDecl(VD))
12466 return UntiedAddr;
12467 llvm::Value *Size;
12468 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
12469 if (CVD->getType()->isVariablyModifiedType()) {
12470 Size = CGF.getTypeSize(Ty: CVD->getType());
12471 // Align the size: ((size + align - 1) / align) * align
12472 Size = CGF.Builder.CreateNUWAdd(
12473 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
12474 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
12475 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
12476 } else {
12477 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
12478 Size = CGM.getSize(numChars: Sz.alignTo(Align));
12479 }
12480 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
12481 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12482 const Expr *Allocator = AA->getAllocator();
12483 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12484 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
12485 SmallVector<llvm::Value *, 4> Args;
12486 Args.push_back(Elt: ThreadID);
12487 if (Alignment)
12488 Args.push_back(Elt: Alignment);
12489 Args.push_back(Elt: Size);
12490 Args.push_back(Elt: AllocVal);
12491 llvm::omp::RuntimeFunction FnID =
12492 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12493 llvm::Value *Addr = CGF.EmitRuntimeCall(
12494 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args,
12495 name: getName(Parts: {CVD->getName(), ".void.addr"}));
12496 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12497 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
12498 QualType Ty = CGM.getContext().getPointerType(T: CVD->getType());
12499 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12500 V: Addr, DestTy: CGF.ConvertTypeForMem(T: Ty), Name: getName(Parts: {CVD->getName(), ".addr"}));
12501 if (UntiedAddr.isValid())
12502 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
12503
12504 // Cleanup action for allocate support.
12505 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12506 llvm::FunctionCallee RTLFn;
12507 SourceLocation::UIntTy LocEncoding;
12508 Address Addr;
12509 const Expr *AllocExpr;
12510
12511 public:
12512 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12513 SourceLocation::UIntTy LocEncoding, Address Addr,
12514 const Expr *AllocExpr)
12515 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12516 AllocExpr(AllocExpr) {}
12517 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12518 if (!CGF.HaveInsertPoint())
12519 return;
12520 llvm::Value *Args[3];
12521 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12522 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
12523 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12524 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
12525 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
12526 Args[2] = AllocVal;
12527 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12528 }
12529 };
12530 Address VDAddr =
12531 UntiedRealAddr.isValid()
12532 ? UntiedRealAddr
12533 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
12534 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12535 Kind: NormalAndEHCleanup, A: FiniRTLFn, A: CVD->getLocation().getRawEncoding(),
12536 A: VDAddr, A: Allocator);
12537 if (UntiedRealAddr.isValid())
12538 if (auto *Region =
12539 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
12540 Region->emitUntiedSwitch(CGF);
12541 return VDAddr;
12542 }
12543 return UntiedAddr;
12544}
12545
12546bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12547 const VarDecl *VD) const {
12548 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12549 if (It == FunctionToUntiedTaskStackMap.end())
12550 return false;
12551 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
12552}
12553
12554CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12555 CodeGenModule &CGM, const OMPLoopDirective &S)
12556 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12557 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12558 if (!NeedToPush)
12559 return;
12560 NontemporalDeclsSet &DS =
12561 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12562 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12563 for (const Stmt *Ref : C->private_refs()) {
12564 const auto *SimpleRefExpr = cast<Expr>(Val: Ref)->IgnoreParenImpCasts();
12565 const ValueDecl *VD;
12566 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SimpleRefExpr)) {
12567 VD = DRE->getDecl();
12568 } else {
12569 const auto *ME = cast<MemberExpr>(Val: SimpleRefExpr);
12570 assert((ME->isImplicitCXXThis() ||
12571 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12572 "Expected member of current class.");
12573 VD = ME->getMemberDecl();
12574 }
12575 DS.insert(V: VD);
12576 }
12577 }
12578}
12579
12580CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12581 if (!NeedToPush)
12582 return;
12583 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12584}
12585
12586CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12587 CodeGenFunction &CGF,
12588 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12589 std::pair<Address, Address>> &LocalVars)
12590 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12591 if (!NeedToPush)
12592 return;
12593 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12594 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12595 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
12596}
12597
12598CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12599 if (!NeedToPush)
12600 return;
12601 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12602}
12603
12604bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12605 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12606
12607 return llvm::any_of(
12608 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
12609 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(V: VD); });
12610}
12611
12612void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12613 const OMPExecutableDirective &S,
12614 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12615 const {
12616 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12617 // Vars in target/task regions must be excluded completely.
12618 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()) ||
12619 isOpenMPTaskingDirective(Kind: S.getDirectiveKind())) {
12620 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12621 getOpenMPCaptureRegions(CaptureRegions, DKind: S.getDirectiveKind());
12622 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CaptureRegions.front());
12623 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12624 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12625 NeedToCheckForLPCs.insert(V: Cap.getCapturedVar());
12626 }
12627 }
12628 // Exclude vars in private clauses.
12629 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12630 for (const Expr *Ref : C->varlist()) {
12631 if (!Ref->getType()->isScalarType())
12632 continue;
12633 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12634 if (!DRE)
12635 continue;
12636 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12637 }
12638 }
12639 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12640 for (const Expr *Ref : C->varlist()) {
12641 if (!Ref->getType()->isScalarType())
12642 continue;
12643 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12644 if (!DRE)
12645 continue;
12646 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12647 }
12648 }
12649 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12650 for (const Expr *Ref : C->varlist()) {
12651 if (!Ref->getType()->isScalarType())
12652 continue;
12653 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12654 if (!DRE)
12655 continue;
12656 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12657 }
12658 }
12659 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12660 for (const Expr *Ref : C->varlist()) {
12661 if (!Ref->getType()->isScalarType())
12662 continue;
12663 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12664 if (!DRE)
12665 continue;
12666 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12667 }
12668 }
12669 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12670 for (const Expr *Ref : C->varlist()) {
12671 if (!Ref->getType()->isScalarType())
12672 continue;
12673 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12674 if (!DRE)
12675 continue;
12676 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12677 }
12678 }
12679 for (const Decl *VD : NeedToCheckForLPCs) {
12680 for (const LastprivateConditionalData &Data :
12681 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12682 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
12683 if (!Data.Disabled)
12684 NeedToAddForLPCsAsDisabled.insert(V: VD);
12685 break;
12686 }
12687 }
12688 }
12689}
12690
12691CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12692 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12693 : CGM(CGF.CGM),
12694 Action((CGM.getLangOpts().OpenMP >= 50 &&
12695 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
12696 P: [](const OMPLastprivateClause *C) {
12697 return C->getKind() ==
12698 OMPC_LASTPRIVATE_conditional;
12699 }))
12700 ? ActionToDo::PushAsLastprivateConditional
12701 : ActionToDo::DoNotPush) {
12702 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12703 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12704 return;
12705 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12706 "Expected a push action.");
12707 LastprivateConditionalData &Data =
12708 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12709 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12710 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12711 continue;
12712
12713 for (const Expr *Ref : C->varlist()) {
12714 Data.DeclToUniqueName.insert(KV: std::make_pair(
12715 x: cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts())->getDecl(),
12716 y: SmallString<16>(generateUniqueName(CGM, Prefix: "pl_cond", Ref))));
12717 }
12718 }
12719 Data.IVLVal = IVLVal;
12720 Data.Fn = CGF.CurFn;
12721}
12722
12723CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12724 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12725 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12726 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12727 if (CGM.getLangOpts().OpenMP < 50)
12728 return;
12729 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12730 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12731 if (!NeedToAddForLPCsAsDisabled.empty()) {
12732 Action = ActionToDo::DisableLastprivateConditional;
12733 LastprivateConditionalData &Data =
12734 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12735 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12736 Data.DeclToUniqueName.try_emplace(Key: VD);
12737 Data.Fn = CGF.CurFn;
12738 Data.Disabled = true;
12739 }
12740}
12741
12742CGOpenMPRuntime::LastprivateConditionalRAII
12743CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12744 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12745 return LastprivateConditionalRAII(CGF, S);
12746}
12747
12748CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12749 if (CGM.getLangOpts().OpenMP < 50)
12750 return;
12751 if (Action == ActionToDo::DisableLastprivateConditional) {
12752 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12753 "Expected list of disabled private vars.");
12754 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12755 }
12756 if (Action == ActionToDo::PushAsLastprivateConditional) {
12757 assert(
12758 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12759 "Expected list of lastprivate conditional vars.");
12760 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12761 }
12762}
12763
12764Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12765 const VarDecl *VD) {
12766 ASTContext &C = CGM.getContext();
12767 auto I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
12768 QualType NewType;
12769 const FieldDecl *VDField;
12770 const FieldDecl *FiredField;
12771 LValue BaseLVal;
12772 auto VI = I->getSecond().find(Val: VD);
12773 if (VI == I->getSecond().end()) {
12774 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
12775 RD->startDefinition();
12776 VDField = addFieldToRecordDecl(C, DC: RD, FieldTy: VD->getType().getNonReferenceType());
12777 FiredField = addFieldToRecordDecl(C, DC: RD, FieldTy: C.CharTy);
12778 RD->completeDefinition();
12779 NewType = C.getCanonicalTagType(TD: RD);
12780 Address Addr = CGF.CreateMemTemp(T: NewType, Align: C.getDeclAlign(D: VD), Name: VD->getName());
12781 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
12782 I->getSecond().try_emplace(Key: VD, Args&: NewType, Args&: VDField, Args&: FiredField, Args&: BaseLVal);
12783 } else {
12784 NewType = std::get<0>(t&: VI->getSecond());
12785 VDField = std::get<1>(t&: VI->getSecond());
12786 FiredField = std::get<2>(t&: VI->getSecond());
12787 BaseLVal = std::get<3>(t&: VI->getSecond());
12788 }
12789 LValue FiredLVal =
12790 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
12791 CGF.EmitStoreOfScalar(
12792 value: llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
12793 lvalue: FiredLVal);
12794 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
12795}
12796
12797namespace {
12798/// Checks if the lastprivate conditional variable is referenced in LHS.
12799class LastprivateConditionalRefChecker final
12800 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12801 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12802 const Expr *FoundE = nullptr;
12803 const Decl *FoundD = nullptr;
12804 StringRef UniqueDeclName;
12805 LValue IVLVal;
12806 llvm::Function *FoundFn = nullptr;
12807 SourceLocation Loc;
12808
12809public:
12810 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12811 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12812 llvm::reverse(C&: LPM)) {
12813 auto It = D.DeclToUniqueName.find(Key: E->getDecl());
12814 if (It == D.DeclToUniqueName.end())
12815 continue;
12816 if (D.Disabled)
12817 return false;
12818 FoundE = E;
12819 FoundD = E->getDecl()->getCanonicalDecl();
12820 UniqueDeclName = It->second;
12821 IVLVal = D.IVLVal;
12822 FoundFn = D.Fn;
12823 break;
12824 }
12825 return FoundE == E;
12826 }
12827 bool VisitMemberExpr(const MemberExpr *E) {
12828 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
12829 return false;
12830 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12831 llvm::reverse(C&: LPM)) {
12832 auto It = D.DeclToUniqueName.find(Key: E->getMemberDecl());
12833 if (It == D.DeclToUniqueName.end())
12834 continue;
12835 if (D.Disabled)
12836 return false;
12837 FoundE = E;
12838 FoundD = E->getMemberDecl()->getCanonicalDecl();
12839 UniqueDeclName = It->second;
12840 IVLVal = D.IVLVal;
12841 FoundFn = D.Fn;
12842 break;
12843 }
12844 return FoundE == E;
12845 }
12846 bool VisitStmt(const Stmt *S) {
12847 for (const Stmt *Child : S->children()) {
12848 if (!Child)
12849 continue;
12850 if (const auto *E = dyn_cast<Expr>(Val: Child))
12851 if (!E->isGLValue())
12852 continue;
12853 if (Visit(S: Child))
12854 return true;
12855 }
12856 return false;
12857 }
12858 explicit LastprivateConditionalRefChecker(
12859 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12860 : LPM(LPM) {}
12861 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12862 getFoundData() const {
12863 return std::make_tuple(args: FoundE, args: FoundD, args: UniqueDeclName, args: IVLVal, args: FoundFn);
12864 }
12865};
12866} // namespace
12867
12868void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12869 LValue IVLVal,
12870 StringRef UniqueDeclName,
12871 LValue LVal,
12872 SourceLocation Loc) {
12873 // Last updated loop counter for the lastprivate conditional var.
12874 // int<xx> last_iv = 0;
12875 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
12876 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12877 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
12878 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
12879 IVLVal.getAlignment().getAsAlign());
12880 LValue LastIVLVal =
12881 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
12882
12883 // Last value of the lastprivate conditional.
12884 // decltype(priv_a) last_a;
12885 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12886 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
12887 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
12888 LVal.getAlignment().getAsAlign());
12889 LValue LastLVal =
12890 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
12891
12892 // Global loop counter. Required to handle inner parallel-for regions.
12893 // iv
12894 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
12895
12896 // #pragma omp critical(a)
12897 // if (last_iv <= iv) {
12898 // last_iv = iv;
12899 // last_a = priv_a;
12900 // }
12901 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12902 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12903 Action.Enter(CGF);
12904 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
12905 // (last_iv <= iv) ? Check if the variable is updated and store new
12906 // value in global var.
12907 llvm::Value *CmpRes;
12908 if (IVLVal.getType()->isSignedIntegerType()) {
12909 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
12910 } else {
12911 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12912 "Loop iteration variable must be integer.");
12913 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
12914 }
12915 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
12916 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
12917 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
12918 // {
12919 CGF.EmitBlock(BB: ThenBB);
12920
12921 // last_iv = iv;
12922 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
12923
12924 // last_a = priv_a;
12925 switch (CGF.getEvaluationKind(T: LVal.getType())) {
12926 case TEK_Scalar: {
12927 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
12928 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
12929 break;
12930 }
12931 case TEK_Complex: {
12932 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
12933 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
12934 break;
12935 }
12936 case TEK_Aggregate:
12937 llvm_unreachable(
12938 "Aggregates are not supported in lastprivate conditional.");
12939 }
12940 // }
12941 CGF.EmitBranch(Block: ExitBB);
12942 // There is no need to emit line number for unconditional branch.
12943 (void)ApplyDebugLocation::CreateEmpty(CGF);
12944 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
12945 };
12946
12947 if (CGM.getLangOpts().OpenMPSimd) {
12948 // Do not emit as a critical region as no parallel region could be emitted.
12949 RegionCodeGenTy ThenRCG(CodeGen);
12950 ThenRCG(CGF);
12951 } else {
12952 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
12953 }
12954}
12955
12956void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12957 const Expr *LHS) {
12958 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12959 return;
12960 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12961 if (!Checker.Visit(S: LHS))
12962 return;
12963 const Expr *FoundE;
12964 const Decl *FoundD;
12965 StringRef UniqueDeclName;
12966 LValue IVLVal;
12967 llvm::Function *FoundFn;
12968 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
12969 Checker.getFoundData();
12970 if (FoundFn != CGF.CurFn) {
12971 // Special codegen for inner parallel regions.
12972 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12973 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
12974 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12975 "Lastprivate conditional is not found in outer region.");
12976 QualType StructTy = std::get<0>(t&: It->getSecond());
12977 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
12978 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
12979 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12980 Addr: PrivLVal.getAddress(),
12981 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
12982 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
12983 LValue BaseLVal =
12984 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
12985 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
12986 CGF.EmitAtomicStore(rvalue: RValue::get(V: llvm::ConstantInt::get(
12987 Ty: CGF.ConvertTypeForMem(T: FiredDecl->getType()), V: 1)),
12988 lvalue: FiredLVal, AO: llvm::AtomicOrdering::Unordered,
12989 /*IsVolatile=*/true, /*isInit=*/false);
12990 return;
12991 }
12992
12993 // Private address of the lastprivate conditional in the current context.
12994 // priv_a
12995 LValue LVal = CGF.EmitLValue(E: FoundE);
12996 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12997 Loc: FoundE->getExprLoc());
12998}
12999
13000void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
13001 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13002 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13003 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13004 return;
13005 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
13006 auto It = llvm::find_if(
13007 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
13008 if (It == Range.end() || It->Fn != CGF.CurFn)
13009 return;
13010 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
13011 assert(LPCI != LastprivateConditionalToTypes.end() &&
13012 "Lastprivates must be registered already.");
13013 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
13014 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
13015 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: CaptureRegions.back());
13016 for (const auto &Pair : It->DeclToUniqueName) {
13017 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
13018 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
13019 continue;
13020 auto I = LPCI->getSecond().find(Val: Pair.first);
13021 assert(I != LPCI->getSecond().end() &&
13022 "Lastprivate must be rehistered already.");
13023 // bool Cmp = priv_a.Fired != 0;
13024 LValue BaseLVal = std::get<3>(t&: I->getSecond());
13025 LValue FiredLVal =
13026 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
13027 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
13028 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
13029 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
13030 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
13031 // if (Cmp) {
13032 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
13033 CGF.EmitBlock(BB: ThenBB);
13034 Address Addr = CGF.GetAddrOfLocalVar(VD);
13035 LValue LVal;
13036 if (VD->getType()->isReferenceType())
13037 LVal = CGF.EmitLoadOfReferenceLValue(RefAddr: Addr, RefTy: VD->getType(),
13038 Source: AlignmentSource::Decl);
13039 else
13040 LVal = CGF.MakeAddrLValue(Addr, T: VD->getType().getNonReferenceType(),
13041 Source: AlignmentSource::Decl);
13042 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
13043 Loc: D.getBeginLoc());
13044 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
13045 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
13046 // }
13047 }
13048}
13049
13050void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
13051 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13052 SourceLocation Loc) {
13053 if (CGF.getLangOpts().OpenMP < 50)
13054 return;
13055 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(Key: VD);
13056 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13057 "Unknown lastprivate conditional variable.");
13058 StringRef UniqueName = It->second;
13059 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
13060 // The variable was not updated in the region - exit.
13061 if (!GV)
13062 return;
13063 LValue LPLVal = CGF.MakeRawAddrLValue(
13064 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
13065 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
13066 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
13067}
13068
13069llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
13070 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13071 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13072 const RegionCodeGenTy &CodeGen) {
13073 llvm_unreachable("Not supported in SIMD-only mode");
13074}
13075
13076llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
13077 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13078 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13079 const RegionCodeGenTy &CodeGen) {
13080 llvm_unreachable("Not supported in SIMD-only mode");
13081}
13082
13083llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
13084 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13085 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13086 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13087 bool Tied, unsigned &NumberOfParts) {
13088 llvm_unreachable("Not supported in SIMD-only mode");
13089}
13090
13091void CGOpenMPSIMDRuntime::emitParallelCall(
13092 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13093 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13094 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13095 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13096 llvm_unreachable("Not supported in SIMD-only mode");
13097}
13098
13099void CGOpenMPSIMDRuntime::emitCriticalRegion(
13100 CodeGenFunction &CGF, StringRef CriticalName,
13101 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13102 const Expr *Hint) {
13103 llvm_unreachable("Not supported in SIMD-only mode");
13104}
13105
13106void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
13107 const RegionCodeGenTy &MasterOpGen,
13108 SourceLocation Loc) {
13109 llvm_unreachable("Not supported in SIMD-only mode");
13110}
13111
13112void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
13113 const RegionCodeGenTy &MasterOpGen,
13114 SourceLocation Loc,
13115 const Expr *Filter) {
13116 llvm_unreachable("Not supported in SIMD-only mode");
13117}
13118
13119void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
13120 SourceLocation Loc) {
13121 llvm_unreachable("Not supported in SIMD-only mode");
13122}
13123
13124void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
13125 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13126 SourceLocation Loc) {
13127 llvm_unreachable("Not supported in SIMD-only mode");
13128}
13129
13130void CGOpenMPSIMDRuntime::emitSingleRegion(
13131 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13132 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13133 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
13134 ArrayRef<const Expr *> AssignmentOps) {
13135 llvm_unreachable("Not supported in SIMD-only mode");
13136}
13137
13138void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
13139 const RegionCodeGenTy &OrderedOpGen,
13140 SourceLocation Loc,
13141 bool IsThreads) {
13142 llvm_unreachable("Not supported in SIMD-only mode");
13143}
13144
13145void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
13146 SourceLocation Loc,
13147 OpenMPDirectiveKind Kind,
13148 bool EmitChecks,
13149 bool ForceSimpleCall) {
13150 llvm_unreachable("Not supported in SIMD-only mode");
13151}
13152
13153void CGOpenMPSIMDRuntime::emitForDispatchInit(
13154 CodeGenFunction &CGF, SourceLocation Loc,
13155 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13156 bool Ordered, const DispatchRTInput &DispatchValues) {
13157 llvm_unreachable("Not supported in SIMD-only mode");
13158}
13159
13160void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
13161 SourceLocation Loc) {
13162 llvm_unreachable("Not supported in SIMD-only mode");
13163}
13164
13165void CGOpenMPSIMDRuntime::emitForStaticInit(
13166 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
13167 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13168 llvm_unreachable("Not supported in SIMD-only mode");
13169}
13170
13171void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13172 CodeGenFunction &CGF, SourceLocation Loc,
13173 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13174 llvm_unreachable("Not supported in SIMD-only mode");
13175}
13176
13177void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13178 SourceLocation Loc,
13179 unsigned IVSize,
13180 bool IVSigned) {
13181 llvm_unreachable("Not supported in SIMD-only mode");
13182}
13183
13184void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13185 SourceLocation Loc,
13186 OpenMPDirectiveKind DKind) {
13187 llvm_unreachable("Not supported in SIMD-only mode");
13188}
13189
13190llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13191 SourceLocation Loc,
13192 unsigned IVSize, bool IVSigned,
13193 Address IL, Address LB,
13194 Address UB, Address ST) {
13195 llvm_unreachable("Not supported in SIMD-only mode");
13196}
13197
13198void CGOpenMPSIMDRuntime::emitNumThreadsClause(
13199 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13200 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
13201 SourceLocation SeverityLoc, const Expr *Message,
13202 SourceLocation MessageLoc) {
13203 llvm_unreachable("Not supported in SIMD-only mode");
13204}
13205
13206void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13207 ProcBindKind ProcBind,
13208 SourceLocation Loc) {
13209 llvm_unreachable("Not supported in SIMD-only mode");
13210}
13211
13212Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13213 const VarDecl *VD,
13214 Address VDAddr,
13215 SourceLocation Loc) {
13216 llvm_unreachable("Not supported in SIMD-only mode");
13217}
13218
13219llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13220 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13221 CodeGenFunction *CGF) {
13222 llvm_unreachable("Not supported in SIMD-only mode");
13223}
13224
13225Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13226 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13227 llvm_unreachable("Not supported in SIMD-only mode");
13228}
13229
13230void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13231 ArrayRef<const Expr *> Vars,
13232 SourceLocation Loc,
13233 llvm::AtomicOrdering AO) {
13234 llvm_unreachable("Not supported in SIMD-only mode");
13235}
13236
13237void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13238 const OMPExecutableDirective &D,
13239 llvm::Function *TaskFunction,
13240 QualType SharedsTy, Address Shareds,
13241 const Expr *IfCond,
13242 const OMPTaskDataTy &Data) {
13243 llvm_unreachable("Not supported in SIMD-only mode");
13244}
13245
13246void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13247 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13248 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13249 const Expr *IfCond, const OMPTaskDataTy &Data) {
13250 llvm_unreachable("Not supported in SIMD-only mode");
13251}
13252
13253void CGOpenMPSIMDRuntime::emitReduction(
13254 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13255 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13256 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13257 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13258 CGOpenMPRuntime::emitReduction(CGF, Loc, OrgPrivates: Privates, OrgLHSExprs: LHSExprs, OrgRHSExprs: RHSExprs,
13259 OrgReductionOps: ReductionOps, Options);
13260}
13261
13262llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13263 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13264 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13265 llvm_unreachable("Not supported in SIMD-only mode");
13266}
13267
13268void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13269 SourceLocation Loc,
13270 bool IsWorksharingReduction) {
13271 llvm_unreachable("Not supported in SIMD-only mode");
13272}
13273
13274void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13275 SourceLocation Loc,
13276 ReductionCodeGen &RCG,
13277 unsigned N) {
13278 llvm_unreachable("Not supported in SIMD-only mode");
13279}
13280
13281Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13282 SourceLocation Loc,
13283 llvm::Value *ReductionsPtr,
13284 LValue SharedLVal) {
13285 llvm_unreachable("Not supported in SIMD-only mode");
13286}
13287
13288void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13289 SourceLocation Loc,
13290 const OMPTaskDataTy &Data) {
13291 llvm_unreachable("Not supported in SIMD-only mode");
13292}
13293
13294void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13295 CodeGenFunction &CGF, SourceLocation Loc,
13296 OpenMPDirectiveKind CancelRegion) {
13297 llvm_unreachable("Not supported in SIMD-only mode");
13298}
13299
13300void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13301 SourceLocation Loc, const Expr *IfCond,
13302 OpenMPDirectiveKind CancelRegion) {
13303 llvm_unreachable("Not supported in SIMD-only mode");
13304}
13305
13306void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13307 const OMPExecutableDirective &D, StringRef ParentName,
13308 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13309 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13310 llvm_unreachable("Not supported in SIMD-only mode");
13311}
13312
13313void CGOpenMPSIMDRuntime::emitTargetCall(
13314 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13315 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13316 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13317 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13318 const OMPLoopDirective &D)>
13319 SizeEmitter) {
13320 llvm_unreachable("Not supported in SIMD-only mode");
13321}
13322
13323bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13324 llvm_unreachable("Not supported in SIMD-only mode");
13325}
13326
13327bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13328 llvm_unreachable("Not supported in SIMD-only mode");
13329}
13330
13331bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13332 return false;
13333}
13334
13335void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13336 const OMPExecutableDirective &D,
13337 SourceLocation Loc,
13338 llvm::Function *OutlinedFn,
13339 ArrayRef<llvm::Value *> CapturedVars) {
13340 llvm_unreachable("Not supported in SIMD-only mode");
13341}
13342
13343void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13344 const Expr *NumTeams,
13345 const Expr *ThreadLimit,
13346 SourceLocation Loc) {
13347 llvm_unreachable("Not supported in SIMD-only mode");
13348}
13349
13350void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13351 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13352 const Expr *Device, const RegionCodeGenTy &CodeGen,
13353 CGOpenMPRuntime::TargetDataInfo &Info) {
13354 llvm_unreachable("Not supported in SIMD-only mode");
13355}
13356
13357void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13358 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13359 const Expr *Device) {
13360 llvm_unreachable("Not supported in SIMD-only mode");
13361}
13362
13363void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13364 const OMPLoopDirective &D,
13365 ArrayRef<Expr *> NumIterations) {
13366 llvm_unreachable("Not supported in SIMD-only mode");
13367}
13368
13369void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13370 const OMPDependClause *C) {
13371 llvm_unreachable("Not supported in SIMD-only mode");
13372}
13373
13374void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13375 const OMPDoacrossClause *C) {
13376 llvm_unreachable("Not supported in SIMD-only mode");
13377}
13378
13379const VarDecl *
13380CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13381 const VarDecl *NativeParam) const {
13382 llvm_unreachable("Not supported in SIMD-only mode");
13383}
13384
13385Address
13386CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13387 const VarDecl *NativeParam,
13388 const VarDecl *TargetParam) const {
13389 llvm_unreachable("Not supported in SIMD-only mode");
13390}
13391