1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
24#include "clang/AST/OpenMPClause.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/SourceManager.h"
30#include "clang/CodeGen/ConstantInitBuilder.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
166 PtrTy: PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(lvalue: PartIdLVal, Loc: PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
171 CGF.EmitBlock(BB: DoneBB);
172 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
173 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
174 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
175 Dest: CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
183 PtrTy: PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
185 lvalue: PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
189 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
191 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
192 Dest: CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
194 CGF.EmitBlock(BB: CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
572void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573 CodeGenFunction::RunCleanupsScope Scope(CGF);
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
596static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
604 const auto *CE = cast<CallExpr>(Val: InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
614 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
615 (void)PrivateScope.Privatize();
616 RValue Func = RValue::get(V: Reduction.second);
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(E: InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(T: Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
633 RValue::getComplex(C: CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
655static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BB: BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
693 Name: "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
711 Original: SrcElementCurrent, Ty: ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
721 Name: "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
728 Name: "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
733 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
746 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, DestAddr: PrivateAddr, Type: PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 Init: EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SrcAddr: SharedAddr);
765}
766
767ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768 ArrayRef<const Expr *> Origs,
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(N: Shareds.size());
772 SharedAddresses.reserve(N: Shareds.size());
773 Sizes.reserve(N: Shareds.size());
774 BaseDecls.reserve(N: Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
780 std::advance(i&: IOrig, n: 1);
781 std::advance(i&: IPriv, n: 1);
782 std::advance(i&: IRed, n: 1);
783 }
784}
785
786void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
791 SharedAddresses.emplace_back(Args&: First, Args&: Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(Args&: First, Args&: Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
797 OrigAddresses.emplace_back(Args&: First, Args&: Second);
798 }
799}
800
801void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 Args: CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
807 Args: nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
816 LHS: OrigAddresses[N].second.getPointer(CGF),
817 RHS: OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateZExtOrTrunc(V: Size, DestTy: ElemSizeOf->getType());
819 Size = CGF.Builder.CreateNUWAdd(
820 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
821 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
822 } else {
823 SizeInChars =
824 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
825 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
826 }
827 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
828 CodeGenFunction::OpaqueValueMapping OpaqueMap(
829 CGF,
830 cast<OpaqueValueExpr>(
831 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
832 RValue::get(V: Size));
833 CGF.EmitVariablyModifiedType(Ty: PrivateType);
834}
835
836void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
837 llvm::Value *Size) {
838 QualType PrivateType = getPrivateType(N);
839 if (!PrivateType->isVariablyModifiedType()) {
840 assert(!Size && !Sizes[N].second &&
841 "Size should be nullptr for non-variably modified reduction "
842 "items.");
843 return;
844 }
845 CodeGenFunction::OpaqueValueMapping OpaqueMap(
846 CGF,
847 cast<OpaqueValueExpr>(
848 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
849 RValue::get(V: Size));
850 CGF.EmitVariablyModifiedType(Ty: PrivateType);
851}
852
853void ReductionCodeGen::emitInitialization(
854 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
855 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
856 assert(SharedAddresses.size() > N && "No variable was generated");
857 const auto *PrivateVD =
858 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
859 const OMPDeclareReductionDecl *DRD =
860 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
861 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
862 if (DRD && DRD->getInitializer())
863 (void)DefaultInit(CGF);
864 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
865 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
866 (void)DefaultInit(CGF);
867 QualType SharedType = SharedAddresses[N].first.getType();
868 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
869 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
870 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
871 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
872 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
873 Quals: PrivateVD->getType().getQualifiers(),
874 /*IsInitializer=*/false);
875 }
876}
877
878bool ReductionCodeGen::needCleanups(unsigned N) {
879 QualType PrivateType = getPrivateType(N);
880 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
881 return DTorKind != QualType::DK_none;
882}
883
884void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
885 Address PrivateAddr) {
886 QualType PrivateType = getPrivateType(N);
887 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
888 if (needCleanups(N)) {
889 PrivateAddr =
890 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
891 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
892 }
893}
894
895static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
896 LValue BaseLV) {
897 BaseTy = BaseTy.getNonReferenceType();
898 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
899 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
900 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
901 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
902 } else {
903 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
904 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
905 }
906 BaseTy = BaseTy->getPointeeType();
907 }
908 return CGF.MakeAddrLValue(
909 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
910 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
911 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
912}
913
914static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
915 Address OriginalBaseAddress, llvm::Value *Addr) {
916 RawAddress Tmp = RawAddress::invalid();
917 Address TopTmp = Address::invalid();
918 Address MostTopTmp = Address::invalid();
919 BaseTy = BaseTy.getNonReferenceType();
920 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
921 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
922 Tmp = CGF.CreateMemTemp(T: BaseTy);
923 if (TopTmp.isValid())
924 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
925 else
926 MostTopTmp = Tmp;
927 TopTmp = Tmp;
928 BaseTy = BaseTy->getPointeeType();
929 }
930
931 if (Tmp.isValid()) {
932 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
933 V: Addr, DestTy: Tmp.getElementType());
934 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
935 return MostTopTmp;
936 }
937
938 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
939 V: Addr, DestTy: OriginalBaseAddress.getType());
940 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
941}
942
943static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
944 const VarDecl *OrigVD = nullptr;
945 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
946 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
947 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
948 Base = TempOASE->getBase()->IgnoreParenImpCasts();
949 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
950 Base = TempASE->getBase()->IgnoreParenImpCasts();
951 DE = cast<DeclRefExpr>(Val: Base);
952 OrigVD = cast<VarDecl>(Val: DE->getDecl());
953 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
954 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
955 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
956 Base = TempASE->getBase()->IgnoreParenImpCasts();
957 DE = cast<DeclRefExpr>(Val: Base);
958 OrigVD = cast<VarDecl>(Val: DE->getDecl());
959 }
960 return OrigVD;
961}
962
963Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
964 Address PrivateAddr) {
965 const DeclRefExpr *DE;
966 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
967 BaseDecls.emplace_back(Args&: OrigVD);
968 LValue OriginalBaseLValue = CGF.EmitLValue(E: DE);
969 LValue BaseLValue =
970 loadToBegin(CGF, BaseTy: OrigVD->getType(), ElTy: SharedAddresses[N].first.getType(),
971 BaseLV: OriginalBaseLValue);
972 Address SharedAddr = SharedAddresses[N].first.getAddress();
973 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
974 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
975 RHS: SharedAddr.emitRawPointer(CGF));
976 llvm::Value *PrivatePointer =
977 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
978 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
979 llvm::Value *Ptr = CGF.Builder.CreateGEP(
980 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
981 return castToBase(CGF, BaseTy: OrigVD->getType(),
982 ElTy: SharedAddresses[N].first.getType(),
983 OriginalBaseAddress: OriginalBaseLValue.getAddress(), Addr: Ptr);
984 }
985 BaseDecls.emplace_back(
986 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
987 return PrivateAddr;
988}
989
990bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
991 const OMPDeclareReductionDecl *DRD =
992 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
993 return DRD && DRD->getInitializer();
994}
995
996LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
997 return CGF.EmitLoadOfPointerLValue(
998 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
999 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
1000}
1001
1002void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1003 if (!CGF.HaveInsertPoint())
1004 return;
1005 // 1.2.2 OpenMP Language Terminology
1006 // Structured block - An executable statement with a single entry at the
1007 // top and a single exit at the bottom.
1008 // The point of exit cannot be a branch out of the structured block.
1009 // longjmp() and throw() must not violate the entry/exit criteria.
1010 CGF.EHStack.pushTerminate();
1011 if (S)
1012 CGF.incrementProfileCounter(S);
1013 CodeGen(CGF);
1014 CGF.EHStack.popTerminate();
1015}
1016
1017LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1018 CodeGenFunction &CGF) {
1019 return CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1020 T: getThreadIDVariable()->getType(),
1021 Source: AlignmentSource::Decl);
1022}
1023
1024static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1025 QualType FieldTy) {
1026 auto *Field = FieldDecl::Create(
1027 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1028 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1029 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1030 Field->setAccess(AS_public);
1031 DC->addDecl(D: Field);
1032 return Field;
1033}
1034
1035CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1036 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1037 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1038 llvm::OpenMPIRBuilderConfig Config(
1039 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1040 CGM.getLangOpts().OpenMPOffloadMandatory,
1041 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1042 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1043 Config.setDefaultTargetAS(
1044 CGM.getContext().getTargetInfo().getTargetAddressSpace(AS: LangAS::Default));
1045 Config.setRuntimeCC(CGM.getRuntimeCC());
1046
1047 OMPBuilder.setConfig(Config);
1048 OMPBuilder.initialize();
1049 OMPBuilder.loadOffloadInfoMetadata(VFS&: *CGM.getFileSystem(),
1050 HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1051 ? CGM.getLangOpts().OMPHostIRFile
1052 : StringRef{});
1053
1054 // The user forces the compiler to behave as if omp requires
1055 // unified_shared_memory was given.
1056 if (CGM.getLangOpts().OpenMPForceUSM) {
1057 HasRequiresUnifiedSharedMemory = true;
1058 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1059 }
1060}
1061
1062void CGOpenMPRuntime::clear() {
1063 InternalVars.clear();
1064 // Clean non-target variable declarations possibly used only in debug info.
1065 for (const auto &Data : EmittedNonTargetVariables) {
1066 if (!Data.getValue().pointsToAliveValue())
1067 continue;
1068 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1069 if (!GV)
1070 continue;
1071 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1072 continue;
1073 GV->eraseFromParent();
1074 }
1075}
1076
1077std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1078 return OMPBuilder.createPlatformSpecificName(Parts);
1079}
1080
1081static llvm::Function *
1082emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1083 const Expr *CombinerInitializer, const VarDecl *In,
1084 const VarDecl *Out, bool IsCombiner) {
1085 // void .omp_combiner.(Ty *in, Ty *out);
1086 ASTContext &C = CGM.getContext();
1087 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1088 FunctionArgList Args;
1089 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1090 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1091 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1092 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1093 Args.push_back(Elt: &OmpOutParm);
1094 Args.push_back(Elt: &OmpInParm);
1095 const CGFunctionInfo &FnInfo =
1096 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
1097 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1098 std::string Name = CGM.getOpenMPRuntime().getName(
1099 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1100 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1101 N: Name, M: &CGM.getModule());
1102 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1103 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1104 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
1105 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1106 Fn->removeFnAttr(Kind: llvm::Attribute::NoInline);
1107 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
1108 Fn->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
1109 }
1110 CodeGenFunction CGF(CGM);
1111 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1112 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1113 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc: In->getLocation(),
1114 StartLoc: Out->getLocation());
1115 CodeGenFunction::OMPPrivateScope Scope(CGF);
1116 Address AddrIn = CGF.GetAddrOfLocalVar(VD: &OmpInParm);
1117 Scope.addPrivate(
1118 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1119 .getAddress());
1120 Address AddrOut = CGF.GetAddrOfLocalVar(VD: &OmpOutParm);
1121 Scope.addPrivate(
1122 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1123 .getAddress());
1124 (void)Scope.Privatize();
1125 if (!IsCombiner && Out->hasInit() &&
1126 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1127 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1128 Quals: Out->getType().getQualifiers(),
1129 /*IsInitializer=*/true);
1130 }
1131 if (CombinerInitializer)
1132 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1133 Scope.ForceCleanup();
1134 CGF.FinishFunction();
1135 return Fn;
1136}
1137
1138void CGOpenMPRuntime::emitUserDefinedReduction(
1139 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1140 if (UDRMap.count(Val: D) > 0)
1141 return;
1142 llvm::Function *Combiner = emitCombinerOrInitializer(
1143 CGM, Ty: D->getType(), CombinerInitializer: D->getCombiner(),
1144 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1145 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1146 /*IsCombiner=*/true);
1147 llvm::Function *Initializer = nullptr;
1148 if (const Expr *Init = D->getInitializer()) {
1149 Initializer = emitCombinerOrInitializer(
1150 CGM, Ty: D->getType(),
1151 CombinerInitializer: D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1152 : nullptr,
1153 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1154 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1155 /*IsCombiner=*/false);
1156 }
1157 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1158 if (CGF)
1159 FunctionUDRMap[CGF->CurFn].push_back(Elt: D);
1160}
1161
1162std::pair<llvm::Function *, llvm::Function *>
1163CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1164 auto I = UDRMap.find(Val: D);
1165 if (I != UDRMap.end())
1166 return I->second;
1167 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1168 return UDRMap.lookup(Val: D);
1169}
1170
1171namespace {
1172// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1173// Builder if one is present.
1174struct PushAndPopStackRAII {
1175 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1176 bool HasCancel, llvm::omp::Directive Kind)
1177 : OMPBuilder(OMPBuilder) {
1178 if (!OMPBuilder)
1179 return;
1180
1181 // The following callback is the crucial part of clangs cleanup process.
1182 //
1183 // NOTE:
1184 // Once the OpenMPIRBuilder is used to create parallel regions (and
1185 // similar), the cancellation destination (Dest below) is determined via
1186 // IP. That means if we have variables to finalize we split the block at IP,
1187 // use the new block (=BB) as destination to build a JumpDest (via
1188 // getJumpDestInCurrentScope(BB)) which then is fed to
1189 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1190 // to push & pop an FinalizationInfo object.
1191 // The FiniCB will still be needed but at the point where the
1192 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1193 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1194 assert(IP.getBlock()->end() == IP.getPoint() &&
1195 "Clang CG should cause non-terminated block!");
1196 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1197 CGF.Builder.restoreIP(IP);
1198 CodeGenFunction::JumpDest Dest =
1199 CGF.getOMPCancelDestination(Kind: OMPD_parallel);
1200 CGF.EmitBranchThroughCleanup(Dest);
1201 return llvm::Error::success();
1202 };
1203
1204 // TODO: Remove this once we emit parallel regions through the
1205 // OpenMPIRBuilder as it can do this setup internally.
1206 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1207 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1208 }
1209 ~PushAndPopStackRAII() {
1210 if (OMPBuilder)
1211 OMPBuilder->popFinalizationCB();
1212 }
1213 llvm::OpenMPIRBuilder *OMPBuilder;
1214};
1215} // namespace
1216
1217static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1218 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1219 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1220 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1221 assert(ThreadIDVar->getType()->isPointerType() &&
1222 "thread id variable must be of type kmp_int32 *");
1223 CodeGenFunction CGF(CGM, true);
1224 bool HasCancel = false;
1225 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1226 HasCancel = OPD->hasCancel();
1227 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1228 HasCancel = OPD->hasCancel();
1229 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1230 HasCancel = OPSD->hasCancel();
1231 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1236 HasCancel = OPFD->hasCancel();
1237 else if (const auto *OPFD =
1238 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1239 HasCancel = OPFD->hasCancel();
1240 else if (const auto *OPFD =
1241 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1242 HasCancel = OPFD->hasCancel();
1243
1244 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1245 // parallel region to make cancellation barriers work properly.
1246 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1247 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1248 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1249 HasCancel, OutlinedHelperName);
1250 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1251 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, D);
1252}
1253
1254std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1255 std::string Suffix = getName(Parts: {"omp_outlined"});
1256 return (Name + Suffix).str();
1257}
1258
1259std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1260 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1261}
1262
1263std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1264 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1265 return (Name + Suffix).str();
1266}
1267
1268llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1269 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_parallel);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1278llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1279 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1280 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1281 const RegionCodeGenTy &CodeGen) {
1282 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_teams);
1283 return emitParallelOrTeamsOutlinedFunction(
1284 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1285 CodeGen);
1286}
1287
1288llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1289 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1291 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1292 bool Tied, unsigned &NumberOfParts) {
1293 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1294 PrePostActionTy &) {
1295 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1296 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1297 llvm::Value *TaskArgs[] = {
1298 UpLoc, ThreadID,
1299 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1300 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1301 .getPointer(CGF)};
1302 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1303 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1304 args: TaskArgs);
1305 };
1306 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1307 UntiedCodeGen);
1308 CodeGen.setAction(Action);
1309 assert(!ThreadIDVar->getType()->isPointerType() &&
1310 "thread id variable must be of type kmp_int32 for tasks");
1311 const OpenMPDirectiveKind Region =
1312 isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) ? OMPD_taskloop
1313 : OMPD_task;
1314 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: Region);
1315 bool HasCancel = false;
1316 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1317 HasCancel = TD->hasCancel();
1318 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1319 HasCancel = TD->hasCancel();
1320 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1321 HasCancel = TD->hasCancel();
1322 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1323 HasCancel = TD->hasCancel();
1324
1325 CodeGenFunction CGF(CGM, true);
1326 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1327 InnermostKind, HasCancel, Action);
1328 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1329 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1330 if (!Tied)
1331 NumberOfParts = Action.getNumberOfParts();
1332 return Res;
1333}
1334
1335void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1336 bool AtCurrentPoint) {
1337 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1338 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1339
1340 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1341 if (AtCurrentPoint) {
1342 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1343 CGF.Builder.GetInsertBlock());
1344 } else {
1345 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1346 Elem.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt->getIterator());
1347 }
1348}
1349
1350void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1351 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1352 if (Elem.ServiceInsertPt) {
1353 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1354 Elem.ServiceInsertPt = nullptr;
1355 Ptr->eraseFromParent();
1356 }
1357}
1358
1359static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1360 SourceLocation Loc,
1361 SmallString<128> &Buffer) {
1362 llvm::raw_svector_ostream OS(Buffer);
1363 // Build debug location
1364 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1365 OS << ";";
1366 if (auto *DbgInfo = CGF.getDebugInfo())
1367 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1368 else
1369 OS << PLoc.getFilename();
1370 OS << ";";
1371 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1372 OS << FD->getQualifiedNameAsString();
1373 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1374 return OS.str();
1375}
1376
1377llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1378 SourceLocation Loc,
1379 unsigned Flags, bool EmitLoc) {
1380 uint32_t SrcLocStrSize;
1381 llvm::Constant *SrcLocStr;
1382 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1383 llvm::codegenoptions::NoDebugInfo) ||
1384 Loc.isInvalid()) {
1385 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1386 } else {
1387 std::string FunctionName;
1388 std::string FileName;
1389 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1390 FunctionName = FD->getQualifiedNameAsString();
1391 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1392 if (auto *DbgInfo = CGF.getDebugInfo())
1393 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1394 else
1395 FileName = PLoc.getFilename();
1396 unsigned Line = PLoc.getLine();
1397 unsigned Column = PLoc.getColumn();
1398 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1399 Column, SrcLocStrSize);
1400 }
1401 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1402 return OMPBuilder.getOrCreateIdent(
1403 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1404}
1405
1406llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1407 SourceLocation Loc) {
1408 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1409 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1410 // the clang invariants used below might be broken.
1411 if (CGM.getLangOpts().OpenMPIRBuilder) {
1412 SmallString<128> Buffer;
1413 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1414 uint32_t SrcLocStrSize;
1415 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1416 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1417 return OMPBuilder.getOrCreateThreadID(
1418 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1419 }
1420
1421 llvm::Value *ThreadID = nullptr;
1422 // Check whether we've already cached a load of the thread id in this
1423 // function.
1424 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1425 if (I != OpenMPLocThreadIDMap.end()) {
1426 ThreadID = I->second.ThreadID;
1427 if (ThreadID != nullptr)
1428 return ThreadID;
1429 }
1430 // If exceptions are enabled, do not use parameter to avoid possible crash.
1431 if (auto *OMPRegionInfo =
1432 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1433 if (OMPRegionInfo->getThreadIDVariable()) {
1434 // Check if this an outlined function with thread id passed as argument.
1435 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1436 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1437 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1438 !CGF.getLangOpts().CXXExceptions ||
1439 CGF.Builder.GetInsertBlock() == TopBlock ||
1440 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1441 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1442 TopBlock ||
1443 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1444 CGF.Builder.GetInsertBlock()) {
1445 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1446 // If value loaded in entry block, cache it and use it everywhere in
1447 // function.
1448 if (CGF.Builder.GetInsertBlock() == TopBlock)
1449 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1450 return ThreadID;
1451 }
1452 }
1453 }
1454
1455 // This is not an outlined function region - need to call __kmpc_int32
1456 // kmpc_global_thread_num(ident_t *loc).
1457 // Generate thread id value and cache this value for use across the
1458 // function.
1459 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1460 if (!Elem.ServiceInsertPt)
1461 setLocThreadIdInsertPt(CGF);
1462 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1463 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1464 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1465 llvm::CallInst *Call = CGF.Builder.CreateCall(
1466 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1467 FnID: OMPRTL___kmpc_global_thread_num),
1468 Args: emitUpdateLocation(CGF, Loc));
1469 Call->setCallingConv(CGF.getRuntimeCC());
1470 Elem.ThreadID = Call;
1471 return Call;
1472}
1473
1474void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1475 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1476 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1477 clearLocThreadIdInsertPt(CGF);
1478 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1479 }
1480 if (auto I = FunctionUDRMap.find(Val: CGF.CurFn); I != FunctionUDRMap.end()) {
1481 for (const auto *D : I->second)
1482 UDRMap.erase(Val: D);
1483 FunctionUDRMap.erase(I);
1484 }
1485 if (auto I = FunctionUDMMap.find(Val: CGF.CurFn); I != FunctionUDMMap.end()) {
1486 for (const auto *D : I->second)
1487 UDMMap.erase(Val: D);
1488 FunctionUDMMap.erase(I);
1489 }
1490 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1491 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1492}
1493
1494llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1495 return OMPBuilder.IdentPtr;
1496}
1497
1498static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499convertDeviceClause(const VarDecl *VD) {
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522convertCaptureClause(const VarDecl *VD) {
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533 break;
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536 break;
1537 default:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539 break;
1540 }
1541}
1542
1543static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546
1547 auto FileInfoCallBack = [&]() {
1548 SourceManager &SM = CGM.getContext().getSourceManager();
1549 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1550
1551 if (!CGM.getFileSystem()->exists(Path: PLoc.getFilename()))
1552 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1553
1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555 };
1556
1557 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack,
1558 VFS&: *CGM.getFileSystem(), ParentName);
1559}
1560
1561ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1562 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1563
1564 auto LinkageForVariable = [&VD, this]() {
1565 return CGM.getLLVMLinkageVarDefinition(VD);
1566 };
1567
1568 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1569
1570 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1571 T: CGM.getContext().getPointerType(T: VD->getType()));
1572 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1573 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1574 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1575 IsExternallyVisible: VD->isExternallyVisible(),
1576 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1577 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
1578 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1579 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1580 VariableLinkage: LinkageForVariable);
1581
1582 if (!addr)
1583 return ConstantAddress::invalid();
1584 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(D: VD));
1585}
1586
1587llvm::Constant *
1588CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1589 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1590 !CGM.getContext().getTargetInfo().isTLSSupported());
1591 // Lookup the entry, lazily creating it if necessary.
1592 std::string Suffix = getName(Parts: {"cache", ""});
1593 return OMPBuilder.getOrCreateInternalVariable(
1594 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1595}
1596
1597Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1598 const VarDecl *VD,
1599 Address VDAddr,
1600 SourceLocation Loc) {
1601 if (CGM.getLangOpts().OpenMPUseTLS &&
1602 CGM.getContext().getTargetInfo().isTLSSupported())
1603 return VDAddr;
1604
1605 llvm::Type *VarTy = VDAddr.getElementType();
1606 llvm::Value *Args[] = {
1607 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1608 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1609 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1610 getOrCreateThreadPrivateCache(VD)};
1611 return Address(
1612 CGF.EmitRuntimeCall(
1613 callee: OMPBuilder.getOrCreateRuntimeFunction(
1614 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1615 args: Args),
1616 CGF.Int8Ty, VDAddr.getAlignment());
1617}
1618
1619void CGOpenMPRuntime::emitThreadPrivateVarInit(
1620 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1621 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1622 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1623 // library.
1624 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1625 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1626 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1627 args: OMPLoc);
1628 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1629 // to register constructor/destructor for variable.
1630 llvm::Value *Args[] = {
1631 OMPLoc,
1632 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1633 Ctor, CopyCtor, Dtor};
1634 CGF.EmitRuntimeCall(
1635 callee: OMPBuilder.getOrCreateRuntimeFunction(
1636 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1637 args: Args);
1638}
1639
1640llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1641 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1642 bool PerformInit, CodeGenFunction *CGF) {
1643 if (CGM.getLangOpts().OpenMPUseTLS &&
1644 CGM.getContext().getTargetInfo().isTLSSupported())
1645 return nullptr;
1646
1647 VD = VD->getDefinition(C&: CGM.getContext());
1648 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1649 QualType ASTTy = VD->getType();
1650
1651 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1652 const Expr *Init = VD->getAnyInitializer();
1653 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1654 // Generate function that re-emits the declaration's initializer into the
1655 // threadprivate copy of the variable VD
1656 CodeGenFunction CtorCGF(CGM);
1657 FunctionArgList Args;
1658 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1659 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1660 ImplicitParamKind::Other);
1661 Args.push_back(Elt: &Dst);
1662
1663 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1664 resultType: CGM.getContext().VoidPtrTy, args: Args);
1665 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1666 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1667 llvm::Function *Fn =
1668 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1669 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1670 Args, Loc, StartLoc: Loc);
1671 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1672 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1673 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1674 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1675 VDAddr.getAlignment());
1676 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1677 /*IsInitializer=*/true);
1678 ArgVal = CtorCGF.EmitLoadOfScalar(
1679 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1680 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1681 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1682 CtorCGF.FinishFunction();
1683 Ctor = Fn;
1684 }
1685 if (VD->getType().isDestructedType() != QualType::DK_none) {
1686 // Generate function that emits destructor call for the threadprivate copy
1687 // of the variable VD
1688 CodeGenFunction DtorCGF(CGM);
1689 FunctionArgList Args;
1690 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1691 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1692 ImplicitParamKind::Other);
1693 Args.push_back(Elt: &Dst);
1694
1695 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1696 resultType: CGM.getContext().VoidTy, args: Args);
1697 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1698 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1699 llvm::Function *Fn =
1700 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1701 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1702 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1703 Loc, StartLoc: Loc);
1704 // Create a scope with an artificial location for the body of this function.
1705 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1706 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1707 Addr: DtorCGF.GetAddrOfLocalVar(VD: &Dst),
1708 /*Volatile=*/false, Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1709 DtorCGF.emitDestroy(
1710 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1711 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1712 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1713 DtorCGF.FinishFunction();
1714 Dtor = Fn;
1715 }
1716 // Do not emit init function if it is not required.
1717 if (!Ctor && !Dtor)
1718 return nullptr;
1719
1720 // Copying constructor for the threadprivate variable.
1721 // Must be NULL - reserved by runtime, but currently it requires that this
1722 // parameter is always NULL. Otherwise it fires assertion.
1723 CopyCtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1724 if (Ctor == nullptr) {
1725 Ctor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1726 }
1727 if (Dtor == nullptr) {
1728 Dtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1729 }
1730 if (!CGF) {
1731 auto *InitFunctionTy =
1732 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1733 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1734 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1735 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1736 CodeGenFunction InitCGF(CGM);
1737 FunctionArgList ArgList;
1738 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1739 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1740 Loc, StartLoc: Loc);
1741 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1742 InitCGF.FinishFunction();
1743 return InitFunction;
1744 }
1745 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1746 }
1747 return nullptr;
1748}
1749
1750void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1751 llvm::GlobalValue *GV) {
1752 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1753 OMPDeclareTargetDeclAttr::getActiveAttr(VD: FD);
1754
1755 // We only need to handle active 'indirect' declare target functions.
1756 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1757 return;
1758
1759 // Get a mangled name to store the new device global in.
1760 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1761 CGM, OMPBuilder, BeginLoc: FD->getCanonicalDecl()->getBeginLoc(), ParentName: FD->getName());
1762 SmallString<128> Name;
1763 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1764
1765 // We need to generate a new global to hold the address of the indirectly
1766 // called device function. Doing this allows us to keep the visibility and
1767 // linkage of the associated function unchanged while allowing the runtime to
1768 // access its value.
1769 llvm::GlobalValue *Addr = GV;
1770 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1771 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1772 C&: CGM.getLLVMContext(),
1773 AddressSpace: CGM.getModule().getDataLayout().getProgramAddressSpace());
1774 Addr = new llvm::GlobalVariable(
1775 CGM.getModule(), FnPtrTy,
1776 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1777 nullptr, llvm::GlobalValue::NotThreadLocal,
1778 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1779 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1780 }
1781
1782 // Register the indirect Vtable:
1783 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1784 // size field refers to the size of memory pointed to, not the size of
1785 // the pointer symbol itself (which is implicitly the size of a pointer).
1786 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1787 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1788 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1789 Linkage: llvm::GlobalValue::WeakODRLinkage);
1790}
1791
1792void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1793 const VarDecl *VD) {
1794 // TODO: add logic to avoid duplicate vtable registrations per
1795 // translation unit; though for external linkage, this should no
1796 // longer be an issue - or at least we can avoid the issue by
1797 // checking for an existing offloading entry. But, perhaps the
1798 // better approach is to defer emission of the vtables and offload
1799 // entries until later (by tracking a list of items that need to be
1800 // emitted).
1801
1802 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1803
1804 // Generate a new externally visible global to point to the
1805 // internally visible vtable. Doing this allows us to keep the
1806 // visibility and linkage of the associated vtable unchanged while
1807 // allowing the runtime to access its value. The externally
1808 // visible global var needs to be emitted with a unique mangled
1809 // name that won't conflict with similarly named (internal)
1810 // vtables in other translation units.
1811
1812 // Register vtable with source location of dynamic object in map
1813 // clause.
1814 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1815 CGM, OMPBuilder, BeginLoc: VD->getCanonicalDecl()->getBeginLoc(),
1816 ParentName: VTable->getName());
1817
1818 llvm::GlobalVariable *Addr = VTable;
1819 SmallString<128> AddrName;
1820 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name&: AddrName, EntryInfo);
1821 AddrName.append(RHS: "addr");
1822
1823 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1824 Addr = new llvm::GlobalVariable(
1825 CGM.getModule(), VTable->getType(),
1826 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1827 AddrName,
1828 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1829 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1830 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1831 }
1832 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1833 VarName: AddrName, Addr: VTable,
1834 VarSize: CGM.getDataLayout().getTypeAllocSize(Ty: VTable->getInitializer()->getType()),
1835 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1836 Linkage: llvm::GlobalValue::WeakODRLinkage);
1837}
1838
1839void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
1840 CXXRecordDecl *CXXRecord,
1841 const VarDecl *VD) {
1842 // Register C++ VTable to OpenMP Offload Entry if it's a new
1843 // CXXRecordDecl.
1844 if (CXXRecord && CXXRecord->isDynamicClass() &&
1845 !CGM.getOpenMPRuntime().VTableDeclMap.contains(Val: CXXRecord)) {
1846 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(Key: CXXRecord, Args&: VD);
1847 if (Res.second) {
1848 CGM.EmitVTable(Class: CXXRecord);
1849 CodeGenVTables VTables = CGM.getVTables();
1850 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(RD: CXXRecord);
1851 assert(VTablesAddr && "Expected non-null VTable address");
1852 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTable: VTablesAddr, VD);
1853 // Emit VTable for all the fields containing dynamic CXXRecord
1854 for (const FieldDecl *Field : CXXRecord->fields()) {
1855 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1856 emitAndRegisterVTable(CGM, CXXRecord: RecordDecl, VD);
1857 }
1858 // Emit VTable for all dynamic parent class
1859 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1860 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1861 emitAndRegisterVTable(CGM, CXXRecord: BaseDecl, VD);
1862 }
1863 }
1864 }
1865}
1866
1867void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
1868 // Register VTable by scanning through the map clause of OpenMP target region.
1869 // Get CXXRecordDecl and VarDecl from Expr.
1870 auto GetVTableDecl = [](const Expr *E) {
1871 QualType VDTy = E->getType();
1872 CXXRecordDecl *CXXRecord = nullptr;
1873 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1874 VDTy = RefType->getPointeeType();
1875 if (VDTy->isPointerType())
1876 CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
1877 else
1878 CXXRecord = VDTy->getAsCXXRecordDecl();
1879
1880 const VarDecl *VD = nullptr;
1881 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: E)) {
1882 VD = cast<VarDecl>(Val: DRE->getDecl());
1883 } else if (auto *MRE = dyn_cast<MemberExpr>(Val: E)) {
1884 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(Val: MRE->getBase())) {
1885 if (auto *BaseVD = dyn_cast<VarDecl>(Val: BaseDRE->getDecl()))
1886 VD = BaseVD;
1887 }
1888 }
1889 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1890 };
1891 // Collect VTable from OpenMP map clause.
1892 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1893 for (const auto *E : C->varlist()) {
1894 auto DeclPair = GetVTableDecl(E);
1895 // Ensure VD is not null
1896 if (DeclPair.second)
1897 emitAndRegisterVTable(CGM, CXXRecord: DeclPair.first, VD: DeclPair.second);
1898 }
1899 }
1900}
1901
1902Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1903 QualType VarType,
1904 StringRef Name) {
1905 std::string Suffix = getName(Parts: {"artificial", ""});
1906 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1907 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1908 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1909 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1910 CGM.getTarget().isTLSSupported()) {
1911 GAddr->setThreadLocal(/*Val=*/true);
1912 return Address(GAddr, GAddr->getValueType(),
1913 CGM.getContext().getTypeAlignInChars(T: VarType));
1914 }
1915 std::string CacheSuffix = getName(Parts: {"cache", ""});
1916 llvm::Value *Args[] = {
1917 emitUpdateLocation(CGF, Loc: SourceLocation()),
1918 getThreadID(CGF, Loc: SourceLocation()),
1919 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1920 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1921 /*isSigned=*/false),
1922 OMPBuilder.getOrCreateInternalVariable(
1923 Ty: CGM.VoidPtrPtrTy,
1924 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1925 return Address(
1926 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1927 V: CGF.EmitRuntimeCall(
1928 callee: OMPBuilder.getOrCreateRuntimeFunction(
1929 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1930 args: Args),
1931 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
1932 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1933}
1934
1935void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1936 const RegionCodeGenTy &ThenGen,
1937 const RegionCodeGenTy &ElseGen) {
1938 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1939
1940 // If the condition constant folds and can be elided, try to avoid emitting
1941 // the condition and the dead arm of the if/else.
1942 bool CondConstant;
1943 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1944 if (CondConstant)
1945 ThenGen(CGF);
1946 else
1947 ElseGen(CGF);
1948 return;
1949 }
1950
1951 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1952 // emit the conditional branch.
1953 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1954 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1955 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1956 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1957
1958 // Emit the 'then' code.
1959 CGF.EmitBlock(BB: ThenBlock);
1960 ThenGen(CGF);
1961 CGF.EmitBranch(Block: ContBlock);
1962 // Emit the 'else' code if present.
1963 // There is no need to emit line number for unconditional branch.
1964 (void)ApplyDebugLocation::CreateEmpty(CGF);
1965 CGF.EmitBlock(BB: ElseBlock);
1966 ElseGen(CGF);
1967 // There is no need to emit line number for unconditional branch.
1968 (void)ApplyDebugLocation::CreateEmpty(CGF);
1969 CGF.EmitBranch(Block: ContBlock);
1970 // Emit the continuation block for code after the if.
1971 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1972}
1973
1974void CGOpenMPRuntime::emitParallelCall(
1975 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1976 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1977 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1978 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1979 if (!CGF.HaveInsertPoint())
1980 return;
1981 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1982 auto &M = CGM.getModule();
1983 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1984 this](CodeGenFunction &CGF, PrePostActionTy &) {
1985 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1986 llvm::Value *Args[] = {
1987 RTLoc,
1988 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1989 OutlinedFn};
1990 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1991 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1992 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1993
1994 llvm::FunctionCallee RTLFn =
1995 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1996 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1997 };
1998 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1999 this](CodeGenFunction &CGF, PrePostActionTy &) {
2000 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2001 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2002 // Build calls:
2003 // __kmpc_serialized_parallel(&Loc, GTid);
2004 llvm::Value *Args[] = {RTLoc, ThreadID};
2005 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2006 M, FnID: OMPRTL___kmpc_serialized_parallel),
2007 args: Args);
2008
2009 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2010 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2011 RawAddress ZeroAddrBound =
2012 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
2013 /*Name=*/".bound.zero.addr");
2014 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
2015 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2016 // ThreadId for serialized parallels is 0.
2017 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
2018 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
2019 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
2020
2021 // Ensure we do not inline the function. This is trivially true for the ones
2022 // passed to __kmpc_fork_call but the ones called in serialized regions
2023 // could be inlined. This is not a perfect but it is closer to the invariant
2024 // we want, namely, every data environment starts with a new function.
2025 // TODO: We should pass the if condition to the runtime function and do the
2026 // handling there. Much cleaner code.
2027 OutlinedFn->removeFnAttr(Kind: llvm::Attribute::AlwaysInline);
2028 OutlinedFn->addFnAttr(Kind: llvm::Attribute::NoInline);
2029 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
2030
2031 // __kmpc_end_serialized_parallel(&Loc, GTid);
2032 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2033 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2034 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
2035 args: EndArgs);
2036 };
2037 if (IfCond) {
2038 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2039 } else {
2040 RegionCodeGenTy ThenRCG(ThenGen);
2041 ThenRCG(CGF);
2042 }
2043}
2044
2045// If we're inside an (outlined) parallel region, use the region info's
2046// thread-ID variable (it is passed in a first argument of the outlined function
2047// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2048// regular serial code region, get thread ID by calling kmp_int32
2049// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2050// return the address of that temp.
2051Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2052 SourceLocation Loc) {
2053 if (auto *OMPRegionInfo =
2054 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2055 if (OMPRegionInfo->getThreadIDVariable())
2056 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2057
2058 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2059 QualType Int32Ty =
2060 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2061 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
2062 CGF.EmitStoreOfScalar(value: ThreadID,
2063 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
2064
2065 return ThreadIDTemp;
2066}
2067
2068llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2069 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2070 std::string Name = getName(Parts: {Prefix, "var"});
2071 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
2072}
2073
2074namespace {
2075/// Common pre(post)-action for different OpenMP constructs.
2076class CommonActionTy final : public PrePostActionTy {
2077 llvm::FunctionCallee EnterCallee;
2078 ArrayRef<llvm::Value *> EnterArgs;
2079 llvm::FunctionCallee ExitCallee;
2080 ArrayRef<llvm::Value *> ExitArgs;
2081 bool Conditional;
2082 llvm::BasicBlock *ContBlock = nullptr;
2083
2084public:
2085 CommonActionTy(llvm::FunctionCallee EnterCallee,
2086 ArrayRef<llvm::Value *> EnterArgs,
2087 llvm::FunctionCallee ExitCallee,
2088 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2089 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2090 ExitArgs(ExitArgs), Conditional(Conditional) {}
2091 void Enter(CodeGenFunction &CGF) override {
2092 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
2093 if (Conditional) {
2094 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
2095 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
2096 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
2097 // Generate the branch (If-stmt)
2098 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
2099 CGF.EmitBlock(BB: ThenBlock);
2100 }
2101 }
2102 void Done(CodeGenFunction &CGF) {
2103 // Emit the rest of blocks/branches
2104 CGF.EmitBranch(Block: ContBlock);
2105 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2106 }
2107 void Exit(CodeGenFunction &CGF) override {
2108 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
2109 }
2110};
2111} // anonymous namespace
2112
2113void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2114 StringRef CriticalName,
2115 const RegionCodeGenTy &CriticalOpGen,
2116 SourceLocation Loc, const Expr *Hint) {
2117 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2118 // CriticalOpGen();
2119 // __kmpc_end_critical(ident_t *, gtid, Lock);
2120 // Prepare arguments and build a call to __kmpc_critical
2121 if (!CGF.HaveInsertPoint())
2122 return;
2123 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2124 M&: CGM.getModule(),
2125 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2126 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2127 unsigned LockVarArgIdx = 2;
2128 if (cast<llvm::GlobalVariable>(Val: LockVar)->getAddressSpace() !=
2129 RuntimeFcn.getFunctionType()
2130 ->getParamType(i: LockVarArgIdx)
2131 ->getPointerAddressSpace())
2132 LockVar = CGF.Builder.CreateAddrSpaceCast(
2133 V: LockVar, DestTy: RuntimeFcn.getFunctionType()->getParamType(i: LockVarArgIdx));
2134 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2135 LockVar};
2136 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2137 std::end(arr&: Args));
2138 if (Hint) {
2139 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2140 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2141 }
2142 CommonActionTy Action(RuntimeFcn, EnterArgs,
2143 OMPBuilder.getOrCreateRuntimeFunction(
2144 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_critical),
2145 Args);
2146 CriticalOpGen.setAction(Action);
2147 emitInlinedDirective(CGF, InnermostKind: OMPD_critical, CodeGen: CriticalOpGen);
2148}
2149
2150void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2151 const RegionCodeGenTy &MasterOpGen,
2152 SourceLocation Loc) {
2153 if (!CGF.HaveInsertPoint())
2154 return;
2155 // if(__kmpc_master(ident_t *, gtid)) {
2156 // MasterOpGen();
2157 // __kmpc_end_master(ident_t *, gtid);
2158 // }
2159 // Prepare arguments and build a call to __kmpc_master
2160 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2161 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2162 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2163 Args,
2164 OMPBuilder.getOrCreateRuntimeFunction(
2165 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2166 Args,
2167 /*Conditional=*/true);
2168 MasterOpGen.setAction(Action);
2169 emitInlinedDirective(CGF, InnermostKind: OMPD_master, CodeGen: MasterOpGen);
2170 Action.Done(CGF);
2171}
2172
2173void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2174 const RegionCodeGenTy &MaskedOpGen,
2175 SourceLocation Loc, const Expr *Filter) {
2176 if (!CGF.HaveInsertPoint())
2177 return;
2178 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2179 // MaskedOpGen();
2180 // __kmpc_end_masked(iden_t *, gtid);
2181 // }
2182 // Prepare arguments and build a call to __kmpc_masked
2183 llvm::Value *FilterVal = Filter
2184 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2185 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2186 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2187 FilterVal};
2188 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2189 getThreadID(CGF, Loc)};
2190 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2191 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2192 Args,
2193 OMPBuilder.getOrCreateRuntimeFunction(
2194 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2195 ArgsEnd,
2196 /*Conditional=*/true);
2197 MaskedOpGen.setAction(Action);
2198 emitInlinedDirective(CGF, InnermostKind: OMPD_masked, CodeGen: MaskedOpGen);
2199 Action.Done(CGF);
2200}
2201
2202void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2203 SourceLocation Loc) {
2204 if (!CGF.HaveInsertPoint())
2205 return;
2206 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2207 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2208 } else {
2209 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2210 llvm::Value *Args[] = {
2211 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2212 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2213 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2214 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2215 args: Args);
2216 }
2217
2218 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2219 Region->emitUntiedSwitch(CGF);
2220}
2221
2222void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2223 const RegionCodeGenTy &TaskgroupOpGen,
2224 SourceLocation Loc) {
2225 if (!CGF.HaveInsertPoint())
2226 return;
2227 // __kmpc_taskgroup(ident_t *, gtid);
2228 // TaskgroupOpGen();
2229 // __kmpc_end_taskgroup(ident_t *, gtid);
2230 // Prepare arguments and build a call to __kmpc_taskgroup
2231 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2232 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2233 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2234 Args,
2235 OMPBuilder.getOrCreateRuntimeFunction(
2236 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2237 Args);
2238 TaskgroupOpGen.setAction(Action);
2239 emitInlinedDirective(CGF, InnermostKind: OMPD_taskgroup, CodeGen: TaskgroupOpGen);
2240}
2241
2242/// Given an array of pointers to variables, project the address of a
2243/// given variable.
2244static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2245 unsigned Index, const VarDecl *Var) {
2246 // Pull out the pointer to the variable.
2247 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2248 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2249
2250 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2251 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(D: Var));
2252}
2253
2254static llvm::Value *emitCopyprivateCopyFunction(
2255 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2256 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2257 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2258 SourceLocation Loc) {
2259 ASTContext &C = CGM.getContext();
2260 // void copy_func(void *LHSArg, void *RHSArg);
2261 FunctionArgList Args;
2262 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2263 ImplicitParamKind::Other);
2264 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2265 ImplicitParamKind::Other);
2266 Args.push_back(Elt: &LHSArg);
2267 Args.push_back(Elt: &RHSArg);
2268 const auto &CGFI =
2269 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
2270 std::string Name =
2271 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2272 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
2273 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
2274 M: &CGM.getModule());
2275 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2276 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2277 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
2278 Fn->setDoesNotRecurse();
2279 CodeGenFunction CGF(CGM);
2280 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2281 // Dest = (void*[n])(LHSArg);
2282 // Src = (void*[n])(RHSArg);
2283 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2284 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
2285 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2286 ArgsElemType, CGF.getPointerAlign());
2287 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2288 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
2289 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2290 ArgsElemType, CGF.getPointerAlign());
2291 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2292 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2293 // ...
2294 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2295 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2296 const auto *DestVar =
2297 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2298 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2299
2300 const auto *SrcVar =
2301 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2302 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2303
2304 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2305 QualType Type = VD->getType();
2306 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2307 }
2308 CGF.FinishFunction();
2309 return Fn;
2310}
2311
2312void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2313 const RegionCodeGenTy &SingleOpGen,
2314 SourceLocation Loc,
2315 ArrayRef<const Expr *> CopyprivateVars,
2316 ArrayRef<const Expr *> SrcExprs,
2317 ArrayRef<const Expr *> DstExprs,
2318 ArrayRef<const Expr *> AssignmentOps) {
2319 if (!CGF.HaveInsertPoint())
2320 return;
2321 assert(CopyprivateVars.size() == SrcExprs.size() &&
2322 CopyprivateVars.size() == DstExprs.size() &&
2323 CopyprivateVars.size() == AssignmentOps.size());
2324 ASTContext &C = CGM.getContext();
2325 // int32 did_it = 0;
2326 // if(__kmpc_single(ident_t *, gtid)) {
2327 // SingleOpGen();
2328 // __kmpc_end_single(ident_t *, gtid);
2329 // did_it = 1;
2330 // }
2331 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2332 // <copy_func>, did_it);
2333
2334 Address DidIt = Address::invalid();
2335 if (!CopyprivateVars.empty()) {
2336 // int32 did_it = 0;
2337 QualType KmpInt32Ty =
2338 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2339 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2340 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2341 }
2342 // Prepare arguments and build a call to __kmpc_single
2343 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2344 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2345 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2346 Args,
2347 OMPBuilder.getOrCreateRuntimeFunction(
2348 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2349 Args,
2350 /*Conditional=*/true);
2351 SingleOpGen.setAction(Action);
2352 emitInlinedDirective(CGF, InnermostKind: OMPD_single, CodeGen: SingleOpGen);
2353 if (DidIt.isValid()) {
2354 // did_it = 1;
2355 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2356 }
2357 Action.Done(CGF);
2358 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2359 // <copy_func>, did_it);
2360 if (DidIt.isValid()) {
2361 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2362 QualType CopyprivateArrayTy = C.getConstantArrayType(
2363 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2364 /*IndexTypeQuals=*/0);
2365 // Create a list of all private variables for copyprivate.
2366 Address CopyprivateList =
2367 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2368 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2369 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2370 CGF.Builder.CreateStore(
2371 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2372 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2373 DestTy: CGF.VoidPtrTy),
2374 Addr: Elem);
2375 }
2376 // Build function that copies private values from single region to all other
2377 // threads in the corresponding parallel region.
2378 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2379 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2380 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2381 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2382 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2383 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2384 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2385 llvm::Value *Args[] = {
2386 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2387 getThreadID(CGF, Loc), // i32 <gtid>
2388 BufSize, // size_t <buf_size>
2389 CL.emitRawPointer(CGF), // void *<copyprivate list>
2390 CpyFn, // void (*) (void *, void *) <copy_func>
2391 DidItVal // i32 did_it
2392 };
2393 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2394 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2395 args: Args);
2396 }
2397}
2398
2399void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2400 const RegionCodeGenTy &OrderedOpGen,
2401 SourceLocation Loc, bool IsThreads) {
2402 if (!CGF.HaveInsertPoint())
2403 return;
2404 // __kmpc_ordered(ident_t *, gtid);
2405 // OrderedOpGen();
2406 // __kmpc_end_ordered(ident_t *, gtid);
2407 // Prepare arguments and build a call to __kmpc_ordered
2408 if (IsThreads) {
2409 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2410 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2411 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2412 Args,
2413 OMPBuilder.getOrCreateRuntimeFunction(
2414 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2415 Args);
2416 OrderedOpGen.setAction(Action);
2417 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2418 return;
2419 }
2420 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2421}
2422
2423unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2424 unsigned Flags;
2425 if (Kind == OMPD_for)
2426 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2427 else if (Kind == OMPD_sections)
2428 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2429 else if (Kind == OMPD_single)
2430 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2431 else if (Kind == OMPD_barrier)
2432 Flags = OMP_IDENT_BARRIER_EXPL;
2433 else
2434 Flags = OMP_IDENT_BARRIER_IMPL;
2435 return Flags;
2436}
2437
2438void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2439 CodeGenFunction &CGF, const OMPLoopDirective &S,
2440 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2441 // Check if the loop directive is actually a doacross loop directive. In this
2442 // case choose static, 1 schedule.
2443 if (llvm::any_of(
2444 Range: S.getClausesOfKind<OMPOrderedClause>(),
2445 P: [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2446 ScheduleKind = OMPC_SCHEDULE_static;
2447 // Chunk size is 1 in this case.
2448 llvm::APInt ChunkSize(32, 1);
2449 ChunkExpr = IntegerLiteral::Create(
2450 C: CGF.getContext(), V: ChunkSize,
2451 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2452 l: SourceLocation());
2453 }
2454}
2455
2456void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2457 OpenMPDirectiveKind Kind, bool EmitChecks,
2458 bool ForceSimpleCall) {
2459 // Check if we should use the OMPBuilder
2460 auto *OMPRegionInfo =
2461 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2462 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2463 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2464 cantFail(ValOrErr: OMPBuilder.createBarrier(Loc: CGF.Builder, Kind, ForceSimpleCall,
2465 CheckCancelFlag: EmitChecks));
2466 CGF.Builder.restoreIP(IP: AfterIP);
2467 return;
2468 }
2469
2470 if (!CGF.HaveInsertPoint())
2471 return;
2472 // Build call __kmpc_cancel_barrier(loc, thread_id);
2473 // Build call __kmpc_barrier(loc, thread_id);
2474 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2475 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2476 // thread_id);
2477 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2478 getThreadID(CGF, Loc)};
2479 if (OMPRegionInfo) {
2480 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2481 llvm::Value *Result = CGF.EmitRuntimeCall(
2482 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2483 FnID: OMPRTL___kmpc_cancel_barrier),
2484 args: Args);
2485 if (EmitChecks) {
2486 // if (__kmpc_cancel_barrier()) {
2487 // exit from construct;
2488 // }
2489 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2490 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2491 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2492 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2493 CGF.EmitBlock(BB: ExitBB);
2494 // exit from construct;
2495 CodeGenFunction::JumpDest CancelDestination =
2496 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
2497 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2498 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2499 }
2500 return;
2501 }
2502 }
2503 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2504 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2505 args: Args);
2506}
2507
2508void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2509 Expr *ME, bool IsFatal) {
2510 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(E: ME)
2511 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2512 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2513 // *message)
2514 llvm::Value *Args[] = {
2515 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2516 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2517 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2518 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2519 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2520 args: Args);
2521}
2522
2523/// Map the OpenMP loop schedule to the runtime enumeration.
2524static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2525 bool Chunked, bool Ordered) {
2526 switch (ScheduleKind) {
2527 case OMPC_SCHEDULE_static:
2528 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2529 : (Ordered ? OMP_ord_static : OMP_sch_static);
2530 case OMPC_SCHEDULE_dynamic:
2531 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2532 case OMPC_SCHEDULE_guided:
2533 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2534 case OMPC_SCHEDULE_runtime:
2535 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2536 case OMPC_SCHEDULE_auto:
2537 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2538 case OMPC_SCHEDULE_unknown:
2539 assert(!Chunked && "chunk was specified but schedule kind not known");
2540 return Ordered ? OMP_ord_static : OMP_sch_static;
2541 }
2542 llvm_unreachable("Unexpected runtime schedule");
2543}
2544
2545/// Map the OpenMP distribute schedule to the runtime enumeration.
2546static OpenMPSchedType
2547getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2548 // only static is allowed for dist_schedule
2549 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2550}
2551
2552bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2553 bool Chunked) const {
2554 OpenMPSchedType Schedule =
2555 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2556 return Schedule == OMP_sch_static;
2557}
2558
2559bool CGOpenMPRuntime::isStaticNonchunked(
2560 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2561 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2562 return Schedule == OMP_dist_sch_static;
2563}
2564
2565bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2566 bool Chunked) const {
2567 OpenMPSchedType Schedule =
2568 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2569 return Schedule == OMP_sch_static_chunked;
2570}
2571
2572bool CGOpenMPRuntime::isStaticChunked(
2573 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2574 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2575 return Schedule == OMP_dist_sch_static_chunked;
2576}
2577
2578bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2579 OpenMPSchedType Schedule =
2580 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2581 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2582 return Schedule != OMP_sch_static;
2583}
2584
2585static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2586 OpenMPScheduleClauseModifier M1,
2587 OpenMPScheduleClauseModifier M2) {
2588 int Modifier = 0;
2589 switch (M1) {
2590 case OMPC_SCHEDULE_MODIFIER_monotonic:
2591 Modifier = OMP_sch_modifier_monotonic;
2592 break;
2593 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2594 Modifier = OMP_sch_modifier_nonmonotonic;
2595 break;
2596 case OMPC_SCHEDULE_MODIFIER_simd:
2597 if (Schedule == OMP_sch_static_chunked)
2598 Schedule = OMP_sch_static_balanced_chunked;
2599 break;
2600 case OMPC_SCHEDULE_MODIFIER_last:
2601 case OMPC_SCHEDULE_MODIFIER_unknown:
2602 break;
2603 }
2604 switch (M2) {
2605 case OMPC_SCHEDULE_MODIFIER_monotonic:
2606 Modifier = OMP_sch_modifier_monotonic;
2607 break;
2608 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2609 Modifier = OMP_sch_modifier_nonmonotonic;
2610 break;
2611 case OMPC_SCHEDULE_MODIFIER_simd:
2612 if (Schedule == OMP_sch_static_chunked)
2613 Schedule = OMP_sch_static_balanced_chunked;
2614 break;
2615 case OMPC_SCHEDULE_MODIFIER_last:
2616 case OMPC_SCHEDULE_MODIFIER_unknown:
2617 break;
2618 }
2619 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2620 // If the static schedule kind is specified or if the ordered clause is
2621 // specified, and if the nonmonotonic modifier is not specified, the effect is
2622 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2623 // modifier is specified, the effect is as if the nonmonotonic modifier is
2624 // specified.
2625 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2626 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2627 Schedule == OMP_sch_static_balanced_chunked ||
2628 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2629 Schedule == OMP_dist_sch_static_chunked ||
2630 Schedule == OMP_dist_sch_static))
2631 Modifier = OMP_sch_modifier_nonmonotonic;
2632 }
2633 return Schedule | Modifier;
2634}
2635
2636void CGOpenMPRuntime::emitForDispatchInit(
2637 CodeGenFunction &CGF, SourceLocation Loc,
2638 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2639 bool Ordered, const DispatchRTInput &DispatchValues) {
2640 if (!CGF.HaveInsertPoint())
2641 return;
2642 OpenMPSchedType Schedule = getRuntimeSchedule(
2643 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2644 assert(Ordered ||
2645 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2646 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2647 Schedule != OMP_sch_static_balanced_chunked));
2648 // Call __kmpc_dispatch_init(
2649 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2650 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2651 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2652
2653 // If the Chunk was not specified in the clause - use default value 1.
2654 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2655 : CGF.Builder.getIntN(N: IVSize, C: 1);
2656 llvm::Value *Args[] = {
2657 emitUpdateLocation(CGF, Loc),
2658 getThreadID(CGF, Loc),
2659 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2660 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2661 DispatchValues.LB, // Lower
2662 DispatchValues.UB, // Upper
2663 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2664 Chunk // Chunk
2665 };
2666 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2667 args: Args);
2668}
2669
2670void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2671 SourceLocation Loc) {
2672 if (!CGF.HaveInsertPoint())
2673 return;
2674 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2675 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2676 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2677}
2678
2679static void emitForStaticInitCall(
2680 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2681 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2682 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2683 const CGOpenMPRuntime::StaticRTInput &Values) {
2684 if (!CGF.HaveInsertPoint())
2685 return;
2686
2687 assert(!Values.Ordered);
2688 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2689 Schedule == OMP_sch_static_balanced_chunked ||
2690 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2691 Schedule == OMP_dist_sch_static ||
2692 Schedule == OMP_dist_sch_static_chunked);
2693
2694 // Call __kmpc_for_static_init(
2695 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2696 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2697 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2698 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2699 llvm::Value *Chunk = Values.Chunk;
2700 if (Chunk == nullptr) {
2701 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2702 Schedule == OMP_dist_sch_static) &&
2703 "expected static non-chunked schedule");
2704 // If the Chunk was not specified in the clause - use default value 1.
2705 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2706 } else {
2707 assert((Schedule == OMP_sch_static_chunked ||
2708 Schedule == OMP_sch_static_balanced_chunked ||
2709 Schedule == OMP_ord_static_chunked ||
2710 Schedule == OMP_dist_sch_static_chunked) &&
2711 "expected static chunked schedule");
2712 }
2713 llvm::Value *Args[] = {
2714 UpdateLocation,
2715 ThreadId,
2716 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2717 M2)), // Schedule type
2718 Values.IL.emitRawPointer(CGF), // &isLastIter
2719 Values.LB.emitRawPointer(CGF), // &LB
2720 Values.UB.emitRawPointer(CGF), // &UB
2721 Values.ST.emitRawPointer(CGF), // &Stride
2722 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2723 Chunk // Chunk
2724 };
2725 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2726}
2727
2728void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2729 SourceLocation Loc,
2730 OpenMPDirectiveKind DKind,
2731 const OpenMPScheduleTy &ScheduleKind,
2732 const StaticRTInput &Values) {
2733 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2734 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2735 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2736 "Expected loop-based or sections-based directive.");
2737 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2738 Flags: isOpenMPLoopDirective(DKind)
2739 ? OMP_IDENT_WORK_LOOP
2740 : OMP_IDENT_WORK_SECTIONS);
2741 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2742 llvm::FunctionCallee StaticInitFunction =
2743 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2744 IsGPUDistribute: false);
2745 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2746 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2747 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2748}
2749
2750void CGOpenMPRuntime::emitDistributeStaticInit(
2751 CodeGenFunction &CGF, SourceLocation Loc,
2752 OpenMPDistScheduleClauseKind SchedKind,
2753 const CGOpenMPRuntime::StaticRTInput &Values) {
2754 OpenMPSchedType ScheduleNum =
2755 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2756 llvm::Value *UpdatedLocation =
2757 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2758 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2759 llvm::FunctionCallee StaticInitFunction;
2760 bool isGPUDistribute =
2761 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2762 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2763 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2764
2765 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2766 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2767 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2768}
2769
2770void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2771 SourceLocation Loc,
2772 OpenMPDirectiveKind DKind) {
2773 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2774 DKind == OMPD_sections) &&
2775 "Expected distribute, for, or sections directive kind");
2776 if (!CGF.HaveInsertPoint())
2777 return;
2778 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2779 llvm::Value *Args[] = {
2780 emitUpdateLocation(CGF, Loc,
2781 Flags: isOpenMPDistributeDirective(DKind) ||
2782 (DKind == OMPD_target_teams_loop)
2783 ? OMP_IDENT_WORK_DISTRIBUTE
2784 : isOpenMPLoopDirective(DKind)
2785 ? OMP_IDENT_WORK_LOOP
2786 : OMP_IDENT_WORK_SECTIONS),
2787 getThreadID(CGF, Loc)};
2788 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2789 if (isOpenMPDistributeDirective(DKind) &&
2790 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2791 CGF.EmitRuntimeCall(
2792 callee: OMPBuilder.getOrCreateRuntimeFunction(
2793 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2794 args: Args);
2795 else
2796 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2797 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2798 args: Args);
2799}
2800
2801void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2802 SourceLocation Loc,
2803 unsigned IVSize,
2804 bool IVSigned) {
2805 if (!CGF.HaveInsertPoint())
2806 return;
2807 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2808 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2809 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2810 args: Args);
2811}
2812
2813llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2814 SourceLocation Loc, unsigned IVSize,
2815 bool IVSigned, Address IL,
2816 Address LB, Address UB,
2817 Address ST) {
2818 // Call __kmpc_dispatch_next(
2819 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2820 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2821 // kmp_int[32|64] *p_stride);
2822 llvm::Value *Args[] = {
2823 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2824 IL.emitRawPointer(CGF), // &isLastIter
2825 LB.emitRawPointer(CGF), // &Lower
2826 UB.emitRawPointer(CGF), // &Upper
2827 ST.emitRawPointer(CGF) // &Stride
2828 };
2829 llvm::Value *Call = CGF.EmitRuntimeCall(
2830 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2831 return CGF.EmitScalarConversion(
2832 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2833 DstTy: CGF.getContext().BoolTy, Loc);
2834}
2835
2836llvm::Value *CGOpenMPRuntime::emitMessageClause(CodeGenFunction &CGF,
2837 const Expr *Message,
2838 SourceLocation Loc) {
2839 if (!Message)
2840 return llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2841 return CGF.EmitScalarExpr(E: Message);
2842}
2843
2844llvm::Value *
2845CGOpenMPRuntime::emitSeverityClause(OpenMPSeverityClauseKind Severity,
2846 SourceLocation Loc) {
2847 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2848 // as if sev-level is fatal."
2849 return llvm::ConstantInt::get(Ty: CGM.Int32Ty,
2850 V: Severity == OMPC_SEVERITY_warning ? 1 : 2);
2851}
2852
2853void CGOpenMPRuntime::emitNumThreadsClause(
2854 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2855 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
2856 SourceLocation SeverityLoc, const Expr *Message,
2857 SourceLocation MessageLoc) {
2858 if (!CGF.HaveInsertPoint())
2859 return;
2860 llvm::SmallVector<llvm::Value *, 4> Args(
2861 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2862 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)});
2863 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2864 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2865 // messsage) if strict modifier is used.
2866 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2867 if (Modifier == OMPC_NUMTHREADS_strict) {
2868 FnID = OMPRTL___kmpc_push_num_threads_strict;
2869 Args.push_back(Elt: emitSeverityClause(Severity, Loc: SeverityLoc));
2870 Args.push_back(Elt: emitMessageClause(CGF, Message, Loc: MessageLoc));
2871 }
2872 CGF.EmitRuntimeCall(
2873 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args);
2874}
2875
2876void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2877 ProcBindKind ProcBind,
2878 SourceLocation Loc) {
2879 if (!CGF.HaveInsertPoint())
2880 return;
2881 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2882 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2883 llvm::Value *Args[] = {
2884 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2885 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2886 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2887 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2888 args: Args);
2889}
2890
2891void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2892 SourceLocation Loc, llvm::AtomicOrdering AO) {
2893 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2894 OMPBuilder.createFlush(Loc: CGF.Builder);
2895 } else {
2896 if (!CGF.HaveInsertPoint())
2897 return;
2898 // Build call void __kmpc_flush(ident_t *loc)
2899 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2900 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2901 args: emitUpdateLocation(CGF, Loc));
2902 }
2903}
2904
2905namespace {
2906/// Indexes of fields for type kmp_task_t.
2907enum KmpTaskTFields {
2908 /// List of shared variables.
2909 KmpTaskTShareds,
2910 /// Task routine.
2911 KmpTaskTRoutine,
2912 /// Partition id for the untied tasks.
2913 KmpTaskTPartId,
2914 /// Function with call of destructors for private variables.
2915 Data1,
2916 /// Task priority.
2917 Data2,
2918 /// (Taskloops only) Lower bound.
2919 KmpTaskTLowerBound,
2920 /// (Taskloops only) Upper bound.
2921 KmpTaskTUpperBound,
2922 /// (Taskloops only) Stride.
2923 KmpTaskTStride,
2924 /// (Taskloops only) Is last iteration flag.
2925 KmpTaskTLastIter,
2926 /// (Taskloops only) Reduction data.
2927 KmpTaskTReductions,
2928};
2929} // anonymous namespace
2930
2931void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2932 // If we are in simd mode or there are no entries, we don't need to do
2933 // anything.
2934 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2935 return;
2936
2937 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2938 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2939 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2940 SourceLocation Loc;
2941 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2942 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2943 E = CGM.getContext().getSourceManager().fileinfo_end();
2944 I != E; ++I) {
2945 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2946 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2947 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2948 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2949 break;
2950 }
2951 }
2952 }
2953 switch (Kind) {
2954 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2955 CGM.getDiags().Report(Loc,
2956 DiagID: diag::err_target_region_offloading_entry_incorrect)
2957 << EntryInfo.ParentName;
2958 } break;
2959 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2960 CGM.getDiags().Report(
2961 Loc, DiagID: diag::err_target_var_offloading_entry_incorrect_with_parent)
2962 << EntryInfo.ParentName;
2963 } break;
2964 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2965 CGM.getDiags().Report(DiagID: diag::err_target_var_offloading_entry_incorrect);
2966 } break;
2967 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2968 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2969 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for indirect declare "
2970 "target variable is incorrect: the "
2971 "address is invalid.");
2972 CGM.getDiags().Report(DiagID);
2973 } break;
2974 }
2975 };
2976
2977 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2978}
2979
2980void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2981 if (!KmpRoutineEntryPtrTy) {
2982 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2983 ASTContext &C = CGM.getContext();
2984 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2985 FunctionProtoType::ExtProtoInfo EPI;
2986 KmpRoutineEntryPtrQTy = C.getPointerType(
2987 T: C.getFunctionType(ResultTy: KmpInt32Ty, Args: KmpRoutineEntryTyArgs, EPI));
2988 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(T: KmpRoutineEntryPtrQTy);
2989 }
2990}
2991
2992namespace {
2993struct PrivateHelpersTy {
2994 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2995 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2996 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2997 PrivateElemInit(PrivateElemInit) {}
2998 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2999 const Expr *OriginalRef = nullptr;
3000 const VarDecl *Original = nullptr;
3001 const VarDecl *PrivateCopy = nullptr;
3002 const VarDecl *PrivateElemInit = nullptr;
3003 bool isLocalPrivate() const {
3004 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3005 }
3006};
3007typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3008} // anonymous namespace
3009
3010static bool isAllocatableDecl(const VarDecl *VD) {
3011 const VarDecl *CVD = VD->getCanonicalDecl();
3012 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3013 return false;
3014 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3015 // Use the default allocation.
3016 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3017 !AA->getAllocator());
3018}
3019
3020static RecordDecl *
3021createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3022 if (!Privates.empty()) {
3023 ASTContext &C = CGM.getContext();
3024 // Build struct .kmp_privates_t. {
3025 // /* private vars */
3026 // };
3027 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
3028 RD->startDefinition();
3029 for (const auto &Pair : Privates) {
3030 const VarDecl *VD = Pair.second.Original;
3031 QualType Type = VD->getType().getNonReferenceType();
3032 // If the private variable is a local variable with lvalue ref type,
3033 // allocate the pointer instead of the pointee type.
3034 if (Pair.second.isLocalPrivate()) {
3035 if (VD->getType()->isLValueReferenceType())
3036 Type = C.getPointerType(T: Type);
3037 if (isAllocatableDecl(VD))
3038 Type = C.getPointerType(T: Type);
3039 }
3040 FieldDecl *FD = addFieldToRecordDecl(C, DC: RD, FieldTy: Type);
3041 if (VD->hasAttrs()) {
3042 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3043 E(VD->getAttrs().end());
3044 I != E; ++I)
3045 FD->addAttr(A: *I);
3046 }
3047 }
3048 RD->completeDefinition();
3049 return RD;
3050 }
3051 return nullptr;
3052}
3053
3054static RecordDecl *
3055createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3056 QualType KmpInt32Ty,
3057 QualType KmpRoutineEntryPointerQTy) {
3058 ASTContext &C = CGM.getContext();
3059 // Build struct kmp_task_t {
3060 // void * shareds;
3061 // kmp_routine_entry_t routine;
3062 // kmp_int32 part_id;
3063 // kmp_cmplrdata_t data1;
3064 // kmp_cmplrdata_t data2;
3065 // For taskloops additional fields:
3066 // kmp_uint64 lb;
3067 // kmp_uint64 ub;
3068 // kmp_int64 st;
3069 // kmp_int32 liter;
3070 // void * reductions;
3071 // };
3072 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
3073 UD->startDefinition();
3074 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpInt32Ty);
3075 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpRoutineEntryPointerQTy);
3076 UD->completeDefinition();
3077 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(TD: UD);
3078 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
3079 RD->startDefinition();
3080 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3081 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpRoutineEntryPointerQTy);
3082 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3083 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3084 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3085 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3086 QualType KmpUInt64Ty =
3087 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3088 QualType KmpInt64Ty =
3089 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3090 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3091 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3092 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt64Ty);
3093 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3094 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3095 }
3096 RD->completeDefinition();
3097 return RD;
3098}
3099
3100static RecordDecl *
3101createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3102 ArrayRef<PrivateDataTy> Privates) {
3103 ASTContext &C = CGM.getContext();
3104 // Build struct kmp_task_t_with_privates {
3105 // kmp_task_t task_data;
3106 // .kmp_privates_t. privates;
3107 // };
3108 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
3109 RD->startDefinition();
3110 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpTaskTQTy);
3111 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3112 addFieldToRecordDecl(C, DC: RD, FieldTy: C.getCanonicalTagType(TD: PrivateRD));
3113 RD->completeDefinition();
3114 return RD;
3115}
3116
3117/// Emit a proxy function which accepts kmp_task_t as the second
3118/// argument.
3119/// \code
3120/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3121/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3122/// For taskloops:
3123/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3124/// tt->reductions, tt->shareds);
3125/// return 0;
3126/// }
3127/// \endcode
3128static llvm::Function *
3129emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3130 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3131 QualType KmpTaskTWithPrivatesPtrQTy,
3132 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3133 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3134 llvm::Value *TaskPrivatesMap) {
3135 ASTContext &C = CGM.getContext();
3136 FunctionArgList Args;
3137 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3138 ImplicitParamKind::Other);
3139 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3140 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3141 ImplicitParamKind::Other);
3142 Args.push_back(Elt: &GtidArg);
3143 Args.push_back(Elt: &TaskTypeArg);
3144 const auto &TaskEntryFnInfo =
3145 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3146 llvm::FunctionType *TaskEntryTy =
3147 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
3148 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
3149 auto *TaskEntry = llvm::Function::Create(
3150 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3151 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
3152 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3153 TaskEntry->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3154 TaskEntry->setDoesNotRecurse();
3155 CodeGenFunction CGF(CGM);
3156 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
3157 Loc, StartLoc: Loc);
3158
3159 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3160 // tt,
3161 // For taskloops:
3162 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3163 // tt->task_data.shareds);
3164 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3165 Addr: CGF.GetAddrOfLocalVar(VD: &GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3166 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3167 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3168 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3169 const auto *KmpTaskTWithPrivatesQTyRD =
3170 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3171 LValue Base =
3172 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3173 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3174 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3175 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3176 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3177
3178 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3179 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3180 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3181 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3182 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3183
3184 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3185 llvm::Value *PrivatesParam;
3186 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3187 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3188 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3189 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3190 } else {
3191 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3192 }
3193
3194 llvm::Value *CommonArgs[] = {
3195 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3196 CGF.Builder
3197 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3198 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3199 .emitRawPointer(CGF)};
3200 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3201 std::end(arr&: CommonArgs));
3202 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3203 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3204 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3205 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3206 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3207 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3208 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3209 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3210 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3211 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3212 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3213 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3214 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3215 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3216 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3217 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3218 CallArgs.push_back(Elt: LBParam);
3219 CallArgs.push_back(Elt: UBParam);
3220 CallArgs.push_back(Elt: StParam);
3221 CallArgs.push_back(Elt: LIParam);
3222 CallArgs.push_back(Elt: RParam);
3223 }
3224 CallArgs.push_back(Elt: SharedsParam);
3225
3226 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3227 Args: CallArgs);
3228 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3229 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3230 CGF.FinishFunction();
3231 return TaskEntry;
3232}
3233
3234static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3235 SourceLocation Loc,
3236 QualType KmpInt32Ty,
3237 QualType KmpTaskTWithPrivatesPtrQTy,
3238 QualType KmpTaskTWithPrivatesQTy) {
3239 ASTContext &C = CGM.getContext();
3240 FunctionArgList Args;
3241 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3242 ImplicitParamKind::Other);
3243 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3244 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3245 ImplicitParamKind::Other);
3246 Args.push_back(Elt: &GtidArg);
3247 Args.push_back(Elt: &TaskTypeArg);
3248 const auto &DestructorFnInfo =
3249 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3250 llvm::FunctionType *DestructorFnTy =
3251 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3252 std::string Name =
3253 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3254 auto *DestructorFn =
3255 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3256 N: Name, M: &CGM.getModule());
3257 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3258 FI: DestructorFnInfo);
3259 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3260 DestructorFn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3261 DestructorFn->setDoesNotRecurse();
3262 CodeGenFunction CGF(CGM);
3263 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3264 Args, Loc, StartLoc: Loc);
3265
3266 LValue Base = CGF.EmitLoadOfPointerLValue(
3267 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3268 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3269 const auto *KmpTaskTWithPrivatesQTyRD =
3270 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3271 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3272 Base = CGF.EmitLValueForField(Base, Field: *FI);
3273 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3274 if (QualType::DestructionKind DtorKind =
3275 Field->getType().isDestructedType()) {
3276 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3277 CGF.pushDestroy(dtorKind: DtorKind, addr: FieldLValue.getAddress(), type: Field->getType());
3278 }
3279 }
3280 CGF.FinishFunction();
3281 return DestructorFn;
3282}
3283
3284/// Emit a privates mapping function for correct handling of private and
3285/// firstprivate variables.
3286/// \code
3287/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3288/// **noalias priv1,..., <tyn> **noalias privn) {
3289/// *priv1 = &.privates.priv1;
3290/// ...;
3291/// *privn = &.privates.privn;
3292/// }
3293/// \endcode
3294static llvm::Value *
3295emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3296 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3297 ArrayRef<PrivateDataTy> Privates) {
3298 ASTContext &C = CGM.getContext();
3299 FunctionArgList Args;
3300 ImplicitParamDecl TaskPrivatesArg(
3301 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3302 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3303 ImplicitParamKind::Other);
3304 Args.push_back(Elt: &TaskPrivatesArg);
3305 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3306 unsigned Counter = 1;
3307 for (const Expr *E : Data.PrivateVars) {
3308 Args.push_back(Elt: ImplicitParamDecl::Create(
3309 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3310 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3311 .withConst()
3312 .withRestrict(),
3313 ParamKind: ImplicitParamKind::Other));
3314 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3315 PrivateVarsPos[VD] = Counter;
3316 ++Counter;
3317 }
3318 for (const Expr *E : Data.FirstprivateVars) {
3319 Args.push_back(Elt: ImplicitParamDecl::Create(
3320 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3321 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3322 .withConst()
3323 .withRestrict(),
3324 ParamKind: ImplicitParamKind::Other));
3325 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3326 PrivateVarsPos[VD] = Counter;
3327 ++Counter;
3328 }
3329 for (const Expr *E : Data.LastprivateVars) {
3330 Args.push_back(Elt: ImplicitParamDecl::Create(
3331 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3332 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3333 .withConst()
3334 .withRestrict(),
3335 ParamKind: ImplicitParamKind::Other));
3336 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3337 PrivateVarsPos[VD] = Counter;
3338 ++Counter;
3339 }
3340 for (const VarDecl *VD : Data.PrivateLocals) {
3341 QualType Ty = VD->getType().getNonReferenceType();
3342 if (VD->getType()->isLValueReferenceType())
3343 Ty = C.getPointerType(T: Ty);
3344 if (isAllocatableDecl(VD))
3345 Ty = C.getPointerType(T: Ty);
3346 Args.push_back(Elt: ImplicitParamDecl::Create(
3347 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3348 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3349 ParamKind: ImplicitParamKind::Other));
3350 PrivateVarsPos[VD] = Counter;
3351 ++Counter;
3352 }
3353 const auto &TaskPrivatesMapFnInfo =
3354 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3355 llvm::FunctionType *TaskPrivatesMapTy =
3356 CGM.getTypes().GetFunctionType(Info: TaskPrivatesMapFnInfo);
3357 std::string Name =
3358 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3359 auto *TaskPrivatesMap = llvm::Function::Create(
3360 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3361 M: &CGM.getModule());
3362 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3363 FI: TaskPrivatesMapFnInfo);
3364 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3365 TaskPrivatesMap->addFnAttr(Kind: "sample-profile-suffix-elision-policy",
3366 Val: "selected");
3367 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3368 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::NoInline);
3369 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
3370 TaskPrivatesMap->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
3371 }
3372 CodeGenFunction CGF(CGM);
3373 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3374 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3375
3376 // *privi = &.privates.privi;
3377 LValue Base = CGF.EmitLoadOfPointerLValue(
3378 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskPrivatesArg),
3379 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3380 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3381 Counter = 0;
3382 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3383 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3384 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3385 LValue RefLVal =
3386 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD), T: VD->getType());
3387 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3388 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3389 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3390 ++Counter;
3391 }
3392 CGF.FinishFunction();
3393 return TaskPrivatesMap;
3394}
3395
3396/// Emit initialization for private variables in task-based directives.
3397static void emitPrivatesInit(CodeGenFunction &CGF,
3398 const OMPExecutableDirective &D,
3399 Address KmpTaskSharedsPtr, LValue TDBase,
3400 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3401 QualType SharedsTy, QualType SharedsPtrTy,
3402 const OMPTaskDataTy &Data,
3403 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3404 ASTContext &C = CGF.getContext();
3405 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3406 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3407 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())
3408 ? OMPD_taskloop
3409 : OMPD_task;
3410 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: Kind);
3411 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3412 LValue SrcBase;
3413 bool IsTargetTask =
3414 isOpenMPTargetDataManagementDirective(DKind: D.getDirectiveKind()) ||
3415 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
3416 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3417 // PointersArray, SizesArray, and MappersArray. The original variables for
3418 // these arrays are not captured and we get their addresses explicitly.
3419 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3420 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3421 SrcBase = CGF.MakeAddrLValue(
3422 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3423 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3424 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3425 T: SharedsTy);
3426 }
3427 FI = FI->getType()->castAsRecordDecl()->field_begin();
3428 for (const PrivateDataTy &Pair : Privates) {
3429 // Do not initialize private locals.
3430 if (Pair.second.isLocalPrivate()) {
3431 ++FI;
3432 continue;
3433 }
3434 const VarDecl *VD = Pair.second.PrivateCopy;
3435 const Expr *Init = VD->getAnyInitializer();
3436 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3437 !CGF.isTrivialInitializer(Init)))) {
3438 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3439 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3440 const VarDecl *OriginalVD = Pair.second.Original;
3441 // Check if the variable is the target-based BasePointersArray,
3442 // PointersArray, SizesArray, or MappersArray.
3443 LValue SharedRefLValue;
3444 QualType Type = PrivateLValue.getType();
3445 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3446 if (IsTargetTask && !SharedField) {
3447 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3448 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3449 cast<CapturedDecl>(OriginalVD->getDeclContext())
3450 ->getNumParams() == 0 &&
3451 isa<TranslationUnitDecl>(
3452 cast<CapturedDecl>(OriginalVD->getDeclContext())
3453 ->getDeclContext()) &&
3454 "Expected artificial target data variable.");
3455 SharedRefLValue =
3456 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3457 } else if (ForDup) {
3458 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3459 SharedRefLValue = CGF.MakeAddrLValue(
3460 Addr: SharedRefLValue.getAddress().withAlignment(
3461 NewAlignment: C.getDeclAlign(D: OriginalVD)),
3462 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3463 TBAAInfo: SharedRefLValue.getTBAAInfo());
3464 } else if (CGF.LambdaCaptureFields.count(
3465 Val: Pair.second.Original->getCanonicalDecl()) > 0 ||
3466 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3467 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3468 } else {
3469 // Processing for implicitly captured variables.
3470 InlinedOpenMPRegionRAII Region(
3471 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3472 /*HasCancel=*/false, /*NoInheritance=*/true);
3473 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3474 }
3475 if (Type->isArrayType()) {
3476 // Initialize firstprivate array.
3477 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3478 // Perform simple memcpy.
3479 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3480 } else {
3481 // Initialize firstprivate array using element-by-element
3482 // initialization.
3483 CGF.EmitOMPAggregateAssign(
3484 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3485 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3486 Address SrcElement) {
3487 // Clean up any temporaries needed by the initialization.
3488 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3489 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3490 (void)InitScope.Privatize();
3491 // Emit initialization for single element.
3492 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3493 CGF, &CapturesInfo);
3494 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3495 Quals: Init->getType().getQualifiers(),
3496 /*IsInitializer=*/false);
3497 });
3498 }
3499 } else {
3500 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3501 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3502 (void)InitScope.Privatize();
3503 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3504 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue,
3505 /*capturedByInit=*/false);
3506 }
3507 } else {
3508 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue, /*capturedByInit=*/false);
3509 }
3510 }
3511 ++FI;
3512 }
3513}
3514
3515/// Check if duplication function is required for taskloops.
3516static bool checkInitIsRequired(CodeGenFunction &CGF,
3517 ArrayRef<PrivateDataTy> Privates) {
3518 bool InitRequired = false;
3519 for (const PrivateDataTy &Pair : Privates) {
3520 if (Pair.second.isLocalPrivate())
3521 continue;
3522 const VarDecl *VD = Pair.second.PrivateCopy;
3523 const Expr *Init = VD->getAnyInitializer();
3524 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3525 !CGF.isTrivialInitializer(Init));
3526 if (InitRequired)
3527 break;
3528 }
3529 return InitRequired;
3530}
3531
3532
3533/// Emit task_dup function (for initialization of
3534/// private/firstprivate/lastprivate vars and last_iter flag)
3535/// \code
3536/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3537/// lastpriv) {
3538/// // setup lastprivate flag
3539/// task_dst->last = lastpriv;
3540/// // could be constructor calls here...
3541/// }
3542/// \endcode
3543static llvm::Value *
3544emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3545 const OMPExecutableDirective &D,
3546 QualType KmpTaskTWithPrivatesPtrQTy,
3547 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3548 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3549 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3550 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3551 ASTContext &C = CGM.getContext();
3552 FunctionArgList Args;
3553 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3554 KmpTaskTWithPrivatesPtrQTy,
3555 ImplicitParamKind::Other);
3556 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3557 KmpTaskTWithPrivatesPtrQTy,
3558 ImplicitParamKind::Other);
3559 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3560 ImplicitParamKind::Other);
3561 Args.push_back(Elt: &DstArg);
3562 Args.push_back(Elt: &SrcArg);
3563 Args.push_back(Elt: &LastprivArg);
3564 const auto &TaskDupFnInfo =
3565 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3566 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(Info: TaskDupFnInfo);
3567 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3568 auto *TaskDup = llvm::Function::Create(
3569 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3570 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3571 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3572 TaskDup->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3573 TaskDup->setDoesNotRecurse();
3574 CodeGenFunction CGF(CGM);
3575 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3576 StartLoc: Loc);
3577
3578 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3579 Ptr: CGF.GetAddrOfLocalVar(VD: &DstArg),
3580 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3581 // task_dst->liter = lastpriv;
3582 if (WithLastIter) {
3583 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3584 LValue Base = CGF.EmitLValueForField(
3585 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3586 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3587 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3588 Addr: CGF.GetAddrOfLocalVar(VD: &LastprivArg), /*Volatile=*/false, Ty: C.IntTy, Loc);
3589 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3590 }
3591
3592 // Emit initial values for private copies (if any).
3593 assert(!Privates.empty());
3594 Address KmpTaskSharedsPtr = Address::invalid();
3595 if (!Data.FirstprivateVars.empty()) {
3596 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3597 Ptr: CGF.GetAddrOfLocalVar(VD: &SrcArg),
3598 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3599 LValue Base = CGF.EmitLValueForField(
3600 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3601 KmpTaskSharedsPtr = Address(
3602 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3603 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3604 n: KmpTaskTShareds)),
3605 Loc),
3606 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3607 }
3608 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3609 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3610 CGF.FinishFunction();
3611 return TaskDup;
3612}
3613
3614/// Checks if destructor function is required to be generated.
3615/// \return true if cleanups are required, false otherwise.
3616static bool
3617checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3618 ArrayRef<PrivateDataTy> Privates) {
3619 for (const PrivateDataTy &P : Privates) {
3620 if (P.second.isLocalPrivate())
3621 continue;
3622 QualType Ty = P.second.Original->getType().getNonReferenceType();
3623 if (Ty.isDestructedType())
3624 return true;
3625 }
3626 return false;
3627}
3628
3629namespace {
3630/// Loop generator for OpenMP iterator expression.
3631class OMPIteratorGeneratorScope final
3632 : public CodeGenFunction::OMPPrivateScope {
3633 CodeGenFunction &CGF;
3634 const OMPIteratorExpr *E = nullptr;
3635 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3636 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3637 OMPIteratorGeneratorScope() = delete;
3638 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3639
3640public:
3641 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3642 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3643 if (!E)
3644 return;
3645 SmallVector<llvm::Value *, 4> Uppers;
3646 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3647 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3648 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3649 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(T: VD->getType(), Name: VD->getName()));
3650 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3651 addPrivate(
3652 LocalVD: HelperData.CounterVD,
3653 Addr: CGF.CreateMemTemp(T: HelperData.CounterVD->getType(), Name: "counter.addr"));
3654 }
3655 Privatize();
3656
3657 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3658 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3659 LValue CLVal =
3660 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3661 T: HelperData.CounterVD->getType());
3662 // Counter = 0;
3663 CGF.EmitStoreOfScalar(
3664 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3665 lvalue: CLVal);
3666 CodeGenFunction::JumpDest &ContDest =
3667 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3668 CodeGenFunction::JumpDest &ExitDest =
3669 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3670 // N = <number-of_iterations>;
3671 llvm::Value *N = Uppers[I];
3672 // cont:
3673 // if (Counter < N) goto body; else goto exit;
3674 CGF.EmitBlock(BB: ContDest.getBlock());
3675 auto *CVal =
3676 CGF.EmitLoadOfScalar(lvalue: CLVal, Loc: HelperData.CounterVD->getLocation());
3677 llvm::Value *Cmp =
3678 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3679 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3680 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3681 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3682 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3683 // body:
3684 CGF.EmitBlock(BB: BodyBB);
3685 // Iteri = Begini + Counter * Stepi;
3686 CGF.EmitIgnoredExpr(E: HelperData.Update);
3687 }
3688 }
3689 ~OMPIteratorGeneratorScope() {
3690 if (!E)
3691 return;
3692 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3693 // Counter = Counter + 1;
3694 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3695 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3696 // goto cont;
3697 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3698 // exit:
3699 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3700 }
3701 }
3702};
3703} // namespace
3704
3705static std::pair<llvm::Value *, llvm::Value *>
3706getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3707 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3708 llvm::Value *Addr;
3709 if (OASE) {
3710 const Expr *Base = OASE->getBase();
3711 Addr = CGF.EmitScalarExpr(E: Base);
3712 } else {
3713 Addr = CGF.EmitLValue(E).getPointer(CGF);
3714 }
3715 llvm::Value *SizeVal;
3716 QualType Ty = E->getType();
3717 if (OASE) {
3718 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3719 for (const Expr *SE : OASE->getDimensions()) {
3720 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3721 Sz = CGF.EmitScalarConversion(
3722 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3723 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3724 }
3725 } else if (const auto *ASE =
3726 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3727 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3728 Address UpAddrAddress = UpAddrLVal.getAddress();
3729 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3730 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3731 /*Idx0=*/1);
3732 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.SizeTy);
3733 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(V: UpAddr, DestTy: CGF.SizeTy);
3734 SizeVal = CGF.Builder.CreateNUWSub(LHS: UpIntPtr, RHS: LowIntPtr);
3735 } else {
3736 SizeVal = CGF.getTypeSize(Ty);
3737 }
3738 return std::make_pair(x&: Addr, y&: SizeVal);
3739}
3740
3741/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3742static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3743 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3744 if (KmpTaskAffinityInfoTy.isNull()) {
3745 RecordDecl *KmpAffinityInfoRD =
3746 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3747 KmpAffinityInfoRD->startDefinition();
3748 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getIntPtrType());
3749 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getSizeType());
3750 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: FlagsTy);
3751 KmpAffinityInfoRD->completeDefinition();
3752 KmpTaskAffinityInfoTy = C.getCanonicalTagType(TD: KmpAffinityInfoRD);
3753 }
3754}
3755
3756CGOpenMPRuntime::TaskResultTy
3757CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3758 const OMPExecutableDirective &D,
3759 llvm::Function *TaskFunction, QualType SharedsTy,
3760 Address Shareds, const OMPTaskDataTy &Data) {
3761 ASTContext &C = CGM.getContext();
3762 llvm::SmallVector<PrivateDataTy, 4> Privates;
3763 // Aggregate privates and sort them by the alignment.
3764 const auto *I = Data.PrivateCopies.begin();
3765 for (const Expr *E : Data.PrivateVars) {
3766 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3767 Privates.emplace_back(
3768 Args: C.getDeclAlign(D: VD),
3769 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3770 /*PrivateElemInit=*/nullptr));
3771 ++I;
3772 }
3773 I = Data.FirstprivateCopies.begin();
3774 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3775 for (const Expr *E : Data.FirstprivateVars) {
3776 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3777 Privates.emplace_back(
3778 Args: C.getDeclAlign(D: VD),
3779 Args: PrivateHelpersTy(
3780 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3781 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3782 ++I;
3783 ++IElemInitRef;
3784 }
3785 I = Data.LastprivateCopies.begin();
3786 for (const Expr *E : Data.LastprivateVars) {
3787 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3788 Privates.emplace_back(
3789 Args: C.getDeclAlign(D: VD),
3790 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3791 /*PrivateElemInit=*/nullptr));
3792 ++I;
3793 }
3794 for (const VarDecl *VD : Data.PrivateLocals) {
3795 if (isAllocatableDecl(VD))
3796 Privates.emplace_back(Args: CGM.getPointerAlign(), Args: PrivateHelpersTy(VD));
3797 else
3798 Privates.emplace_back(Args: C.getDeclAlign(D: VD), Args: PrivateHelpersTy(VD));
3799 }
3800 llvm::stable_sort(Range&: Privates,
3801 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3802 return L.first > R.first;
3803 });
3804 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3805 // Build type kmp_routine_entry_t (if not built yet).
3806 emitKmpRoutineEntryT(KmpInt32Ty);
3807 // Build type kmp_task_t (if not built yet).
3808 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())) {
3809 if (SavedKmpTaskloopTQTy.isNull()) {
3810 SavedKmpTaskloopTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3811 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3812 }
3813 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3814 } else {
3815 assert((D.getDirectiveKind() == OMPD_task ||
3816 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3817 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3818 "Expected taskloop, task or target directive");
3819 if (SavedKmpTaskTQTy.isNull()) {
3820 SavedKmpTaskTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3821 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3822 }
3823 KmpTaskTQTy = SavedKmpTaskTQTy;
3824 }
3825 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3826 // Build particular struct kmp_task_t for the given task.
3827 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3828 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3829 CanQualType KmpTaskTWithPrivatesQTy =
3830 C.getCanonicalTagType(TD: KmpTaskTWithPrivatesQTyRD);
3831 QualType KmpTaskTWithPrivatesPtrQTy =
3832 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3833 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(AddrSpace: 0);
3834 llvm::Value *KmpTaskTWithPrivatesTySize =
3835 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3836 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3837
3838 // Emit initial values for private copies (if any).
3839 llvm::Value *TaskPrivatesMap = nullptr;
3840 llvm::Type *TaskPrivatesMapTy =
3841 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3842 if (!Privates.empty()) {
3843 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3844 TaskPrivatesMap =
3845 emitTaskPrivateMappingFunction(CGM, Loc, Data, PrivatesQTy: FI->getType(), Privates);
3846 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3847 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3848 } else {
3849 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3850 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3851 }
3852 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3853 // kmp_task_t *tt);
3854 llvm::Function *TaskEntry = emitProxyTaskFunction(
3855 CGM, Loc, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3856 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3857 TaskPrivatesMap);
3858
3859 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3860 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3861 // kmp_routine_entry_t *task_entry);
3862 // Task flags. Format is taken from
3863 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3864 // description of kmp_tasking_flags struct.
3865 enum {
3866 TiedFlag = 0x1,
3867 FinalFlag = 0x2,
3868 DestructorsFlag = 0x8,
3869 PriorityFlag = 0x20,
3870 DetachableFlag = 0x40,
3871 FreeAgentFlag = 0x80,
3872 TransparentFlag = 0x100,
3873 };
3874 unsigned Flags = Data.Tied ? TiedFlag : 0;
3875 bool NeedsCleanup = false;
3876 if (!Privates.empty()) {
3877 NeedsCleanup =
3878 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3879 if (NeedsCleanup)
3880 Flags = Flags | DestructorsFlag;
3881 }
3882 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3883 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3884 if (Kind == OMPC_THREADSET_omp_pool)
3885 Flags = Flags | FreeAgentFlag;
3886 }
3887 if (D.getSingleClause<OMPTransparentClause>())
3888 Flags |= TransparentFlag;
3889
3890 if (Data.Priority.getInt())
3891 Flags = Flags | PriorityFlag;
3892 if (D.hasClausesOfKind<OMPDetachClause>())
3893 Flags = Flags | DetachableFlag;
3894 llvm::Value *TaskFlags =
3895 Data.Final.getPointer()
3896 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3897 True: CGF.Builder.getInt32(C: FinalFlag),
3898 False: CGF.Builder.getInt32(/*C=*/0))
3899 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3900 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3901 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3902 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3903 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3904 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3905 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3906 llvm::Value *NewTask;
3907 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3908 // Check if we have any device clause associated with the directive.
3909 const Expr *Device = nullptr;
3910 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3911 Device = C->getDevice();
3912 // Emit device ID if any otherwise use default value.
3913 llvm::Value *DeviceID;
3914 if (Device)
3915 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3916 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3917 else
3918 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3919 AllocArgs.push_back(Elt: DeviceID);
3920 NewTask = CGF.EmitRuntimeCall(
3921 callee: OMPBuilder.getOrCreateRuntimeFunction(
3922 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3923 args: AllocArgs);
3924 } else {
3925 NewTask =
3926 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3927 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3928 args: AllocArgs);
3929 }
3930 // Emit detach clause initialization.
3931 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3932 // task_descriptor);
3933 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3934 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3935 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3936
3937 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3938 // int gtid, kmp_task_t *task);
3939 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3940 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3941 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3942 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3943 callee: OMPBuilder.getOrCreateRuntimeFunction(
3944 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3945 args: {Loc, Tid, NewTask});
3946 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3947 Loc: Evt->getExprLoc());
3948 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3949 }
3950 // Process affinity clauses.
3951 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3952 // Process list of affinity data.
3953 ASTContext &C = CGM.getContext();
3954 Address AffinitiesArray = Address::invalid();
3955 // Calculate number of elements to form the array of affinity data.
3956 llvm::Value *NumOfElements = nullptr;
3957 unsigned NumAffinities = 0;
3958 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3959 if (const Expr *Modifier = C->getModifier()) {
3960 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3961 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3962 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3963 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3964 NumOfElements =
3965 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3966 }
3967 } else {
3968 NumAffinities += C->varlist_size();
3969 }
3970 }
3971 getKmpAffinityType(C&: CGM.getContext(), KmpTaskAffinityInfoTy);
3972 // Fields ids in kmp_task_affinity_info record.
3973 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3974
3975 QualType KmpTaskAffinityInfoArrayTy;
3976 if (NumOfElements) {
3977 NumOfElements = CGF.Builder.CreateNUWAdd(
3978 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3979 auto *OVE = new (C) OpaqueValueExpr(
3980 Loc,
3981 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3982 VK_PRValue);
3983 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3984 RValue::get(V: NumOfElements));
3985 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3986 EltTy: KmpTaskAffinityInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
3987 /*IndexTypeQuals=*/0);
3988 // Properly emit variable-sized array.
3989 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3990 ParamKind: ImplicitParamKind::Other);
3991 CGF.EmitVarDecl(D: *PD);
3992 AffinitiesArray = CGF.GetAddrOfLocalVar(VD: PD);
3993 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3994 /*isSigned=*/false);
3995 } else {
3996 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3997 EltTy: KmpTaskAffinityInfoTy,
3998 ArySize: llvm::APInt(C.getTypeSize(T: C.getSizeType()), NumAffinities), SizeExpr: nullptr,
3999 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4000 AffinitiesArray =
4001 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
4002 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
4003 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
4004 /*isSigned=*/IsSigned: false);
4005 }
4006
4007 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4008 // Fill array by elements without iterators.
4009 unsigned Pos = 0;
4010 bool HasIterator = false;
4011 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4012 if (C->getModifier()) {
4013 HasIterator = true;
4014 continue;
4015 }
4016 for (const Expr *E : C->varlist()) {
4017 llvm::Value *Addr;
4018 llvm::Value *Size;
4019 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4020 LValue Base =
4021 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateConstGEP(Addr: AffinitiesArray, Index: Pos),
4022 T: KmpTaskAffinityInfoTy);
4023 // affs[i].base_addr = &<Affinities[i].second>;
4024 LValue BaseAddrLVal = CGF.EmitLValueForField(
4025 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4026 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4027 lvalue: BaseAddrLVal);
4028 // affs[i].len = sizeof(<Affinities[i].second>);
4029 LValue LenLVal = CGF.EmitLValueForField(
4030 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4031 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4032 ++Pos;
4033 }
4034 }
4035 LValue PosLVal;
4036 if (HasIterator) {
4037 PosLVal = CGF.MakeAddrLValue(
4038 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
4039 T: C.getSizeType());
4040 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4041 }
4042 // Process elements with iterators.
4043 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4044 const Expr *Modifier = C->getModifier();
4045 if (!Modifier)
4046 continue;
4047 OMPIteratorGeneratorScope IteratorScope(
4048 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
4049 for (const Expr *E : C->varlist()) {
4050 llvm::Value *Addr;
4051 llvm::Value *Size;
4052 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4053 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4054 LValue Base =
4055 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateGEP(CGF, Addr: AffinitiesArray, Index: Idx),
4056 T: KmpTaskAffinityInfoTy);
4057 // affs[i].base_addr = &<Affinities[i].second>;
4058 LValue BaseAddrLVal = CGF.EmitLValueForField(
4059 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4060 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4061 lvalue: BaseAddrLVal);
4062 // affs[i].len = sizeof(<Affinities[i].second>);
4063 LValue LenLVal = CGF.EmitLValueForField(
4064 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4065 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4066 Idx = CGF.Builder.CreateNUWAdd(
4067 LHS: Idx, RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4068 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4069 }
4070 }
4071 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4072 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4073 // naffins, kmp_task_affinity_info_t *affin_list);
4074 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4075 llvm::Value *GTid = getThreadID(CGF, Loc);
4076 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4077 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
4078 // FIXME: Emit the function and ignore its result for now unless the
4079 // runtime function is properly implemented.
4080 (void)CGF.EmitRuntimeCall(
4081 callee: OMPBuilder.getOrCreateRuntimeFunction(
4082 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
4083 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4084 }
4085 llvm::Value *NewTaskNewTaskTTy =
4086 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4087 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
4088 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
4089 T: KmpTaskTWithPrivatesQTy);
4090 LValue TDBase =
4091 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
4092 // Fill the data in the resulting kmp_task_t record.
4093 // Copy shareds if there are any.
4094 Address KmpTaskSharedsPtr = Address::invalid();
4095 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4096 KmpTaskSharedsPtr = Address(
4097 CGF.EmitLoadOfScalar(
4098 lvalue: CGF.EmitLValueForField(
4099 Base: TDBase,
4100 Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds)),
4101 Loc),
4102 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
4103 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
4104 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
4105 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
4106 }
4107 // Emit initial values for private copies (if any).
4108 TaskResultTy Result;
4109 if (!Privates.empty()) {
4110 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
4111 SharedsTy, SharedsPtrTy, Data, Privates,
4112 /*ForDup=*/false);
4113 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
4114 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4115 Result.TaskDupFn = emitTaskDupFunction(
4116 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4117 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4118 /*WithLastIter=*/!Data.LastprivateVars.empty());
4119 }
4120 }
4121 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4122 enum { Priority = 0, Destructors = 1 };
4123 // Provide pointer to function with destructors for privates.
4124 auto FI = std::next(x: KmpTaskTQTyRD->field_begin(), n: Data1);
4125 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4126 assert(KmpCmplrdataUD->isUnion());
4127 if (NeedsCleanup) {
4128 llvm::Value *DestructorFn = emitDestructorsFunction(
4129 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4130 KmpTaskTWithPrivatesQTy);
4131 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
4132 LValue DestructorsLV = CGF.EmitLValueForField(
4133 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
4134 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4135 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
4136 lvalue: DestructorsLV);
4137 }
4138 // Set priority.
4139 if (Data.Priority.getInt()) {
4140 LValue Data2LV = CGF.EmitLValueForField(
4141 Base: TDBase, Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: Data2));
4142 LValue PriorityLV = CGF.EmitLValueForField(
4143 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
4144 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
4145 }
4146 Result.NewTask = NewTask;
4147 Result.TaskEntry = TaskEntry;
4148 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4149 Result.TDBase = TDBase;
4150 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4151 return Result;
4152}
4153
4154/// Translates internal dependency kind into the runtime kind.
4155static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4156 RTLDependenceKindTy DepKind;
4157 switch (K) {
4158 case OMPC_DEPEND_in:
4159 DepKind = RTLDependenceKindTy::DepIn;
4160 break;
4161 // Out and InOut dependencies must use the same code.
4162 case OMPC_DEPEND_out:
4163 case OMPC_DEPEND_inout:
4164 DepKind = RTLDependenceKindTy::DepInOut;
4165 break;
4166 case OMPC_DEPEND_mutexinoutset:
4167 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4168 break;
4169 case OMPC_DEPEND_inoutset:
4170 DepKind = RTLDependenceKindTy::DepInOutSet;
4171 break;
4172 case OMPC_DEPEND_outallmemory:
4173 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4174 break;
4175 case OMPC_DEPEND_source:
4176 case OMPC_DEPEND_sink:
4177 case OMPC_DEPEND_depobj:
4178 case OMPC_DEPEND_inoutallmemory:
4179 case OMPC_DEPEND_unknown:
4180 llvm_unreachable("Unknown task dependence type");
4181 }
4182 return DepKind;
4183}
4184
4185/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4186static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4187 QualType &FlagsTy) {
4188 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.BoolTy), /*Signed=*/false);
4189 if (KmpDependInfoTy.isNull()) {
4190 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4191 KmpDependInfoRD->startDefinition();
4192 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getIntPtrType());
4193 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getSizeType());
4194 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: FlagsTy);
4195 KmpDependInfoRD->completeDefinition();
4196 KmpDependInfoTy = C.getCanonicalTagType(TD: KmpDependInfoRD);
4197 }
4198}
4199
4200std::pair<llvm::Value *, LValue>
4201CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4202 SourceLocation Loc) {
4203 ASTContext &C = CGM.getContext();
4204 QualType FlagsTy;
4205 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4206 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4207 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4208 LValue Base = CGF.EmitLoadOfPointerLValue(
4209 Ptr: DepobjLVal.getAddress().withElementType(
4210 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4211 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4212 Address DepObjAddr = CGF.Builder.CreateGEP(
4213 CGF, Addr: Base.getAddress(),
4214 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4215 LValue NumDepsBase = CGF.MakeAddrLValue(
4216 Addr: DepObjAddr, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(), TBAAInfo: Base.getTBAAInfo());
4217 // NumDeps = deps[i].base_addr;
4218 LValue BaseAddrLVal = CGF.EmitLValueForField(
4219 Base: NumDepsBase,
4220 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4221 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4222 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4223 return std::make_pair(x&: NumDeps, y&: Base);
4224}
4225
4226static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4227 llvm::PointerUnion<unsigned *, LValue *> Pos,
4228 const OMPTaskDataTy::DependData &Data,
4229 Address DependenciesArray) {
4230 CodeGenModule &CGM = CGF.CGM;
4231 ASTContext &C = CGM.getContext();
4232 QualType FlagsTy;
4233 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4234 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4235 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4236
4237 OMPIteratorGeneratorScope IteratorScope(
4238 CGF, cast_or_null<OMPIteratorExpr>(
4239 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4240 : nullptr));
4241 for (const Expr *E : Data.DepExprs) {
4242 llvm::Value *Addr;
4243 llvm::Value *Size;
4244
4245 // The expression will be a nullptr in the 'omp_all_memory' case.
4246 if (E) {
4247 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4248 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4249 } else {
4250 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4251 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4252 }
4253 LValue Base;
4254 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4255 Base = CGF.MakeAddrLValue(
4256 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4257 } else {
4258 assert(E && "Expected a non-null expression");
4259 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4260 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4261 Base = CGF.MakeAddrLValue(
4262 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4263 }
4264 // deps[i].base_addr = &<Dependencies[i].second>;
4265 LValue BaseAddrLVal = CGF.EmitLValueForField(
4266 Base,
4267 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4268 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4269 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4270 // deps[i].len = sizeof(<Dependencies[i].second>);
4271 LValue LenLVal = CGF.EmitLValueForField(
4272 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4273 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4274 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4275 // deps[i].flags = <Dependencies[i].first>;
4276 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4277 LValue FlagsLVal = CGF.EmitLValueForField(
4278 Base,
4279 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4280 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4281 CGF.EmitStoreOfScalar(
4282 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4283 lvalue: FlagsLVal);
4284 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4285 ++(*P);
4286 } else {
4287 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4288 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4289 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4290 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4291 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4292 }
4293 }
4294}
4295
4296SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4297 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4298 const OMPTaskDataTy::DependData &Data) {
4299 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4300 "Expected depobj dependency kind.");
4301 SmallVector<llvm::Value *, 4> Sizes;
4302 SmallVector<LValue, 4> SizeLVals;
4303 ASTContext &C = CGF.getContext();
4304 {
4305 OMPIteratorGeneratorScope IteratorScope(
4306 CGF, cast_or_null<OMPIteratorExpr>(
4307 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4308 : nullptr));
4309 for (const Expr *E : Data.DepExprs) {
4310 llvm::Value *NumDeps;
4311 LValue Base;
4312 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4313 std::tie(args&: NumDeps, args&: Base) =
4314 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4315 LValue NumLVal = CGF.MakeAddrLValue(
4316 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4317 T: C.getUIntPtrType());
4318 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4319 Addr: NumLVal.getAddress());
4320 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4321 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4322 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4323 SizeLVals.push_back(Elt: NumLVal);
4324 }
4325 }
4326 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4327 llvm::Value *Size =
4328 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4329 Sizes.push_back(Elt: Size);
4330 }
4331 return Sizes;
4332}
4333
4334void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4335 QualType &KmpDependInfoTy,
4336 LValue PosLVal,
4337 const OMPTaskDataTy::DependData &Data,
4338 Address DependenciesArray) {
4339 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4340 "Expected depobj dependency kind.");
4341 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4342 {
4343 OMPIteratorGeneratorScope IteratorScope(
4344 CGF, cast_or_null<OMPIteratorExpr>(
4345 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4346 : nullptr));
4347 for (const Expr *E : Data.DepExprs) {
4348 llvm::Value *NumDeps;
4349 LValue Base;
4350 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4351 std::tie(args&: NumDeps, args&: Base) =
4352 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4353
4354 // memcopy dependency data.
4355 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4356 LHS: ElSize,
4357 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4358 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4359 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4360 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4361
4362 // Increase pos.
4363 // pos += size;
4364 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4365 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4366 }
4367 }
4368}
4369
4370std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4371 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4372 SourceLocation Loc) {
4373 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4374 return D.DepExprs.empty();
4375 }))
4376 return std::make_pair(x: nullptr, y: Address::invalid());
4377 // Process list of dependencies.
4378 ASTContext &C = CGM.getContext();
4379 Address DependenciesArray = Address::invalid();
4380 llvm::Value *NumOfElements = nullptr;
4381 unsigned NumDependencies = std::accumulate(
4382 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4383 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4384 return D.DepKind == OMPC_DEPEND_depobj
4385 ? V
4386 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4387 });
4388 QualType FlagsTy;
4389 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4390 bool HasDepobjDeps = false;
4391 bool HasRegularWithIterators = false;
4392 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4393 llvm::Value *NumOfRegularWithIterators =
4394 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4395 // Calculate number of depobj dependencies and regular deps with the
4396 // iterators.
4397 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4398 if (D.DepKind == OMPC_DEPEND_depobj) {
4399 SmallVector<llvm::Value *, 4> Sizes =
4400 emitDepobjElementsSizes(CGF, KmpDependInfoTy, Data: D);
4401 for (llvm::Value *Size : Sizes) {
4402 NumOfDepobjElements =
4403 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4404 }
4405 HasDepobjDeps = true;
4406 continue;
4407 }
4408 // Include number of iterations, if any.
4409
4410 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4411 llvm::Value *ClauseIteratorSpace =
4412 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4413 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4414 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4415 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4416 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4417 }
4418 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4419 LHS: ClauseIteratorSpace,
4420 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4421 NumOfRegularWithIterators =
4422 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4423 HasRegularWithIterators = true;
4424 continue;
4425 }
4426 }
4427
4428 QualType KmpDependInfoArrayTy;
4429 if (HasDepobjDeps || HasRegularWithIterators) {
4430 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4431 /*isSigned=*/IsSigned: false);
4432 if (HasDepobjDeps) {
4433 NumOfElements =
4434 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4435 }
4436 if (HasRegularWithIterators) {
4437 NumOfElements =
4438 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4439 }
4440 auto *OVE = new (C) OpaqueValueExpr(
4441 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4442 VK_PRValue);
4443 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4444 RValue::get(V: NumOfElements));
4445 KmpDependInfoArrayTy =
4446 C.getVariableArrayType(EltTy: KmpDependInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
4447 /*IndexTypeQuals=*/0);
4448 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4449 // Properly emit variable-sized array.
4450 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4451 ParamKind: ImplicitParamKind::Other);
4452 CGF.EmitVarDecl(D: *PD);
4453 DependenciesArray = CGF.GetAddrOfLocalVar(VD: PD);
4454 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4455 /*isSigned=*/false);
4456 } else {
4457 KmpDependInfoArrayTy = C.getConstantArrayType(
4458 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies), SizeExpr: nullptr,
4459 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4460 DependenciesArray =
4461 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4462 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4463 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4464 /*isSigned=*/IsSigned: false);
4465 }
4466 unsigned Pos = 0;
4467 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4468 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4469 continue;
4470 emitDependData(CGF, KmpDependInfoTy, Pos: &Pos, Data: Dep, DependenciesArray);
4471 }
4472 // Copy regular dependencies with iterators.
4473 LValue PosLVal = CGF.MakeAddrLValue(
4474 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4475 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4476 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4477 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4478 continue;
4479 emitDependData(CGF, KmpDependInfoTy, Pos: &PosLVal, Data: Dep, DependenciesArray);
4480 }
4481 // Copy final depobj arrays without iterators.
4482 if (HasDepobjDeps) {
4483 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4484 if (Dep.DepKind != OMPC_DEPEND_depobj)
4485 continue;
4486 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Data: Dep, DependenciesArray);
4487 }
4488 }
4489 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4490 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4491 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4492}
4493
4494Address CGOpenMPRuntime::emitDepobjDependClause(
4495 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4496 SourceLocation Loc) {
4497 if (Dependencies.DepExprs.empty())
4498 return Address::invalid();
4499 // Process list of dependencies.
4500 ASTContext &C = CGM.getContext();
4501 Address DependenciesArray = Address::invalid();
4502 unsigned NumDependencies = Dependencies.DepExprs.size();
4503 QualType FlagsTy;
4504 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4505 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4506
4507 llvm::Value *Size;
4508 // Define type kmp_depend_info[<Dependencies.size()>];
4509 // For depobj reserve one extra element to store the number of elements.
4510 // It is required to handle depobj(x) update(in) construct.
4511 // kmp_depend_info[<Dependencies.size()>] deps;
4512 llvm::Value *NumDepsVal;
4513 CharUnits Align = C.getTypeAlignInChars(T: KmpDependInfoTy);
4514 if (const auto *IE =
4515 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4516 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4517 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4518 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4519 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4520 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4521 }
4522 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4523 RHS: NumDepsVal);
4524 CharUnits SizeInBytes =
4525 C.getTypeSizeInChars(T: KmpDependInfoTy).alignTo(Align);
4526 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4527 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4528 NumDepsVal =
4529 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4530 } else {
4531 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4532 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4533 SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4534 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4535 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4536 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4537 }
4538 // Need to allocate on the dynamic memory.
4539 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4540 // Use default allocator.
4541 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4542 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4543
4544 llvm::Value *Addr =
4545 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4546 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4547 args: Args, name: ".dep.arr.addr");
4548 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(T: KmpDependInfoTy);
4549 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4550 V: Addr, DestTy: CGF.Builder.getPtrTy(AddrSpace: 0));
4551 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4552 // Write number of elements in the first element of array for depobj.
4553 LValue Base = CGF.MakeAddrLValue(Addr: DependenciesArray, T: KmpDependInfoTy);
4554 // deps[i].base_addr = NumDependencies;
4555 LValue BaseAddrLVal = CGF.EmitLValueForField(
4556 Base,
4557 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4558 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4559 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4560 llvm::PointerUnion<unsigned *, LValue *> Pos;
4561 unsigned Idx = 1;
4562 LValue PosLVal;
4563 if (Dependencies.IteratorExpr) {
4564 PosLVal = CGF.MakeAddrLValue(
4565 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4566 T: C.getSizeType());
4567 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4568 /*IsInit=*/isInit: true);
4569 Pos = &PosLVal;
4570 } else {
4571 Pos = &Idx;
4572 }
4573 emitDependData(CGF, KmpDependInfoTy, Pos, Data: Dependencies, DependenciesArray);
4574 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4576 ElementTy: CGF.Int8Ty);
4577 return DependenciesArray;
4578}
4579
4580void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4581 SourceLocation Loc) {
4582 ASTContext &C = CGM.getContext();
4583 QualType FlagsTy;
4584 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4585 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4586 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4587 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4588 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4589 Addr: Base.getAddress(), Ty: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy),
4590 ElementTy: CGF.ConvertTypeForMem(T: KmpDependInfoTy));
4591 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4592 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4593 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4594 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4595 DestTy: CGF.VoidPtrTy);
4596 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4597 // Use default allocator.
4598 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4599 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4600
4601 // _kmpc_free(gtid, addr, nullptr);
4602 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4603 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4604 args: Args);
4605}
4606
4607void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4608 OpenMPDependClauseKind NewDepKind,
4609 SourceLocation Loc) {
4610 ASTContext &C = CGM.getContext();
4611 QualType FlagsTy;
4612 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4613 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4614 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4615 llvm::Value *NumDeps;
4616 LValue Base;
4617 std::tie(args&: NumDeps, args&: Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4618
4619 Address Begin = Base.getAddress();
4620 // Cast from pointer to array type to pointer to single element.
4621 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4622 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4623 // The basic structure here is a while-do loop.
4624 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4625 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4626 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4627 CGF.EmitBlock(BB: BodyBB);
4628 llvm::PHINode *ElementPHI =
4629 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4630 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4631 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4632 Base = CGF.MakeAddrLValue(Addr: Begin, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(),
4633 TBAAInfo: Base.getTBAAInfo());
4634 // deps[i].flags = NewDepKind;
4635 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4636 LValue FlagsLVal = CGF.EmitLValueForField(
4637 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4638 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4639 CGF.EmitStoreOfScalar(
4640 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4641 lvalue: FlagsLVal);
4642
4643 // Shift the address forward by one element.
4644 llvm::Value *ElementNext =
4645 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4646 .emitRawPointer(CGF);
4647 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4648 llvm::Value *IsEmpty =
4649 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4650 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4651 // Done.
4652 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4653}
4654
4655void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4656 const OMPExecutableDirective &D,
4657 llvm::Function *TaskFunction,
4658 QualType SharedsTy, Address Shareds,
4659 const Expr *IfCond,
4660 const OMPTaskDataTy &Data) {
4661 if (!CGF.HaveInsertPoint())
4662 return;
4663
4664 TaskResultTy Result =
4665 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4666 llvm::Value *NewTask = Result.NewTask;
4667 llvm::Function *TaskEntry = Result.TaskEntry;
4668 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4669 LValue TDBase = Result.TDBase;
4670 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4671 // Process list of dependences.
4672 Address DependenciesArray = Address::invalid();
4673 llvm::Value *NumOfElements;
4674 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4675 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4676
4677 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4678 // libcall.
4679 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4680 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4681 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4682 // list is not empty
4683 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4684 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4685 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4686 llvm::Value *DepTaskArgs[7];
4687 if (!Data.Dependences.empty()) {
4688 DepTaskArgs[0] = UpLoc;
4689 DepTaskArgs[1] = ThreadID;
4690 DepTaskArgs[2] = NewTask;
4691 DepTaskArgs[3] = NumOfElements;
4692 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4693 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4694 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4695 }
4696 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4697 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4698 if (!Data.Tied) {
4699 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4700 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4701 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4702 }
4703 if (!Data.Dependences.empty()) {
4704 CGF.EmitRuntimeCall(
4705 callee: OMPBuilder.getOrCreateRuntimeFunction(
4706 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4707 args: DepTaskArgs);
4708 } else {
4709 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4710 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4711 args: TaskArgs);
4712 }
4713 // Check if parent region is untied and build return for untied task;
4714 if (auto *Region =
4715 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4716 Region->emitUntiedSwitch(CGF);
4717 };
4718
4719 llvm::Value *DepWaitTaskArgs[7];
4720 if (!Data.Dependences.empty()) {
4721 DepWaitTaskArgs[0] = UpLoc;
4722 DepWaitTaskArgs[1] = ThreadID;
4723 DepWaitTaskArgs[2] = NumOfElements;
4724 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4725 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4726 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4727 DepWaitTaskArgs[6] =
4728 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4729 }
4730 auto &M = CGM.getModule();
4731 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4732 TaskEntry, &Data, &DepWaitTaskArgs,
4733 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4734 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4735 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4736 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4737 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4738 // is specified.
4739 if (!Data.Dependences.empty())
4740 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4741 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4742 args: DepWaitTaskArgs);
4743 // Call proxy_task_entry(gtid, new_task);
4744 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4745 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4746 Action.Enter(CGF);
4747 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4748 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4749 Args: OutlinedFnArgs);
4750 };
4751
4752 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4753 // kmp_task_t *new_task);
4754 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4755 // kmp_task_t *new_task);
4756 RegionCodeGenTy RCG(CodeGen);
4757 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4758 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4759 TaskArgs,
4760 OMPBuilder.getOrCreateRuntimeFunction(
4761 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4762 TaskArgs);
4763 RCG.setAction(Action);
4764 RCG(CGF);
4765 };
4766
4767 if (IfCond) {
4768 emitIfClause(CGF, Cond: IfCond, ThenGen: ThenCodeGen, ElseGen: ElseCodeGen);
4769 } else {
4770 RegionCodeGenTy ThenRCG(ThenCodeGen);
4771 ThenRCG(CGF);
4772 }
4773}
4774
4775void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4776 const OMPLoopDirective &D,
4777 llvm::Function *TaskFunction,
4778 QualType SharedsTy, Address Shareds,
4779 const Expr *IfCond,
4780 const OMPTaskDataTy &Data) {
4781 if (!CGF.HaveInsertPoint())
4782 return;
4783 TaskResultTy Result =
4784 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4785 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4786 // libcall.
4787 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4788 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4789 // sched, kmp_uint64 grainsize, void *task_dup);
4790 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4791 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4792 llvm::Value *IfVal;
4793 if (IfCond) {
4794 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4795 /*isSigned=*/true);
4796 } else {
4797 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4798 }
4799
4800 LValue LBLVal = CGF.EmitLValueForField(
4801 Base: Result.TDBase,
4802 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4803 const auto *LBVar =
4804 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4805 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4806 /*IsInitializer=*/true);
4807 LValue UBLVal = CGF.EmitLValueForField(
4808 Base: Result.TDBase,
4809 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4810 const auto *UBVar =
4811 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4812 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4813 /*IsInitializer=*/true);
4814 LValue StLVal = CGF.EmitLValueForField(
4815 Base: Result.TDBase,
4816 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4817 const auto *StVar =
4818 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4819 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4820 /*IsInitializer=*/true);
4821 // Store reductions address.
4822 LValue RedLVal = CGF.EmitLValueForField(
4823 Base: Result.TDBase,
4824 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4825 if (Data.Reductions) {
4826 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4827 } else {
4828 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4829 Ty: CGF.getContext().VoidPtrTy);
4830 }
4831 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4832 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4833 UpLoc,
4834 ThreadID,
4835 Result.NewTask,
4836 IfVal,
4837 LBLVal.getPointer(CGF),
4838 UBLVal.getPointer(CGF),
4839 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4840 llvm::ConstantInt::getSigned(
4841 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4842 llvm::ConstantInt::getSigned(
4843 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4844 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4845 : NoSchedule),
4846 Data.Schedule.getPointer()
4847 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4848 /*isSigned=*/false)
4849 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0)};
4850 if (Data.HasModifier)
4851 TaskArgs.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 1));
4852
4853 TaskArgs.push_back(Elt: Result.TaskDupFn
4854 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4855 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4856 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy));
4857 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4858 M&: CGM.getModule(), FnID: Data.HasModifier
4859 ? OMPRTL___kmpc_taskloop_5
4860 : OMPRTL___kmpc_taskloop),
4861 args: TaskArgs);
4862}
4863
4864/// Emit reduction operation for each element of array (required for
4865/// array sections) LHS op = RHS.
4866/// \param Type Type of array.
4867/// \param LHSVar Variable on the left side of the reduction operation
4868/// (references element of array in original variable).
4869/// \param RHSVar Variable on the right side of the reduction operation
4870/// (references element of array in original variable).
4871/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4872/// RHSVar.
4873static void EmitOMPAggregateReduction(
4874 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4875 const VarDecl *RHSVar,
4876 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4877 const Expr *, const Expr *)> &RedOpGen,
4878 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4879 const Expr *UpExpr = nullptr) {
4880 // Perform element-by-element initialization.
4881 QualType ElementTy;
4882 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4883 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4884
4885 // Drill down to the base element type on both arrays.
4886 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4887 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4888
4889 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4890 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4891 // Cast from pointer to array type to pointer to single element.
4892 llvm::Value *LHSEnd =
4893 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4894 // The basic structure here is a while-do loop.
4895 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4896 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4897 llvm::Value *IsEmpty =
4898 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4899 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4900
4901 // Enter the loop body, making that address the current address.
4902 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4903 CGF.EmitBlock(BB: BodyBB);
4904
4905 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4906
4907 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4908 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4909 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4910 Address RHSElementCurrent(
4911 RHSElementPHI, RHSAddr.getElementType(),
4912 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4913
4914 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4915 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4916 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4917 Address LHSElementCurrent(
4918 LHSElementPHI, LHSAddr.getElementType(),
4919 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4920
4921 // Emit copy.
4922 CodeGenFunction::OMPPrivateScope Scope(CGF);
4923 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4924 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4925 Scope.Privatize();
4926 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4927 Scope.ForceCleanup();
4928
4929 // Shift the address forward by one element.
4930 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4931 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4932 Name: "omp.arraycpy.dest.element");
4933 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4934 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4935 Name: "omp.arraycpy.src.element");
4936 // Check whether we've reached the end.
4937 llvm::Value *Done =
4938 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4939 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4940 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4941 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4942
4943 // Done.
4944 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4945}
4946
4947/// Emit reduction combiner. If the combiner is a simple expression emit it as
4948/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4949/// UDR combiner function.
4950static void emitReductionCombiner(CodeGenFunction &CGF,
4951 const Expr *ReductionOp) {
4952 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4953 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4954 if (const auto *DRE =
4955 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4956 if (const auto *DRD =
4957 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4958 std::pair<llvm::Function *, llvm::Function *> Reduction =
4959 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4960 RValue Func = RValue::get(V: Reduction.first);
4961 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4962 CGF.EmitIgnoredExpr(E: ReductionOp);
4963 return;
4964 }
4965 CGF.EmitIgnoredExpr(E: ReductionOp);
4966}
4967
4968llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4969 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4970 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4971 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4972 ASTContext &C = CGM.getContext();
4973
4974 // void reduction_func(void *LHSArg, void *RHSArg);
4975 FunctionArgList Args;
4976 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4977 ImplicitParamKind::Other);
4978 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4979 ImplicitParamKind::Other);
4980 Args.push_back(Elt: &LHSArg);
4981 Args.push_back(Elt: &RHSArg);
4982 const auto &CGFI =
4983 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
4984 std::string Name = getReductionFuncName(Name: ReducerName);
4985 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
4986 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
4987 M: &CGM.getModule());
4988 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4989 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
4990 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
4991 Fn->setDoesNotRecurse();
4992 CodeGenFunction CGF(CGM);
4993 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4994
4995 // Dst = (void*[n])(LHSArg);
4996 // Src = (void*[n])(RHSArg);
4997 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4998 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
4999 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5000 ArgsElemType, CGF.getPointerAlign());
5001 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5002 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
5003 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5004 ArgsElemType, CGF.getPointerAlign());
5005
5006 // ...
5007 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5008 // ...
5009 CodeGenFunction::OMPPrivateScope Scope(CGF);
5010 const auto *IPriv = Privates.begin();
5011 unsigned Idx = 0;
5012 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5013 const auto *RHSVar =
5014 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
5015 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
5016 const auto *LHSVar =
5017 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
5018 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
5019 QualType PrivTy = (*IPriv)->getType();
5020 if (PrivTy->isVariablyModifiedType()) {
5021 // Get array size and emit VLA type.
5022 ++Idx;
5023 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
5024 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
5025 const VariableArrayType *VLA =
5026 CGF.getContext().getAsVariableArrayType(T: PrivTy);
5027 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
5028 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5029 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
5030 CGF.EmitVariablyModifiedType(Ty: PrivTy);
5031 }
5032 }
5033 Scope.Privatize();
5034 IPriv = Privates.begin();
5035 const auto *ILHS = LHSExprs.begin();
5036 const auto *IRHS = RHSExprs.begin();
5037 for (const Expr *E : ReductionOps) {
5038 if ((*IPriv)->getType()->isArrayType()) {
5039 // Emit reduction for array section.
5040 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5041 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5042 EmitOMPAggregateReduction(
5043 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5044 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5045 emitReductionCombiner(CGF, ReductionOp: E);
5046 });
5047 } else {
5048 // Emit reduction for array subscript or single variable.
5049 emitReductionCombiner(CGF, ReductionOp: E);
5050 }
5051 ++IPriv;
5052 ++ILHS;
5053 ++IRHS;
5054 }
5055 Scope.ForceCleanup();
5056 CGF.FinishFunction();
5057 return Fn;
5058}
5059
5060void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5061 const Expr *ReductionOp,
5062 const Expr *PrivateRef,
5063 const DeclRefExpr *LHS,
5064 const DeclRefExpr *RHS) {
5065 if (PrivateRef->getType()->isArrayType()) {
5066 // Emit reduction for array section.
5067 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
5068 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
5069 EmitOMPAggregateReduction(
5070 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
5071 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5072 emitReductionCombiner(CGF, ReductionOp);
5073 });
5074 } else {
5075 // Emit reduction for array subscript or single variable.
5076 emitReductionCombiner(CGF, ReductionOp);
5077 }
5078}
5079
5080static std::string generateUniqueName(CodeGenModule &CGM,
5081 llvm::StringRef Prefix, const Expr *Ref);
5082
5083void CGOpenMPRuntime::emitPrivateReduction(
5084 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5085 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5086
5087 // Create a shared global variable (__shared_reduction_var) to accumulate the
5088 // final result.
5089 //
5090 // Call __kmpc_barrier to synchronize threads before initialization.
5091 //
5092 // The master thread (thread_id == 0) initializes __shared_reduction_var
5093 // with the identity value or initializer.
5094 //
5095 // Call __kmpc_barrier to synchronize before combining.
5096 // For each i:
5097 // - Thread enters critical section.
5098 // - Reads its private value from LHSExprs[i].
5099 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5100 // Privates[i]).
5101 // - Exits critical section.
5102 //
5103 // Call __kmpc_barrier after combining.
5104 //
5105 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5106 //
5107 // Final __kmpc_barrier to synchronize after broadcasting
5108 QualType PrivateType = Privates->getType();
5109 llvm::Type *LLVMType = CGF.ConvertTypeForMem(T: PrivateType);
5110
5111 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOp: ReductionOps);
5112 std::string ReductionVarNameStr;
5113 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates->IgnoreParenCasts()))
5114 ReductionVarNameStr =
5115 generateUniqueName(CGM, Prefix: DRE->getDecl()->getNameAsString(), Ref: Privates);
5116 else
5117 ReductionVarNameStr = "unnamed_priv_var";
5118
5119 // Create an internal shared variable
5120 std::string SharedName =
5121 CGM.getOpenMPRuntime().getName(Parts: {"internal_pivate_", ReductionVarNameStr});
5122 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5123 Ty: LLVMType, Name: ".omp.reduction." + SharedName);
5124
5125 SharedVar->setAlignment(
5126 llvm::MaybeAlign(CGF.getContext().getTypeAlign(T: PrivateType) / 8));
5127
5128 Address SharedResult =
5129 CGF.MakeNaturalAlignRawAddrLValue(V: SharedVar, T: PrivateType).getAddress();
5130
5131 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5132 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5133 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5134
5135 llvm::BasicBlock *InitBB = CGF.createBasicBlock(name: "init");
5136 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock(name: "init.end");
5137
5138 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5139 LHS: ThreadId, RHS: llvm::ConstantInt::get(Ty: ThreadId->getType(), V: 0));
5140 CGF.Builder.CreateCondBr(Cond: IsWorker, True: InitBB, False: InitEndBB);
5141
5142 CGF.EmitBlock(BB: InitBB);
5143
5144 auto EmitSharedInit = [&]() {
5145 if (UDR) { // Check if it's a User-Defined Reduction
5146 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5147 std::pair<llvm::Function *, llvm::Function *> FnPair =
5148 getUserDefinedReduction(D: UDR);
5149 llvm::Function *InitializerFn = FnPair.second;
5150 if (InitializerFn) {
5151 if (const auto *CE =
5152 dyn_cast<CallExpr>(Val: UDRInitExpr->IgnoreParenImpCasts())) {
5153 const auto *OutDRE = cast<DeclRefExpr>(
5154 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5155 ->getSubExpr());
5156 const VarDecl *OutVD = cast<VarDecl>(Val: OutDRE->getDecl());
5157
5158 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5159 LocalScope.addPrivate(LocalVD: OutVD, Addr: SharedResult);
5160
5161 (void)LocalScope.Privatize();
5162 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5163 Val: CE->getCallee()->IgnoreParenImpCasts())) {
5164 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5165 CGF, OVE, RValue::get(V: InitializerFn));
5166 CGF.EmitIgnoredExpr(E: CE);
5167 } else {
5168 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5169 Quals: PrivateType.getQualifiers(),
5170 /*IsInitializer=*/true);
5171 }
5172 } else {
5173 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5174 Quals: PrivateType.getQualifiers(),
5175 /*IsInitializer=*/true);
5176 }
5177 } else {
5178 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5179 Quals: PrivateType.getQualifiers(),
5180 /*IsInitializer=*/true);
5181 }
5182 } else {
5183 // EmitNullInitialization handles default construction for C++ classes
5184 // and zeroing for scalars, which is a reasonable default.
5185 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5186 }
5187 return; // UDR initialization handled
5188 }
5189 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates)) {
5190 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
5191 if (const Expr *InitExpr = VD->getInit()) {
5192 CGF.EmitAnyExprToMem(E: InitExpr, Location: SharedResult,
5193 Quals: PrivateType.getQualifiers(), IsInitializer: true);
5194 return;
5195 }
5196 }
5197 }
5198 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5199 };
5200 EmitSharedInit();
5201 CGF.Builder.CreateBr(Dest: InitEndBB);
5202 CGF.EmitBlock(BB: InitEndBB);
5203
5204 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5205 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5206 args: BarrierArgs);
5207
5208 const Expr *ReductionOp = ReductionOps;
5209 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5210 LValue SharedLV = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5211 LValue LHSLV = CGF.EmitLValue(E: Privates);
5212
5213 auto EmitCriticalReduction = [&](auto ReductionGen) {
5214 std::string CriticalName = getName(Parts: {"reduction_critical"});
5215 emitCriticalRegion(CGF, CriticalName, CriticalOpGen: ReductionGen, Loc);
5216 };
5217
5218 if (CurrentUDR) {
5219 // Handle user-defined reduction.
5220 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5221 Action.Enter(CGF);
5222 std::pair<llvm::Function *, llvm::Function *> FnPair =
5223 getUserDefinedReduction(D: CurrentUDR);
5224 if (FnPair.first) {
5225 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp)) {
5226 const auto *OutDRE = cast<DeclRefExpr>(
5227 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5228 ->getSubExpr());
5229 const auto *InDRE = cast<DeclRefExpr>(
5230 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 1)->IgnoreParenImpCasts())
5231 ->getSubExpr());
5232 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5233 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: OutDRE->getDecl()),
5234 Addr: SharedLV.getAddress());
5235 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: InDRE->getDecl()),
5236 Addr: LHSLV.getAddress());
5237 (void)LocalScope.Privatize();
5238 emitReductionCombiner(CGF, ReductionOp);
5239 }
5240 }
5241 };
5242 EmitCriticalReduction(ReductionGen);
5243 } else {
5244 // Handle built-in reduction operations.
5245#ifndef NDEBUG
5246 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5247 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5248 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5249
5250 const Expr *AssignRHS = nullptr;
5251 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5252 if (BinOp->getOpcode() == BO_Assign)
5253 AssignRHS = BinOp->getRHS();
5254 } else if (const auto *OpCall =
5255 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5256 if (OpCall->getOperator() == OO_Equal)
5257 AssignRHS = OpCall->getArg(1);
5258 }
5259
5260 assert(AssignRHS &&
5261 "Private Variable Reduction : Invalid ReductionOp expression");
5262#endif
5263
5264 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5265 Action.Enter(CGF);
5266 const auto *OmpOutDRE =
5267 dyn_cast<DeclRefExpr>(Val: LHSExprs->IgnoreParenImpCasts());
5268 const auto *OmpInDRE =
5269 dyn_cast<DeclRefExpr>(Val: RHSExprs->IgnoreParenImpCasts());
5270 assert(
5271 OmpOutDRE && OmpInDRE &&
5272 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5273 const VarDecl *OmpOutVD = cast<VarDecl>(Val: OmpOutDRE->getDecl());
5274 const VarDecl *OmpInVD = cast<VarDecl>(Val: OmpInDRE->getDecl());
5275 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5276 LocalScope.addPrivate(LocalVD: OmpOutVD, Addr: SharedLV.getAddress());
5277 LocalScope.addPrivate(LocalVD: OmpInVD, Addr: LHSLV.getAddress());
5278 (void)LocalScope.Privatize();
5279 // Emit the actual reduction operation
5280 CGF.EmitIgnoredExpr(E: ReductionOp);
5281 };
5282 EmitCriticalReduction(ReductionGen);
5283 }
5284
5285 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5286 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5287 args: BarrierArgs);
5288
5289 // Broadcast final result
5290 bool IsAggregate = PrivateType->isAggregateType();
5291 LValue SharedLV1 = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5292 llvm::Value *FinalResultVal = nullptr;
5293 Address FinalResultAddr = Address::invalid();
5294
5295 if (IsAggregate)
5296 FinalResultAddr = SharedResult;
5297 else
5298 FinalResultVal = CGF.EmitLoadOfScalar(lvalue: SharedLV1, Loc);
5299
5300 LValue TargetLHSLV = CGF.EmitLValue(E: RHSExprs);
5301 if (IsAggregate) {
5302 CGF.EmitAggregateCopy(Dest: TargetLHSLV,
5303 Src: CGF.MakeAddrLValue(Addr: FinalResultAddr, T: PrivateType),
5304 EltTy: PrivateType, MayOverlap: AggValueSlot::DoesNotOverlap, isVolatile: false);
5305 } else {
5306 CGF.EmitStoreOfScalar(value: FinalResultVal, lvalue: TargetLHSLV);
5307 }
5308 // Final synchronization barrier
5309 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5310 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5311 args: BarrierArgs);
5312
5313 // Combiner with original list item
5314 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5315 PrePostActionTy &Action) {
5316 Action.Enter(CGF);
5317 emitSingleReductionCombiner(CGF, ReductionOp: ReductionOps, PrivateRef: Privates,
5318 LHS: cast<DeclRefExpr>(Val: LHSExprs),
5319 RHS: cast<DeclRefExpr>(Val: RHSExprs));
5320 };
5321 EmitCriticalReduction(OriginalListCombiner);
5322}
5323
5324void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5325 ArrayRef<const Expr *> OrgPrivates,
5326 ArrayRef<const Expr *> OrgLHSExprs,
5327 ArrayRef<const Expr *> OrgRHSExprs,
5328 ArrayRef<const Expr *> OrgReductionOps,
5329 ReductionOptionsTy Options) {
5330 if (!CGF.HaveInsertPoint())
5331 return;
5332
5333 bool WithNowait = Options.WithNowait;
5334 bool SimpleReduction = Options.SimpleReduction;
5335
5336 // Next code should be emitted for reduction:
5337 //
5338 // static kmp_critical_name lock = { 0 };
5339 //
5340 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5341 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5342 // ...
5343 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5344 // *(Type<n>-1*)rhs[<n>-1]);
5345 // }
5346 //
5347 // ...
5348 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5349 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5350 // RedList, reduce_func, &<lock>)) {
5351 // case 1:
5352 // ...
5353 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5354 // ...
5355 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5356 // break;
5357 // case 2:
5358 // ...
5359 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5360 // ...
5361 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5362 // break;
5363 // default:;
5364 // }
5365 //
5366 // if SimpleReduction is true, only the next code is generated:
5367 // ...
5368 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5369 // ...
5370
5371 ASTContext &C = CGM.getContext();
5372
5373 if (SimpleReduction) {
5374 CodeGenFunction::RunCleanupsScope Scope(CGF);
5375 const auto *IPriv = OrgPrivates.begin();
5376 const auto *ILHS = OrgLHSExprs.begin();
5377 const auto *IRHS = OrgRHSExprs.begin();
5378 for (const Expr *E : OrgReductionOps) {
5379 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5380 RHS: cast<DeclRefExpr>(Val: *IRHS));
5381 ++IPriv;
5382 ++ILHS;
5383 ++IRHS;
5384 }
5385 return;
5386 }
5387
5388 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5389 // Only keep entries where the corresponding variable is not private.
5390 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5391 FilteredRHSExprs, FilteredReductionOps;
5392 for (unsigned I : llvm::seq<unsigned>(
5393 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5394 if (!Options.IsPrivateVarReduction[I]) {
5395 FilteredPrivates.emplace_back(Args: OrgPrivates[I]);
5396 FilteredLHSExprs.emplace_back(Args: OrgLHSExprs[I]);
5397 FilteredRHSExprs.emplace_back(Args: OrgRHSExprs[I]);
5398 FilteredReductionOps.emplace_back(Args: OrgReductionOps[I]);
5399 }
5400 }
5401 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5402 // processing.
5403 ArrayRef<const Expr *> Privates = FilteredPrivates;
5404 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5405 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5406 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5407
5408 // 1. Build a list of reduction variables.
5409 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5410 auto Size = RHSExprs.size();
5411 for (const Expr *E : Privates) {
5412 if (E->getType()->isVariablyModifiedType())
5413 // Reserve place for array size.
5414 ++Size;
5415 }
5416 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5417 QualType ReductionArrayTy = C.getConstantArrayType(
5418 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5419 /*IndexTypeQuals=*/0);
5420 RawAddress ReductionList =
5421 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5422 const auto *IPriv = Privates.begin();
5423 unsigned Idx = 0;
5424 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5425 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5426 CGF.Builder.CreateStore(
5427 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5428 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5429 Addr: Elem);
5430 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5431 // Store array size.
5432 ++Idx;
5433 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5434 llvm::Value *Size = CGF.Builder.CreateIntCast(
5435 V: CGF.getVLASize(
5436 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5437 .NumElts,
5438 DestTy: CGF.SizeTy, /*isSigned=*/false);
5439 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5440 Addr: Elem);
5441 }
5442 }
5443
5444 // 2. Emit reduce_func().
5445 llvm::Function *ReductionFn = emitReductionFunction(
5446 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5447 Privates, LHSExprs, RHSExprs, ReductionOps);
5448
5449 // 3. Create static kmp_critical_name lock = { 0 };
5450 std::string Name = getName(Parts: {"reduction"});
5451 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5452
5453 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5454 // RedList, reduce_func, &<lock>);
5455 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5456 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5457 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5458 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5459 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5460 llvm::Value *Args[] = {
5461 IdentTLoc, // ident_t *<loc>
5462 ThreadId, // i32 <gtid>
5463 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5464 ReductionArrayTySize, // size_type sizeof(RedList)
5465 RL, // void *RedList
5466 ReductionFn, // void (*) (void *, void *) <reduce_func>
5467 Lock // kmp_critical_name *&<lock>
5468 };
5469 llvm::Value *Res = CGF.EmitRuntimeCall(
5470 callee: OMPBuilder.getOrCreateRuntimeFunction(
5471 M&: CGM.getModule(),
5472 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5473 args: Args);
5474
5475 // 5. Build switch(res)
5476 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5477 llvm::SwitchInst *SwInst =
5478 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5479
5480 // 6. Build case 1:
5481 // ...
5482 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5483 // ...
5484 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5485 // break;
5486 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5487 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5488 CGF.EmitBlock(BB: Case1BB);
5489
5490 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5491 llvm::Value *EndArgs[] = {
5492 IdentTLoc, // ident_t *<loc>
5493 ThreadId, // i32 <gtid>
5494 Lock // kmp_critical_name *&<lock>
5495 };
5496 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5497 CodeGenFunction &CGF, PrePostActionTy &Action) {
5498 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5499 const auto *IPriv = Privates.begin();
5500 const auto *ILHS = LHSExprs.begin();
5501 const auto *IRHS = RHSExprs.begin();
5502 for (const Expr *E : ReductionOps) {
5503 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5504 RHS: cast<DeclRefExpr>(Val: *IRHS));
5505 ++IPriv;
5506 ++ILHS;
5507 ++IRHS;
5508 }
5509 };
5510 RegionCodeGenTy RCG(CodeGen);
5511 CommonActionTy Action(
5512 nullptr, {},
5513 OMPBuilder.getOrCreateRuntimeFunction(
5514 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5515 : OMPRTL___kmpc_end_reduce),
5516 EndArgs);
5517 RCG.setAction(Action);
5518 RCG(CGF);
5519
5520 CGF.EmitBranch(Block: DefaultBB);
5521
5522 // 7. Build case 2:
5523 // ...
5524 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5525 // ...
5526 // break;
5527 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5528 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5529 CGF.EmitBlock(BB: Case2BB);
5530
5531 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5532 CodeGenFunction &CGF, PrePostActionTy &Action) {
5533 const auto *ILHS = LHSExprs.begin();
5534 const auto *IRHS = RHSExprs.begin();
5535 const auto *IPriv = Privates.begin();
5536 for (const Expr *E : ReductionOps) {
5537 const Expr *XExpr = nullptr;
5538 const Expr *EExpr = nullptr;
5539 const Expr *UpExpr = nullptr;
5540 BinaryOperatorKind BO = BO_Comma;
5541 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5542 if (BO->getOpcode() == BO_Assign) {
5543 XExpr = BO->getLHS();
5544 UpExpr = BO->getRHS();
5545 }
5546 }
5547 // Try to emit update expression as a simple atomic.
5548 const Expr *RHSExpr = UpExpr;
5549 if (RHSExpr) {
5550 // Analyze RHS part of the whole expression.
5551 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5552 Val: RHSExpr->IgnoreParenImpCasts())) {
5553 // If this is a conditional operator, analyze its condition for
5554 // min/max reduction operator.
5555 RHSExpr = ACO->getCond();
5556 }
5557 if (const auto *BORHS =
5558 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5559 EExpr = BORHS->getRHS();
5560 BO = BORHS->getOpcode();
5561 }
5562 }
5563 if (XExpr) {
5564 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5565 auto &&AtomicRedGen = [BO, VD,
5566 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5567 const Expr *EExpr, const Expr *UpExpr) {
5568 LValue X = CGF.EmitLValue(E: XExpr);
5569 RValue E;
5570 if (EExpr)
5571 E = CGF.EmitAnyExpr(E: EExpr);
5572 CGF.EmitOMPAtomicSimpleUpdateExpr(
5573 X, E, BO, /*IsXLHSInRHSPart=*/true,
5574 AO: llvm::AtomicOrdering::Monotonic, Loc,
5575 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5576 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5577 Address LHSTemp = CGF.CreateMemTemp(T: VD->getType());
5578 CGF.emitOMPSimpleStore(
5579 LVal: CGF.MakeAddrLValue(Addr: LHSTemp, T: VD->getType()), RVal: XRValue,
5580 RValTy: VD->getType().getNonReferenceType(), Loc);
5581 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5582 (void)PrivateScope.Privatize();
5583 return CGF.EmitAnyExpr(E: UpExpr);
5584 });
5585 };
5586 if ((*IPriv)->getType()->isArrayType()) {
5587 // Emit atomic reduction for array section.
5588 const auto *RHSVar =
5589 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5590 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5591 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5592 } else {
5593 // Emit atomic reduction for array subscript or single variable.
5594 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5595 }
5596 } else {
5597 // Emit as a critical region.
5598 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5599 const Expr *, const Expr *) {
5600 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5601 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5602 RT.emitCriticalRegion(
5603 CGF, CriticalName: Name,
5604 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5605 Action.Enter(CGF);
5606 emitReductionCombiner(CGF, ReductionOp: E);
5607 },
5608 Loc);
5609 };
5610 if ((*IPriv)->getType()->isArrayType()) {
5611 const auto *LHSVar =
5612 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5613 const auto *RHSVar =
5614 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5615 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5616 RedOpGen: CritRedGen);
5617 } else {
5618 CritRedGen(CGF, nullptr, nullptr, nullptr);
5619 }
5620 }
5621 ++ILHS;
5622 ++IRHS;
5623 ++IPriv;
5624 }
5625 };
5626 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5627 if (!WithNowait) {
5628 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5629 llvm::Value *EndArgs[] = {
5630 IdentTLoc, // ident_t *<loc>
5631 ThreadId, // i32 <gtid>
5632 Lock // kmp_critical_name *&<lock>
5633 };
5634 CommonActionTy Action(nullptr, {},
5635 OMPBuilder.getOrCreateRuntimeFunction(
5636 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5637 EndArgs);
5638 AtomicRCG.setAction(Action);
5639 AtomicRCG(CGF);
5640 } else {
5641 AtomicRCG(CGF);
5642 }
5643
5644 CGF.EmitBranch(Block: DefaultBB);
5645 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5646 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5647 "PrivateVarReduction: Privates size mismatch");
5648 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5649 "PrivateVarReduction: ReductionOps size mismatch");
5650 for (unsigned I : llvm::seq<unsigned>(
5651 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5652 if (Options.IsPrivateVarReduction[I])
5653 emitPrivateReduction(CGF, Loc, Privates: OrgPrivates[I], LHSExprs: OrgLHSExprs[I],
5654 RHSExprs: OrgRHSExprs[I], ReductionOps: OrgReductionOps[I]);
5655 }
5656}
5657
5658/// Generates unique name for artificial threadprivate variables.
5659/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5660static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5661 const Expr *Ref) {
5662 SmallString<256> Buffer;
5663 llvm::raw_svector_ostream Out(Buffer);
5664 const clang::DeclRefExpr *DE;
5665 const VarDecl *D = ::getBaseDecl(Ref, DE);
5666 if (!D)
5667 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5668 D = D->getCanonicalDecl();
5669 std::string Name = CGM.getOpenMPRuntime().getName(
5670 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5671 Out << Prefix << Name << "_"
5672 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5673 return std::string(Out.str());
5674}
5675
5676/// Emits reduction initializer function:
5677/// \code
5678/// void @.red_init(void* %arg, void* %orig) {
5679/// %0 = bitcast void* %arg to <type>*
5680/// store <type> <init>, <type>* %0
5681/// ret void
5682/// }
5683/// \endcode
5684static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5685 SourceLocation Loc,
5686 ReductionCodeGen &RCG, unsigned N) {
5687 ASTContext &C = CGM.getContext();
5688 QualType VoidPtrTy = C.VoidPtrTy;
5689 VoidPtrTy.addRestrict();
5690 FunctionArgList Args;
5691 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5692 ImplicitParamKind::Other);
5693 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5694 ImplicitParamKind::Other);
5695 Args.emplace_back(Args: &Param);
5696 Args.emplace_back(Args: &ParamOrig);
5697 const auto &FnInfo =
5698 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5699 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5700 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5701 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5702 N: Name, M: &CGM.getModule());
5703 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5704 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5705 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5706 Fn->setDoesNotRecurse();
5707 CodeGenFunction CGF(CGM);
5708 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5709 QualType PrivateType = RCG.getPrivateType(N);
5710 Address PrivateAddr = CGF.EmitLoadOfPointer(
5711 Ptr: CGF.GetAddrOfLocalVar(VD: &Param).withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5712 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5713 llvm::Value *Size = nullptr;
5714 // If the size of the reduction item is non-constant, load it from global
5715 // threadprivate variable.
5716 if (RCG.getSizes(N).second) {
5717 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5718 CGF, VarType: CGM.getContext().getSizeType(),
5719 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5720 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5721 Ty: CGM.getContext().getSizeType(), Loc);
5722 }
5723 RCG.emitAggregateType(CGF, N, Size);
5724 Address OrigAddr = Address::invalid();
5725 // If initializer uses initializer from declare reduction construct, emit a
5726 // pointer to the address of the original reduction item (reuired by reduction
5727 // initializer)
5728 if (RCG.usesReductionInitializer(N)) {
5729 Address SharedAddr = CGF.GetAddrOfLocalVar(VD: &ParamOrig);
5730 OrigAddr = CGF.EmitLoadOfPointer(
5731 Ptr: SharedAddr,
5732 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5733 }
5734 // Emit the initializer:
5735 // %0 = bitcast void* %arg to <type>*
5736 // store <type> <init>, <type>* %0
5737 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5738 DefaultInit: [](CodeGenFunction &) { return false; });
5739 CGF.FinishFunction();
5740 return Fn;
5741}
5742
5743/// Emits reduction combiner function:
5744/// \code
5745/// void @.red_comb(void* %arg0, void* %arg1) {
5746/// %lhs = bitcast void* %arg0 to <type>*
5747/// %rhs = bitcast void* %arg1 to <type>*
5748/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5749/// store <type> %2, <type>* %lhs
5750/// ret void
5751/// }
5752/// \endcode
5753static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5754 SourceLocation Loc,
5755 ReductionCodeGen &RCG, unsigned N,
5756 const Expr *ReductionOp,
5757 const Expr *LHS, const Expr *RHS,
5758 const Expr *PrivateRef) {
5759 ASTContext &C = CGM.getContext();
5760 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5761 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5762 FunctionArgList Args;
5763 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5764 C.VoidPtrTy, ImplicitParamKind::Other);
5765 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5766 ImplicitParamKind::Other);
5767 Args.emplace_back(Args: &ParamInOut);
5768 Args.emplace_back(Args: &ParamIn);
5769 const auto &FnInfo =
5770 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5771 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5772 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5773 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5774 N: Name, M: &CGM.getModule());
5775 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5776 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5777 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5778 Fn->setDoesNotRecurse();
5779 CodeGenFunction CGF(CGM);
5780 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5781 llvm::Value *Size = nullptr;
5782 // If the size of the reduction item is non-constant, load it from global
5783 // threadprivate variable.
5784 if (RCG.getSizes(N).second) {
5785 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5786 CGF, VarType: CGM.getContext().getSizeType(),
5787 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5788 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5789 Ty: CGM.getContext().getSizeType(), Loc);
5790 }
5791 RCG.emitAggregateType(CGF, N, Size);
5792 // Remap lhs and rhs variables to the addresses of the function arguments.
5793 // %lhs = bitcast void* %arg0 to <type>*
5794 // %rhs = bitcast void* %arg1 to <type>*
5795 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5796 PrivateScope.addPrivate(
5797 LocalVD: LHSVD,
5798 // Pull out the pointer to the variable.
5799 Addr: CGF.EmitLoadOfPointer(
5800 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamInOut)
5801 .withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5802 PtrTy: C.getPointerType(T: LHSVD->getType())->castAs<PointerType>()));
5803 PrivateScope.addPrivate(
5804 LocalVD: RHSVD,
5805 // Pull out the pointer to the variable.
5806 Addr: CGF.EmitLoadOfPointer(
5807 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamIn).withElementType(
5808 ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5809 PtrTy: C.getPointerType(T: RHSVD->getType())->castAs<PointerType>()));
5810 PrivateScope.Privatize();
5811 // Emit the combiner body:
5812 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5813 // store <type> %2, <type>* %lhs
5814 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5815 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5816 RHS: cast<DeclRefExpr>(Val: RHS));
5817 CGF.FinishFunction();
5818 return Fn;
5819}
5820
5821/// Emits reduction finalizer function:
5822/// \code
5823/// void @.red_fini(void* %arg) {
5824/// %0 = bitcast void* %arg to <type>*
5825/// <destroy>(<type>* %0)
5826/// ret void
5827/// }
5828/// \endcode
5829static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5830 SourceLocation Loc,
5831 ReductionCodeGen &RCG, unsigned N) {
5832 if (!RCG.needCleanups(N))
5833 return nullptr;
5834 ASTContext &C = CGM.getContext();
5835 FunctionArgList Args;
5836 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5837 ImplicitParamKind::Other);
5838 Args.emplace_back(Args: &Param);
5839 const auto &FnInfo =
5840 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5841 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5842 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5843 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5844 N: Name, M: &CGM.getModule());
5845 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5846 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5847 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5848 Fn->setDoesNotRecurse();
5849 CodeGenFunction CGF(CGM);
5850 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5851 Address PrivateAddr = CGF.EmitLoadOfPointer(
5852 Ptr: CGF.GetAddrOfLocalVar(VD: &Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5853 llvm::Value *Size = nullptr;
5854 // If the size of the reduction item is non-constant, load it from global
5855 // threadprivate variable.
5856 if (RCG.getSizes(N).second) {
5857 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5858 CGF, VarType: CGM.getContext().getSizeType(),
5859 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5860 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5861 Ty: CGM.getContext().getSizeType(), Loc);
5862 }
5863 RCG.emitAggregateType(CGF, N, Size);
5864 // Emit the finalizer body:
5865 // <destroy>(<type>* %0)
5866 RCG.emitCleanups(CGF, N, PrivateAddr);
5867 CGF.FinishFunction(EndLoc: Loc);
5868 return Fn;
5869}
5870
5871llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5872 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5873 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5874 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5875 return nullptr;
5876
5877 // Build typedef struct:
5878 // kmp_taskred_input {
5879 // void *reduce_shar; // shared reduction item
5880 // void *reduce_orig; // original reduction item used for initialization
5881 // size_t reduce_size; // size of data item
5882 // void *reduce_init; // data initialization routine
5883 // void *reduce_fini; // data finalization routine
5884 // void *reduce_comb; // data combiner routine
5885 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5886 // } kmp_taskred_input_t;
5887 ASTContext &C = CGM.getContext();
5888 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5889 RD->startDefinition();
5890 const FieldDecl *SharedFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5891 const FieldDecl *OrigFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5892 const FieldDecl *SizeFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.getSizeType());
5893 const FieldDecl *InitFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5894 const FieldDecl *FiniFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5895 const FieldDecl *CombFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5896 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5897 C, DC: RD, FieldTy: C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5898 RD->completeDefinition();
5899 CanQualType RDType = C.getCanonicalTagType(TD: RD);
5900 unsigned Size = Data.ReductionVars.size();
5901 llvm::APInt ArraySize(/*numBits=*/64, Size);
5902 QualType ArrayRDType =
5903 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5904 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5905 // kmp_task_red_input_t .rd_input.[Size];
5906 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5907 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5908 Data.ReductionCopies, Data.ReductionOps);
5909 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5910 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5911 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5912 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5913 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5914 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5915 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5916 Name: ".rd_input.gep.");
5917 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5918 // ElemLVal.reduce_shar = &Shareds[Cnt];
5919 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5920 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5921 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5922 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5923 // ElemLVal.reduce_orig = &Origs[Cnt];
5924 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5925 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5926 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5927 RCG.emitAggregateType(CGF, N: Cnt);
5928 llvm::Value *SizeValInChars;
5929 llvm::Value *SizeVal;
5930 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5931 // We use delayed creation/initialization for VLAs and array sections. It is
5932 // required because runtime does not provide the way to pass the sizes of
5933 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5934 // threadprivate global variables are used to store these values and use
5935 // them in the functions.
5936 bool DelayedCreation = !!SizeVal;
5937 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5938 /*isSigned=*/false);
5939 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5940 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5941 // ElemLVal.reduce_init = init;
5942 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5943 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5944 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5945 // ElemLVal.reduce_fini = fini;
5946 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5947 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5948 llvm::Value *FiniAddr =
5949 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5950 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5951 // ElemLVal.reduce_comb = comb;
5952 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5953 llvm::Value *CombAddr = emitReduceCombFunction(
5954 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5955 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5956 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5957 // ElemLVal.flags = 0;
5958 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5959 if (DelayedCreation) {
5960 CGF.EmitStoreOfScalar(
5961 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5962 lvalue: FlagsLVal);
5963 } else
5964 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5965 }
5966 if (Data.IsReductionWithTaskMod) {
5967 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5968 // is_ws, int num, void *data);
5969 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5970 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5971 DestTy: CGM.IntTy, /*isSigned=*/true);
5972 llvm::Value *Args[] = {
5973 IdentTLoc, GTid,
5974 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5975 /*isSigned=*/IsSigned: true),
5976 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5977 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5978 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5979 return CGF.EmitRuntimeCall(
5980 callee: OMPBuilder.getOrCreateRuntimeFunction(
5981 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5982 args: Args);
5983 }
5984 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5985 llvm::Value *Args[] = {
5986 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5987 /*isSigned=*/true),
5988 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5989 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5990 DestTy: CGM.VoidPtrTy)};
5991 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5992 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5993 args: Args);
5994}
5995
5996void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5997 SourceLocation Loc,
5998 bool IsWorksharingReduction) {
5999 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6000 // is_ws, int num, void *data);
6001 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6002 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6003 DestTy: CGM.IntTy, /*isSigned=*/true);
6004 llvm::Value *Args[] = {IdentTLoc, GTid,
6005 llvm::ConstantInt::get(Ty: CGM.IntTy,
6006 V: IsWorksharingReduction ? 1 : 0,
6007 /*isSigned=*/IsSigned: true)};
6008 (void)CGF.EmitRuntimeCall(
6009 callee: OMPBuilder.getOrCreateRuntimeFunction(
6010 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
6011 args: Args);
6012}
6013
6014void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6015 SourceLocation Loc,
6016 ReductionCodeGen &RCG,
6017 unsigned N) {
6018 auto Sizes = RCG.getSizes(N);
6019 // Emit threadprivate global variable if the type is non-constant
6020 // (Sizes.second = nullptr).
6021 if (Sizes.second) {
6022 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
6023 /*isSigned=*/false);
6024 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6025 CGF, VarType: CGM.getContext().getSizeType(),
6026 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
6027 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
6028 }
6029}
6030
6031Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6032 SourceLocation Loc,
6033 llvm::Value *ReductionsPtr,
6034 LValue SharedLVal) {
6035 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6036 // *d);
6037 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6038 DestTy: CGM.IntTy,
6039 /*isSigned=*/true),
6040 ReductionsPtr,
6041 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6042 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
6043 return Address(
6044 CGF.EmitRuntimeCall(
6045 callee: OMPBuilder.getOrCreateRuntimeFunction(
6046 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
6047 args: Args),
6048 CGF.Int8Ty, SharedLVal.getAlignment());
6049}
6050
6051void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6052 const OMPTaskDataTy &Data) {
6053 if (!CGF.HaveInsertPoint())
6054 return;
6055
6056 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6057 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6058 OMPBuilder.createTaskwait(Loc: CGF.Builder);
6059 } else {
6060 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6061 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6062 auto &M = CGM.getModule();
6063 Address DependenciesArray = Address::invalid();
6064 llvm::Value *NumOfElements;
6065 std::tie(args&: NumOfElements, args&: DependenciesArray) =
6066 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
6067 if (!Data.Dependences.empty()) {
6068 llvm::Value *DepWaitTaskArgs[7];
6069 DepWaitTaskArgs[0] = UpLoc;
6070 DepWaitTaskArgs[1] = ThreadID;
6071 DepWaitTaskArgs[2] = NumOfElements;
6072 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6073 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
6074 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6075 DepWaitTaskArgs[6] =
6076 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
6077
6078 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6079
6080 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6081 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6082 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6083 // kmp_int32 has_no_wait); if dependence info is specified.
6084 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6085 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
6086 args: DepWaitTaskArgs);
6087
6088 } else {
6089
6090 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6091 // global_tid);
6092 llvm::Value *Args[] = {UpLoc, ThreadID};
6093 // Ignore return result until untied tasks are supported.
6094 CGF.EmitRuntimeCall(
6095 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
6096 args: Args);
6097 }
6098 }
6099
6100 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
6101 Region->emitUntiedSwitch(CGF);
6102}
6103
6104void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6105 OpenMPDirectiveKind InnerKind,
6106 const RegionCodeGenTy &CodeGen,
6107 bool HasCancel) {
6108 if (!CGF.HaveInsertPoint())
6109 return;
6110 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6111 InnerKind != OMPD_critical &&
6112 InnerKind != OMPD_master &&
6113 InnerKind != OMPD_masked);
6114 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6115}
6116
6117namespace {
6118enum RTCancelKind {
6119 CancelNoreq = 0,
6120 CancelParallel = 1,
6121 CancelLoop = 2,
6122 CancelSections = 3,
6123 CancelTaskgroup = 4
6124};
6125} // anonymous namespace
6126
6127static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6128 RTCancelKind CancelKind = CancelNoreq;
6129 if (CancelRegion == OMPD_parallel)
6130 CancelKind = CancelParallel;
6131 else if (CancelRegion == OMPD_for)
6132 CancelKind = CancelLoop;
6133 else if (CancelRegion == OMPD_sections)
6134 CancelKind = CancelSections;
6135 else {
6136 assert(CancelRegion == OMPD_taskgroup);
6137 CancelKind = CancelTaskgroup;
6138 }
6139 return CancelKind;
6140}
6141
6142void CGOpenMPRuntime::emitCancellationPointCall(
6143 CodeGenFunction &CGF, SourceLocation Loc,
6144 OpenMPDirectiveKind CancelRegion) {
6145 if (!CGF.HaveInsertPoint())
6146 return;
6147 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6148 // global_tid, kmp_int32 cncl_kind);
6149 if (auto *OMPRegionInfo =
6150 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6151 // For 'cancellation point taskgroup', the task region info may not have a
6152 // cancel. This may instead happen in another adjacent task.
6153 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6154 llvm::Value *Args[] = {
6155 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6156 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6157 // Ignore return result until untied tasks are supported.
6158 llvm::Value *Result = CGF.EmitRuntimeCall(
6159 callee: OMPBuilder.getOrCreateRuntimeFunction(
6160 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
6161 args: Args);
6162 // if (__kmpc_cancellationpoint()) {
6163 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6164 // exit from construct;
6165 // }
6166 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6167 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6168 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6169 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6170 CGF.EmitBlock(BB: ExitBB);
6171 if (CancelRegion == OMPD_parallel)
6172 emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6173 // exit from construct;
6174 CodeGenFunction::JumpDest CancelDest =
6175 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6176 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6177 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6178 }
6179 }
6180}
6181
6182void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6183 const Expr *IfCond,
6184 OpenMPDirectiveKind CancelRegion) {
6185 if (!CGF.HaveInsertPoint())
6186 return;
6187 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6188 // kmp_int32 cncl_kind);
6189 auto &M = CGM.getModule();
6190 if (auto *OMPRegionInfo =
6191 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6192 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6193 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6194 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6195 llvm::Value *Args[] = {
6196 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6197 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6198 // Ignore return result until untied tasks are supported.
6199 llvm::Value *Result = CGF.EmitRuntimeCall(
6200 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), args: Args);
6201 // if (__kmpc_cancel()) {
6202 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6203 // exit from construct;
6204 // }
6205 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6206 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6207 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6208 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6209 CGF.EmitBlock(BB: ExitBB);
6210 if (CancelRegion == OMPD_parallel)
6211 RT.emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6212 // exit from construct;
6213 CodeGenFunction::JumpDest CancelDest =
6214 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6215 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6216 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6217 };
6218 if (IfCond) {
6219 emitIfClause(CGF, Cond: IfCond, ThenGen,
6220 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
6221 } else {
6222 RegionCodeGenTy ThenRCG(ThenGen);
6223 ThenRCG(CGF);
6224 }
6225 }
6226}
6227
6228namespace {
6229/// Cleanup action for uses_allocators support.
6230class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6231 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6232
6233public:
6234 OMPUsesAllocatorsActionTy(
6235 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6236 : Allocators(Allocators) {}
6237 void Enter(CodeGenFunction &CGF) override {
6238 if (!CGF.HaveInsertPoint())
6239 return;
6240 for (const auto &AllocatorData : Allocators) {
6241 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6242 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
6243 }
6244 }
6245 void Exit(CodeGenFunction &CGF) override {
6246 if (!CGF.HaveInsertPoint())
6247 return;
6248 for (const auto &AllocatorData : Allocators) {
6249 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6250 Allocator: AllocatorData.first);
6251 }
6252 }
6253};
6254} // namespace
6255
6256void CGOpenMPRuntime::emitTargetOutlinedFunction(
6257 const OMPExecutableDirective &D, StringRef ParentName,
6258 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6259 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6260 assert(!ParentName.empty() && "Invalid target entry parent name!");
6261 HasEmittedTargetRegion = true;
6262 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6263 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6264 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6265 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6266 if (!D.AllocatorTraits)
6267 continue;
6268 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
6269 }
6270 }
6271 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6272 CodeGen.setAction(UsesAllocatorAction);
6273 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6274 IsOffloadEntry, CodeGen);
6275}
6276
6277void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6278 const Expr *Allocator,
6279 const Expr *AllocatorTraits) {
6280 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6281 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6282 // Use default memspace handle.
6283 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6284 llvm::Value *NumTraits = llvm::ConstantInt::get(
6285 Ty: CGF.IntTy, V: cast<ConstantArrayType>(
6286 Val: AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6287 ->getSize()
6288 .getLimitedValue());
6289 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
6290 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6291 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
6292 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, T: CGF.getContext().VoidPtrTy,
6293 BaseInfo: AllocatorTraitsLVal.getBaseInfo(),
6294 TBAAInfo: AllocatorTraitsLVal.getTBAAInfo());
6295 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6296
6297 llvm::Value *AllocatorVal =
6298 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6299 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
6300 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
6301 // Store to allocator.
6302 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
6303 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
6304 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6305 AllocatorVal =
6306 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
6307 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
6308 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
6309}
6310
6311void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6312 const Expr *Allocator) {
6313 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6314 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6315 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6316 llvm::Value *AllocatorVal =
6317 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
6318 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
6319 DstTy: CGF.getContext().VoidPtrTy,
6320 Loc: Allocator->getExprLoc());
6321 (void)CGF.EmitRuntimeCall(
6322 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
6323 FnID: OMPRTL___kmpc_destroy_allocator),
6324 args: {ThreadId, AllocatorVal});
6325}
6326
6327void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6328 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6329 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6330 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6331 "invalid default attrs structure");
6332 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6333 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6334
6335 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: Attrs.MinTeams, MaxTeamsVal);
6336 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
6337 /*UpperBoundOnly=*/true);
6338
6339 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6340 for (auto *A : C->getAttrs()) {
6341 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6342 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6343 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(Val: A))
6344 CGM.handleCUDALaunchBoundsAttr(F: nullptr, A: Attr, MaxThreadsVal: &AttrMaxThreadsVal,
6345 MinBlocksVal: &AttrMinBlocksVal, MaxClusterRankVal: &AttrMaxBlocksVal);
6346 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(Val: A))
6347 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6348 F: nullptr, A: Attr, /*ReqdWGS=*/nullptr, MinThreadsVal: &AttrMinThreadsVal,
6349 MaxThreadsVal: &AttrMaxThreadsVal);
6350 else
6351 continue;
6352
6353 Attrs.MinThreads = std::max(a: Attrs.MinThreads, b: AttrMinThreadsVal);
6354 if (AttrMaxThreadsVal > 0)
6355 MaxThreadsVal = MaxThreadsVal > 0
6356 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
6357 : AttrMaxThreadsVal;
6358 Attrs.MinTeams = std::max(a: Attrs.MinTeams, b: AttrMinBlocksVal);
6359 if (AttrMaxBlocksVal > 0)
6360 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
6361 : AttrMaxBlocksVal;
6362 }
6363 }
6364}
6365
6366void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6367 const OMPExecutableDirective &D, StringRef ParentName,
6368 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6369 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6370
6371 llvm::TargetRegionEntryInfo EntryInfo =
6372 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
6373
6374 CodeGenFunction CGF(CGM, true);
6375 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6376 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6377 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
6378
6379 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6380 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6381 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, D);
6382 };
6383
6384 cantFail(Err: OMPBuilder.emitTargetRegionFunction(
6385 EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6386 OutlinedFnID));
6387
6388 if (!OutlinedFn)
6389 return;
6390
6391 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
6392
6393 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6394 for (auto *A : C->getAttrs()) {
6395 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(Val: A))
6396 CGM.handleAMDGPUWavesPerEUAttr(F: OutlinedFn, A: Attr);
6397 }
6398 }
6399 registerVTable(D);
6400}
6401
6402/// Checks if the expression is constant or does not have non-trivial function
6403/// calls.
6404static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6405 // We can skip constant expressions.
6406 // We can skip expressions with trivial calls or simple expressions.
6407 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
6408 !E->hasNonTrivialCall(Ctx)) &&
6409 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6410}
6411
6412const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6413 const Stmt *Body) {
6414 const Stmt *Child = Body->IgnoreContainers();
6415 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
6416 Child = nullptr;
6417 for (const Stmt *S : C->body()) {
6418 if (const auto *E = dyn_cast<Expr>(Val: S)) {
6419 if (isTrivial(Ctx, E))
6420 continue;
6421 }
6422 // Some of the statements can be ignored.
6423 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
6424 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
6425 continue;
6426 // Analyze declarations.
6427 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
6428 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
6429 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
6430 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
6431 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
6432 isa<UsingDirectiveDecl>(Val: D) ||
6433 isa<OMPDeclareReductionDecl>(Val: D) ||
6434 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6435 return true;
6436 const auto *VD = dyn_cast<VarDecl>(Val: D);
6437 if (!VD)
6438 return false;
6439 return VD->hasGlobalStorage() || !VD->isUsed();
6440 }))
6441 continue;
6442 }
6443 // Found multiple children - cannot get the one child only.
6444 if (Child)
6445 return nullptr;
6446 Child = S;
6447 }
6448 if (Child)
6449 Child = Child->IgnoreContainers();
6450 }
6451 return Child;
6452}
6453
6454const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6455 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6456 int32_t &MaxTeamsVal) {
6457
6458 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6459 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6460 "Expected target-based executable directive.");
6461 switch (DirectiveKind) {
6462 case OMPD_target: {
6463 const auto *CS = D.getInnermostCapturedStmt();
6464 const auto *Body =
6465 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6466 const Stmt *ChildStmt =
6467 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6468 if (const auto *NestedDir =
6469 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6470 if (isOpenMPTeamsDirective(DKind: NestedDir->getDirectiveKind())) {
6471 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6472 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6473 ->getNumTeams()
6474 .front();
6475 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6476 if (auto Constant =
6477 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6478 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6479 return NumTeams;
6480 }
6481 MinTeamsVal = MaxTeamsVal = 0;
6482 return nullptr;
6483 }
6484 MinTeamsVal = MaxTeamsVal = 1;
6485 return nullptr;
6486 }
6487 // A value of -1 is used to check if we need to emit no teams region
6488 MinTeamsVal = MaxTeamsVal = -1;
6489 return nullptr;
6490 }
6491 case OMPD_target_teams_loop:
6492 case OMPD_target_teams:
6493 case OMPD_target_teams_distribute:
6494 case OMPD_target_teams_distribute_simd:
6495 case OMPD_target_teams_distribute_parallel_for:
6496 case OMPD_target_teams_distribute_parallel_for_simd: {
6497 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6498 const Expr *NumTeams =
6499 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6500 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6501 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6502 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6503 return NumTeams;
6504 }
6505 MinTeamsVal = MaxTeamsVal = 0;
6506 return nullptr;
6507 }
6508 case OMPD_target_parallel:
6509 case OMPD_target_parallel_for:
6510 case OMPD_target_parallel_for_simd:
6511 case OMPD_target_parallel_loop:
6512 case OMPD_target_simd:
6513 MinTeamsVal = MaxTeamsVal = 1;
6514 return nullptr;
6515 case OMPD_parallel:
6516 case OMPD_for:
6517 case OMPD_parallel_for:
6518 case OMPD_parallel_loop:
6519 case OMPD_parallel_master:
6520 case OMPD_parallel_sections:
6521 case OMPD_for_simd:
6522 case OMPD_parallel_for_simd:
6523 case OMPD_cancel:
6524 case OMPD_cancellation_point:
6525 case OMPD_ordered:
6526 case OMPD_threadprivate:
6527 case OMPD_allocate:
6528 case OMPD_task:
6529 case OMPD_simd:
6530 case OMPD_tile:
6531 case OMPD_unroll:
6532 case OMPD_sections:
6533 case OMPD_section:
6534 case OMPD_single:
6535 case OMPD_master:
6536 case OMPD_critical:
6537 case OMPD_taskyield:
6538 case OMPD_barrier:
6539 case OMPD_taskwait:
6540 case OMPD_taskgroup:
6541 case OMPD_atomic:
6542 case OMPD_flush:
6543 case OMPD_depobj:
6544 case OMPD_scan:
6545 case OMPD_teams:
6546 case OMPD_target_data:
6547 case OMPD_target_exit_data:
6548 case OMPD_target_enter_data:
6549 case OMPD_distribute:
6550 case OMPD_distribute_simd:
6551 case OMPD_distribute_parallel_for:
6552 case OMPD_distribute_parallel_for_simd:
6553 case OMPD_teams_distribute:
6554 case OMPD_teams_distribute_simd:
6555 case OMPD_teams_distribute_parallel_for:
6556 case OMPD_teams_distribute_parallel_for_simd:
6557 case OMPD_target_update:
6558 case OMPD_declare_simd:
6559 case OMPD_declare_variant:
6560 case OMPD_begin_declare_variant:
6561 case OMPD_end_declare_variant:
6562 case OMPD_declare_target:
6563 case OMPD_end_declare_target:
6564 case OMPD_declare_reduction:
6565 case OMPD_declare_mapper:
6566 case OMPD_taskloop:
6567 case OMPD_taskloop_simd:
6568 case OMPD_master_taskloop:
6569 case OMPD_master_taskloop_simd:
6570 case OMPD_parallel_master_taskloop:
6571 case OMPD_parallel_master_taskloop_simd:
6572 case OMPD_requires:
6573 case OMPD_metadirective:
6574 case OMPD_unknown:
6575 break;
6576 default:
6577 break;
6578 }
6579 llvm_unreachable("Unexpected directive kind.");
6580}
6581
6582llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6583 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6584 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6585 "Clauses associated with the teams directive expected to be emitted "
6586 "only for the host!");
6587 CGBuilderTy &Bld = CGF.Builder;
6588 int32_t MinNT = -1, MaxNT = -1;
6589 const Expr *NumTeams =
6590 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6591 if (NumTeams != nullptr) {
6592 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6593
6594 switch (DirectiveKind) {
6595 case OMPD_target: {
6596 const auto *CS = D.getInnermostCapturedStmt();
6597 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6598 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6599 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6600 /*IgnoreResultAssign*/ true);
6601 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6602 /*isSigned=*/true);
6603 }
6604 case OMPD_target_teams:
6605 case OMPD_target_teams_distribute:
6606 case OMPD_target_teams_distribute_simd:
6607 case OMPD_target_teams_distribute_parallel_for:
6608 case OMPD_target_teams_distribute_parallel_for_simd: {
6609 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6610 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6611 /*IgnoreResultAssign*/ true);
6612 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6613 /*isSigned=*/true);
6614 }
6615 default:
6616 break;
6617 }
6618 }
6619
6620 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6621 return llvm::ConstantInt::getSigned(Ty: CGF.Int32Ty, V: MinNT);
6622}
6623
6624/// Check for a num threads constant value (stored in \p DefaultVal), or
6625/// expression (stored in \p E). If the value is conditional (via an if-clause),
6626/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6627/// nullptr, no expression evaluation is perfomed.
6628static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6629 const Expr **E, int32_t &UpperBound,
6630 bool UpperBoundOnly, llvm::Value **CondVal) {
6631 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6632 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6633 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6634 if (!Dir)
6635 return;
6636
6637 if (isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6638 // Handle if clause. If if clause present, the number of threads is
6639 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6640 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6641 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6642 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6643 const OMPIfClause *IfClause = nullptr;
6644 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6645 if (C->getNameModifier() == OMPD_unknown ||
6646 C->getNameModifier() == OMPD_parallel) {
6647 IfClause = C;
6648 break;
6649 }
6650 }
6651 if (IfClause) {
6652 const Expr *CondExpr = IfClause->getCondition();
6653 bool Result;
6654 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6655 if (!Result) {
6656 UpperBound = 1;
6657 return;
6658 }
6659 } else {
6660 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6661 if (const auto *PreInit =
6662 cast_or_null<DeclStmt>(Val: IfClause->getPreInitStmt())) {
6663 for (const auto *I : PreInit->decls()) {
6664 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6665 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6666 } else {
6667 CodeGenFunction::AutoVarEmission Emission =
6668 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6669 CGF.EmitAutoVarCleanups(emission: Emission);
6670 }
6671 }
6672 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6673 }
6674 }
6675 }
6676 }
6677 // Check the value of num_threads clause iff if clause was not specified
6678 // or is not evaluated to false.
6679 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6680 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6681 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6682 const auto *NumThreadsClause =
6683 Dir->getSingleClause<OMPNumThreadsClause>();
6684 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6685 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6686 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6687 UpperBound =
6688 UpperBound
6689 ? Constant->getZExtValue()
6690 : std::min(a: UpperBound,
6691 b: static_cast<int32_t>(Constant->getZExtValue()));
6692 // If we haven't found a upper bound, remember we saw a thread limiting
6693 // clause.
6694 if (UpperBound == -1)
6695 UpperBound = 0;
6696 if (!E)
6697 return;
6698 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6699 if (const auto *PreInit =
6700 cast_or_null<DeclStmt>(Val: NumThreadsClause->getPreInitStmt())) {
6701 for (const auto *I : PreInit->decls()) {
6702 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6703 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6704 } else {
6705 CodeGenFunction::AutoVarEmission Emission =
6706 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6707 CGF.EmitAutoVarCleanups(emission: Emission);
6708 }
6709 }
6710 }
6711 *E = NTExpr;
6712 }
6713 return;
6714 }
6715 if (isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6716 UpperBound = 1;
6717}
6718
6719const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6720 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6721 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6722 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6723 "Clauses associated with the teams directive expected to be emitted "
6724 "only for the host!");
6725 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6726 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6727 "Expected target-based executable directive.");
6728
6729 const Expr *NT = nullptr;
6730 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6731
6732 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6733 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6734 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6735 UpperBound = UpperBound ? Constant->getZExtValue()
6736 : std::min(a: UpperBound,
6737 b: int32_t(Constant->getZExtValue()));
6738 }
6739 // If we haven't found a upper bound, remember we saw a thread limiting
6740 // clause.
6741 if (UpperBound == -1)
6742 UpperBound = 0;
6743 if (EPtr)
6744 *EPtr = E;
6745 };
6746
6747 auto ReturnSequential = [&]() {
6748 UpperBound = 1;
6749 return NT;
6750 };
6751
6752 switch (DirectiveKind) {
6753 case OMPD_target: {
6754 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6755 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6756 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6757 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6758 // TODO: The standard is not clear how to resolve two thread limit clauses,
6759 // let's pick the teams one if it's present, otherwise the target one.
6760 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6761 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6762 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6763 ThreadLimitClause = TLC;
6764 if (ThreadLimitExpr) {
6765 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6766 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6767 CodeGenFunction::LexicalScope Scope(
6768 CGF,
6769 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6770 if (const auto *PreInit =
6771 cast_or_null<DeclStmt>(Val: ThreadLimitClause->getPreInitStmt())) {
6772 for (const auto *I : PreInit->decls()) {
6773 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6774 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6775 } else {
6776 CodeGenFunction::AutoVarEmission Emission =
6777 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6778 CGF.EmitAutoVarCleanups(emission: Emission);
6779 }
6780 }
6781 }
6782 }
6783 }
6784 }
6785 if (ThreadLimitClause)
6786 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6787 ThreadLimitExpr);
6788 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6789 if (isOpenMPTeamsDirective(DKind: Dir->getDirectiveKind()) &&
6790 !isOpenMPDistributeDirective(DKind: Dir->getDirectiveKind())) {
6791 CS = Dir->getInnermostCapturedStmt();
6792 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6793 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6794 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6795 }
6796 if (Dir && isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6797 CS = Dir->getInnermostCapturedStmt();
6798 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6799 } else if (Dir && isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6800 return ReturnSequential();
6801 }
6802 return NT;
6803 }
6804 case OMPD_target_teams: {
6805 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6806 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6807 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6808 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6809 ThreadLimitExpr);
6810 }
6811 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6812 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6813 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6814 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6815 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6816 if (Dir->getDirectiveKind() == OMPD_distribute) {
6817 CS = Dir->getInnermostCapturedStmt();
6818 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6819 }
6820 }
6821 return NT;
6822 }
6823 case OMPD_target_teams_distribute:
6824 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6825 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6826 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6827 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6828 ThreadLimitExpr);
6829 }
6830 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6831 UpperBoundOnly, CondVal);
6832 return NT;
6833 case OMPD_target_teams_loop:
6834 case OMPD_target_parallel_loop:
6835 case OMPD_target_parallel:
6836 case OMPD_target_parallel_for:
6837 case OMPD_target_parallel_for_simd:
6838 case OMPD_target_teams_distribute_parallel_for:
6839 case OMPD_target_teams_distribute_parallel_for_simd: {
6840 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6841 const OMPIfClause *IfClause = nullptr;
6842 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6843 if (C->getNameModifier() == OMPD_unknown ||
6844 C->getNameModifier() == OMPD_parallel) {
6845 IfClause = C;
6846 break;
6847 }
6848 }
6849 if (IfClause) {
6850 const Expr *Cond = IfClause->getCondition();
6851 bool Result;
6852 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6853 if (!Result)
6854 return ReturnSequential();
6855 } else {
6856 CodeGenFunction::RunCleanupsScope Scope(CGF);
6857 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6858 }
6859 }
6860 }
6861 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6862 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6863 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6864 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6865 ThreadLimitExpr);
6866 }
6867 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6868 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6869 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6870 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6871 return NumThreadsClause->getNumThreads();
6872 }
6873 return NT;
6874 }
6875 case OMPD_target_teams_distribute_simd:
6876 case OMPD_target_simd:
6877 return ReturnSequential();
6878 default:
6879 break;
6880 }
6881 llvm_unreachable("Unsupported directive kind.");
6882}
6883
6884llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6885 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6886 llvm::Value *NumThreadsVal = nullptr;
6887 llvm::Value *CondVal = nullptr;
6888 llvm::Value *ThreadLimitVal = nullptr;
6889 const Expr *ThreadLimitExpr = nullptr;
6890 int32_t UpperBound = -1;
6891
6892 const Expr *NT = getNumThreadsExprForTargetDirective(
6893 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6894 ThreadLimitExpr: &ThreadLimitExpr);
6895
6896 // Thread limit expressions are used below, emit them.
6897 if (ThreadLimitExpr) {
6898 ThreadLimitVal =
6899 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6900 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6901 /*isSigned=*/false);
6902 }
6903
6904 // Generate the num teams expression.
6905 if (UpperBound == 1) {
6906 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6907 } else if (NT) {
6908 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6909 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6910 /*isSigned=*/false);
6911 } else if (ThreadLimitVal) {
6912 // If we do not have a num threads value but a thread limit, replace the
6913 // former with the latter. We know handled the thread limit expression.
6914 NumThreadsVal = ThreadLimitVal;
6915 ThreadLimitVal = nullptr;
6916 } else {
6917 // Default to "0" which means runtime choice.
6918 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6919 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6920 }
6921
6922 // Handle if clause. If if clause present, the number of threads is
6923 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6924 if (CondVal) {
6925 CodeGenFunction::RunCleanupsScope Scope(CGF);
6926 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6927 False: CGF.Builder.getInt32(C: 1));
6928 }
6929
6930 // If the thread limit and num teams expression were present, take the
6931 // minimum.
6932 if (ThreadLimitVal) {
6933 NumThreadsVal = CGF.Builder.CreateSelect(
6934 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6935 True: ThreadLimitVal, False: NumThreadsVal);
6936 }
6937
6938 return NumThreadsVal;
6939}
6940
6941namespace {
6942LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6943
6944// Utility to handle information from clauses associated with a given
6945// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6946// It provides a convenient interface to obtain the information and generate
6947// code for that information.
6948class MappableExprsHandler {
6949public:
6950 /// Custom comparator for attach-pointer expressions that compares them by
6951 /// complexity (i.e. their component-depth) first, then by the order in which
6952 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6953 /// different.
6954 struct AttachPtrExprComparator {
6955 const MappableExprsHandler &Handler;
6956 // Cache of previous equality comparison results.
6957 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6958 CachedEqualityComparisons;
6959
6960 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6961 AttachPtrExprComparator() = delete;
6962
6963 // Return true iff LHS is "less than" RHS.
6964 bool operator()(const Expr *LHS, const Expr *RHS) const {
6965 if (LHS == RHS)
6966 return false;
6967
6968 // First, compare by complexity (depth)
6969 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(Val: LHS);
6970 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(Val: RHS);
6971
6972 std::optional<size_t> DepthLHS =
6973 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6974 : std::nullopt;
6975 std::optional<size_t> DepthRHS =
6976 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6977 : std::nullopt;
6978
6979 // std::nullopt (no attach pointer) has lowest complexity
6980 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6981 // Both have same complexity, now check semantic equality
6982 if (areEqual(LHS, RHS))
6983 return false;
6984 // Different semantically, compare by computation order
6985 return wasComputedBefore(LHS, RHS);
6986 }
6987 if (!DepthLHS.has_value())
6988 return true; // LHS has lower complexity
6989 if (!DepthRHS.has_value())
6990 return false; // RHS has lower complexity
6991
6992 // Both have values, compare by depth (lower depth = lower complexity)
6993 if (DepthLHS.value() != DepthRHS.value())
6994 return DepthLHS.value() < DepthRHS.value();
6995
6996 // Same complexity, now check semantic equality
6997 if (areEqual(LHS, RHS))
6998 return false;
6999 // Different semantically, compare by computation order
7000 return wasComputedBefore(LHS, RHS);
7001 }
7002
7003 public:
7004 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7005 /// results, if available, otherwise does a recursive semantic comparison.
7006 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7007 // Check cache first for faster lookup
7008 const auto CachedResultIt = CachedEqualityComparisons.find(Val: {LHS, RHS});
7009 if (CachedResultIt != CachedEqualityComparisons.end())
7010 return CachedResultIt->second;
7011
7012 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7013
7014 // Cache the result for future lookups (both orders since semantic
7015 // equality is commutative)
7016 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7017 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7018 return ComparisonResult;
7019 }
7020
7021 /// Compare the two attach-ptr expressions by their computation order.
7022 /// Returns true iff LHS was computed before RHS by
7023 /// collectAttachPtrExprInfo().
7024 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7025 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(Val: LHS);
7026 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(Val: RHS);
7027
7028 return OrderLHS < OrderRHS;
7029 }
7030
7031 private:
7032 /// Helper function to compare attach-pointer expressions semantically.
7033 /// This function handles various expression types that can be part of an
7034 /// attach-pointer.
7035 /// TODO: Not urgent, but we should ideally return true when comparing
7036 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7037 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7038 if (LHS == RHS)
7039 return true;
7040
7041 // If only one is null, they aren't equal
7042 if (!LHS || !RHS)
7043 return false;
7044
7045 ASTContext &Ctx = Handler.CGF.getContext();
7046 // Strip away parentheses and no-op casts to get to the core expression
7047 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7048 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7049
7050 // Direct pointer comparison of the underlying expressions
7051 if (LHS == RHS)
7052 return true;
7053
7054 // Check if the expression classes match
7055 if (LHS->getStmtClass() != RHS->getStmtClass())
7056 return false;
7057
7058 // Handle DeclRefExpr (variable references)
7059 if (const auto *LD = dyn_cast<DeclRefExpr>(Val: LHS)) {
7060 const auto *RD = dyn_cast<DeclRefExpr>(Val: RHS);
7061 if (!RD)
7062 return false;
7063 return LD->getDecl()->getCanonicalDecl() ==
7064 RD->getDecl()->getCanonicalDecl();
7065 }
7066
7067 // Handle ArraySubscriptExpr (array indexing like a[i])
7068 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(Val: LHS)) {
7069 const auto *RA = dyn_cast<ArraySubscriptExpr>(Val: RHS);
7070 if (!RA)
7071 return false;
7072 return areSemanticallyEqual(LHS: LA->getBase(), RHS: RA->getBase()) &&
7073 areSemanticallyEqual(LHS: LA->getIdx(), RHS: RA->getIdx());
7074 }
7075
7076 // Handle MemberExpr (member access like s.m or p->m)
7077 if (const auto *LM = dyn_cast<MemberExpr>(Val: LHS)) {
7078 const auto *RM = dyn_cast<MemberExpr>(Val: RHS);
7079 if (!RM)
7080 return false;
7081 if (LM->getMemberDecl()->getCanonicalDecl() !=
7082 RM->getMemberDecl()->getCanonicalDecl())
7083 return false;
7084 return areSemanticallyEqual(LHS: LM->getBase(), RHS: RM->getBase());
7085 }
7086
7087 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7088 if (const auto *LU = dyn_cast<UnaryOperator>(Val: LHS)) {
7089 const auto *RU = dyn_cast<UnaryOperator>(Val: RHS);
7090 if (!RU)
7091 return false;
7092 if (LU->getOpcode() != RU->getOpcode())
7093 return false;
7094 return areSemanticallyEqual(LHS: LU->getSubExpr(), RHS: RU->getSubExpr());
7095 }
7096
7097 // Handle BinaryOperator (binary operations like p + offset)
7098 if (const auto *LB = dyn_cast<BinaryOperator>(Val: LHS)) {
7099 const auto *RB = dyn_cast<BinaryOperator>(Val: RHS);
7100 if (!RB)
7101 return false;
7102 if (LB->getOpcode() != RB->getOpcode())
7103 return false;
7104 return areSemanticallyEqual(LHS: LB->getLHS(), RHS: RB->getLHS()) &&
7105 areSemanticallyEqual(LHS: LB->getRHS(), RHS: RB->getRHS());
7106 }
7107
7108 // Handle ArraySectionExpr (array sections like a[0:1])
7109 // Attach pointers should not contain array-sections, but currently we
7110 // don't emit an error.
7111 if (const auto *LAS = dyn_cast<ArraySectionExpr>(Val: LHS)) {
7112 const auto *RAS = dyn_cast<ArraySectionExpr>(Val: RHS);
7113 if (!RAS)
7114 return false;
7115 return areSemanticallyEqual(LHS: LAS->getBase(), RHS: RAS->getBase()) &&
7116 areSemanticallyEqual(LHS: LAS->getLowerBound(),
7117 RHS: RAS->getLowerBound()) &&
7118 areSemanticallyEqual(LHS: LAS->getLength(), RHS: RAS->getLength());
7119 }
7120
7121 // Handle CastExpr (explicit casts)
7122 if (const auto *LC = dyn_cast<CastExpr>(Val: LHS)) {
7123 const auto *RC = dyn_cast<CastExpr>(Val: RHS);
7124 if (!RC)
7125 return false;
7126 if (LC->getCastKind() != RC->getCastKind())
7127 return false;
7128 return areSemanticallyEqual(LHS: LC->getSubExpr(), RHS: RC->getSubExpr());
7129 }
7130
7131 // Handle CXXThisExpr (this pointer)
7132 if (isa<CXXThisExpr>(Val: LHS) && isa<CXXThisExpr>(Val: RHS))
7133 return true;
7134
7135 // Handle IntegerLiteral (integer constants)
7136 if (const auto *LI = dyn_cast<IntegerLiteral>(Val: LHS)) {
7137 const auto *RI = dyn_cast<IntegerLiteral>(Val: RHS);
7138 if (!RI)
7139 return false;
7140 return LI->getValue() == RI->getValue();
7141 }
7142
7143 // Handle CharacterLiteral (character constants)
7144 if (const auto *LC = dyn_cast<CharacterLiteral>(Val: LHS)) {
7145 const auto *RC = dyn_cast<CharacterLiteral>(Val: RHS);
7146 if (!RC)
7147 return false;
7148 return LC->getValue() == RC->getValue();
7149 }
7150
7151 // Handle FloatingLiteral (floating point constants)
7152 if (const auto *LF = dyn_cast<FloatingLiteral>(Val: LHS)) {
7153 const auto *RF = dyn_cast<FloatingLiteral>(Val: RHS);
7154 if (!RF)
7155 return false;
7156 // Use bitwise comparison for floating point literals
7157 return LF->getValue().bitwiseIsEqual(RHS: RF->getValue());
7158 }
7159
7160 // Handle StringLiteral (string constants)
7161 if (const auto *LS = dyn_cast<StringLiteral>(Val: LHS)) {
7162 const auto *RS = dyn_cast<StringLiteral>(Val: RHS);
7163 if (!RS)
7164 return false;
7165 return LS->getString() == RS->getString();
7166 }
7167
7168 // Handle CXXNullPtrLiteralExpr (nullptr)
7169 if (isa<CXXNullPtrLiteralExpr>(Val: LHS) && isa<CXXNullPtrLiteralExpr>(Val: RHS))
7170 return true;
7171
7172 // Handle CXXBoolLiteralExpr (true/false)
7173 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(Val: LHS)) {
7174 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(Val: RHS);
7175 if (!RB)
7176 return false;
7177 return LB->getValue() == RB->getValue();
7178 }
7179
7180 // Fallback for other forms - use the existing comparison method
7181 return Expr::isSameComparisonOperand(E1: LHS, E2: RHS);
7182 }
7183 };
7184
7185 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7186 static unsigned getFlagMemberOffset() {
7187 unsigned Offset = 0;
7188 for (uint64_t Remain =
7189 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7190 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7191 !(Remain & 1); Remain = Remain >> 1)
7192 Offset++;
7193 return Offset;
7194 }
7195
7196 /// Class that holds debugging information for a data mapping to be passed to
7197 /// the runtime library.
7198 class MappingExprInfo {
7199 /// The variable declaration used for the data mapping.
7200 const ValueDecl *MapDecl = nullptr;
7201 /// The original expression used in the map clause, or null if there is
7202 /// none.
7203 const Expr *MapExpr = nullptr;
7204
7205 public:
7206 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7207 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7208
7209 const ValueDecl *getMapDecl() const { return MapDecl; }
7210 const Expr *getMapExpr() const { return MapExpr; }
7211 };
7212
7213 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7214 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7215 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7216 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7217 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7218 using MapNonContiguousArrayTy =
7219 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7220 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7221 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7222 using MapData =
7223 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7224 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7225 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7226 using MapDataArrayTy = SmallVector<MapData, 4>;
7227
7228 /// This structure contains combined information generated for mappable
7229 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7230 /// mappers, and non-contiguous information.
7231 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7232 MapExprsArrayTy Exprs;
7233 MapValueDeclsArrayTy Mappers;
7234 MapValueDeclsArrayTy DevicePtrDecls;
7235
7236 /// Append arrays in \a CurInfo.
7237 void append(MapCombinedInfoTy &CurInfo) {
7238 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
7239 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
7240 in_end: CurInfo.DevicePtrDecls.end());
7241 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
7242 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7243 }
7244 };
7245
7246 /// Map between a struct and the its lowest & highest elements which have been
7247 /// mapped.
7248 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7249 /// HE(FieldIndex, Pointer)}
7250 struct StructRangeInfoTy {
7251 MapCombinedInfoTy PreliminaryMapData;
7252 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7253 0, Address::invalid()};
7254 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7255 0, Address::invalid()};
7256 Address Base = Address::invalid();
7257 Address LB = Address::invalid();
7258 bool IsArraySection = false;
7259 bool HasCompleteRecord = false;
7260 };
7261
7262 /// A struct to store the attach pointer and pointee information, to be used
7263 /// when emitting an attach entry.
7264 struct AttachInfoTy {
7265 Address AttachPtrAddr = Address::invalid();
7266 Address AttachPteeAddr = Address::invalid();
7267 const ValueDecl *AttachPtrDecl = nullptr;
7268 const Expr *AttachMapExpr = nullptr;
7269
7270 bool isValid() const {
7271 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7272 }
7273 };
7274
7275 /// Check if there's any component list where the attach pointer expression
7276 /// matches the given captured variable.
7277 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7278 for (const auto &AttachEntry : AttachPtrExprMap) {
7279 if (AttachEntry.second) {
7280 // Check if the attach pointer expression is a DeclRefExpr that
7281 // references the captured variable
7282 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: AttachEntry.second))
7283 if (DRE->getDecl() == VD)
7284 return true;
7285 }
7286 }
7287 return false;
7288 }
7289
7290 /// Get the previously-cached attach pointer for a component list, if-any.
7291 const Expr *getAttachPtrExpr(
7292 OMPClauseMappableExprCommon::MappableExprComponentListRef Components)
7293 const {
7294 const auto It = AttachPtrExprMap.find(Val: Components);
7295 if (It != AttachPtrExprMap.end())
7296 return It->second;
7297
7298 return nullptr;
7299 }
7300
7301private:
7302 /// Kind that defines how a device pointer has to be returned.
7303 struct MapInfo {
7304 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7305 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7306 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7307 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7308 bool ReturnDevicePointer = false;
7309 bool IsImplicit = false;
7310 const ValueDecl *Mapper = nullptr;
7311 const Expr *VarRef = nullptr;
7312 bool ForDeviceAddr = false;
7313 bool HasUdpFbNullify = false;
7314
7315 MapInfo() = default;
7316 MapInfo(
7317 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7318 OpenMPMapClauseKind MapType,
7319 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7320 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7321 bool ReturnDevicePointer, bool IsImplicit,
7322 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7323 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7324 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7325 MotionModifiers(MotionModifiers),
7326 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7327 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7328 HasUdpFbNullify(HasUdpFbNullify) {}
7329 };
7330
7331 /// The target directive from where the mappable clauses were extracted. It
7332 /// is either a executable directive or a user-defined mapper directive.
7333 llvm::PointerUnion<const OMPExecutableDirective *,
7334 const OMPDeclareMapperDecl *>
7335 CurDir;
7336
7337 /// Function the directive is being generated for.
7338 CodeGenFunction &CGF;
7339
7340 /// Set of all first private variables in the current directive.
7341 /// bool data is set to true if the variable is implicitly marked as
7342 /// firstprivate, false otherwise.
7343 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7344
7345 /// Set of defaultmap clause kinds that use firstprivate behavior.
7346 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7347
7348 /// Map between device pointer declarations and their expression components.
7349 /// The key value for declarations in 'this' is null.
7350 llvm::DenseMap<
7351 const ValueDecl *,
7352 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7353 DevPointersMap;
7354
7355 /// Map between device addr declarations and their expression components.
7356 /// The key value for declarations in 'this' is null.
7357 llvm::DenseMap<
7358 const ValueDecl *,
7359 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7360 HasDevAddrsMap;
7361
7362 /// Map between lambda declarations and their map type.
7363 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7364
7365 /// Map from component lists to their attach pointer expressions.
7366 llvm::DenseMap<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7367 const Expr *>
7368 AttachPtrExprMap;
7369
7370 /// Map from attach pointer expressions to their component depth.
7371 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7372 /// expressions with increasing/decreasing depth.
7373 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7374 /// TODO: Not urgent, but we should ideally use the number of pointer
7375 /// dereferences in an expr as an indicator of its complexity, instead of the
7376 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7377 /// `*(p + 5 + 5)` together.
7378 llvm::DenseMap<const Expr *, std::optional<size_t>>
7379 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7380
7381 /// Map from attach pointer expressions to the order they were computed in, in
7382 /// collectAttachPtrExprInfo().
7383 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7384 {nullptr, 0}};
7385
7386 /// An instance of attach-ptr-expr comparator that can be used throughout the
7387 /// lifetime of this handler.
7388 AttachPtrExprComparator AttachPtrComparator;
7389
7390 llvm::Value *getExprTypeSize(const Expr *E) const {
7391 QualType ExprTy = E->getType().getCanonicalType();
7392
7393 // Calculate the size for array shaping expression.
7394 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
7395 llvm::Value *Size =
7396 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
7397 for (const Expr *SE : OAE->getDimensions()) {
7398 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
7399 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
7400 DstTy: CGF.getContext().getSizeType(),
7401 Loc: SE->getExprLoc());
7402 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
7403 }
7404 return Size;
7405 }
7406
7407 // Reference types are ignored for mapping purposes.
7408 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7409 ExprTy = RefTy->getPointeeType().getCanonicalType();
7410
7411 // Given that an array section is considered a built-in type, we need to
7412 // do the calculation based on the length of the section instead of relying
7413 // on CGF.getTypeSize(E->getType()).
7414 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
7415 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7416 Base: OAE->getBase()->IgnoreParenImpCasts())
7417 .getCanonicalType();
7418
7419 // If there is no length associated with the expression and lower bound is
7420 // not specified too, that means we are using the whole length of the
7421 // base.
7422 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7423 !OAE->getLowerBound())
7424 return CGF.getTypeSize(Ty: BaseTy);
7425
7426 llvm::Value *ElemSize;
7427 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7428 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
7429 } else {
7430 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
7431 assert(ATy && "Expecting array type if not a pointer type.");
7432 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
7433 }
7434
7435 // If we don't have a length at this point, that is because we have an
7436 // array section with a single element.
7437 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7438 return ElemSize;
7439
7440 if (const Expr *LenExpr = OAE->getLength()) {
7441 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
7442 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
7443 DstTy: CGF.getContext().getSizeType(),
7444 Loc: LenExpr->getExprLoc());
7445 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
7446 }
7447 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7448 OAE->getLowerBound() && "expected array_section[lb:].");
7449 // Size = sizetype - lb * elemtype;
7450 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
7451 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
7452 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
7453 DstTy: CGF.getContext().getSizeType(),
7454 Loc: OAE->getLowerBound()->getExprLoc());
7455 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
7456 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
7457 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
7458 LengthVal = CGF.Builder.CreateSelect(
7459 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
7460 return LengthVal;
7461 }
7462 return CGF.getTypeSize(Ty: ExprTy);
7463 }
7464
7465 /// Return the corresponding bits for a given map clause modifier. Add
7466 /// a flag marking the map as a pointer if requested. Add a flag marking the
7467 /// map as the first one of a series of maps that relate to the same map
7468 /// expression.
7469 OpenMPOffloadMappingFlags getMapTypeBits(
7470 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7471 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7472 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7473 OpenMPOffloadMappingFlags Bits =
7474 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7475 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7476 switch (MapType) {
7477 case OMPC_MAP_alloc:
7478 case OMPC_MAP_release:
7479 // alloc and release is the default behavior in the runtime library, i.e.
7480 // if we don't pass any bits alloc/release that is what the runtime is
7481 // going to do. Therefore, we don't need to signal anything for these two
7482 // type modifiers.
7483 break;
7484 case OMPC_MAP_to:
7485 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7486 break;
7487 case OMPC_MAP_from:
7488 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7489 break;
7490 case OMPC_MAP_tofrom:
7491 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7492 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7493 break;
7494 case OMPC_MAP_delete:
7495 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7496 break;
7497 case OMPC_MAP_unknown:
7498 llvm_unreachable("Unexpected map type!");
7499 }
7500 if (AddPtrFlag)
7501 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7502 if (AddIsTargetParamFlag)
7503 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7504 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
7505 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7506 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
7507 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7508 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
7509 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
7510 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7511 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
7512 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7513 if (IsNonContiguous)
7514 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7515 return Bits;
7516 }
7517
7518 /// Return true if the provided expression is a final array section. A
7519 /// final array section, is one whose length can't be proved to be one.
7520 bool isFinalArraySectionExpression(const Expr *E) const {
7521 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
7522
7523 // It is not an array section and therefore not a unity-size one.
7524 if (!OASE)
7525 return false;
7526
7527 // An array section with no colon always refer to a single element.
7528 if (OASE->getColonLocFirst().isInvalid())
7529 return false;
7530
7531 const Expr *Length = OASE->getLength();
7532
7533 // If we don't have a length we have to check if the array has size 1
7534 // for this dimension. Also, we should always expect a length if the
7535 // base type is pointer.
7536 if (!Length) {
7537 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7538 Base: OASE->getBase()->IgnoreParenImpCasts())
7539 .getCanonicalType();
7540 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
7541 return ATy->getSExtSize() != 1;
7542 // If we don't have a constant dimension length, we have to consider
7543 // the current section as having any size, so it is not necessarily
7544 // unitary. If it happen to be unity size, that's user fault.
7545 return true;
7546 }
7547
7548 // Check if the length evaluates to 1.
7549 Expr::EvalResult Result;
7550 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
7551 return true; // Can have more that size 1.
7552
7553 llvm::APSInt ConstLength = Result.Val.getInt();
7554 return ConstLength.getSExtValue() != 1;
7555 }
7556
7557 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7558 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7559 /// an attach entry has the following form:
7560 /// &p, &p[1], sizeof(void*), ATTACH
7561 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7562 const AttachInfoTy &AttachInfo) const {
7563 assert(AttachInfo.isValid() &&
7564 "Expected valid attach pointer/pointee information!");
7565
7566 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7567 // size
7568 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7569 V: llvm::ConstantInt::get(
7570 Ty: CGF.CGM.SizeTy, V: CGF.getContext()
7571 .getTypeSizeInChars(T: CGF.getContext().VoidPtrTy)
7572 .getQuantity()),
7573 DestTy: CGF.Int64Ty, /*isSigned=*/true);
7574
7575 CombinedInfo.Exprs.emplace_back(Args: AttachInfo.AttachPtrDecl,
7576 Args: AttachInfo.AttachMapExpr);
7577 CombinedInfo.BasePointers.push_back(
7578 Elt: AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7579 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7580 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7581 CombinedInfo.Pointers.push_back(
7582 Elt: AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7583 CombinedInfo.Sizes.push_back(Elt: PointerSize);
7584 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7585 CombinedInfo.Mappers.push_back(Elt: nullptr);
7586 CombinedInfo.NonContigInfo.Dims.push_back(Elt: 1);
7587 }
7588
7589 /// A helper class to copy structures with overlapped elements, i.e. those
7590 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7591 /// are not explicitly copied have mapping nodes synthesized for them,
7592 /// taking care to avoid generating zero-sized copies.
7593 class CopyOverlappedEntryGaps {
7594 CodeGenFunction &CGF;
7595 MapCombinedInfoTy &CombinedInfo;
7596 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7597 const ValueDecl *MapDecl = nullptr;
7598 const Expr *MapExpr = nullptr;
7599 Address BP = Address::invalid();
7600 bool IsNonContiguous = false;
7601 uint64_t DimSize = 0;
7602 // These elements track the position as the struct is iterated over
7603 // (in order of increasing element address).
7604 const RecordDecl *LastParent = nullptr;
7605 uint64_t Cursor = 0;
7606 unsigned LastIndex = -1u;
7607 Address LB = Address::invalid();
7608
7609 public:
7610 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7611 MapCombinedInfoTy &CombinedInfo,
7612 OpenMPOffloadMappingFlags Flags,
7613 const ValueDecl *MapDecl, const Expr *MapExpr,
7614 Address BP, Address LB, bool IsNonContiguous,
7615 uint64_t DimSize)
7616 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7617 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7618 DimSize(DimSize), LB(LB) {}
7619
7620 void processField(
7621 const OMPClauseMappableExprCommon::MappableComponent &MC,
7622 const FieldDecl *FD,
7623 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7624 EmitMemberExprBase) {
7625 const RecordDecl *RD = FD->getParent();
7626 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(D: RD);
7627 uint64_t FieldOffset = RL.getFieldOffset(FieldNo: FD->getFieldIndex());
7628 uint64_t FieldSize =
7629 CGF.getContext().getTypeSize(T: FD->getType().getCanonicalType());
7630 Address ComponentLB = Address::invalid();
7631
7632 if (FD->getType()->isLValueReferenceType()) {
7633 const auto *ME = cast<MemberExpr>(Val: MC.getAssociatedExpression());
7634 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7635 ComponentLB =
7636 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD).getAddress();
7637 } else {
7638 ComponentLB =
7639 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression()).getAddress();
7640 }
7641
7642 if (!LastParent)
7643 LastParent = RD;
7644 if (FD->getParent() == LastParent) {
7645 if (FD->getFieldIndex() != LastIndex + 1)
7646 copyUntilField(FD, ComponentLB);
7647 } else {
7648 LastParent = FD->getParent();
7649 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7650 copyUntilField(FD, ComponentLB);
7651 }
7652 Cursor = FieldOffset + FieldSize;
7653 LastIndex = FD->getFieldIndex();
7654 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7655 }
7656
7657 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7658 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7659 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7660 llvm::Value *Size = CGF.Builder.CreatePtrDiff(LHS: ComponentLBPtr, RHS: LBPtr);
7661 copySizedChunk(Base: LBPtr, Size);
7662 }
7663
7664 void copyUntilEnd(Address HB) {
7665 if (LastParent) {
7666 const ASTRecordLayout &RL =
7667 CGF.getContext().getASTRecordLayout(D: LastParent);
7668 if ((uint64_t)CGF.getContext().toBits(CharSize: RL.getSize()) <= Cursor)
7669 return;
7670 }
7671 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7672 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7673 LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF), RHS: LBPtr);
7674 copySizedChunk(Base: LBPtr, Size);
7675 }
7676
7677 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7678 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7679 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7680 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7681 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7682 CombinedInfo.Pointers.push_back(Elt: Base);
7683 CombinedInfo.Sizes.push_back(
7684 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/false));
7685 CombinedInfo.Types.push_back(Elt: Flags);
7686 CombinedInfo.Mappers.push_back(Elt: nullptr);
7687 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize : 1);
7688 }
7689 };
7690
7691 /// Generate the base pointers, section pointers, sizes, map type bits, and
7692 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7693 /// map type, map or motion modifiers, and expression components.
7694 /// \a IsFirstComponent should be set to true if the provided set of
7695 /// components is the first associated with a capture.
7696 void generateInfoForComponentList(
7697 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7698 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7699 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7700 MapCombinedInfoTy &CombinedInfo,
7701 MapCombinedInfoTy &StructBaseCombinedInfo,
7702 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7703 bool IsFirstComponentList, bool IsImplicit,
7704 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7705 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7706 const Expr *MapExpr = nullptr,
7707 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7708 OverlappedElements = {}) const {
7709
7710 // The following summarizes what has to be generated for each map and the
7711 // types below. The generated information is expressed in this order:
7712 // base pointer, section pointer, size, flags
7713 // (to add to the ones that come from the map type and modifier).
7714 // Entries annotated with (+) are only generated for "target" constructs,
7715 // and only if the variable at the beginning of the expression is used in
7716 // the region.
7717 //
7718 // double d;
7719 // int i[100];
7720 // float *p;
7721 // int **a = &i;
7722 //
7723 // struct S1 {
7724 // int i;
7725 // float f[50];
7726 // }
7727 // struct S2 {
7728 // int i;
7729 // float f[50];
7730 // S1 s;
7731 // double *p;
7732 // double *&pref;
7733 // struct S2 *ps;
7734 // int &ref;
7735 // }
7736 // S2 s;
7737 // S2 *ps;
7738 //
7739 // map(d)
7740 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7741 //
7742 // map(i)
7743 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7744 //
7745 // map(i[1:23])
7746 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7747 //
7748 // map(p)
7749 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7750 //
7751 // map(p[1:24])
7752 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7753 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7754 // // are present, and either is new
7755 //
7756 // map(([22])p)
7757 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7758 // &p, p, sizeof(void*), ATTACH
7759 //
7760 // map((*a)[0:3])
7761 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7762 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7763 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7764 // (+) Only on target, if a is used in the region
7765 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7766 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7767 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7768 // referenced in the target region, because it is a pointer.
7769 //
7770 // map(**a)
7771 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7772 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7773 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7774 // (+) Only on target, if a is used in the region
7775 //
7776 // map(s)
7777 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7778 // effect is supposed to be same as if the user had a map for every element
7779 // of the struct. We currently do a shallow-map of s.
7780 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7781 //
7782 // map(s.i)
7783 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7784 //
7785 // map(s.s.f)
7786 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7787 //
7788 // map(s.p)
7789 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7790 //
7791 // map(to: s.p[:22])
7792 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7793 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7794 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7795 //
7796 // map(to: s.ref)
7797 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7798 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7799 // (*) alloc space for struct members, only this is a target parameter.
7800 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7801 // optimizes this entry out, same in the examples below)
7802 // (***) map the pointee (map: to)
7803 // Note: ptr(s.ref) represents the referring pointer of s.ref
7804 // ptee(s.ref) represents the referenced pointee of s.ref
7805 //
7806 // map(to: s.pref)
7807 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7808 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7809 //
7810 // map(to: s.pref[:22])
7811 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7812 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7813 // FROM | IMPLICIT // (+)
7814 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7815 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7816 //
7817 // map(s.ps)
7818 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7819 //
7820 // map(from: s.ps->s.i)
7821 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7822 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7823 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7824 //
7825 // map(to: s.ps->ps)
7826 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7827 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7828 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7829 //
7830 // map(s.ps->ps->ps)
7831 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7832 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7833 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7834 //
7835 // map(to: s.ps->ps->s.f[:22])
7836 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7837 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7838 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7839 //
7840 // map(ps)
7841 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7842 //
7843 // map(ps->i)
7844 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7845 // &ps, &(ps->i), sizeof(void*), ATTACH
7846 //
7847 // map(ps->s.f)
7848 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7849 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7850 //
7851 // map(from: ps->p)
7852 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7853 // &ps, &(ps->p), sizeof(ps), ATTACH
7854 //
7855 // map(to: ps->p[:22])
7856 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7857 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7858 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7859 //
7860 // map(ps->ps)
7861 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7862 // &ps, &(ps->ps), sizeof(ps), ATTACH
7863 //
7864 // map(from: ps->ps->s.i)
7865 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7866 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7867 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7868 //
7869 // map(from: ps->ps->ps)
7870 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7871 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7872 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7873 //
7874 // map(ps->ps->ps->ps)
7875 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7876 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7877 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7878 //
7879 // map(to: ps->ps->ps->s.f[:22])
7880 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7881 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7882 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7883 //
7884 // map(to: s.f[:22]) map(from: s.p[:33])
7885 // On target, and if s is used in the region:
7886 //
7887 // &s, &(s.f[0]), 50*sizeof(float) +
7888 // sizeof(struct S1) +
7889 // sizeof(double*) (**), TARGET_PARAM
7890 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7891 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7892 // FROM | IMPLICIT
7893 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7894 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7895 // (**) allocate contiguous space needed to fit all mapped members even if
7896 // we allocate space for members not mapped (in this example,
7897 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7898 // them as well because they fall between &s.f[0] and &s.p)
7899 //
7900 // On other constructs, and, if s is not used in the region, on target:
7901 // &s, &(s.f[0]), 22*sizeof(float), TO
7902 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7903 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7904 //
7905 // map(from: s.f[:22]) map(to: ps->p[:33])
7906 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7907 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7908 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7909 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7910 //
7911 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7912 // &s, &(s.f[0]), 50*sizeof(float) +
7913 // sizeof(struct S1), TARGET_PARAM
7914 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7915 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7916 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7917 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7918 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7919 //
7920 // map(p[:100], p)
7921 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7922 // p, &p[0], 100*sizeof(float), TO | FROM
7923 // &p, &p[0], sizeof(float*), ATTACH
7924
7925 // Track if the map information being generated is the first for a capture.
7926 bool IsCaptureFirstInfo = IsFirstComponentList;
7927 // When the variable is on a declare target link or in a to clause with
7928 // unified memory, a reference is needed to hold the host/device address
7929 // of the variable.
7930 bool RequiresReference = false;
7931
7932 // Scan the components from the base to the complete expression.
7933 auto CI = Components.rbegin();
7934 auto CE = Components.rend();
7935 auto I = CI;
7936
7937 // Track if the map information being generated is the first for a list of
7938 // components.
7939 bool IsExpressionFirstInfo = true;
7940 bool FirstPointerInComplexData = false;
7941 Address BP = Address::invalid();
7942 Address FinalLowestElem = Address::invalid();
7943 const Expr *AssocExpr = I->getAssociatedExpression();
7944 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7945 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7946 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7947
7948 // Get the pointer-attachment base-pointer for the given list, if any.
7949 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7950 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7951 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7952
7953 bool HasAttachPtr = AttachPtrExpr != nullptr;
7954 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7955 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7956
7957 if (FirstComponentIsForAttachPtr) {
7958 // No need to process AttachPtr here. It will be processed at the end
7959 // after we have computed the pointee's address.
7960 ++I;
7961 } else if (isa<MemberExpr>(Val: AssocExpr)) {
7962 // The base is the 'this' pointer. The content of the pointer is going
7963 // to be the base of the field being mapped.
7964 BP = CGF.LoadCXXThisAddress();
7965 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7966 (OASE &&
7967 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7968 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7969 } else if (OAShE &&
7970 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7971 BP = Address(
7972 CGF.EmitScalarExpr(E: OAShE->getBase()),
7973 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7974 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7975 } else {
7976 // The base is the reference to the variable.
7977 // BP = &Var.
7978 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7979 if (const auto *VD =
7980 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7981 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7982 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7983 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7984 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7985 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7986 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7987 RequiresReference = true;
7988 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7989 }
7990 }
7991 }
7992
7993 // If the variable is a pointer and is being dereferenced (i.e. is not
7994 // the last component), the base has to be the pointer itself, not its
7995 // reference. References are ignored for mapping purposes.
7996 QualType Ty =
7997 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7998 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7999 // No need to generate individual map information for the pointer, it
8000 // can be associated with the combined storage if shared memory mode is
8001 // active or the base declaration is not global variable.
8002 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
8003 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8004 !VD || VD->hasLocalStorage() || HasAttachPtr)
8005 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8006 else
8007 FirstPointerInComplexData = true;
8008 ++I;
8009 }
8010 }
8011
8012 // Track whether a component of the list should be marked as MEMBER_OF some
8013 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8014 // in a component list should be marked as MEMBER_OF, all subsequent entries
8015 // do not belong to the base struct. E.g.
8016 // struct S2 s;
8017 // s.ps->ps->ps->f[:]
8018 // (1) (2) (3) (4)
8019 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8020 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8021 // is the pointee of ps(2) which is not member of struct s, so it should not
8022 // be marked as such (it is still PTR_AND_OBJ).
8023 // The variable is initialized to false so that PTR_AND_OBJ entries which
8024 // are not struct members are not considered (e.g. array of pointers to
8025 // data).
8026 bool ShouldBeMemberOf = false;
8027
8028 // Variable keeping track of whether or not we have encountered a component
8029 // in the component list which is a member expression. Useful when we have a
8030 // pointer or a final array section, in which case it is the previous
8031 // component in the list which tells us whether we have a member expression.
8032 // E.g. X.f[:]
8033 // While processing the final array section "[:]" it is "f" which tells us
8034 // whether we are dealing with a member of a declared struct.
8035 const MemberExpr *EncounteredME = nullptr;
8036
8037 // Track for the total number of dimension. Start from one for the dummy
8038 // dimension.
8039 uint64_t DimSize = 1;
8040
8041 // Detects non-contiguous updates due to strided accesses.
8042 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8043 // correctly when generating information to be passed to the runtime. The
8044 // flag is set to true if any array section has a stride not equal to 1, or
8045 // if the stride is not a constant expression (conservatively assumed
8046 // non-contiguous).
8047 bool IsNonContiguous =
8048 CombinedInfo.NonContigInfo.IsNonContiguous ||
8049 any_of(Range&: Components, P: [&](const auto &Component) {
8050 const auto *OASE =
8051 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8052 if (!OASE)
8053 return false;
8054
8055 const Expr *StrideExpr = OASE->getStride();
8056 if (!StrideExpr)
8057 return false;
8058
8059 const auto Constant =
8060 StrideExpr->getIntegerConstantExpr(Ctx: CGF.getContext());
8061 if (!Constant)
8062 return false;
8063
8064 return !Constant->isOne();
8065 });
8066
8067 bool IsPrevMemberReference = false;
8068
8069 bool IsPartialMapped =
8070 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8071
8072 // We need to check if we will be encountering any MEs. If we do not
8073 // encounter any ME expression it means we will be mapping the whole struct.
8074 // In that case we need to skip adding an entry for the struct to the
8075 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8076 // list only when generating all info for clauses.
8077 bool IsMappingWholeStruct = true;
8078 if (!GenerateAllInfoForClauses) {
8079 IsMappingWholeStruct = false;
8080 } else {
8081 for (auto TempI = I; TempI != CE; ++TempI) {
8082 const MemberExpr *PossibleME =
8083 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
8084 if (PossibleME) {
8085 IsMappingWholeStruct = false;
8086 break;
8087 }
8088 }
8089 }
8090
8091 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8092 for (; I != CE; ++I) {
8093 // If we have a valid attach-ptr, we skip processing all components until
8094 // after the attach-ptr.
8095 if (HasAttachPtr && !SeenAttachPtr) {
8096 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8097 continue;
8098 }
8099
8100 // After finding the attach pointer, skip binary-ops, to skip past
8101 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8102 // the attach-ptr.
8103 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8104 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8105 if (BO)
8106 continue;
8107
8108 // Found the first non-binary-operator component after attach
8109 SeenFirstNonBinOpExprAfterAttachPtr = true;
8110 BP = AttachPteeBaseAddr;
8111 }
8112
8113 // If the current component is member of a struct (parent struct) mark it.
8114 if (!EncounteredME) {
8115 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
8116 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8117 // as MEMBER_OF the parent struct.
8118 if (EncounteredME) {
8119 ShouldBeMemberOf = true;
8120 // Do not emit as complex pointer if this is actually not array-like
8121 // expression.
8122 if (FirstPointerInComplexData) {
8123 QualType Ty = std::prev(x: I)
8124 ->getAssociatedDeclaration()
8125 ->getType()
8126 .getNonReferenceType();
8127 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8128 FirstPointerInComplexData = false;
8129 }
8130 }
8131 }
8132
8133 auto Next = std::next(x: I);
8134
8135 // We need to generate the addresses and sizes if this is the last
8136 // component, if the component is a pointer or if it is an array section
8137 // whose length can't be proved to be one. If this is a pointer, it
8138 // becomes the base address for the following components.
8139
8140 // A final array section, is one whose length can't be proved to be one.
8141 // If the map item is non-contiguous then we don't treat any array section
8142 // as final array section.
8143 bool IsFinalArraySection =
8144 !IsNonContiguous &&
8145 isFinalArraySectionExpression(E: I->getAssociatedExpression());
8146
8147 // If we have a declaration for the mapping use that, otherwise use
8148 // the base declaration of the map clause.
8149 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8150 ? I->getAssociatedDeclaration()
8151 : BaseDecl;
8152 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8153 : MapExpr;
8154
8155 // Get information on whether the element is a pointer. Have to do a
8156 // special treatment for array sections given that they are built-in
8157 // types.
8158 const auto *OASE =
8159 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
8160 const auto *OAShE =
8161 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
8162 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
8163 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8164 bool IsPointer =
8165 OAShE ||
8166 (OASE && ArraySectionExpr::getBaseOriginalType(Base: OASE)
8167 .getCanonicalType()
8168 ->isAnyPointerType()) ||
8169 I->getAssociatedExpression()->getType()->isAnyPointerType();
8170 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
8171 MapDecl &&
8172 MapDecl->getType()->isLValueReferenceType();
8173 bool IsNonDerefPointer = IsPointer &&
8174 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8175 !IsNonContiguous;
8176
8177 if (OASE)
8178 ++DimSize;
8179
8180 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8181 IsFinalArraySection) {
8182 // If this is not the last component, we expect the pointer to be
8183 // associated with an array expression or member expression.
8184 assert((Next == CE ||
8185 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8186 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8187 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8188 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8189 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8190 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8191 "Unexpected expression");
8192
8193 Address LB = Address::invalid();
8194 Address LowestElem = Address::invalid();
8195 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8196 const MemberExpr *E) {
8197 const Expr *BaseExpr = E->getBase();
8198 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8199 // scalar.
8200 LValue BaseLV;
8201 if (E->isArrow()) {
8202 LValueBaseInfo BaseInfo;
8203 TBAAAccessInfo TBAAInfo;
8204 Address Addr =
8205 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
8206 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8207 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
8208 } else {
8209 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
8210 }
8211 return BaseLV;
8212 };
8213 if (OAShE) {
8214 LowestElem = LB =
8215 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
8216 CGF.ConvertTypeForMem(
8217 T: OAShE->getBase()->getType()->getPointeeType()),
8218 CGF.getContext().getTypeAlignInChars(
8219 T: OAShE->getBase()->getType()));
8220 } else if (IsMemberReference) {
8221 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
8222 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8223 LowestElem = CGF.EmitLValueForFieldInitialization(
8224 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
8225 .getAddress();
8226 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
8227 .getAddress();
8228 } else {
8229 LowestElem = LB =
8230 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
8231 .getAddress();
8232 }
8233
8234 // Save the final LowestElem, to use it as the pointee in attach maps,
8235 // if emitted.
8236 if (Next == CE)
8237 FinalLowestElem = LowestElem;
8238
8239 // If this component is a pointer inside the base struct then we don't
8240 // need to create any entry for it - it will be combined with the object
8241 // it is pointing to into a single PTR_AND_OBJ entry.
8242 bool IsMemberPointerOrAddr =
8243 EncounteredME &&
8244 (((IsPointer || ForDeviceAddr) &&
8245 I->getAssociatedExpression() == EncounteredME) ||
8246 (IsPrevMemberReference && !IsPointer) ||
8247 (IsMemberReference && Next != CE &&
8248 !Next->getAssociatedExpression()->getType()->isPointerType()));
8249 if (!OverlappedElements.empty() && Next == CE) {
8250 // Handle base element with the info for overlapped elements.
8251 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8252 assert(!IsPointer &&
8253 "Unexpected base element with the pointer type.");
8254 // Mark the whole struct as the struct that requires allocation on the
8255 // device.
8256 PartialStruct.LowestElem = {0, LowestElem};
8257 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8258 T: I->getAssociatedExpression()->getType());
8259 Address HB = CGF.Builder.CreateConstGEP(
8260 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8261 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
8262 Index: TypeSize.getQuantity() - 1);
8263 PartialStruct.HighestElem = {
8264 std::numeric_limits<decltype(
8265 PartialStruct.HighestElem.first)>::max(),
8266 HB};
8267 PartialStruct.Base = BP;
8268 PartialStruct.LB = LB;
8269 assert(
8270 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8271 "Overlapped elements must be used only once for the variable.");
8272 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
8273 // Emit data for non-overlapped data.
8274 OpenMPOffloadMappingFlags Flags =
8275 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8276 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8277 /*AddPtrFlag=*/false,
8278 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8279 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8280 MapExpr, BP, LB, IsNonContiguous,
8281 DimSize);
8282 // Do bitcopy of all non-overlapped structure elements.
8283 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8284 Component : OverlappedElements) {
8285 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8286 Component) {
8287 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8288 if (const auto *FD = dyn_cast<FieldDecl>(Val: VD)) {
8289 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8290 }
8291 }
8292 }
8293 }
8294 CopyGaps.copyUntilEnd(HB);
8295 break;
8296 }
8297 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
8298 // Skip adding an entry in the CurInfo of this combined entry if the
8299 // whole struct is currently being mapped. The struct needs to be added
8300 // in the first position before any data internal to the struct is being
8301 // mapped.
8302 // Skip adding an entry in the CurInfo of this combined entry if the
8303 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8304 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8305 (Next == CE && MapType != OMPC_MAP_unknown)) {
8306 if (!IsMappingWholeStruct) {
8307 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8308 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
8309 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8310 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8311 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8312 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8313 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8314 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
8315 : 1);
8316 } else {
8317 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8318 StructBaseCombinedInfo.BasePointers.push_back(
8319 Elt: BP.emitRawPointer(CGF));
8320 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8321 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8322 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8323 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8324 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8325 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8326 Elt: IsNonContiguous ? DimSize : 1);
8327 }
8328
8329 // If Mapper is valid, the last component inherits the mapper.
8330 bool HasMapper = Mapper && Next == CE;
8331 if (!IsMappingWholeStruct)
8332 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
8333 else
8334 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
8335 : nullptr);
8336
8337 // We need to add a pointer flag for each map that comes from the
8338 // same expression except for the first one. We also need to signal
8339 // this map is the first one that relates with the current capture
8340 // (there is a set of entries for each capture).
8341 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8342 MapType, MapModifiers, MotionModifiers, IsImplicit,
8343 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
8344 FirstPointerInComplexData || IsMemberReference,
8345 AddIsTargetParamFlag: IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8346
8347 if (!IsExpressionFirstInfo || IsMemberReference) {
8348 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8349 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8350 if (IsPointer || (IsMemberReference && Next != CE))
8351 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8352 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8353 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8354 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8355 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8356
8357 if (ShouldBeMemberOf) {
8358 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8359 // should be later updated with the correct value of MEMBER_OF.
8360 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8361 // From now on, all subsequent PTR_AND_OBJ entries should not be
8362 // marked as MEMBER_OF.
8363 ShouldBeMemberOf = false;
8364 }
8365 }
8366
8367 if (!IsMappingWholeStruct)
8368 CombinedInfo.Types.push_back(Elt: Flags);
8369 else
8370 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
8371 }
8372
8373 // If we have encountered a member expression so far, keep track of the
8374 // mapped member. If the parent is "*this", then the value declaration
8375 // is nullptr.
8376 if (EncounteredME) {
8377 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
8378 unsigned FieldIndex = FD->getFieldIndex();
8379
8380 // Update info about the lowest and highest elements for this struct
8381 if (!PartialStruct.Base.isValid()) {
8382 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8383 if (IsFinalArraySection && OASE) {
8384 Address HB =
8385 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8386 .getAddress();
8387 PartialStruct.HighestElem = {FieldIndex, HB};
8388 } else {
8389 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8390 }
8391 PartialStruct.Base = BP;
8392 PartialStruct.LB = BP;
8393 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8394 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8395 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8396 if (IsFinalArraySection && OASE) {
8397 Address HB =
8398 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8399 .getAddress();
8400 PartialStruct.HighestElem = {FieldIndex, HB};
8401 } else {
8402 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8403 }
8404 }
8405 }
8406
8407 // Need to emit combined struct for array sections.
8408 if (IsFinalArraySection || IsNonContiguous)
8409 PartialStruct.IsArraySection = true;
8410
8411 // If we have a final array section, we are done with this expression.
8412 if (IsFinalArraySection)
8413 break;
8414
8415 // The pointer becomes the base for the next element.
8416 if (Next != CE)
8417 BP = IsMemberReference ? LowestElem : LB;
8418 if (!IsPartialMapped)
8419 IsExpressionFirstInfo = false;
8420 IsCaptureFirstInfo = false;
8421 FirstPointerInComplexData = false;
8422 IsPrevMemberReference = IsMemberReference;
8423 } else if (FirstPointerInComplexData) {
8424 QualType Ty = Components.rbegin()
8425 ->getAssociatedDeclaration()
8426 ->getType()
8427 .getNonReferenceType();
8428 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8429 FirstPointerInComplexData = false;
8430 }
8431 }
8432 // If ran into the whole component - allocate the space for the whole
8433 // record.
8434 if (!EncounteredME)
8435 PartialStruct.HasCompleteRecord = true;
8436
8437 // Populate ATTACH information for later processing by emitAttachEntry.
8438 if (shouldEmitAttachEntry(PointerExpr: AttachPtrExpr, MapBaseDecl: BaseDecl, CGF, CurDir)) {
8439 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8440 AttachInfo.AttachPteeAddr = FinalLowestElem;
8441 AttachInfo.AttachPtrDecl = BaseDecl;
8442 AttachInfo.AttachMapExpr = MapExpr;
8443 }
8444
8445 if (!IsNonContiguous)
8446 return;
8447
8448 const ASTContext &Context = CGF.getContext();
8449
8450 // For supporting stride in array section, we need to initialize the first
8451 // dimension size as 1, first offset as 0, and first count as 1
8452 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
8453 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8454 MapValuesArrayTy CurStrides;
8455 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8456 uint64_t ElementTypeSize;
8457
8458 // Collect Size information for each dimension and get the element size as
8459 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8460 // should be [10, 10] and the first stride is 4 btyes.
8461 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8462 Components) {
8463 const Expr *AssocExpr = Component.getAssociatedExpression();
8464 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8465
8466 if (!OASE)
8467 continue;
8468
8469 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
8470 auto *CAT = Context.getAsConstantArrayType(T: Ty);
8471 auto *VAT = Context.getAsVariableArrayType(T: Ty);
8472
8473 // We need all the dimension size except for the last dimension.
8474 assert((VAT || CAT || &Component == &*Components.begin()) &&
8475 "Should be either ConstantArray or VariableArray if not the "
8476 "first Component");
8477
8478 // Get element size if CurStrides is empty.
8479 if (CurStrides.empty()) {
8480 const Type *ElementType = nullptr;
8481 if (CAT)
8482 ElementType = CAT->getElementType().getTypePtr();
8483 else if (VAT)
8484 ElementType = VAT->getElementType().getTypePtr();
8485 else if (&Component == &*Components.begin()) {
8486 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8487 // there was no earlier CAT/VAT/array handling to establish
8488 // ElementType. Capture the pointee type now so that subsequent
8489 // components (offset/length/stride) have a concrete element type to
8490 // work with. This makes pointer-backed sections behave consistently
8491 // with CAT/VAT/array bases.
8492 if (const auto *PtrType = Ty->getAs<PointerType>())
8493 ElementType = PtrType->getPointeeType().getTypePtr();
8494 } else {
8495 // Any component after the first should never have a raw pointer type;
8496 // by this point. ElementType must already be known (set above or in
8497 // prior array / CAT / VAT handling).
8498 assert(!Ty->isPointerType() &&
8499 "Non-first components should not be raw pointers");
8500 }
8501
8502 // At this stage, if ElementType was a base pointer and we are in the
8503 // first iteration, it has been computed.
8504 if (ElementType) {
8505 // For the case that having pointer as base, we need to remove one
8506 // level of indirection.
8507 if (&Component != &*Components.begin())
8508 ElementType = ElementType->getPointeeOrArrayElementType();
8509 ElementTypeSize =
8510 Context.getTypeSizeInChars(T: ElementType).getQuantity();
8511 CurStrides.push_back(
8512 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
8513 }
8514 }
8515 // Get dimension value except for the last dimension since we don't need
8516 // it.
8517 if (DimSizes.size() < Components.size() - 1) {
8518 if (CAT)
8519 DimSizes.push_back(
8520 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
8521 else if (VAT)
8522 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
8523 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
8524 /*IsSigned=*/isSigned: false));
8525 }
8526 }
8527
8528 // Skip the dummy dimension since we have already have its information.
8529 auto *DI = DimSizes.begin() + 1;
8530 // Product of dimension.
8531 llvm::Value *DimProd =
8532 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
8533
8534 // Collect info for non-contiguous. Notice that offset, count, and stride
8535 // are only meaningful for array-section, so we insert a null for anything
8536 // other than array-section.
8537 // Also, the size of offset, count, and stride are not the same as
8538 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8539 // count, and stride are the same as the number of non-contiguous
8540 // declaration in target update to/from clause.
8541 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8542 Components) {
8543 const Expr *AssocExpr = Component.getAssociatedExpression();
8544
8545 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
8546 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8547 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
8548 /*isSigned=*/false);
8549 CurOffsets.push_back(Elt: Offset);
8550 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
8551 CurStrides.push_back(Elt: CurStrides.back());
8552 continue;
8553 }
8554
8555 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8556
8557 if (!OASE)
8558 continue;
8559
8560 // Offset
8561 const Expr *OffsetExpr = OASE->getLowerBound();
8562 llvm::Value *Offset = nullptr;
8563 if (!OffsetExpr) {
8564 // If offset is absent, then we just set it to zero.
8565 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
8566 } else {
8567 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
8568 DestTy: CGF.Int64Ty,
8569 /*isSigned=*/false);
8570 }
8571 CurOffsets.push_back(Elt: Offset);
8572
8573 // Count
8574 const Expr *CountExpr = OASE->getLength();
8575 llvm::Value *Count = nullptr;
8576 if (!CountExpr) {
8577 // In Clang, once a high dimension is an array section, we construct all
8578 // the lower dimension as array section, however, for case like
8579 // arr[0:2][2], Clang construct the inner dimension as an array section
8580 // but it actually is not in an array section form according to spec.
8581 if (!OASE->getColonLocFirst().isValid() &&
8582 !OASE->getColonLocSecond().isValid()) {
8583 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
8584 } else {
8585 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8586 // When the length is absent it defaults to ⌈(size −
8587 // lower-bound)/stride⌉, where size is the size of the array
8588 // dimension.
8589 const Expr *StrideExpr = OASE->getStride();
8590 llvm::Value *Stride =
8591 StrideExpr
8592 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8593 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8594 : nullptr;
8595 if (Stride)
8596 Count = CGF.Builder.CreateUDiv(
8597 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
8598 else
8599 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
8600 }
8601 } else {
8602 Count = CGF.EmitScalarExpr(E: CountExpr);
8603 }
8604 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
8605 CurCounts.push_back(Elt: Count);
8606
8607 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8608 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8609 // Offset Count Stride
8610 // D0 0 1 4 (int) <- dummy dimension
8611 // D1 0 2 8 (2 * (1) * 4)
8612 // D2 1 2 20 (1 * (1 * 5) * 4)
8613 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8614 const Expr *StrideExpr = OASE->getStride();
8615 llvm::Value *Stride =
8616 StrideExpr
8617 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8618 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8619 : nullptr;
8620 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
8621 if (Stride)
8622 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
8623 else
8624 CurStrides.push_back(Elt: DimProd);
8625 if (DI != DimSizes.end())
8626 ++DI;
8627 }
8628
8629 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
8630 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
8631 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
8632 }
8633
8634 /// Return the adjusted map modifiers if the declaration a capture refers to
8635 /// appears in a first-private clause. This is expected to be used only with
8636 /// directives that start with 'target'.
8637 OpenMPOffloadMappingFlags
8638 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8639 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8640
8641 // A first private variable captured by reference will use only the
8642 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8643 // declaration is known as first-private in this handler.
8644 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
8645 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8646 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8647 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8648 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8649 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8650 }
8651 auto I = LambdasMap.find(Val: Cap.getCapturedVar()->getCanonicalDecl());
8652 if (I != LambdasMap.end())
8653 // for map(to: lambda): using user specified map type.
8654 return getMapTypeBits(
8655 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
8656 /*MotionModifiers=*/{}, IsImplicit: I->getSecond()->isImplicit(),
8657 /*AddPtrFlag=*/false,
8658 /*AddIsTargetParamFlag=*/false,
8659 /*isNonContiguous=*/IsNonContiguous: false);
8660 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8661 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8662 }
8663
8664 void getPlainLayout(const CXXRecordDecl *RD,
8665 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8666 bool AsBase) const {
8667 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8668
8669 llvm::StructType *St =
8670 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8671
8672 unsigned NumElements = St->getNumElements();
8673 llvm::SmallVector<
8674 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8675 RecordLayout(NumElements);
8676
8677 // Fill bases.
8678 for (const auto &I : RD->bases()) {
8679 if (I.isVirtual())
8680 continue;
8681
8682 QualType BaseTy = I.getType();
8683 const auto *Base = BaseTy->getAsCXXRecordDecl();
8684 // Ignore empty bases.
8685 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
8686 CGF.getContext()
8687 .getASTRecordLayout(D: Base)
8688 .getNonVirtualSize()
8689 .isZero())
8690 continue;
8691
8692 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
8693 RecordLayout[FieldIndex] = Base;
8694 }
8695 // Fill in virtual bases.
8696 for (const auto &I : RD->vbases()) {
8697 QualType BaseTy = I.getType();
8698 // Ignore empty bases.
8699 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
8700 continue;
8701
8702 const auto *Base = BaseTy->getAsCXXRecordDecl();
8703 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
8704 if (RecordLayout[FieldIndex])
8705 continue;
8706 RecordLayout[FieldIndex] = Base;
8707 }
8708 // Fill in all the fields.
8709 assert(!RD->isUnion() && "Unexpected union.");
8710 for (const auto *Field : RD->fields()) {
8711 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8712 // will fill in later.)
8713 if (!Field->isBitField() &&
8714 !isEmptyFieldForLayout(Context: CGF.getContext(), FD: Field)) {
8715 unsigned FieldIndex = RL.getLLVMFieldNo(FD: Field);
8716 RecordLayout[FieldIndex] = Field;
8717 }
8718 }
8719 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8720 &Data : RecordLayout) {
8721 if (Data.isNull())
8722 continue;
8723 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Val: Data))
8724 getPlainLayout(RD: Base, Layout, /*AsBase=*/true);
8725 else
8726 Layout.push_back(Elt: cast<const FieldDecl *>(Val: Data));
8727 }
8728 }
8729
8730 /// Returns the address corresponding to \p PointerExpr.
8731 static Address getAttachPtrAddr(const Expr *PointerExpr,
8732 CodeGenFunction &CGF) {
8733 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8734 Address AttachPtrAddr = Address::invalid();
8735
8736 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: PointerExpr)) {
8737 // If the pointer is a variable, we can use its address directly.
8738 AttachPtrAddr = CGF.EmitLValue(E: DRE).getAddress();
8739 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(Val: PointerExpr)) {
8740 AttachPtrAddr =
8741 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/true).getAddress();
8742 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: PointerExpr)) {
8743 AttachPtrAddr = CGF.EmitLValue(E: ASE).getAddress();
8744 } else if (auto *ME = dyn_cast<MemberExpr>(Val: PointerExpr)) {
8745 AttachPtrAddr = CGF.EmitMemberExpr(E: ME).getAddress();
8746 } else if (auto *UO = dyn_cast<UnaryOperator>(Val: PointerExpr)) {
8747 assert(UO->getOpcode() == UO_Deref &&
8748 "Unexpected unary-operator on attach-ptr-expr");
8749 AttachPtrAddr = CGF.EmitLValue(E: UO).getAddress();
8750 }
8751 assert(AttachPtrAddr.isValid() &&
8752 "Failed to get address for attach pointer expression");
8753 return AttachPtrAddr;
8754 }
8755
8756 /// Get the address of the attach pointer, and a load from it, to get the
8757 /// pointee base address.
8758 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8759 /// contains invalid addresses if \p AttachPtrExpr is null.
8760 static std::pair<Address, Address>
8761 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8762 CodeGenFunction &CGF) {
8763
8764 if (!AttachPtrExpr)
8765 return {Address::invalid(), Address::invalid()};
8766
8767 Address AttachPtrAddr = getAttachPtrAddr(PointerExpr: AttachPtrExpr, CGF);
8768 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8769
8770 QualType AttachPtrType =
8771 OMPClauseMappableExprCommon::getComponentExprElementType(Exp: AttachPtrExpr)
8772 .getCanonicalType();
8773
8774 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8775 Ptr: AttachPtrAddr, PtrTy: AttachPtrType->castAs<PointerType>());
8776 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8777
8778 return {AttachPtrAddr, AttachPteeBaseAddr};
8779 }
8780
8781 /// Returns whether an attach entry should be emitted for a map on
8782 /// \p MapBaseDecl on the directive \p CurDir.
8783 static bool
8784 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8785 CodeGenFunction &CGF,
8786 llvm::PointerUnion<const OMPExecutableDirective *,
8787 const OMPDeclareMapperDecl *>
8788 CurDir) {
8789 if (!PointerExpr)
8790 return false;
8791
8792 // Pointer attachment is needed at map-entering time or for declare
8793 // mappers.
8794 return isa<const OMPDeclareMapperDecl *>(Val: CurDir) ||
8795 isOpenMPTargetMapEnteringDirective(
8796 DKind: cast<const OMPExecutableDirective *>(Val&: CurDir)
8797 ->getDirectiveKind());
8798 }
8799
8800 /// Computes the attach-ptr expr for \p Components, and updates various maps
8801 /// with the information.
8802 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8803 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8804 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8805 /// AttachPtrExprMap.
8806 void collectAttachPtrExprInfo(
8807 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
8808 llvm::PointerUnion<const OMPExecutableDirective *,
8809 const OMPDeclareMapperDecl *>
8810 CurDir) {
8811
8812 OpenMPDirectiveKind CurDirectiveID =
8813 isa<const OMPDeclareMapperDecl *>(Val: CurDir)
8814 ? OMPD_declare_mapper
8815 : cast<const OMPExecutableDirective *>(Val&: CurDir)->getDirectiveKind();
8816
8817 const auto &[AttachPtrExpr, Depth] =
8818 OMPClauseMappableExprCommon::findAttachPtrExpr(Components,
8819 CurDirKind: CurDirectiveID);
8820
8821 AttachPtrComputationOrderMap.try_emplace(
8822 Key: AttachPtrExpr, Args: AttachPtrComputationOrderMap.size());
8823 AttachPtrComponentDepthMap.try_emplace(Key: AttachPtrExpr, Args: Depth);
8824 AttachPtrExprMap.try_emplace(Key: Components, Args: AttachPtrExpr);
8825 }
8826
8827 /// Generate all the base pointers, section pointers, sizes, map types, and
8828 /// mappers for the extracted mappable expressions (all included in \a
8829 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8830 /// pair of the relevant declaration and index where it occurs is appended to
8831 /// the device pointers info array.
8832 void generateAllInfoForClauses(
8833 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8834 llvm::OpenMPIRBuilder &OMPBuilder,
8835 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8836 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8837 // We have to process the component lists that relate with the same
8838 // declaration in a single chunk so that we can generate the map flags
8839 // correctly. Therefore, we organize all lists in a map.
8840 enum MapKind { Present, Allocs, Other, Total };
8841 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8842 SmallVector<SmallVector<MapInfo, 8>, 4>>
8843 Info;
8844
8845 // Helper function to fill the information map for the different supported
8846 // clauses.
8847 auto &&InfoGen =
8848 [&Info, &SkipVarSet](
8849 const ValueDecl *D, MapKind Kind,
8850 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8851 OpenMPMapClauseKind MapType,
8852 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8853 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8854 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8855 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8856 if (SkipVarSet.contains(V: D))
8857 return;
8858 auto It = Info.try_emplace(Key: D, Args: Total).first;
8859 It->second[Kind].emplace_back(
8860 Args&: L, Args&: MapType, Args&: MapModifiers, Args&: MotionModifiers, Args&: ReturnDevicePointer,
8861 Args&: IsImplicit, Args&: Mapper, Args&: VarRef, Args&: ForDeviceAddr);
8862 };
8863
8864 for (const auto *Cl : Clauses) {
8865 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
8866 if (!C)
8867 continue;
8868 MapKind Kind = Other;
8869 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
8870 Element: OMPC_MAP_MODIFIER_present))
8871 Kind = Present;
8872 else if (C->getMapType() == OMPC_MAP_alloc)
8873 Kind = Allocs;
8874 const auto *EI = C->getVarRefs().begin();
8875 for (const auto L : C->component_lists()) {
8876 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8877 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), C->getMapType(),
8878 C->getMapTypeModifiers(), {},
8879 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8880 E);
8881 ++EI;
8882 }
8883 }
8884 for (const auto *Cl : Clauses) {
8885 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
8886 if (!C)
8887 continue;
8888 MapKind Kind = Other;
8889 if (llvm::is_contained(Range: C->getMotionModifiers(),
8890 Element: OMPC_MOTION_MODIFIER_present))
8891 Kind = Present;
8892 if (llvm::is_contained(Range: C->getMotionModifiers(),
8893 Element: OMPC_MOTION_MODIFIER_iterator)) {
8894 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8895 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8896 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8897 CGF.EmitVarDecl(D: *VD);
8898 }
8899 }
8900
8901 const auto *EI = C->getVarRefs().begin();
8902 for (const auto L : C->component_lists()) {
8903 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_to, {},
8904 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8905 C->isImplicit(), std::get<2>(t: L), *EI);
8906 ++EI;
8907 }
8908 }
8909 for (const auto *Cl : Clauses) {
8910 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
8911 if (!C)
8912 continue;
8913 MapKind Kind = Other;
8914 if (llvm::is_contained(Range: C->getMotionModifiers(),
8915 Element: OMPC_MOTION_MODIFIER_present))
8916 Kind = Present;
8917 if (llvm::is_contained(Range: C->getMotionModifiers(),
8918 Element: OMPC_MOTION_MODIFIER_iterator)) {
8919 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8920 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8921 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8922 CGF.EmitVarDecl(D: *VD);
8923 }
8924 }
8925
8926 const auto *EI = C->getVarRefs().begin();
8927 for (const auto L : C->component_lists()) {
8928 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_from, {},
8929 C->getMotionModifiers(),
8930 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8931 *EI);
8932 ++EI;
8933 }
8934 }
8935
8936 // Look at the use_device_ptr and use_device_addr clauses information and
8937 // mark the existing map entries as such. If there is no map information for
8938 // an entry in the use_device_ptr and use_device_addr list, we create one
8939 // with map type 'return_param' and zero size section. It is the user's
8940 // fault if that was not mapped before. If there is no map information, then
8941 // we defer the emission of that entry until all the maps for the same VD
8942 // have been handled.
8943 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8944
8945 auto &&UseDeviceDataCombinedInfoGen =
8946 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8947 CodeGenFunction &CGF, bool IsDevAddr,
8948 bool HasUdpFbNullify = false) {
8949 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
8950 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
8951 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8952 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8953 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8954 // FIXME: For use_device_addr on array-sections, this should
8955 // be the starting address of the section.
8956 // e.g. int *p;
8957 // ... use_device_addr(p[3])
8958 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8959 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
8960 UseDeviceDataCombinedInfo.Sizes.push_back(
8961 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8962 OpenMPOffloadMappingFlags Flags =
8963 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8964 if (HasUdpFbNullify)
8965 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8966 UseDeviceDataCombinedInfo.Types.push_back(Elt: Flags);
8967 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
8968 };
8969
8970 auto &&MapInfoGen =
8971 [&UseDeviceDataCombinedInfoGen](
8972 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8973 OMPClauseMappableExprCommon::MappableExprComponentListRef
8974 Components,
8975 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
8976 bool HasUdpFbNullify = false) {
8977 // We didn't find any match in our map information - generate a zero
8978 // size array section.
8979 llvm::Value *Ptr;
8980 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8981 if (IE->isGLValue())
8982 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
8983 else
8984 Ptr = CGF.EmitScalarExpr(E: IE);
8985 } else {
8986 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
8987 }
8988 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
8989 // For the purpose of address-translation, treat something like the
8990 // following:
8991 // int *p;
8992 // ... use_device_addr(p[1])
8993 // equivalent to
8994 // ... use_device_ptr(p)
8995 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
8996 !TreatDevAddrAsDevPtr,
8997 HasUdpFbNullify);
8998 };
8999
9000 auto &&IsMapInfoExist =
9001 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9002 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9003 bool HasUdpFbNullify = false) -> bool {
9004 // We potentially have map information for this declaration already.
9005 // Look for the first set of components that refer to it. If found,
9006 // return true.
9007 // If the first component is a member expression, we have to look into
9008 // 'this', which maps to null in the map of map information. Otherwise
9009 // look directly for the information.
9010 auto It = Info.find(Key: isa<MemberExpr>(Val: IE) ? nullptr : VD);
9011 if (It != Info.end()) {
9012 bool Found = false;
9013 for (auto &Data : It->second) {
9014 MapInfo *CI = nullptr;
9015 // We potentially have multiple maps for the same decl. We need to
9016 // only consider those for which the attach-ptr matches the desired
9017 // attach-ptr.
9018 auto *It = llvm::find_if(Range&: Data, P: [&](const MapInfo &MI) {
9019 if (MI.Components.back().getAssociatedDeclaration() != VD)
9020 return false;
9021
9022 const Expr *MapAttachPtr = getAttachPtrExpr(Components: MI.Components);
9023 bool Match = AttachPtrComparator.areEqual(LHS: MapAttachPtr,
9024 RHS: DesiredAttachPtrExpr);
9025 return Match;
9026 });
9027
9028 if (It != Data.end())
9029 CI = &*It;
9030
9031 if (CI) {
9032 if (IsDevAddr) {
9033 CI->ForDeviceAddr = true;
9034 CI->ReturnDevicePointer = true;
9035 CI->HasUdpFbNullify = HasUdpFbNullify;
9036 Found = true;
9037 break;
9038 } else {
9039 auto PrevCI = std::next(x: CI->Components.rbegin());
9040 const auto *VarD = dyn_cast<VarDecl>(Val: VD);
9041 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: CI->Components);
9042 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9043 isa<MemberExpr>(Val: IE) ||
9044 !VD->getType().getNonReferenceType()->isPointerType() ||
9045 PrevCI == CI->Components.rend() ||
9046 isa<MemberExpr>(Val: PrevCI->getAssociatedExpression()) || !VarD ||
9047 VarD->hasLocalStorage() ||
9048 (isa_and_nonnull<DeclRefExpr>(Val: AttachPtrExpr) &&
9049 VD == cast<DeclRefExpr>(Val: AttachPtrExpr)->getDecl())) {
9050 CI->ForDeviceAddr = IsDevAddr;
9051 CI->ReturnDevicePointer = true;
9052 CI->HasUdpFbNullify = HasUdpFbNullify;
9053 Found = true;
9054 break;
9055 }
9056 }
9057 }
9058 }
9059 return Found;
9060 }
9061 return false;
9062 };
9063
9064 // Look at the use_device_ptr clause information and mark the existing map
9065 // entries as such. If there is no map information for an entry in the
9066 // use_device_ptr list, we create one with map type 'alloc' and zero size
9067 // section. It is the user fault if that was not mapped before. If there is
9068 // no map information and the pointer is a struct member, then we defer the
9069 // emission of that entry until the whole struct has been processed.
9070 for (const auto *Cl : Clauses) {
9071 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
9072 if (!C)
9073 continue;
9074 bool HasUdpFbNullify =
9075 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9076 for (const auto L : C->component_lists()) {
9077 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9078 std::get<1>(t: L);
9079 assert(!Components.empty() &&
9080 "Not expecting empty list of components!");
9081 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9082 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9083 const Expr *IE = Components.back().getAssociatedExpression();
9084 // For use_device_ptr, we match an existing map clause if its attach-ptr
9085 // is same as the use_device_ptr operand. e.g.
9086 // map expr | use_device_ptr expr | current behavior
9087 // ---------|---------------------|-----------------
9088 // p[1] | p | match
9089 // ps->a | ps | match
9090 // p | p | no match
9091 const Expr *UDPOperandExpr =
9092 Components.front().getAssociatedExpression();
9093 if (IsMapInfoExist(CGF, VD, IE,
9094 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9095 /*IsDevAddr=*/false, HasUdpFbNullify))
9096 continue;
9097 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9098 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9099 }
9100 }
9101
9102 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9103 for (const auto *Cl : Clauses) {
9104 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
9105 if (!C)
9106 continue;
9107 for (const auto L : C->component_lists()) {
9108 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9109 std::get<1>(t: L);
9110 assert(!std::get<1>(L).empty() &&
9111 "Not expecting empty list of components!");
9112 const ValueDecl *VD = std::get<1>(t: L).back().getAssociatedDeclaration();
9113 if (!Processed.insert(V: VD).second)
9114 continue;
9115 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9116 // For use_device_addr, we match an existing map clause if the
9117 // use_device_addr operand's attach-ptr matches the map operand's
9118 // attach-ptr.
9119 // We chould also restrict to only match cases when there is a full
9120 // match between the map/use_device_addr clause exprs, but that may be
9121 // unnecessary.
9122 //
9123 // map expr | use_device_addr expr | current | possible restrictive/
9124 // | | behavior | safer behavior
9125 // ---------|----------------------|-----------|-----------------------
9126 // p | p | match | match
9127 // p[0] | p[0] | match | match
9128 // p[0:1] | p[0] | match | no match
9129 // p[0:1] | p[2:1] | match | no match
9130 // p[1] | p[0] | match | no match
9131 // ps->a | ps->b | match | no match
9132 // p | p[0] | no match | no match
9133 // pp | pp[0][0] | no match | no match
9134 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9135 const Expr *IE = std::get<1>(t: L).back().getAssociatedExpression();
9136 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9137 "use_device_addr operand has an attach-ptr, but does not match "
9138 "last component's expr.");
9139 if (IsMapInfoExist(CGF, VD, IE,
9140 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9141 /*IsDevAddr=*/true))
9142 continue;
9143 MapInfoGen(CGF, IE, VD, Components,
9144 /*IsDevAddr=*/true,
9145 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9146 }
9147 }
9148
9149 for (const auto &Data : Info) {
9150 MapCombinedInfoTy CurInfo;
9151 const Decl *D = Data.first;
9152 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
9153 // Group component lists by their AttachPtrExpr and process them in order
9154 // of increasing complexity (nullptr first, then simple expressions like
9155 // p, then more complex ones like p[0], etc.)
9156 //
9157 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9158 // grouping for target constructs.
9159 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9160
9161 // First, collect all MapData entries with their attach-ptr exprs.
9162 for (const auto &M : Data.second) {
9163 for (const MapInfo &L : M) {
9164 assert(!L.Components.empty() &&
9165 "Not expecting declaration with no component lists.");
9166
9167 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: L.Components);
9168 AttachPtrMapInfoPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9169 }
9170 }
9171
9172 // Next, sort by increasing order of their complexity.
9173 llvm::stable_sort(Range&: AttachPtrMapInfoPairs,
9174 C: [this](const auto &LHS, const auto &RHS) {
9175 return AttachPtrComparator(LHS.first, RHS.first);
9176 });
9177
9178 // And finally, process them all in order, grouping those with
9179 // equivalent attach-ptr exprs together.
9180 auto *It = AttachPtrMapInfoPairs.begin();
9181 while (It != AttachPtrMapInfoPairs.end()) {
9182 const Expr *AttachPtrExpr = It->first;
9183
9184 SmallVector<MapInfo, 8> GroupLists;
9185 while (It != AttachPtrMapInfoPairs.end() &&
9186 (It->first == AttachPtrExpr ||
9187 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
9188 GroupLists.push_back(Elt: It->second);
9189 ++It;
9190 }
9191 assert(!GroupLists.empty() && "GroupLists should not be empty");
9192
9193 StructRangeInfoTy PartialStruct;
9194 AttachInfoTy AttachInfo;
9195 MapCombinedInfoTy GroupCurInfo;
9196 // Current group's struct base information:
9197 MapCombinedInfoTy GroupStructBaseCurInfo;
9198 for (const MapInfo &L : GroupLists) {
9199 // Remember the current base pointer index.
9200 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9201 unsigned StructBasePointersIdx =
9202 GroupStructBaseCurInfo.BasePointers.size();
9203
9204 GroupCurInfo.NonContigInfo.IsNonContiguous =
9205 L.Components.back().isNonContiguous();
9206 generateInfoForComponentList(
9207 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
9208 CombinedInfo&: GroupCurInfo, StructBaseCombinedInfo&: GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9209 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
9210 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
9211 MapExpr: L.VarRef, /*OverlappedElements*/ {});
9212
9213 // If this entry relates to a device pointer, set the relevant
9214 // declaration and add the 'return pointer' flag.
9215 if (L.ReturnDevicePointer) {
9216 // Check whether a value was added to either GroupCurInfo or
9217 // GroupStructBaseCurInfo and error if no value was added to either
9218 // of them:
9219 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9220 StructBasePointersIdx <
9221 GroupStructBaseCurInfo.BasePointers.size()) &&
9222 "Unexpected number of mapped base pointers.");
9223
9224 // Choose a base pointer index which is always valid:
9225 const ValueDecl *RelevantVD =
9226 L.Components.back().getAssociatedDeclaration();
9227 assert(RelevantVD &&
9228 "No relevant declaration related with device pointer??");
9229
9230 // If GroupStructBaseCurInfo has been updated this iteration then
9231 // work on the first new entry added to it i.e. make sure that when
9232 // multiple values are added to any of the lists, the first value
9233 // added is being modified by the assignments below (not the last
9234 // value added).
9235 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9236 unsigned Idx) {
9237 Info.DevicePtrDecls[Idx] = RelevantVD;
9238 Info.DevicePointers[Idx] = L.ForDeviceAddr
9239 ? DeviceInfoTy::Address
9240 : DeviceInfoTy::Pointer;
9241 Info.Types[Idx] |=
9242 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9243 if (L.HasUdpFbNullify)
9244 Info.Types[Idx] |=
9245 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9246 };
9247
9248 if (StructBasePointersIdx <
9249 GroupStructBaseCurInfo.BasePointers.size())
9250 SetDevicePointerInfo(GroupStructBaseCurInfo,
9251 StructBasePointersIdx);
9252 else
9253 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9254 }
9255 }
9256
9257 // Unify entries in one list making sure the struct mapping precedes the
9258 // individual fields:
9259 MapCombinedInfoTy GroupUnionCurInfo;
9260 GroupUnionCurInfo.append(CurInfo&: GroupStructBaseCurInfo);
9261 GroupUnionCurInfo.append(CurInfo&: GroupCurInfo);
9262
9263 // If there is an entry in PartialStruct it means we have a struct with
9264 // individual members mapped. Emit an extra combined entry.
9265 if (PartialStruct.Base.isValid()) {
9266 // Prepend a synthetic dimension of length 1 to represent the
9267 // aggregated struct object. Using 1 (not 0, as 0 produced an
9268 // incorrect non-contiguous descriptor (DimSize==1), causing the
9269 // non-contiguous motion clause path to be skipped.) is important:
9270 // * It preserves the correct rank so targetDataUpdate() computes
9271 // DimSize == 2 for cases like strided array sections originating
9272 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9273 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9274 I: GroupUnionCurInfo.NonContigInfo.Dims.begin(), Elt: 1);
9275 emitCombinedEntry(
9276 CombinedInfo&: CurInfo, CurTypes&: GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9277 /*IsMapThis=*/!VD, OMPBuilder, VD,
9278 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9279 /*NotTargetParams=*/true);
9280 }
9281
9282 // Append this group's results to the overall CurInfo in the correct
9283 // order: combined-entry -> original-field-entries -> attach-entry
9284 CurInfo.append(CurInfo&: GroupUnionCurInfo);
9285 if (AttachInfo.isValid())
9286 emitAttachEntry(CGF, CombinedInfo&: CurInfo, AttachInfo);
9287 }
9288
9289 // We need to append the results of this capture to what we already have.
9290 CombinedInfo.append(CurInfo);
9291 }
9292 // Append data for use_device_ptr/addr clauses.
9293 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
9294 }
9295
9296public:
9297 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9298 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9299 // Extract firstprivate clause information.
9300 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9301 for (const auto *D : C->varlist())
9302 FirstPrivateDecls.try_emplace(
9303 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl()), Args: C->isImplicit());
9304 // Extract implicit firstprivates from uses_allocators clauses.
9305 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9306 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9307 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9308 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
9309 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
9310 /*Implicit=*/Args: true);
9311 else if (const auto *VD = dyn_cast<VarDecl>(
9312 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
9313 ->getDecl()))
9314 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
9315 }
9316 }
9317 // Extract defaultmap clause information.
9318 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9319 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9320 DefaultmapFirstprivateKinds.insert(V: C->getDefaultmapKind());
9321 // Extract device pointer clause information.
9322 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9323 for (auto L : C->component_lists())
9324 DevPointersMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9325 // Extract device addr clause information.
9326 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9327 for (auto L : C->component_lists())
9328 HasDevAddrsMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9329 // Extract map information.
9330 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9331 if (C->getMapType() != OMPC_MAP_to)
9332 continue;
9333 for (auto L : C->component_lists()) {
9334 const ValueDecl *VD = std::get<0>(t&: L);
9335 const auto *RD = VD ? VD->getType()
9336 .getCanonicalType()
9337 .getNonReferenceType()
9338 ->getAsCXXRecordDecl()
9339 : nullptr;
9340 if (RD && RD->isLambda())
9341 LambdasMap.try_emplace(Key: std::get<0>(t&: L), Args&: C);
9342 }
9343 }
9344
9345 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9346 for (auto L : C->component_lists()) {
9347 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9348 std::get<1>(L);
9349 if (!Components.empty())
9350 collectAttachPtrExprInfo(Components, CurDir);
9351 }
9352 };
9353
9354 // Populate the AttachPtrExprMap for all component lists from map-related
9355 // clauses.
9356 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9357 CollectAttachPtrExprsForClauseComponents(C);
9358 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9359 CollectAttachPtrExprsForClauseComponents(C);
9360 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9361 CollectAttachPtrExprsForClauseComponents(C);
9362 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9363 CollectAttachPtrExprsForClauseComponents(C);
9364 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9365 CollectAttachPtrExprsForClauseComponents(C);
9366 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9367 CollectAttachPtrExprsForClauseComponents(C);
9368 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9369 CollectAttachPtrExprsForClauseComponents(C);
9370 }
9371
9372 /// Constructor for the declare mapper directive.
9373 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9374 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9375
9376 /// Generate code for the combined entry if we have a partially mapped struct
9377 /// and take care of the mapping flags of the arguments corresponding to
9378 /// individual struct members.
9379 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9380 /// to the combined-entry's begin address, if emitted.
9381 /// \p PartialStruct contains attach base-pointer information.
9382 /// \returns The index of the combined entry if one was added, std::nullopt
9383 /// otherwise.
9384 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9385 MapFlagsArrayTy &CurTypes,
9386 const StructRangeInfoTy &PartialStruct,
9387 AttachInfoTy &AttachInfo, bool IsMapThis,
9388 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9389 unsigned OffsetForMemberOfFlag,
9390 bool NotTargetParams) const {
9391 if (CurTypes.size() == 1 &&
9392 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9393 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9394 !PartialStruct.IsArraySection)
9395 return;
9396 Address LBAddr = PartialStruct.LowestElem.second;
9397 Address HBAddr = PartialStruct.HighestElem.second;
9398 if (PartialStruct.HasCompleteRecord) {
9399 LBAddr = PartialStruct.LB;
9400 HBAddr = PartialStruct.LB;
9401 }
9402 CombinedInfo.Exprs.push_back(Elt: VD);
9403 // Base is the base of the struct
9404 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9405 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9406 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9407 // Pointer is the address of the lowest element
9408 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9409 const CXXMethodDecl *MD =
9410 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
9411 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9412 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9413 // There should not be a mapper for a combined entry.
9414 if (HasBaseClass) {
9415 // OpenMP 5.2 148:21:
9416 // If the target construct is within a class non-static member function,
9417 // and a variable is an accessible data member of the object for which the
9418 // non-static data member function is invoked, the variable is treated as
9419 // if the this[:1] expression had appeared in a map clause with a map-type
9420 // of tofrom.
9421 // Emit this[:1]
9422 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9423 QualType Ty = MD->getFunctionObjectParameterType();
9424 llvm::Value *Size =
9425 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
9426 /*isSigned=*/true);
9427 CombinedInfo.Sizes.push_back(Elt: Size);
9428 } else {
9429 CombinedInfo.Pointers.push_back(Elt: LB);
9430 // Size is (addr of {highest+1} element) - (addr of lowest element)
9431 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9432 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9433 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
9434 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
9435 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
9436 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(LHS: CHAddr, RHS: CLAddr);
9437 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
9438 /*isSigned=*/false);
9439 CombinedInfo.Sizes.push_back(Elt: Size);
9440 }
9441 CombinedInfo.Mappers.push_back(Elt: nullptr);
9442 // Map type is always TARGET_PARAM, if generate info for captures.
9443 CombinedInfo.Types.push_back(
9444 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9445 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9446 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9447 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9448 // If any element has the present modifier, then make sure the runtime
9449 // doesn't attempt to allocate the struct.
9450 if (CurTypes.end() !=
9451 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9452 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9453 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9454 }))
9455 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9456 // Remove TARGET_PARAM flag from the first element
9457 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9458 // If any element has the ompx_hold modifier, then make sure the runtime
9459 // uses the hold reference count for the struct as a whole so that it won't
9460 // be unmapped by an extra dynamic reference count decrement. Add it to all
9461 // elements as well so the runtime knows which reference count to check
9462 // when determining whether it's time for device-to-host transfers of
9463 // individual elements.
9464 if (CurTypes.end() !=
9465 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9466 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9467 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9468 })) {
9469 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9470 for (auto &M : CurTypes)
9471 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9472 }
9473
9474 // All other current entries will be MEMBER_OF the combined entry
9475 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9476 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9477 // to be handled by themselves, after all other maps).
9478 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9479 Position: OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9480 for (auto &M : CurTypes)
9481 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
9482
9483 // When we are emitting a combined entry. If there were any pending
9484 // attachments to be done, we do them to the begin address of the combined
9485 // entry. Note that this means only one attachment per combined-entry will
9486 // be done. So, for instance, if we have:
9487 // S *ps;
9488 // ... map(ps->a, ps->b)
9489 // When we are emitting a combined entry. If AttachInfo is valid,
9490 // update the pointee address to point to the begin address of the combined
9491 // entry. This ensures that if we have multiple maps like:
9492 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9493 //
9494 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9495 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9496 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9497 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9498 if (AttachInfo.isValid())
9499 AttachInfo.AttachPteeAddr = LBAddr;
9500 }
9501
9502 /// Generate all the base pointers, section pointers, sizes, map types, and
9503 /// mappers for the extracted mappable expressions (all included in \a
9504 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9505 /// pair of the relevant declaration and index where it occurs is appended to
9506 /// the device pointers info array.
9507 void generateAllInfo(
9508 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9509 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9510 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9511 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9512 "Expect a executable directive");
9513 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9514 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9515 SkipVarSet);
9516 }
9517
9518 /// Generate all the base pointers, section pointers, sizes, map types, and
9519 /// mappers for the extracted map clauses of user-defined mapper (all included
9520 /// in \a CombinedInfo).
9521 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9522 llvm::OpenMPIRBuilder &OMPBuilder) const {
9523 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9524 "Expect a declare mapper directive");
9525 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(Val: CurDir);
9526 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
9527 OMPBuilder);
9528 }
9529
9530 /// Emit capture info for lambdas for variables captured by reference.
9531 void generateInfoForLambdaCaptures(
9532 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9533 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9534 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9535 const auto *RD = VDType->getAsCXXRecordDecl();
9536 if (!RD || !RD->isLambda())
9537 return;
9538 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
9539 CGF.getContext().getDeclAlign(D: VD));
9540 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
9541 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9542 FieldDecl *ThisCapture = nullptr;
9543 RD->getCaptureFields(Captures, ThisCapture);
9544 if (ThisCapture) {
9545 LValue ThisLVal =
9546 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
9547 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
9548 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
9549 Args: VDLVal.getPointer(CGF));
9550 CombinedInfo.Exprs.push_back(Elt: VD);
9551 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
9552 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9553 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9554 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
9555 CombinedInfo.Sizes.push_back(
9556 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
9557 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9558 CombinedInfo.Types.push_back(
9559 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9560 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9561 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9562 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9563 CombinedInfo.Mappers.push_back(Elt: nullptr);
9564 }
9565 for (const LambdaCapture &LC : RD->captures()) {
9566 if (!LC.capturesVariable())
9567 continue;
9568 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
9569 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9570 continue;
9571 auto It = Captures.find(Val: VD);
9572 assert(It != Captures.end() && "Found lambda capture without field.");
9573 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
9574 if (LC.getCaptureKind() == LCK_ByRef) {
9575 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
9576 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9577 Args: VDLVal.getPointer(CGF));
9578 CombinedInfo.Exprs.push_back(Elt: VD);
9579 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9580 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9581 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9582 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
9583 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9584 V: CGF.getTypeSize(
9585 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
9586 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9587 } else {
9588 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
9589 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9590 Args: VDLVal.getPointer(CGF));
9591 CombinedInfo.Exprs.push_back(Elt: VD);
9592 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9593 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9594 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9595 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
9596 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
9597 }
9598 CombinedInfo.Types.push_back(
9599 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9600 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9601 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9602 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9603 CombinedInfo.Mappers.push_back(Elt: nullptr);
9604 }
9605 }
9606
9607 /// Set correct indices for lambdas captures.
9608 void adjustMemberOfForLambdaCaptures(
9609 llvm::OpenMPIRBuilder &OMPBuilder,
9610 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9611 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9612 MapFlagsArrayTy &Types) const {
9613 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9614 // Set correct member_of idx for all implicit lambda captures.
9615 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9616 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9617 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9618 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9619 continue;
9620 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
9621 assert(BasePtr && "Unable to find base lambda address.");
9622 int TgtIdx = -1;
9623 for (unsigned J = I; J > 0; --J) {
9624 unsigned Idx = J - 1;
9625 if (Pointers[Idx] != BasePtr)
9626 continue;
9627 TgtIdx = Idx;
9628 break;
9629 }
9630 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9631 // All other current entries will be MEMBER_OF the combined entry
9632 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9633 // 0xFFFF in the MEMBER_OF field).
9634 OpenMPOffloadMappingFlags MemberOfFlag =
9635 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
9636 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
9637 }
9638 }
9639
9640 /// Populate component lists for non-lambda captured variables from map,
9641 /// is_device_ptr and has_device_addr clause info.
9642 void populateComponentListsForNonLambdaCaptureFromClauses(
9643 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9644 SmallVectorImpl<
9645 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9646 &StorageForImplicitlyAddedComponentLists) const {
9647 if (VD && LambdasMap.count(Val: VD))
9648 return;
9649
9650 // For member fields list in is_device_ptr, store it in
9651 // DeclComponentLists for generating components info.
9652 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9653 auto It = DevPointersMap.find(Val: VD);
9654 if (It != DevPointersMap.end())
9655 for (const auto &MCL : It->second)
9656 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_to, Args: Unknown,
9657 /*IsImpicit = */ Args: true, Args: nullptr,
9658 Args: nullptr);
9659 auto I = HasDevAddrsMap.find(Val: VD);
9660 if (I != HasDevAddrsMap.end())
9661 for (const auto &MCL : I->second)
9662 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_tofrom, Args: Unknown,
9663 /*IsImpicit = */ Args: true, Args: nullptr,
9664 Args: nullptr);
9665 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9666 "Expect a executable directive");
9667 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9668 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9669 const auto *EI = C->getVarRefs().begin();
9670 for (const auto L : C->decl_component_lists(VD)) {
9671 const ValueDecl *VDecl, *Mapper;
9672 // The Expression is not correct if the mapping is implicit
9673 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9674 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9675 std::tie(args&: VDecl, args&: Components, args&: Mapper) = L;
9676 assert(VDecl == VD && "We got information for the wrong declaration??");
9677 assert(!Components.empty() &&
9678 "Not expecting declaration with no component lists.");
9679 DeclComponentLists.emplace_back(Args&: Components, Args: C->getMapType(),
9680 Args: C->getMapTypeModifiers(),
9681 Args: C->isImplicit(), Args&: Mapper, Args&: E);
9682 ++EI;
9683 }
9684 }
9685
9686 // For the target construct, if there's a map with a base-pointer that's
9687 // a member of an implicitly captured struct, of the current class,
9688 // we need to emit an implicit map on the pointer.
9689 if (isOpenMPTargetExecutionDirective(DKind: CurExecDir->getDirectiveKind()))
9690 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9691 CapturedVD: VD, DeclComponentLists, ComponentVectorStorage&: StorageForImplicitlyAddedComponentLists);
9692
9693 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
9694 const MapData &RHS) {
9695 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
9696 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
9697 bool HasPresent =
9698 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9699 bool HasAllocs = MapType == OMPC_MAP_alloc;
9700 MapModifiers = std::get<2>(t: RHS);
9701 MapType = std::get<1>(t: LHS);
9702 bool HasPresentR =
9703 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9704 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9705 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9706 });
9707 }
9708
9709 /// On a target construct, if there's an implicit map on a struct, or that of
9710 /// this[:], and an explicit map with a member of that struct/class as the
9711 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9712 /// to make sure we don't map the full struct/class. For example:
9713 ///
9714 /// \code
9715 /// struct S {
9716 /// int dummy[10000];
9717 /// int *p;
9718 /// void f1() {
9719 /// #pragma omp target map(p[0:1])
9720 /// (void)this;
9721 /// }
9722 /// }; S s;
9723 ///
9724 /// void f2() {
9725 /// #pragma omp target map(s.p[0:10])
9726 /// (void)s;
9727 /// }
9728 /// \endcode
9729 ///
9730 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9731 //
9732 // OpenMP 6.0: 7.9.6 map clause, pg 285
9733 // If a list item with an implicitly determined data-mapping attribute does
9734 // not have any corresponding storage in the device data environment prior to
9735 // a task encountering the construct associated with the map clause, and one
9736 // or more contiguous parts of the original storage are either list items or
9737 // base pointers to list items that are explicitly mapped on the construct,
9738 // only those parts of the original storage will have corresponding storage in
9739 // the device data environment as a result of the map clauses on the
9740 // construct.
9741 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9742 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9743 SmallVectorImpl<
9744 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9745 &ComponentVectorStorage) const {
9746 bool IsThisCapture = CapturedVD == nullptr;
9747
9748 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9749 OMPClauseMappableExprCommon::MappableExprComponentListRef
9750 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9751 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9752 if (!AttachPtrExpr)
9753 continue;
9754
9755 const auto *ME = dyn_cast<MemberExpr>(Val: AttachPtrExpr);
9756 if (!ME)
9757 continue;
9758
9759 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9760
9761 // If we are handling a "this" capture, then we are looking for
9762 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9763 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Val: Base))
9764 continue;
9765
9766 if (!IsThisCapture && (!isa<DeclRefExpr>(Val: Base) ||
9767 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9768 continue;
9769
9770 // For non-this captures, we are looking for attach-ptrs of form
9771 // `s.p`.
9772 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9773 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Val: Base) ||
9774 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9775 continue;
9776
9777 // Check if we have an existing map on either:
9778 // this[:], s, this->p, or s.p, in which case, we don't need to add
9779 // an implicit one for the attach-ptr s.p/this->p.
9780 bool FoundExistingMap = false;
9781 for (const MapData &ExistingL : DeclComponentLists) {
9782 OMPClauseMappableExprCommon::MappableExprComponentListRef
9783 ExistingComponents = std::get<0>(t: ExistingL);
9784
9785 if (ExistingComponents.empty())
9786 continue;
9787
9788 // First check if we have a map like map(this->p) or map(s.p).
9789 const auto &FirstComponent = ExistingComponents.front();
9790 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9791
9792 if (!FirstExpr)
9793 continue;
9794
9795 // First check if we have a map like map(this->p) or map(s.p).
9796 if (AttachPtrComparator.areEqual(LHS: FirstExpr, RHS: AttachPtrExpr)) {
9797 FoundExistingMap = true;
9798 break;
9799 }
9800
9801 // Check if we have a map like this[0:1]
9802 if (IsThisCapture) {
9803 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: FirstExpr)) {
9804 if (isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts())) {
9805 FoundExistingMap = true;
9806 break;
9807 }
9808 }
9809 continue;
9810 }
9811
9812 // When the attach-ptr is something like `s.p`, check if
9813 // `s` itself is mapped explicitly.
9814 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: FirstExpr)) {
9815 if (DRE->getDecl() == CapturedVD) {
9816 FoundExistingMap = true;
9817 break;
9818 }
9819 }
9820 }
9821
9822 if (FoundExistingMap)
9823 continue;
9824
9825 // If no base map is found, we need to create an implicit map for the
9826 // attach-pointer expr.
9827
9828 ComponentVectorStorage.emplace_back();
9829 auto &AttachPtrComponents = ComponentVectorStorage.back();
9830
9831 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9832 bool SeenAttachPtrComponent = false;
9833 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9834 // components from the component-list which has `s.p/this->p`
9835 // as the attach-ptr, starting from the component which matches
9836 // `s.p/this->p`. This way, we'll have component-lists of
9837 // `s.p` -> `s`, and `this->p` -> `this`.
9838 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9839 const auto &Component = ComponentsWithAttachPtr[i];
9840 const Expr *ComponentExpr = Component.getAssociatedExpression();
9841
9842 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9843 continue;
9844 SeenAttachPtrComponent = true;
9845
9846 AttachPtrComponents.emplace_back(Args: Component.getAssociatedExpression(),
9847 Args: Component.getAssociatedDeclaration(),
9848 Args: Component.isNonContiguous());
9849 }
9850 assert(!AttachPtrComponents.empty() &&
9851 "Could not populate component-lists for mapping attach-ptr");
9852
9853 DeclComponentLists.emplace_back(
9854 Args&: AttachPtrComponents, Args: OMPC_MAP_tofrom, Args: Unknown,
9855 /*IsImplicit=*/Args: true, /*mapper=*/Args: nullptr, Args&: AttachPtrExpr);
9856 }
9857 }
9858
9859 /// For a capture that has an associated clause, generate the base pointers,
9860 /// section pointers, sizes, map types, and mappers (all included in
9861 /// \a CurCaptureVarInfo).
9862 void generateInfoForCaptureFromClauseInfo(
9863 const MapDataArrayTy &DeclComponentListsFromClauses,
9864 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9865 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9866 unsigned OffsetForMemberOfFlag) const {
9867 assert(!Cap->capturesVariableArrayType() &&
9868 "Not expecting to generate map info for a variable array type!");
9869
9870 // We need to know when we generating information for the first component
9871 const ValueDecl *VD = Cap->capturesThis()
9872 ? nullptr
9873 : Cap->getCapturedVar()->getCanonicalDecl();
9874
9875 // for map(to: lambda): skip here, processing it in
9876 // generateDefaultMapInfo
9877 if (LambdasMap.count(Val: VD))
9878 return;
9879
9880 // If this declaration appears in a is_device_ptr clause we just have to
9881 // pass the pointer by value. If it is a reference to a declaration, we just
9882 // pass its value.
9883 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
9884 CurCaptureVarInfo.Exprs.push_back(Elt: VD);
9885 CurCaptureVarInfo.BasePointers.emplace_back(Args&: Arg);
9886 CurCaptureVarInfo.DevicePtrDecls.emplace_back(Args&: VD);
9887 CurCaptureVarInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
9888 CurCaptureVarInfo.Pointers.push_back(Elt: Arg);
9889 CurCaptureVarInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9890 V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), DestTy: CGF.Int64Ty,
9891 /*isSigned=*/true));
9892 CurCaptureVarInfo.Types.push_back(
9893 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9894 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9895 CurCaptureVarInfo.Mappers.push_back(Elt: nullptr);
9896 return;
9897 }
9898
9899 auto GenerateInfoForComponentLists =
9900 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9901 bool IsEligibleForTargetParamFlag) {
9902 MapCombinedInfoTy CurInfoForComponentLists;
9903 StructRangeInfoTy PartialStruct;
9904 AttachInfoTy AttachInfo;
9905
9906 if (DeclComponentListsFromClauses.empty())
9907 return;
9908
9909 generateInfoForCaptureFromComponentLists(
9910 VD, DeclComponentLists: DeclComponentListsFromClauses, CurComponentListInfo&: CurInfoForComponentLists,
9911 PartialStruct, AttachInfo, IsListEligibleForTargetParamFlag: IsEligibleForTargetParamFlag);
9912
9913 // If there is an entry in PartialStruct it means we have a
9914 // struct with individual members mapped. Emit an extra combined
9915 // entry.
9916 if (PartialStruct.Base.isValid()) {
9917 CurCaptureVarInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9918 emitCombinedEntry(
9919 CombinedInfo&: CurCaptureVarInfo, CurTypes&: CurInfoForComponentLists.Types,
9920 PartialStruct, AttachInfo, IsMapThis: Cap->capturesThis(), OMPBuilder,
9921 /*VD=*/nullptr, OffsetForMemberOfFlag,
9922 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9923 }
9924
9925 // We do the appends to get the entries in the following order:
9926 // combined-entry -> individual-field-entries -> attach-entry,
9927 CurCaptureVarInfo.append(CurInfo&: CurInfoForComponentLists);
9928 if (AttachInfo.isValid())
9929 emitAttachEntry(CGF, CombinedInfo&: CurCaptureVarInfo, AttachInfo);
9930 };
9931
9932 // Group component lists by their AttachPtrExpr and process them in order
9933 // of increasing complexity (nullptr first, then simple expressions like p,
9934 // then more complex ones like p[0], etc.)
9935 //
9936 // This ensure that we:
9937 // * handle maps that can contribute towards setting the kernel argument,
9938 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9939 // * allocate a single contiguous storage for all exprs with the same
9940 // captured var and having the same attach-ptr.
9941 //
9942 // Example: The map clauses below should be handled grouped together based
9943 // on their attachable-base-pointers:
9944 // map-clause | attachable-base-pointer
9945 // --------------------------+------------------------
9946 // map(p, ps) | nullptr
9947 // map(p[0]) | p
9948 // map(p[0]->b, p[0]->c) | p[0]
9949 // map(ps->d, ps->e, ps->pt) | ps
9950 // map(ps->pt->d, ps->pt->e) | ps->pt
9951
9952 // First, collect all MapData entries with their attach-ptr exprs.
9953 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9954
9955 for (const MapData &L : DeclComponentListsFromClauses) {
9956 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9957 std::get<0>(t: L);
9958 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9959 AttachPtrMapDataPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9960 }
9961
9962 // Next, sort by increasing order of their complexity.
9963 llvm::stable_sort(Range&: AttachPtrMapDataPairs,
9964 C: [this](const auto &LHS, const auto &RHS) {
9965 return AttachPtrComparator(LHS.first, RHS.first);
9966 });
9967
9968 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9969 bool IsFirstGroup = true;
9970
9971 // And finally, process them all in order, grouping those with
9972 // equivalent attach-ptr exprs together.
9973 auto *It = AttachPtrMapDataPairs.begin();
9974 while (It != AttachPtrMapDataPairs.end()) {
9975 const Expr *AttachPtrExpr = It->first;
9976
9977 MapDataArrayTy GroupLists;
9978 while (It != AttachPtrMapDataPairs.end() &&
9979 (It->first == AttachPtrExpr ||
9980 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
9981 GroupLists.push_back(Elt: It->second);
9982 ++It;
9983 }
9984 assert(!GroupLists.empty() && "GroupLists should not be empty");
9985
9986 // Determine if this group of component-lists is eligible for TARGET_PARAM
9987 // flag. Only the first group processed should be eligible, and only if no
9988 // default mapping was done.
9989 bool IsEligibleForTargetParamFlag =
9990 IsFirstGroup && NoDefaultMappingDoneForVD;
9991
9992 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
9993 IsFirstGroup = false;
9994 }
9995 }
9996
9997 /// Generate the base pointers, section pointers, sizes, map types, and
9998 /// mappers associated to \a DeclComponentLists for a given capture
9999 /// \a VD (all included in \a CurComponentListInfo).
10000 void generateInfoForCaptureFromComponentLists(
10001 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10002 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10003 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10004 // Find overlapping elements (including the offset from the base element).
10005 llvm::SmallDenseMap<
10006 const MapData *,
10007 llvm::SmallVector<
10008 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
10009 4>
10010 OverlappedData;
10011 size_t Count = 0;
10012 for (const MapData &L : DeclComponentLists) {
10013 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10014 OpenMPMapClauseKind MapType;
10015 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10016 bool IsImplicit;
10017 const ValueDecl *Mapper;
10018 const Expr *VarRef;
10019 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10020 L;
10021 ++Count;
10022 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
10023 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
10024 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
10025 args&: VarRef) = L1;
10026 auto CI = Components.rbegin();
10027 auto CE = Components.rend();
10028 auto SI = Components1.rbegin();
10029 auto SE = Components1.rend();
10030 for (; CI != CE && SI != SE; ++CI, ++SI) {
10031 if (CI->getAssociatedExpression()->getStmtClass() !=
10032 SI->getAssociatedExpression()->getStmtClass())
10033 break;
10034 // Are we dealing with different variables/fields?
10035 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10036 break;
10037 }
10038 // Found overlapping if, at least for one component, reached the head
10039 // of the components list.
10040 if (CI == CE || SI == SE) {
10041 // Ignore it if it is the same component.
10042 if (CI == CE && SI == SE)
10043 continue;
10044 const auto It = (SI == SE) ? CI : SI;
10045 // If one component is a pointer and another one is a kind of
10046 // dereference of this pointer (array subscript, section, dereference,
10047 // etc.), it is not an overlapping.
10048 // Same, if one component is a base and another component is a
10049 // dereferenced pointer memberexpr with the same base.
10050 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
10051 (std::prev(x: It)->getAssociatedDeclaration() &&
10052 std::prev(x: It)
10053 ->getAssociatedDeclaration()
10054 ->getType()
10055 ->isPointerType()) ||
10056 (It->getAssociatedDeclaration() &&
10057 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10058 std::next(x: It) != CE && std::next(x: It) != SE))
10059 continue;
10060 const MapData &BaseData = CI == CE ? L : L1;
10061 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
10062 SI == SE ? Components : Components1;
10063 OverlappedData[&BaseData].push_back(Elt: SubData);
10064 }
10065 }
10066 }
10067 // Sort the overlapped elements for each item.
10068 llvm::SmallVector<const FieldDecl *, 4> Layout;
10069 if (!OverlappedData.empty()) {
10070 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10071 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10072 while (BaseType != OrigType) {
10073 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10074 OrigType = BaseType->getPointeeOrArrayElementType();
10075 }
10076
10077 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10078 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
10079 else {
10080 const auto *RD = BaseType->getAsRecordDecl();
10081 Layout.append(in_start: RD->field_begin(), in_end: RD->field_end());
10082 }
10083 }
10084 for (auto &Pair : OverlappedData) {
10085 llvm::stable_sort(
10086 Range&: Pair.getSecond(),
10087 C: [&Layout](
10088 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
10089 OMPClauseMappableExprCommon::MappableExprComponentListRef
10090 Second) {
10091 auto CI = First.rbegin();
10092 auto CE = First.rend();
10093 auto SI = Second.rbegin();
10094 auto SE = Second.rend();
10095 for (; CI != CE && SI != SE; ++CI, ++SI) {
10096 if (CI->getAssociatedExpression()->getStmtClass() !=
10097 SI->getAssociatedExpression()->getStmtClass())
10098 break;
10099 // Are we dealing with different variables/fields?
10100 if (CI->getAssociatedDeclaration() !=
10101 SI->getAssociatedDeclaration())
10102 break;
10103 }
10104
10105 // Lists contain the same elements.
10106 if (CI == CE && SI == SE)
10107 return false;
10108
10109 // List with less elements is less than list with more elements.
10110 if (CI == CE || SI == SE)
10111 return CI == CE;
10112
10113 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
10114 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
10115 if (FD1->getParent() == FD2->getParent())
10116 return FD1->getFieldIndex() < FD2->getFieldIndex();
10117 const auto *It =
10118 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
10119 return FD == FD1 || FD == FD2;
10120 });
10121 return *It == FD1;
10122 });
10123 }
10124
10125 // Associated with a capture, because the mapping flags depend on it.
10126 // Go through all of the elements with the overlapped elements.
10127 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10128 MapCombinedInfoTy StructBaseCombinedInfo;
10129 for (const auto &Pair : OverlappedData) {
10130 const MapData &L = *Pair.getFirst();
10131 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10132 OpenMPMapClauseKind MapType;
10133 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10134 bool IsImplicit;
10135 const ValueDecl *Mapper;
10136 const Expr *VarRef;
10137 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10138 L;
10139 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10140 OverlappedComponents = Pair.getSecond();
10141 generateInfoForComponentList(
10142 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10143 StructBaseCombinedInfo, PartialStruct, AttachInfo, IsFirstComponentList: AddTargetParamFlag,
10144 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10145 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
10146 AddTargetParamFlag = false;
10147 }
10148 // Go through other elements without overlapped elements.
10149 for (const MapData &L : DeclComponentLists) {
10150 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10151 OpenMPMapClauseKind MapType;
10152 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10153 bool IsImplicit;
10154 const ValueDecl *Mapper;
10155 const Expr *VarRef;
10156 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10157 L;
10158 auto It = OverlappedData.find(Val: &L);
10159 if (It == OverlappedData.end())
10160 generateInfoForComponentList(
10161 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10162 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10163 IsFirstComponentList: AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10164 Mapper, /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
10165 /*OverlappedElements*/ {});
10166 AddTargetParamFlag = false;
10167 }
10168 }
10169
10170 /// Check if a variable should be treated as firstprivate due to explicit
10171 /// firstprivate clause or defaultmap(firstprivate:...).
10172 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10173 // Check explicit firstprivate clauses (not implicit from defaultmap)
10174 auto I = FirstPrivateDecls.find(Val: VD);
10175 if (I != FirstPrivateDecls.end() && !I->getSecond())
10176 return true; // Explicit firstprivate only
10177
10178 // Check defaultmap(firstprivate:scalar) for scalar types
10179 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_scalar)) {
10180 if (Type->isScalarType())
10181 return true;
10182 }
10183
10184 // Check defaultmap(firstprivate:pointer) for pointer types
10185 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_pointer)) {
10186 if (Type->isAnyPointerType())
10187 return true;
10188 }
10189
10190 // Check defaultmap(firstprivate:aggregate) for aggregate types
10191 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_aggregate)) {
10192 if (Type->isAggregateType())
10193 return true;
10194 }
10195
10196 // Check defaultmap(firstprivate:all) for all types
10197 return DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_all);
10198 }
10199
10200 /// Generate the default map information for a given capture \a CI,
10201 /// record field declaration \a RI and captured value \a CV.
10202 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10203 const FieldDecl &RI, llvm::Value *CV,
10204 MapCombinedInfoTy &CombinedInfo) const {
10205 bool IsImplicit = true;
10206 // Do the default mapping.
10207 if (CI.capturesThis()) {
10208 CombinedInfo.Exprs.push_back(Elt: nullptr);
10209 CombinedInfo.BasePointers.push_back(Elt: CV);
10210 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10211 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10212 CombinedInfo.Pointers.push_back(Elt: CV);
10213 const auto *PtrTy = cast<PointerType>(Val: RI.getType().getTypePtr());
10214 CombinedInfo.Sizes.push_back(
10215 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
10216 DestTy: CGF.Int64Ty, /*isSigned=*/true));
10217 // Default map type.
10218 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
10219 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10220 } else if (CI.capturesVariableByCopy()) {
10221 const VarDecl *VD = CI.getCapturedVar();
10222 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10223 CombinedInfo.BasePointers.push_back(Elt: CV);
10224 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10225 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10226 CombinedInfo.Pointers.push_back(Elt: CV);
10227 bool IsFirstprivate =
10228 isEffectivelyFirstprivate(VD, Type: RI.getType().getNonReferenceType());
10229
10230 if (!RI.getType()->isAnyPointerType()) {
10231 // We have to signal to the runtime captures passed by value that are
10232 // not pointers.
10233 CombinedInfo.Types.push_back(
10234 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10235 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10236 V: CGF.getTypeSize(Ty: RI.getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10237 } else if (IsFirstprivate) {
10238 // Firstprivate pointers should be passed by value (as literals)
10239 // without performing a present table lookup at runtime.
10240 CombinedInfo.Types.push_back(
10241 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10242 // Use zero size for pointer literals (just passing the pointer value)
10243 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10244 } else {
10245 // Pointers are implicitly mapped with a zero size and no flags
10246 // (other than first map that is added for all implicit maps).
10247 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10248 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10249 }
10250 auto I = FirstPrivateDecls.find(Val: VD);
10251 if (I != FirstPrivateDecls.end())
10252 IsImplicit = I->getSecond();
10253 } else {
10254 assert(CI.capturesVariable() && "Expected captured reference.");
10255 const auto *PtrTy = cast<ReferenceType>(Val: RI.getType().getTypePtr());
10256 QualType ElementType = PtrTy->getPointeeType();
10257 const VarDecl *VD = CI.getCapturedVar();
10258 bool IsFirstprivate = isEffectivelyFirstprivate(VD, Type: ElementType);
10259 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10260 CombinedInfo.BasePointers.push_back(Elt: CV);
10261 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10262 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10263
10264 // For firstprivate pointers, pass by value instead of dereferencing
10265 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10266 // Treat as a literal value (pass the pointer value itself)
10267 CombinedInfo.Pointers.push_back(Elt: CV);
10268 // Use zero size for pointer literals
10269 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10270 CombinedInfo.Types.push_back(
10271 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10272 } else {
10273 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10274 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10275 // The default map type for a scalar/complex type is 'to' because by
10276 // default the value doesn't have to be retrieved. For an aggregate
10277 // type, the default is 'tofrom'.
10278 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
10279 CombinedInfo.Pointers.push_back(Elt: CV);
10280 }
10281 auto I = FirstPrivateDecls.find(Val: VD);
10282 if (I != FirstPrivateDecls.end())
10283 IsImplicit = I->getSecond();
10284 }
10285 // Every default map produces a single argument which is a target parameter.
10286 CombinedInfo.Types.back() |=
10287 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10288
10289 // Add flag stating this is an implicit map.
10290 if (IsImplicit)
10291 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10292
10293 // No user-defined mapper for default mapping.
10294 CombinedInfo.Mappers.push_back(Elt: nullptr);
10295 }
10296};
10297} // anonymous namespace
10298
10299// Try to extract the base declaration from a `this->x` expression if possible.
10300static ValueDecl *getDeclFromThisExpr(const Expr *E) {
10301 if (!E)
10302 return nullptr;
10303
10304 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
10305 if (const MemberExpr *ME =
10306 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
10307 return ME->getMemberDecl();
10308 return nullptr;
10309}
10310
10311/// Emit a string constant containing the names of the values mapped to the
10312/// offloading runtime library.
10313static llvm::Constant *
10314emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10315 MappableExprsHandler::MappingExprInfo &MapExprs) {
10316
10317 uint32_t SrcLocStrSize;
10318 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10319 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10320
10321 SourceLocation Loc;
10322 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10323 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
10324 Loc = VD->getLocation();
10325 else
10326 Loc = MapExprs.getMapExpr()->getExprLoc();
10327 } else {
10328 Loc = MapExprs.getMapDecl()->getLocation();
10329 }
10330
10331 std::string ExprName;
10332 if (MapExprs.getMapExpr()) {
10333 PrintingPolicy P(CGF.getContext().getLangOpts());
10334 llvm::raw_string_ostream OS(ExprName);
10335 MapExprs.getMapExpr()->printPretty(OS, Helper: nullptr, Policy: P);
10336 } else {
10337 ExprName = MapExprs.getMapDecl()->getNameAsString();
10338 }
10339
10340 std::string FileName;
10341 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
10342 if (auto *DbgInfo = CGF.getDebugInfo())
10343 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10344 else
10345 FileName = PLoc.getFilename();
10346 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: FileName, FileName: ExprName, Line: PLoc.getLine(),
10347 Column: PLoc.getColumn(), SrcLocStrSize);
10348}
10349/// Emit the arrays used to pass the captures and map information to the
10350/// offloading runtime library. If there is no map or capture information,
10351/// return nullptr by reference.
10352static void emitOffloadingArraysAndArgs(
10353 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10354 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10355 bool IsNonContiguous = false, bool ForEndCall = false) {
10356 CodeGenModule &CGM = CGF.CGM;
10357
10358 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10359 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10360 CGF.AllocaInsertPt->getIterator());
10361 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10362 CGF.Builder.GetInsertPoint());
10363
10364 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10365 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10366 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10367 }
10368 };
10369
10370 auto CustomMapperCB = [&](unsigned int I) {
10371 llvm::Function *MFunc = nullptr;
10372 if (CombinedInfo.Mappers[I]) {
10373 Info.HasMapper = true;
10374 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10375 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10376 }
10377 return MFunc;
10378 };
10379 cantFail(Err: OMPBuilder.emitOffloadingArraysAndArgs(
10380 AllocaIP, CodeGenIP, Info, RTArgs&: Info.RTArgs, CombinedInfo, CustomMapperCB,
10381 IsNonContiguous, ForEndCall, DeviceAddrCB));
10382}
10383
10384/// Check for inner distribute directive.
10385static const OMPExecutableDirective *
10386getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
10387 const auto *CS = D.getInnermostCapturedStmt();
10388 const auto *Body =
10389 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10390 const Stmt *ChildStmt =
10391 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10392
10393 if (const auto *NestedDir =
10394 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10395 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10396 switch (D.getDirectiveKind()) {
10397 case OMPD_target:
10398 // For now, treat 'target' with nested 'teams loop' as if it's
10399 // distributed (target teams distribute).
10400 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10401 return NestedDir;
10402 if (DKind == OMPD_teams) {
10403 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10404 /*IgnoreCaptured=*/true);
10405 if (!Body)
10406 return nullptr;
10407 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10408 if (const auto *NND =
10409 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10410 DKind = NND->getDirectiveKind();
10411 if (isOpenMPDistributeDirective(DKind))
10412 return NND;
10413 }
10414 }
10415 return nullptr;
10416 case OMPD_target_teams:
10417 if (isOpenMPDistributeDirective(DKind))
10418 return NestedDir;
10419 return nullptr;
10420 case OMPD_target_parallel:
10421 case OMPD_target_simd:
10422 case OMPD_target_parallel_for:
10423 case OMPD_target_parallel_for_simd:
10424 return nullptr;
10425 case OMPD_target_teams_distribute:
10426 case OMPD_target_teams_distribute_simd:
10427 case OMPD_target_teams_distribute_parallel_for:
10428 case OMPD_target_teams_distribute_parallel_for_simd:
10429 case OMPD_parallel:
10430 case OMPD_for:
10431 case OMPD_parallel_for:
10432 case OMPD_parallel_master:
10433 case OMPD_parallel_sections:
10434 case OMPD_for_simd:
10435 case OMPD_parallel_for_simd:
10436 case OMPD_cancel:
10437 case OMPD_cancellation_point:
10438 case OMPD_ordered:
10439 case OMPD_threadprivate:
10440 case OMPD_allocate:
10441 case OMPD_task:
10442 case OMPD_simd:
10443 case OMPD_tile:
10444 case OMPD_unroll:
10445 case OMPD_sections:
10446 case OMPD_section:
10447 case OMPD_single:
10448 case OMPD_master:
10449 case OMPD_critical:
10450 case OMPD_taskyield:
10451 case OMPD_barrier:
10452 case OMPD_taskwait:
10453 case OMPD_taskgroup:
10454 case OMPD_atomic:
10455 case OMPD_flush:
10456 case OMPD_depobj:
10457 case OMPD_scan:
10458 case OMPD_teams:
10459 case OMPD_target_data:
10460 case OMPD_target_exit_data:
10461 case OMPD_target_enter_data:
10462 case OMPD_distribute:
10463 case OMPD_distribute_simd:
10464 case OMPD_distribute_parallel_for:
10465 case OMPD_distribute_parallel_for_simd:
10466 case OMPD_teams_distribute:
10467 case OMPD_teams_distribute_simd:
10468 case OMPD_teams_distribute_parallel_for:
10469 case OMPD_teams_distribute_parallel_for_simd:
10470 case OMPD_target_update:
10471 case OMPD_declare_simd:
10472 case OMPD_declare_variant:
10473 case OMPD_begin_declare_variant:
10474 case OMPD_end_declare_variant:
10475 case OMPD_declare_target:
10476 case OMPD_end_declare_target:
10477 case OMPD_declare_reduction:
10478 case OMPD_declare_mapper:
10479 case OMPD_taskloop:
10480 case OMPD_taskloop_simd:
10481 case OMPD_master_taskloop:
10482 case OMPD_master_taskloop_simd:
10483 case OMPD_parallel_master_taskloop:
10484 case OMPD_parallel_master_taskloop_simd:
10485 case OMPD_requires:
10486 case OMPD_metadirective:
10487 case OMPD_unknown:
10488 default:
10489 llvm_unreachable("Unexpected directive.");
10490 }
10491 }
10492
10493 return nullptr;
10494}
10495
10496/// Emit the user-defined mapper function. The code generation follows the
10497/// pattern in the example below.
10498/// \code
10499/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10500/// void *base, void *begin,
10501/// int64_t size, int64_t type,
10502/// void *name = nullptr) {
10503/// // Allocate space for an array section first.
10504/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10505/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10506/// size*sizeof(Ty), clearToFromMember(type));
10507/// // Map members.
10508/// for (unsigned i = 0; i < size; i++) {
10509/// // For each component specified by this mapper:
10510/// for (auto c : begin[i]->all_components) {
10511/// if (c.hasMapper())
10512/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10513/// c.arg_type, c.arg_name);
10514/// else
10515/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10516/// c.arg_begin, c.arg_size, c.arg_type,
10517/// c.arg_name);
10518/// }
10519/// }
10520/// // Delete the array section.
10521/// if (size > 1 && maptype.IsDelete)
10522/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10523/// size*sizeof(Ty), clearToFromMember(type));
10524/// }
10525/// \endcode
10526void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
10527 CodeGenFunction *CGF) {
10528 if (UDMMap.count(Val: D) > 0)
10529 return;
10530 ASTContext &C = CGM.getContext();
10531 QualType Ty = D->getType();
10532 auto *MapperVarDecl =
10533 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
10534 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
10535 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
10536
10537 CodeGenFunction MapperCGF(CGM);
10538 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10539 auto PrivatizeAndGenMapInfoCB =
10540 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10541 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10542 MapperCGF.Builder.restoreIP(IP: CodeGenIP);
10543
10544 // Privatize the declared variable of mapper to be the current array
10545 // element.
10546 Address PtrCurrent(
10547 PtrPHI, ElemTy,
10548 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10549 .getAlignment()
10550 .alignmentOfArrayElement(elementSize: ElementSize));
10551 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10552 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
10553 (void)Scope.Privatize();
10554
10555 // Get map clause information.
10556 MappableExprsHandler MEHandler(*D, MapperCGF);
10557 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10558
10559 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10560 return emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: MapExpr);
10561 };
10562 if (CGM.getCodeGenOpts().getDebugInfo() !=
10563 llvm::codegenoptions::NoDebugInfo) {
10564 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10565 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10566 F: FillInfoMap);
10567 }
10568
10569 return CombinedInfo;
10570 };
10571
10572 auto CustomMapperCB = [&](unsigned I) {
10573 llvm::Function *MapperFunc = nullptr;
10574 if (CombinedInfo.Mappers[I]) {
10575 // Call the corresponding mapper function.
10576 MapperFunc = getOrCreateUserDefinedMapperFunc(
10577 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10578 assert(MapperFunc && "Expect a valid mapper function is available.");
10579 }
10580 return MapperFunc;
10581 };
10582
10583 SmallString<64> TyStr;
10584 llvm::raw_svector_ostream Out(TyStr);
10585 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
10586 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
10587
10588 llvm::Function *NewFn = cantFail(ValOrErr: OMPBuilder.emitUserDefinedMapper(
10589 PrivAndGenMapInfoCB: PrivatizeAndGenMapInfoCB, ElemTy, FuncName: Name, CustomMapperCB));
10590 UDMMap.try_emplace(Key: D, Args&: NewFn);
10591 if (CGF)
10592 FunctionUDMMap[CGF->CurFn].push_back(Elt: D);
10593}
10594
10595llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10596 const OMPDeclareMapperDecl *D) {
10597 auto I = UDMMap.find(Val: D);
10598 if (I != UDMMap.end())
10599 return I->second;
10600 emitUserDefinedMapper(D);
10601 return UDMMap.lookup(Val: D);
10602}
10603
10604llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
10605 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10606 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10607 const OMPLoopDirective &D)>
10608 SizeEmitter) {
10609 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10610 const OMPExecutableDirective *TD = &D;
10611 // Get nested teams distribute kind directive, if any. For now, treat
10612 // 'target_teams_loop' as if it's really a target_teams_distribute.
10613 if ((!isOpenMPDistributeDirective(DKind: Kind) || !isOpenMPTeamsDirective(DKind: Kind)) &&
10614 Kind != OMPD_target_teams_loop)
10615 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
10616 if (!TD)
10617 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10618
10619 const auto *LD = cast<OMPLoopDirective>(Val: TD);
10620 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10621 return NumIterations;
10622 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10623}
10624
10625static void
10626emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10627 const OMPExecutableDirective &D,
10628 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10629 bool RequiresOuterTask, const CapturedStmt &CS,
10630 bool OffloadingMandatory, CodeGenFunction &CGF) {
10631 if (OffloadingMandatory) {
10632 CGF.Builder.CreateUnreachable();
10633 } else {
10634 if (RequiresOuterTask) {
10635 CapturedVars.clear();
10636 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
10637 }
10638 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
10639 Args: CapturedVars);
10640 }
10641}
10642
10643static llvm::Value *emitDeviceID(
10644 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10645 CodeGenFunction &CGF) {
10646 // Emit device ID if any.
10647 llvm::Value *DeviceID;
10648 if (Device.getPointer()) {
10649 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10650 Device.getInt() == OMPC_DEVICE_device_num) &&
10651 "Expected device_num modifier.");
10652 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
10653 DeviceID =
10654 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
10655 } else {
10656 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10657 }
10658 return DeviceID;
10659}
10660
10661static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10662emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF) {
10663 llvm::Value *DynGP = CGF.Builder.getInt32(C: 0);
10664 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10665
10666 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10667 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10668 llvm::Value *DynGPVal =
10669 CGF.EmitScalarExpr(E: DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10670 DynGP = CGF.Builder.CreateIntCast(V: DynGPVal, DestTy: CGF.Int32Ty,
10671 /*isSigned=*/false);
10672 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10673 switch (FallbackModifier) {
10674 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10675 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10676 break;
10677 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10678 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10679 break;
10680 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10681 case OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown:
10682 // This is the default for dyn_groupprivate.
10683 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10684 break;
10685 default:
10686 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10687 }
10688 } else if (auto *OMPXDynCGClause =
10689 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10690 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10691 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(E: OMPXDynCGClause->getSize(),
10692 /*IgnoreResultAssign=*/true);
10693 DynGP = CGF.Builder.CreateIntCast(V: DynCGMemVal, DestTy: CGF.Int32Ty,
10694 /*isSigned=*/false);
10695 }
10696 return {DynGP, DynGPFallback};
10697}
10698
10699static void genMapInfoForCaptures(
10700 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10701 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10702 llvm::OpenMPIRBuilder &OMPBuilder,
10703 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10704 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10705
10706 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10707 auto RI = CS.getCapturedRecordDecl()->field_begin();
10708 auto *CV = CapturedVars.begin();
10709 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10710 CE = CS.capture_end();
10711 CI != CE; ++CI, ++RI, ++CV) {
10712 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10713
10714 // VLA sizes are passed to the outlined region by copy and do not have map
10715 // information associated.
10716 if (CI->capturesVariableArrayType()) {
10717 CurInfo.Exprs.push_back(Elt: nullptr);
10718 CurInfo.BasePointers.push_back(Elt: *CV);
10719 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
10720 CurInfo.DevicePointers.push_back(
10721 Elt: MappableExprsHandler::DeviceInfoTy::None);
10722 CurInfo.Pointers.push_back(Elt: *CV);
10723 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10724 V: CGF.getTypeSize(Ty: RI->getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10725 // Copy to the device as an argument. No need to retrieve it.
10726 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10727 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10728 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10729 CurInfo.Mappers.push_back(Elt: nullptr);
10730 } else {
10731 const ValueDecl *CapturedVD =
10732 CI->capturesThis() ? nullptr
10733 : CI->getCapturedVar()->getCanonicalDecl();
10734 bool HasEntryWithCVAsAttachPtr = false;
10735 if (CapturedVD)
10736 HasEntryWithCVAsAttachPtr =
10737 MEHandler.hasAttachEntryForCapturedVar(VD: CapturedVD);
10738
10739 // Populate component lists for the captured variable from clauses.
10740 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10741 SmallVector<
10742 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>, 4>
10743 StorageForImplicitlyAddedComponentLists;
10744 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10745 VD: CapturedVD, DeclComponentLists,
10746 StorageForImplicitlyAddedComponentLists);
10747
10748 // OpenMP 6.0, 15.8, target construct, restrictions:
10749 // * A list item in a map clause that is specified on a target construct
10750 // must have a base variable or base pointer.
10751 //
10752 // Map clauses on a target construct must either have a base pointer, or a
10753 // base-variable. So, if we don't have a base-pointer, that means that it
10754 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10755 // etc. In such cases, we do not need to handle default map generation
10756 // for `s`.
10757 bool HasEntryWithoutAttachPtr =
10758 llvm::any_of(Range&: DeclComponentLists, P: [&](const auto &MapData) {
10759 OMPClauseMappableExprCommon::MappableExprComponentListRef
10760 Components = std::get<0>(MapData);
10761 return !MEHandler.getAttachPtrExpr(Components);
10762 });
10763
10764 // Generate default map info first if there's no direct map with CV as
10765 // the base-variable, or attach pointer.
10766 if (DeclComponentLists.empty() ||
10767 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10768 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
10769
10770 // If we have any information in the map clause, we use it, otherwise we
10771 // just do a default mapping.
10772 MEHandler.generateInfoForCaptureFromClauseInfo(
10773 DeclComponentListsFromClauses: DeclComponentLists, Cap: CI, Arg: *CV, CurCaptureVarInfo&: CurInfo, OMPBuilder,
10774 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10775
10776 if (!CI->capturesThis())
10777 MappedVarSet.insert(V: CI->getCapturedVar());
10778 else
10779 MappedVarSet.insert(V: nullptr);
10780
10781 // Generate correct mapping for variables captured by reference in
10782 // lambdas.
10783 if (CI->capturesVariable())
10784 MEHandler.generateInfoForLambdaCaptures(VD: CI->getCapturedVar(), Arg: *CV,
10785 CombinedInfo&: CurInfo, LambdaPointers);
10786 }
10787 // We expect to have at least an element of information for this capture.
10788 assert(!CurInfo.BasePointers.empty() &&
10789 "Non-existing map pointer for capture!");
10790 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10791 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10792 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10793 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10794 "Inconsistent map information sizes!");
10795
10796 // We need to append the results of this capture to what we already have.
10797 CombinedInfo.append(CurInfo);
10798 }
10799 // Adjust MEMBER_OF flags for the lambdas captures.
10800 MEHandler.adjustMemberOfForLambdaCaptures(
10801 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
10802 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
10803}
10804static void
10805genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10806 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10807 llvm::OpenMPIRBuilder &OMPBuilder,
10808 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10809 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10810
10811 CodeGenModule &CGM = CGF.CGM;
10812 // Map any list items in a map clause that were not captures because they
10813 // weren't referenced within the construct.
10814 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: SkippedVarSet);
10815
10816 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10817 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10818 };
10819 if (CGM.getCodeGenOpts().getDebugInfo() !=
10820 llvm::codegenoptions::NoDebugInfo) {
10821 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10822 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10823 F: FillInfoMap);
10824 }
10825}
10826
10827static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
10828 const CapturedStmt &CS,
10829 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10830 llvm::OpenMPIRBuilder &OMPBuilder,
10831 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10832 // Get mappable expression information.
10833 MappableExprsHandler MEHandler(D, CGF);
10834 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10835
10836 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10837 MappedVarSet, CombinedInfo);
10838 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, SkippedVarSet: MappedVarSet);
10839}
10840
10841template <typename ClauseTy>
10842static void
10843emitClauseForBareTargetDirective(CodeGenFunction &CGF,
10844 const OMPExecutableDirective &D,
10845 llvm::SmallVectorImpl<llvm::Value *> &Values) {
10846 const auto *C = D.getSingleClause<ClauseTy>();
10847 assert(!C->varlist_empty() &&
10848 "ompx_bare requires explicit num_teams and thread_limit");
10849 CodeGenFunction::RunCleanupsScope Scope(CGF);
10850 for (auto *E : C->varlist()) {
10851 llvm::Value *V = CGF.EmitScalarExpr(E);
10852 Values.push_back(
10853 Elt: CGF.Builder.CreateIntCast(V, DestTy: CGF.Int32Ty, /*isSigned=*/true));
10854 }
10855}
10856
10857static void emitTargetCallKernelLaunch(
10858 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10859 const OMPExecutableDirective &D,
10860 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10861 const CapturedStmt &CS, bool OffloadingMandatory,
10862 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10863 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10864 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10865 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10866 const OMPLoopDirective &D)>
10867 SizeEmitter,
10868 CodeGenFunction &CGF, CodeGenModule &CGM) {
10869 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10870
10871 // Fill up the arrays with all the captured variables.
10872 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10873 CGOpenMPRuntime::TargetDataInfo Info;
10874 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10875
10876 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10877 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10878
10879 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10880 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10881 CGF.VoidPtrTy, CGM.getPointerAlign());
10882 InputInfo.PointersArray =
10883 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10884 InputInfo.SizesArray =
10885 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10886 InputInfo.MappersArray =
10887 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10888 MapTypesArray = Info.RTArgs.MapTypesArray;
10889 MapNamesArray = Info.RTArgs.MapNamesArray;
10890
10891 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10892 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10893 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10894 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10895 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10896
10897 if (IsReverseOffloading) {
10898 // Reverse offloading is not supported, so just execute on the host.
10899 // FIXME: This fallback solution is incorrect since it ignores the
10900 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10901 // assert here and ensure SEMA emits an error.
10902 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10903 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10904 return;
10905 }
10906
10907 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10908 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10909
10910 llvm::Value *BasePointersArray =
10911 InputInfo.BasePointersArray.emitRawPointer(CGF);
10912 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10913 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10914 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10915
10916 auto &&EmitTargetCallFallbackCB =
10917 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10918 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10919 -> llvm::OpenMPIRBuilder::InsertPointTy {
10920 CGF.Builder.restoreIP(IP);
10921 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10922 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10923 return CGF.Builder.saveIP();
10924 };
10925
10926 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10927 SmallVector<llvm::Value *, 3> NumTeams;
10928 SmallVector<llvm::Value *, 3> NumThreads;
10929 if (IsBare) {
10930 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, Values&: NumTeams);
10931 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
10932 Values&: NumThreads);
10933 } else {
10934 NumTeams.push_back(Elt: OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10935 NumThreads.push_back(
10936 Elt: OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10937 }
10938
10939 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10940 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10941 llvm::Value *NumIterations =
10942 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10943 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10944 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10945 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10946
10947 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10948 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10949 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10950
10951 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10952 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10953 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10954
10955 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10956 cantFail(ValOrErr: OMPRuntime->getOMPBuilder().emitKernelLaunch(
10957 Loc: CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10958 RTLoc, AllocaIP));
10959 CGF.Builder.restoreIP(IP: AfterIP);
10960 };
10961
10962 if (RequiresOuterTask)
10963 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10964 else
10965 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
10966}
10967
10968static void
10969emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10970 const OMPExecutableDirective &D,
10971 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10972 bool RequiresOuterTask, const CapturedStmt &CS,
10973 bool OffloadingMandatory, CodeGenFunction &CGF) {
10974
10975 // Notify that the host version must be executed.
10976 auto &&ElseGen =
10977 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10978 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10979 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10980 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10981 };
10982
10983 if (RequiresOuterTask) {
10984 CodeGenFunction::OMPTargetDataInfo InputInfo;
10985 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
10986 } else {
10987 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ElseGen);
10988 }
10989}
10990
10991void CGOpenMPRuntime::emitTargetCall(
10992 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10993 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10994 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10995 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10996 const OMPLoopDirective &D)>
10997 SizeEmitter) {
10998 if (!CGF.HaveInsertPoint())
10999 return;
11000
11001 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11002 CGM.getLangOpts().OpenMPOffloadMandatory;
11003
11004 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11005
11006 const bool RequiresOuterTask =
11007 D.hasClausesOfKind<OMPDependClause>() ||
11008 D.hasClausesOfKind<OMPNowaitClause>() ||
11009 D.hasClausesOfKind<OMPInReductionClause>() ||
11010 (CGM.getLangOpts().OpenMP >= 51 &&
11011 needsTaskBasedThreadLimit(DKind: D.getDirectiveKind()) &&
11012 D.hasClausesOfKind<OMPThreadLimitClause>());
11013 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
11014 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
11015 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11016 PrePostActionTy &) {
11017 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
11018 };
11019 emitInlinedDirective(CGF, InnerKind: OMPD_unknown, CodeGen: ArgsCodegen);
11020
11021 CodeGenFunction::OMPTargetDataInfo InputInfo;
11022 llvm::Value *MapTypesArray = nullptr;
11023 llvm::Value *MapNamesArray = nullptr;
11024
11025 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11026 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11027 OutlinedFnID, &InputInfo, &MapTypesArray,
11028 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11029 PrePostActionTy &) {
11030 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
11031 RequiresOuterTask, CS, OffloadingMandatory,
11032 Device, OutlinedFnID, InputInfo, MapTypesArray,
11033 MapNamesArray, SizeEmitter, CGF, CGM);
11034 };
11035
11036 auto &&TargetElseGen =
11037 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11038 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11039 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11040 CS, OffloadingMandatory, CGF);
11041 };
11042
11043 // If we have a target function ID it means that we need to support
11044 // offloading, otherwise, just execute on the host. We need to execute on host
11045 // regardless of the conditional in the if clause if, e.g., the user do not
11046 // specify target triples.
11047 if (OutlinedFnID) {
11048 if (IfCond) {
11049 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
11050 } else {
11051 RegionCodeGenTy ThenRCG(TargetThenGen);
11052 ThenRCG(CGF);
11053 }
11054 } else {
11055 RegionCodeGenTy ElseRCG(TargetElseGen);
11056 ElseRCG(CGF);
11057 }
11058}
11059
11060void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
11061 StringRef ParentName) {
11062 if (!S)
11063 return;
11064
11065 // Register vtable from device for target data and target directives.
11066 // Add this block here since scanForTargetRegionsFunctions ignores
11067 // target data by checking if S is a executable directive (target).
11068 if (auto *E = dyn_cast<OMPExecutableDirective>(Val: S);
11069 E && isOpenMPTargetDataManagementDirective(DKind: E->getDirectiveKind())) {
11070 // Don't need to check if it's device compile
11071 // since scanForTargetRegionsFunctions currently only called
11072 // in device compilation.
11073 registerVTable(D: *E);
11074 }
11075
11076 // Codegen OMP target directives that offload compute to the device.
11077 bool RequiresDeviceCodegen =
11078 isa<OMPExecutableDirective>(Val: S) &&
11079 isOpenMPTargetExecutionDirective(
11080 DKind: cast<OMPExecutableDirective>(Val: S)->getDirectiveKind());
11081
11082 if (RequiresDeviceCodegen) {
11083 const auto &E = *cast<OMPExecutableDirective>(Val: S);
11084
11085 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11086 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
11087
11088 // Is this a target region that should not be emitted as an entry point? If
11089 // so just signal we are done with this target region.
11090 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11091 return;
11092
11093 switch (E.getDirectiveKind()) {
11094 case OMPD_target:
11095 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
11096 S: cast<OMPTargetDirective>(Val: E));
11097 break;
11098 case OMPD_target_parallel:
11099 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
11100 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
11101 break;
11102 case OMPD_target_teams:
11103 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
11104 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
11105 break;
11106 case OMPD_target_teams_distribute:
11107 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
11108 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
11109 break;
11110 case OMPD_target_teams_distribute_simd:
11111 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
11112 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
11113 break;
11114 case OMPD_target_parallel_for:
11115 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
11116 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
11117 break;
11118 case OMPD_target_parallel_for_simd:
11119 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
11120 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
11121 break;
11122 case OMPD_target_simd:
11123 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
11124 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
11125 break;
11126 case OMPD_target_teams_distribute_parallel_for:
11127 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
11128 CGM, ParentName,
11129 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
11130 break;
11131 case OMPD_target_teams_distribute_parallel_for_simd:
11132 CodeGenFunction::
11133 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
11134 CGM, ParentName,
11135 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
11136 break;
11137 case OMPD_target_teams_loop:
11138 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
11139 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
11140 break;
11141 case OMPD_target_parallel_loop:
11142 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
11143 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
11144 break;
11145 case OMPD_parallel:
11146 case OMPD_for:
11147 case OMPD_parallel_for:
11148 case OMPD_parallel_master:
11149 case OMPD_parallel_sections:
11150 case OMPD_for_simd:
11151 case OMPD_parallel_for_simd:
11152 case OMPD_cancel:
11153 case OMPD_cancellation_point:
11154 case OMPD_ordered:
11155 case OMPD_threadprivate:
11156 case OMPD_allocate:
11157 case OMPD_task:
11158 case OMPD_simd:
11159 case OMPD_tile:
11160 case OMPD_unroll:
11161 case OMPD_sections:
11162 case OMPD_section:
11163 case OMPD_single:
11164 case OMPD_master:
11165 case OMPD_critical:
11166 case OMPD_taskyield:
11167 case OMPD_barrier:
11168 case OMPD_taskwait:
11169 case OMPD_taskgroup:
11170 case OMPD_atomic:
11171 case OMPD_flush:
11172 case OMPD_depobj:
11173 case OMPD_scan:
11174 case OMPD_teams:
11175 case OMPD_target_data:
11176 case OMPD_target_exit_data:
11177 case OMPD_target_enter_data:
11178 case OMPD_distribute:
11179 case OMPD_distribute_simd:
11180 case OMPD_distribute_parallel_for:
11181 case OMPD_distribute_parallel_for_simd:
11182 case OMPD_teams_distribute:
11183 case OMPD_teams_distribute_simd:
11184 case OMPD_teams_distribute_parallel_for:
11185 case OMPD_teams_distribute_parallel_for_simd:
11186 case OMPD_target_update:
11187 case OMPD_declare_simd:
11188 case OMPD_declare_variant:
11189 case OMPD_begin_declare_variant:
11190 case OMPD_end_declare_variant:
11191 case OMPD_declare_target:
11192 case OMPD_end_declare_target:
11193 case OMPD_declare_reduction:
11194 case OMPD_declare_mapper:
11195 case OMPD_taskloop:
11196 case OMPD_taskloop_simd:
11197 case OMPD_master_taskloop:
11198 case OMPD_master_taskloop_simd:
11199 case OMPD_parallel_master_taskloop:
11200 case OMPD_parallel_master_taskloop_simd:
11201 case OMPD_requires:
11202 case OMPD_metadirective:
11203 case OMPD_unknown:
11204 default:
11205 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11206 }
11207 return;
11208 }
11209
11210 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
11211 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11212 return;
11213
11214 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
11215 return;
11216 }
11217
11218 // If this is a lambda function, look into its body.
11219 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
11220 S = L->getBody();
11221
11222 // Keep looking for target regions recursively.
11223 for (const Stmt *II : S->children())
11224 scanForTargetRegionsFunctions(S: II, ParentName);
11225}
11226
11227static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11228 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11229 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11230 if (!DevTy)
11231 return false;
11232 // Do not emit device_type(nohost) functions for the host.
11233 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11234 return true;
11235 // Do not emit device_type(host) functions for the device.
11236 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11237 return true;
11238 return false;
11239}
11240
11241bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
11242 // If emitting code for the host, we do not process FD here. Instead we do
11243 // the normal code generation.
11244 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11245 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
11246 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11247 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11248 return true;
11249 return false;
11250 }
11251
11252 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
11253 // Try to detect target regions in the function.
11254 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
11255 StringRef Name = CGM.getMangledName(GD);
11256 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
11257 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11258 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11259 return true;
11260 }
11261
11262 // Do not to emit function if it is not marked as declare target.
11263 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11264 AlreadyEmittedTargetDecls.count(V: VD) == 0;
11265}
11266
11267bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11268 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
11269 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11270 return true;
11271
11272 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11273 return false;
11274
11275 // Check if there are Ctors/Dtors in this declaration and look for target
11276 // regions in it. We use the complete variant to produce the kernel name
11277 // mangling.
11278 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
11279 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11280 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11281 StringRef ParentName =
11282 CGM.getMangledName(GD: GlobalDecl(Ctor, Ctor_Complete));
11283 scanForTargetRegionsFunctions(S: Ctor->getBody(), ParentName);
11284 }
11285 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11286 StringRef ParentName =
11287 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
11288 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
11289 }
11290 }
11291
11292 // Do not to emit variable if it is not marked as declare target.
11293 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11294 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11295 VD: cast<VarDecl>(Val: GD.getDecl()));
11296 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11297 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11298 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11299 HasRequiresUnifiedSharedMemory)) {
11300 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
11301 return true;
11302 }
11303 return false;
11304}
11305
11306void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
11307 llvm::Constant *Addr) {
11308 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11309 !CGM.getLangOpts().OpenMPIsTargetDevice)
11310 return;
11311
11312 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11313 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11314
11315 // If this is an 'extern' declaration we defer to the canonical definition and
11316 // do not emit an offloading entry.
11317 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11318 VD->hasExternalStorage())
11319 return;
11320
11321 if (!Res) {
11322 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11323 // Register non-target variables being emitted in device code (debug info
11324 // may cause this).
11325 StringRef VarName = CGM.getMangledName(GD: VD);
11326 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
11327 }
11328 return;
11329 }
11330
11331 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
11332 auto LinkageForVariable = [&VD, this]() {
11333 return CGM.getLLVMLinkageVarDefinition(VD);
11334 };
11335
11336 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11337 OMPBuilder.registerTargetGlobalVariable(
11338 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
11339 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11340 IsExternallyVisible: VD->isExternallyVisible(),
11341 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
11342 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
11343 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
11344 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
11345 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
11346 T: CGM.getContext().getPointerType(T: VD->getType())),
11347 Addr);
11348
11349 for (auto *ref : GeneratedRefs)
11350 CGM.addCompilerUsedGlobal(GV: ref);
11351}
11352
11353bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
11354 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
11355 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
11356 return emitTargetFunctions(GD);
11357
11358 return emitTargetGlobalVariable(GD);
11359}
11360
11361void CGOpenMPRuntime::emitDeferredTargetDecls() const {
11362 for (const VarDecl *VD : DeferredGlobalVariables) {
11363 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11364 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11365 if (!Res)
11366 continue;
11367 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11368 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11369 !HasRequiresUnifiedSharedMemory) {
11370 CGM.EmitGlobal(D: VD);
11371 } else {
11372 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11373 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11374 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11375 HasRequiresUnifiedSharedMemory)) &&
11376 "Expected link clause or to clause with unified memory.");
11377 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11378 }
11379 }
11380}
11381
11382void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11383 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11384 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11385 " Expected target-based directive.");
11386}
11387
11388void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11389 for (const OMPClause *Clause : D->clauselists()) {
11390 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11391 HasRequiresUnifiedSharedMemory = true;
11392 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11393 } else if (const auto *AC =
11394 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
11395 switch (AC->getAtomicDefaultMemOrderKind()) {
11396 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11397 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11398 break;
11399 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11400 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11401 break;
11402 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11403 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11404 break;
11405 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11406 break;
11407 }
11408 }
11409 }
11410}
11411
11412llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11413 return RequiresAtomicOrdering;
11414}
11415
11416bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11417 LangAS &AS) {
11418 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11419 return false;
11420 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11421 switch(A->getAllocatorType()) {
11422 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11423 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11424 // Not supported, fallback to the default mem space.
11425 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11426 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11427 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11428 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11429 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11430 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11431 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11432 AS = LangAS::Default;
11433 return true;
11434 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11435 llvm_unreachable("Expected predefined allocator for the variables with the "
11436 "static storage.");
11437 }
11438 return false;
11439}
11440
11441bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11442 return HasRequiresUnifiedSharedMemory;
11443}
11444
11445CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11446 CodeGenModule &CGM)
11447 : CGM(CGM) {
11448 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11449 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11450 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11451 }
11452}
11453
11454CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11455 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11456 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11457}
11458
11459bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11460 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11461 return true;
11462
11463 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
11464 // Do not to emit function if it is marked as declare target as it was already
11465 // emitted.
11466 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: D)) {
11467 if (D->hasBody() && AlreadyEmittedTargetDecls.count(V: D) == 0) {
11468 if (auto *F = dyn_cast_or_null<llvm::Function>(
11469 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
11470 return !F->isDeclaration();
11471 return false;
11472 }
11473 return true;
11474 }
11475
11476 return !AlreadyEmittedTargetDecls.insert(V: D).second;
11477}
11478
11479void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11480 const OMPExecutableDirective &D,
11481 SourceLocation Loc,
11482 llvm::Function *OutlinedFn,
11483 ArrayRef<llvm::Value *> CapturedVars) {
11484 if (!CGF.HaveInsertPoint())
11485 return;
11486
11487 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11488 CodeGenFunction::RunCleanupsScope Scope(CGF);
11489
11490 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11491 llvm::Value *Args[] = {
11492 RTLoc,
11493 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
11494 OutlinedFn};
11495 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11496 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
11497 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
11498
11499 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11500 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
11501 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
11502}
11503
11504void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11505 const Expr *NumTeams,
11506 const Expr *ThreadLimit,
11507 SourceLocation Loc) {
11508 if (!CGF.HaveInsertPoint())
11509 return;
11510
11511 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11512
11513 llvm::Value *NumTeamsVal =
11514 NumTeams
11515 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
11516 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11517 : CGF.Builder.getInt32(C: 0);
11518
11519 llvm::Value *ThreadLimitVal =
11520 ThreadLimit
11521 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11522 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11523 : CGF.Builder.getInt32(C: 0);
11524
11525 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11526 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11527 ThreadLimitVal};
11528 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11529 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
11530 args: PushNumTeamsArgs);
11531}
11532
11533void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
11534 const Expr *ThreadLimit,
11535 SourceLocation Loc) {
11536 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11537 llvm::Value *ThreadLimitVal =
11538 ThreadLimit
11539 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11540 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11541 : CGF.Builder.getInt32(C: 0);
11542
11543 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11544 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11545 ThreadLimitVal};
11546 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11547 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
11548 args: ThreadLimitArgs);
11549}
11550
11551void CGOpenMPRuntime::emitTargetDataCalls(
11552 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11553 const Expr *Device, const RegionCodeGenTy &CodeGen,
11554 CGOpenMPRuntime::TargetDataInfo &Info) {
11555 if (!CGF.HaveInsertPoint())
11556 return;
11557
11558 // Action used to replace the default codegen action and turn privatization
11559 // off.
11560 PrePostActionTy NoPrivAction;
11561
11562 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11563
11564 llvm::Value *IfCondVal = nullptr;
11565 if (IfCond)
11566 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
11567
11568 // Emit device ID if any.
11569 llvm::Value *DeviceID = nullptr;
11570 if (Device) {
11571 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11572 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11573 } else {
11574 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11575 }
11576
11577 // Fill up the arrays with all the mapped variables.
11578 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11579 auto GenMapInfoCB =
11580 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11581 CGF.Builder.restoreIP(IP: CodeGenIP);
11582 // Get map clause information.
11583 MappableExprsHandler MEHandler(D, CGF);
11584 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11585
11586 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11587 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
11588 };
11589 if (CGM.getCodeGenOpts().getDebugInfo() !=
11590 llvm::codegenoptions::NoDebugInfo) {
11591 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
11592 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
11593 F: FillInfoMap);
11594 }
11595
11596 return CombinedInfo;
11597 };
11598 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11599 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11600 CGF.Builder.restoreIP(IP: CodeGenIP);
11601 switch (BodyGenType) {
11602 case BodyGenTy::Priv:
11603 if (!Info.CaptureDeviceAddrMap.empty())
11604 CodeGen(CGF);
11605 break;
11606 case BodyGenTy::DupNoPriv:
11607 if (!Info.CaptureDeviceAddrMap.empty()) {
11608 CodeGen.setAction(NoPrivAction);
11609 CodeGen(CGF);
11610 }
11611 break;
11612 case BodyGenTy::NoPriv:
11613 if (Info.CaptureDeviceAddrMap.empty()) {
11614 CodeGen.setAction(NoPrivAction);
11615 CodeGen(CGF);
11616 }
11617 break;
11618 }
11619 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11620 CGF.Builder.GetInsertPoint());
11621 };
11622
11623 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11624 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11625 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
11626 }
11627 };
11628
11629 auto CustomMapperCB = [&](unsigned int I) {
11630 llvm::Function *MFunc = nullptr;
11631 if (CombinedInfo.Mappers[I]) {
11632 Info.HasMapper = true;
11633 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
11634 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
11635 }
11636 return MFunc;
11637 };
11638
11639 // Source location for the ident struct
11640 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11641
11642 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11643 CGF.AllocaInsertPt->getIterator());
11644 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11645 CGF.Builder.GetInsertPoint());
11646 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11647 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11648 cantFail(ValOrErr: OMPBuilder.createTargetData(
11649 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
11650 CustomMapperCB,
11651 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, SrcLocInfo: RTLoc));
11652 CGF.Builder.restoreIP(IP: AfterIP);
11653}
11654
11655void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11656 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11657 const Expr *Device) {
11658 if (!CGF.HaveInsertPoint())
11659 return;
11660
11661 assert((isa<OMPTargetEnterDataDirective>(D) ||
11662 isa<OMPTargetExitDataDirective>(D) ||
11663 isa<OMPTargetUpdateDirective>(D)) &&
11664 "Expecting either target enter, exit data, or update directives.");
11665
11666 CodeGenFunction::OMPTargetDataInfo InputInfo;
11667 llvm::Value *MapTypesArray = nullptr;
11668 llvm::Value *MapNamesArray = nullptr;
11669 // Generate the code for the opening of the data environment.
11670 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11671 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11672 // Emit device ID if any.
11673 llvm::Value *DeviceID = nullptr;
11674 if (Device) {
11675 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11676 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11677 } else {
11678 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11679 }
11680
11681 // Emit the number of elements in the offloading arrays.
11682 llvm::Constant *PointerNum =
11683 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
11684
11685 // Source location for the ident struct
11686 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11687
11688 SmallVector<llvm::Value *, 13> OffloadingArgs(
11689 {RTLoc, DeviceID, PointerNum,
11690 InputInfo.BasePointersArray.emitRawPointer(CGF),
11691 InputInfo.PointersArray.emitRawPointer(CGF),
11692 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11693 InputInfo.MappersArray.emitRawPointer(CGF)});
11694
11695 // Select the right runtime function call for each standalone
11696 // directive.
11697 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11698 RuntimeFunction RTLFn;
11699 switch (D.getDirectiveKind()) {
11700 case OMPD_target_enter_data:
11701 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11702 : OMPRTL___tgt_target_data_begin_mapper;
11703 break;
11704 case OMPD_target_exit_data:
11705 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11706 : OMPRTL___tgt_target_data_end_mapper;
11707 break;
11708 case OMPD_target_update:
11709 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11710 : OMPRTL___tgt_target_data_update_mapper;
11711 break;
11712 case OMPD_parallel:
11713 case OMPD_for:
11714 case OMPD_parallel_for:
11715 case OMPD_parallel_master:
11716 case OMPD_parallel_sections:
11717 case OMPD_for_simd:
11718 case OMPD_parallel_for_simd:
11719 case OMPD_cancel:
11720 case OMPD_cancellation_point:
11721 case OMPD_ordered:
11722 case OMPD_threadprivate:
11723 case OMPD_allocate:
11724 case OMPD_task:
11725 case OMPD_simd:
11726 case OMPD_tile:
11727 case OMPD_unroll:
11728 case OMPD_sections:
11729 case OMPD_section:
11730 case OMPD_single:
11731 case OMPD_master:
11732 case OMPD_critical:
11733 case OMPD_taskyield:
11734 case OMPD_barrier:
11735 case OMPD_taskwait:
11736 case OMPD_taskgroup:
11737 case OMPD_atomic:
11738 case OMPD_flush:
11739 case OMPD_depobj:
11740 case OMPD_scan:
11741 case OMPD_teams:
11742 case OMPD_target_data:
11743 case OMPD_distribute:
11744 case OMPD_distribute_simd:
11745 case OMPD_distribute_parallel_for:
11746 case OMPD_distribute_parallel_for_simd:
11747 case OMPD_teams_distribute:
11748 case OMPD_teams_distribute_simd:
11749 case OMPD_teams_distribute_parallel_for:
11750 case OMPD_teams_distribute_parallel_for_simd:
11751 case OMPD_declare_simd:
11752 case OMPD_declare_variant:
11753 case OMPD_begin_declare_variant:
11754 case OMPD_end_declare_variant:
11755 case OMPD_declare_target:
11756 case OMPD_end_declare_target:
11757 case OMPD_declare_reduction:
11758 case OMPD_declare_mapper:
11759 case OMPD_taskloop:
11760 case OMPD_taskloop_simd:
11761 case OMPD_master_taskloop:
11762 case OMPD_master_taskloop_simd:
11763 case OMPD_parallel_master_taskloop:
11764 case OMPD_parallel_master_taskloop_simd:
11765 case OMPD_target:
11766 case OMPD_target_simd:
11767 case OMPD_target_teams_distribute:
11768 case OMPD_target_teams_distribute_simd:
11769 case OMPD_target_teams_distribute_parallel_for:
11770 case OMPD_target_teams_distribute_parallel_for_simd:
11771 case OMPD_target_teams:
11772 case OMPD_target_parallel:
11773 case OMPD_target_parallel_for:
11774 case OMPD_target_parallel_for_simd:
11775 case OMPD_requires:
11776 case OMPD_metadirective:
11777 case OMPD_unknown:
11778 default:
11779 llvm_unreachable("Unexpected standalone target data directive.");
11780 break;
11781 }
11782 if (HasNowait) {
11783 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11784 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11785 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11786 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11787 }
11788 CGF.EmitRuntimeCall(
11789 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
11790 args: OffloadingArgs);
11791 };
11792
11793 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11794 &MapNamesArray](CodeGenFunction &CGF,
11795 PrePostActionTy &) {
11796 // Fill up the arrays with all the mapped variables.
11797 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11798 CGOpenMPRuntime::TargetDataInfo Info;
11799 MappableExprsHandler MEHandler(D, CGF);
11800 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11801 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11802 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11803
11804 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11805 D.hasClausesOfKind<OMPNowaitClause>();
11806
11807 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11808 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11809 CGF.VoidPtrTy, CGM.getPointerAlign());
11810 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11811 CGM.getPointerAlign());
11812 InputInfo.SizesArray =
11813 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11814 InputInfo.MappersArray =
11815 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11816 MapTypesArray = Info.RTArgs.MapTypesArray;
11817 MapNamesArray = Info.RTArgs.MapNamesArray;
11818 if (RequiresOuterTask)
11819 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
11820 else
11821 emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
11822 };
11823
11824 if (IfCond) {
11825 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
11826 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
11827 } else {
11828 RegionCodeGenTy ThenRCG(TargetThenGen);
11829 ThenRCG(CGF);
11830 }
11831}
11832
11833namespace {
11834 /// Kind of parameter in a function with 'declare simd' directive.
11835enum ParamKindTy {
11836 Linear,
11837 LinearRef,
11838 LinearUVal,
11839 LinearVal,
11840 Uniform,
11841 Vector,
11842};
11843/// Attribute set of the parameter.
11844struct ParamAttrTy {
11845 ParamKindTy Kind = Vector;
11846 llvm::APSInt StrideOrArg;
11847 llvm::APSInt Alignment;
11848 bool HasVarStride = false;
11849};
11850} // namespace
11851
11852static unsigned evaluateCDTSize(const FunctionDecl *FD,
11853 ArrayRef<ParamAttrTy> ParamAttrs) {
11854 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11855 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11856 // of that clause. The VLEN value must be power of 2.
11857 // In other case the notion of the function`s "characteristic data type" (CDT)
11858 // is used to compute the vector length.
11859 // CDT is defined in the following order:
11860 // a) For non-void function, the CDT is the return type.
11861 // b) If the function has any non-uniform, non-linear parameters, then the
11862 // CDT is the type of the first such parameter.
11863 // c) If the CDT determined by a) or b) above is struct, union, or class
11864 // type which is pass-by-value (except for the type that maps to the
11865 // built-in complex data type), the characteristic data type is int.
11866 // d) If none of the above three cases is applicable, the CDT is int.
11867 // The VLEN is then determined based on the CDT and the size of vector
11868 // register of that ISA for which current vector version is generated. The
11869 // VLEN is computed using the formula below:
11870 // VLEN = sizeof(vector_register) / sizeof(CDT),
11871 // where vector register size specified in section 3.2.1 Registers and the
11872 // Stack Frame of original AMD64 ABI document.
11873 QualType RetType = FD->getReturnType();
11874 if (RetType.isNull())
11875 return 0;
11876 ASTContext &C = FD->getASTContext();
11877 QualType CDT;
11878 if (!RetType.isNull() && !RetType->isVoidType()) {
11879 CDT = RetType;
11880 } else {
11881 unsigned Offset = 0;
11882 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
11883 if (ParamAttrs[Offset].Kind == Vector)
11884 CDT = C.getPointerType(T: C.getCanonicalTagType(TD: MD->getParent()));
11885 ++Offset;
11886 }
11887 if (CDT.isNull()) {
11888 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11889 if (ParamAttrs[I + Offset].Kind == Vector) {
11890 CDT = FD->getParamDecl(i: I)->getType();
11891 break;
11892 }
11893 }
11894 }
11895 }
11896 if (CDT.isNull())
11897 CDT = C.IntTy;
11898 CDT = CDT->getCanonicalTypeUnqualified();
11899 if (CDT->isRecordType() || CDT->isUnionType())
11900 CDT = C.IntTy;
11901 return C.getTypeSize(T: CDT);
11902}
11903
11904/// Mangle the parameter part of the vector function name according to
11905/// their OpenMP classification. The mangling function is defined in
11906/// section 4.5 of the AAVFABI(2021Q1).
11907static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11908 SmallString<256> Buffer;
11909 llvm::raw_svector_ostream Out(Buffer);
11910 for (const auto &ParamAttr : ParamAttrs) {
11911 switch (ParamAttr.Kind) {
11912 case Linear:
11913 Out << 'l';
11914 break;
11915 case LinearRef:
11916 Out << 'R';
11917 break;
11918 case LinearUVal:
11919 Out << 'U';
11920 break;
11921 case LinearVal:
11922 Out << 'L';
11923 break;
11924 case Uniform:
11925 Out << 'u';
11926 break;
11927 case Vector:
11928 Out << 'v';
11929 break;
11930 }
11931 if (ParamAttr.HasVarStride)
11932 Out << "s" << ParamAttr.StrideOrArg;
11933 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11934 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11935 // Don't print the step value if it is not present or if it is
11936 // equal to 1.
11937 if (ParamAttr.StrideOrArg < 0)
11938 Out << 'n' << -ParamAttr.StrideOrArg;
11939 else if (ParamAttr.StrideOrArg != 1)
11940 Out << ParamAttr.StrideOrArg;
11941 }
11942
11943 if (!!ParamAttr.Alignment)
11944 Out << 'a' << ParamAttr.Alignment;
11945 }
11946
11947 return std::string(Out.str());
11948}
11949
11950static void
11951emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11952 const llvm::APSInt &VLENVal,
11953 ArrayRef<ParamAttrTy> ParamAttrs,
11954 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11955 struct ISADataTy {
11956 char ISA;
11957 unsigned VecRegSize;
11958 };
11959 ISADataTy ISAData[] = {
11960 {
11961 .ISA: 'b', .VecRegSize: 128
11962 }, // SSE
11963 {
11964 .ISA: 'c', .VecRegSize: 256
11965 }, // AVX
11966 {
11967 .ISA: 'd', .VecRegSize: 256
11968 }, // AVX2
11969 {
11970 .ISA: 'e', .VecRegSize: 512
11971 }, // AVX512
11972 };
11973 llvm::SmallVector<char, 2> Masked;
11974 switch (State) {
11975 case OMPDeclareSimdDeclAttr::BS_Undefined:
11976 Masked.push_back(Elt: 'N');
11977 Masked.push_back(Elt: 'M');
11978 break;
11979 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11980 Masked.push_back(Elt: 'N');
11981 break;
11982 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11983 Masked.push_back(Elt: 'M');
11984 break;
11985 }
11986 for (char Mask : Masked) {
11987 for (const ISADataTy &Data : ISAData) {
11988 SmallString<256> Buffer;
11989 llvm::raw_svector_ostream Out(Buffer);
11990 Out << "_ZGV" << Data.ISA << Mask;
11991 if (!VLENVal) {
11992 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11993 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11994 Out << llvm::APSInt::getUnsigned(X: Data.VecRegSize / NumElts);
11995 } else {
11996 Out << VLENVal;
11997 }
11998 Out << mangleVectorParameters(ParamAttrs);
11999 Out << '_' << Fn->getName();
12000 Fn->addFnAttr(Kind: Out.str());
12001 }
12002 }
12003}
12004
12005// This are the Functions that are needed to mangle the name of the
12006// vector functions generated by the compiler, according to the rules
12007// defined in the "Vector Function ABI specifications for AArch64",
12008// available at
12009// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
12010
12011/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
12012static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
12013 QT = QT.getCanonicalType();
12014
12015 if (QT->isVoidType())
12016 return false;
12017
12018 if (Kind == ParamKindTy::Uniform)
12019 return false;
12020
12021 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
12022 return false;
12023
12024 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
12025 !QT->isReferenceType())
12026 return false;
12027
12028 return true;
12029}
12030
12031/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
12032static bool getAArch64PBV(QualType QT, ASTContext &C) {
12033 QT = QT.getCanonicalType();
12034 unsigned Size = C.getTypeSize(T: QT);
12035
12036 // Only scalars and complex within 16 bytes wide set PVB to true.
12037 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
12038 return false;
12039
12040 if (QT->isFloatingType())
12041 return true;
12042
12043 if (QT->isIntegerType())
12044 return true;
12045
12046 if (QT->isPointerType())
12047 return true;
12048
12049 // TODO: Add support for complex types (section 3.1.2, item 2).
12050
12051 return false;
12052}
12053
12054/// Computes the lane size (LS) of a return type or of an input parameter,
12055/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
12056/// TODO: Add support for references, section 3.2.1, item 1.
12057static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
12058 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
12059 QualType PTy = QT.getCanonicalType()->getPointeeType();
12060 if (getAArch64PBV(QT: PTy, C))
12061 return C.getTypeSize(T: PTy);
12062 }
12063 if (getAArch64PBV(QT, C))
12064 return C.getTypeSize(T: QT);
12065
12066 return C.getTypeSize(T: C.getUIntPtrType());
12067}
12068
12069// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
12070// signature of the scalar function, as defined in 3.2.2 of the
12071// AAVFABI.
12072static std::tuple<unsigned, unsigned, bool>
12073getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
12074 QualType RetType = FD->getReturnType().getCanonicalType();
12075
12076 ASTContext &C = FD->getASTContext();
12077
12078 bool OutputBecomesInput = false;
12079
12080 llvm::SmallVector<unsigned, 8> Sizes;
12081 if (!RetType->isVoidType()) {
12082 Sizes.push_back(Elt: getAArch64LS(QT: RetType, Kind: ParamKindTy::Vector, C));
12083 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
12084 OutputBecomesInput = true;
12085 }
12086 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12087 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
12088 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
12089 }
12090
12091 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12092 // The LS of a function parameter / return value can only be a power
12093 // of 2, starting from 8 bits, up to 128.
12094 assert(llvm::all_of(Sizes,
12095 [](unsigned Size) {
12096 return Size == 8 || Size == 16 || Size == 32 ||
12097 Size == 64 || Size == 128;
12098 }) &&
12099 "Invalid size");
12100
12101 return std::make_tuple(args&: *llvm::min_element(Range&: Sizes), args&: *llvm::max_element(Range&: Sizes),
12102 args&: OutputBecomesInput);
12103}
12104
12105// Function used to add the attribute. The parameter `VLEN` is
12106// templated to allow the use of "x" when targeting scalable functions
12107// for SVE.
12108template <typename T>
12109static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
12110 char ISA, StringRef ParSeq,
12111 StringRef MangledName, bool OutputBecomesInput,
12112 llvm::Function *Fn) {
12113 SmallString<256> Buffer;
12114 llvm::raw_svector_ostream Out(Buffer);
12115 Out << Prefix << ISA << LMask << VLEN;
12116 if (OutputBecomesInput)
12117 Out << "v";
12118 Out << ParSeq << "_" << MangledName;
12119 Fn->addFnAttr(Kind: Out.str());
12120}
12121
12122// Helper function to generate the Advanced SIMD names depending on
12123// the value of the NDS when simdlen is not present.
12124static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
12125 StringRef Prefix, char ISA,
12126 StringRef ParSeq, StringRef MangledName,
12127 bool OutputBecomesInput,
12128 llvm::Function *Fn) {
12129 switch (NDS) {
12130 case 8:
12131 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
12132 OutputBecomesInput, Fn);
12133 addAArch64VectorName(VLEN: 16, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
12134 OutputBecomesInput, Fn);
12135 break;
12136 case 16:
12137 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
12138 OutputBecomesInput, Fn);
12139 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
12140 OutputBecomesInput, Fn);
12141 break;
12142 case 32:
12143 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
12144 OutputBecomesInput, Fn);
12145 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
12146 OutputBecomesInput, Fn);
12147 break;
12148 case 64:
12149 case 128:
12150 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
12151 OutputBecomesInput, Fn);
12152 break;
12153 default:
12154 llvm_unreachable("Scalar type is too wide.");
12155 }
12156}
12157
12158/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
12159static void emitAArch64DeclareSimdFunction(
12160 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
12161 ArrayRef<ParamAttrTy> ParamAttrs,
12162 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
12163 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
12164
12165 // Get basic data for building the vector signature.
12166 const auto Data = getNDSWDS(FD, ParamAttrs);
12167 const unsigned NDS = std::get<0>(t: Data);
12168 const unsigned WDS = std::get<1>(t: Data);
12169 const bool OutputBecomesInput = std::get<2>(t: Data);
12170
12171 // Check the values provided via `simdlen` by the user.
12172 // 1. A `simdlen(1)` doesn't produce vector signatures,
12173 if (UserVLEN == 1) {
12174 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_1_no_effect);
12175 return;
12176 }
12177
12178 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
12179 // Advanced SIMD output.
12180 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
12181 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_requires_power_of_2);
12182 return;
12183 }
12184
12185 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
12186 // limits.
12187 if (ISA == 's' && UserVLEN != 0) {
12188 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
12189 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_must_fit_lanes) << WDS;
12190 return;
12191 }
12192 }
12193
12194 // Sort out parameter sequence.
12195 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
12196 StringRef Prefix = "_ZGV";
12197 // Generate simdlen from user input (if any).
12198 if (UserVLEN) {
12199 if (ISA == 's') {
12200 // SVE generates only a masked function.
12201 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
12202 OutputBecomesInput, Fn);
12203 } else {
12204 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12205 // Advanced SIMD generates one or two functions, depending on
12206 // the `[not]inbranch` clause.
12207 switch (State) {
12208 case OMPDeclareSimdDeclAttr::BS_Undefined:
12209 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
12210 OutputBecomesInput, Fn);
12211 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
12212 OutputBecomesInput, Fn);
12213 break;
12214 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12215 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
12216 OutputBecomesInput, Fn);
12217 break;
12218 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12219 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
12220 OutputBecomesInput, Fn);
12221 break;
12222 }
12223 }
12224 } else {
12225 // If no user simdlen is provided, follow the AAVFABI rules for
12226 // generating the vector length.
12227 if (ISA == 's') {
12228 // SVE, section 3.4.1, item 1.
12229 addAArch64VectorName(VLEN: "x", LMask: "M", Prefix, ISA, ParSeq, MangledName,
12230 OutputBecomesInput, Fn);
12231 } else {
12232 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12233 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
12234 // two vector names depending on the use of the clause
12235 // `[not]inbranch`.
12236 switch (State) {
12237 case OMPDeclareSimdDeclAttr::BS_Undefined:
12238 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
12239 OutputBecomesInput, Fn);
12240 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
12241 OutputBecomesInput, Fn);
12242 break;
12243 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12244 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
12245 OutputBecomesInput, Fn);
12246 break;
12247 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12248 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
12249 OutputBecomesInput, Fn);
12250 break;
12251 }
12252 }
12253 }
12254}
12255
12256void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
12257 llvm::Function *Fn) {
12258 ASTContext &C = CGM.getContext();
12259 FD = FD->getMostRecentDecl();
12260 while (FD) {
12261 // Map params to their positions in function decl.
12262 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12263 if (isa<CXXMethodDecl>(Val: FD))
12264 ParamPositions.try_emplace(Key: FD, Args: 0);
12265 unsigned ParamPos = ParamPositions.size();
12266 for (const ParmVarDecl *P : FD->parameters()) {
12267 ParamPositions.try_emplace(Key: P->getCanonicalDecl(), Args&: ParamPos);
12268 ++ParamPos;
12269 }
12270 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12271 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
12272 // Mark uniform parameters.
12273 for (const Expr *E : Attr->uniforms()) {
12274 E = E->IgnoreParenImpCasts();
12275 unsigned Pos;
12276 if (isa<CXXThisExpr>(Val: E)) {
12277 Pos = ParamPositions[FD];
12278 } else {
12279 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12280 ->getCanonicalDecl();
12281 auto It = ParamPositions.find(Val: PVD);
12282 assert(It != ParamPositions.end() && "Function parameter not found");
12283 Pos = It->second;
12284 }
12285 ParamAttrs[Pos].Kind = Uniform;
12286 }
12287 // Get alignment info.
12288 auto *NI = Attr->alignments_begin();
12289 for (const Expr *E : Attr->aligneds()) {
12290 E = E->IgnoreParenImpCasts();
12291 unsigned Pos;
12292 QualType ParmTy;
12293 if (isa<CXXThisExpr>(Val: E)) {
12294 Pos = ParamPositions[FD];
12295 ParmTy = E->getType();
12296 } else {
12297 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12298 ->getCanonicalDecl();
12299 auto It = ParamPositions.find(Val: PVD);
12300 assert(It != ParamPositions.end() && "Function parameter not found");
12301 Pos = It->second;
12302 ParmTy = PVD->getType();
12303 }
12304 ParamAttrs[Pos].Alignment =
12305 (*NI)
12306 ? (*NI)->EvaluateKnownConstInt(Ctx: C)
12307 : llvm::APSInt::getUnsigned(
12308 X: C.toCharUnitsFromBits(BitSize: C.getOpenMPDefaultSimdAlign(T: ParmTy))
12309 .getQuantity());
12310 ++NI;
12311 }
12312 // Mark linear parameters.
12313 auto *SI = Attr->steps_begin();
12314 auto *MI = Attr->modifiers_begin();
12315 for (const Expr *E : Attr->linears()) {
12316 E = E->IgnoreParenImpCasts();
12317 unsigned Pos;
12318 bool IsReferenceType = false;
12319 // Rescaling factor needed to compute the linear parameter
12320 // value in the mangled name.
12321 unsigned PtrRescalingFactor = 1;
12322 if (isa<CXXThisExpr>(Val: E)) {
12323 Pos = ParamPositions[FD];
12324 auto *P = cast<PointerType>(Val: E->getType());
12325 PtrRescalingFactor = CGM.getContext()
12326 .getTypeSizeInChars(T: P->getPointeeType())
12327 .getQuantity();
12328 } else {
12329 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12330 ->getCanonicalDecl();
12331 auto It = ParamPositions.find(Val: PVD);
12332 assert(It != ParamPositions.end() && "Function parameter not found");
12333 Pos = It->second;
12334 if (auto *P = dyn_cast<PointerType>(Val: PVD->getType()))
12335 PtrRescalingFactor = CGM.getContext()
12336 .getTypeSizeInChars(T: P->getPointeeType())
12337 .getQuantity();
12338 else if (PVD->getType()->isReferenceType()) {
12339 IsReferenceType = true;
12340 PtrRescalingFactor =
12341 CGM.getContext()
12342 .getTypeSizeInChars(T: PVD->getType().getNonReferenceType())
12343 .getQuantity();
12344 }
12345 }
12346 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12347 if (*MI == OMPC_LINEAR_ref)
12348 ParamAttr.Kind = LinearRef;
12349 else if (*MI == OMPC_LINEAR_uval)
12350 ParamAttr.Kind = LinearUVal;
12351 else if (IsReferenceType)
12352 ParamAttr.Kind = LinearVal;
12353 else
12354 ParamAttr.Kind = Linear;
12355 // Assuming a stride of 1, for `linear` without modifiers.
12356 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: 1);
12357 if (*SI) {
12358 Expr::EvalResult Result;
12359 if (!(*SI)->EvaluateAsInt(Result, Ctx: C, AllowSideEffects: Expr::SE_AllowSideEffects)) {
12360 if (const auto *DRE =
12361 cast<DeclRefExpr>(Val: (*SI)->IgnoreParenImpCasts())) {
12362 if (const auto *StridePVD =
12363 dyn_cast<ParmVarDecl>(Val: DRE->getDecl())) {
12364 ParamAttr.HasVarStride = true;
12365 auto It = ParamPositions.find(Val: StridePVD->getCanonicalDecl());
12366 assert(It != ParamPositions.end() &&
12367 "Function parameter not found");
12368 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: It->second);
12369 }
12370 }
12371 } else {
12372 ParamAttr.StrideOrArg = Result.Val.getInt();
12373 }
12374 }
12375 // If we are using a linear clause on a pointer, we need to
12376 // rescale the value of linear_step with the byte size of the
12377 // pointee type.
12378 if (!ParamAttr.HasVarStride &&
12379 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
12380 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12381 ++SI;
12382 ++MI;
12383 }
12384 llvm::APSInt VLENVal;
12385 SourceLocation ExprLoc;
12386 const Expr *VLENExpr = Attr->getSimdlen();
12387 if (VLENExpr) {
12388 VLENVal = VLENExpr->EvaluateKnownConstInt(Ctx: C);
12389 ExprLoc = VLENExpr->getExprLoc();
12390 }
12391 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12392 if (CGM.getTriple().isX86()) {
12393 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12394 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12395 unsigned VLEN = VLENVal.getExtValue();
12396 StringRef MangledName = Fn->getName();
12397 if (CGM.getTarget().hasFeature(Feature: "sve"))
12398 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
12399 MangledName, ISA: 's', VecRegSize: 128, Fn, SLoc: ExprLoc);
12400 else if (CGM.getTarget().hasFeature(Feature: "neon"))
12401 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
12402 MangledName, ISA: 'n', VecRegSize: 128, Fn, SLoc: ExprLoc);
12403 }
12404 }
12405 FD = FD->getPreviousDecl();
12406 }
12407}
12408
12409namespace {
12410/// Cleanup action for doacross support.
12411class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12412public:
12413 static const int DoacrossFinArgs = 2;
12414
12415private:
12416 llvm::FunctionCallee RTLFn;
12417 llvm::Value *Args[DoacrossFinArgs];
12418
12419public:
12420 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12421 ArrayRef<llvm::Value *> CallArgs)
12422 : RTLFn(RTLFn) {
12423 assert(CallArgs.size() == DoacrossFinArgs);
12424 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
12425 }
12426 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12427 if (!CGF.HaveInsertPoint())
12428 return;
12429 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12430 }
12431};
12432} // namespace
12433
12434void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12435 const OMPLoopDirective &D,
12436 ArrayRef<Expr *> NumIterations) {
12437 if (!CGF.HaveInsertPoint())
12438 return;
12439
12440 ASTContext &C = CGM.getContext();
12441 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12442 RecordDecl *RD;
12443 if (KmpDimTy.isNull()) {
12444 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12445 // kmp_int64 lo; // lower
12446 // kmp_int64 up; // upper
12447 // kmp_int64 st; // stride
12448 // };
12449 RD = C.buildImplicitRecord(Name: "kmp_dim");
12450 RD->startDefinition();
12451 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12452 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12453 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12454 RD->completeDefinition();
12455 KmpDimTy = C.getCanonicalTagType(TD: RD);
12456 } else {
12457 RD = KmpDimTy->castAsRecordDecl();
12458 }
12459 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12460 QualType ArrayTy = C.getConstantArrayType(EltTy: KmpDimTy, ArySize: Size, SizeExpr: nullptr,
12461 ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12462
12463 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
12464 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
12465 enum { LowerFD = 0, UpperFD, StrideFD };
12466 // Fill dims with data.
12467 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12468 LValue DimsLVal = CGF.MakeAddrLValue(
12469 Addr: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: I), T: KmpDimTy);
12470 // dims.upper = num_iterations;
12471 LValue UpperLVal = CGF.EmitLValueForField(
12472 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
12473 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12474 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
12475 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
12476 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
12477 // dims.stride = 1;
12478 LValue StrideLVal = CGF.EmitLValueForField(
12479 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
12480 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
12481 lvalue: StrideLVal);
12482 }
12483
12484 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12485 // kmp_int32 num_dims, struct kmp_dim * dims);
12486 llvm::Value *Args[] = {
12487 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
12488 getThreadID(CGF, Loc: D.getBeginLoc()),
12489 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
12490 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12491 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
12492 DestTy: CGM.VoidPtrTy)};
12493
12494 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12495 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
12496 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12497 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12498 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
12499 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12500 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
12501 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
12502 A: llvm::ArrayRef(FiniArgs));
12503}
12504
12505template <typename T>
12506static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
12507 const T *C, llvm::Value *ULoc,
12508 llvm::Value *ThreadID) {
12509 QualType Int64Ty =
12510 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12511 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12512 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12513 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12514 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
12515 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12516 const Expr *CounterVal = C->getLoopData(I);
12517 assert(CounterVal);
12518 llvm::Value *CntVal = CGF.EmitScalarConversion(
12519 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
12520 Loc: CounterVal->getExprLoc());
12521 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
12522 /*Volatile=*/false, Ty: Int64Ty);
12523 }
12524 llvm::Value *Args[] = {
12525 ULoc, ThreadID,
12526 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
12527 llvm::FunctionCallee RTLFn;
12528 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12529 OMPDoacrossKind<T> ODK;
12530 if (ODK.isSource(C)) {
12531 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12532 FnID: OMPRTL___kmpc_doacross_post);
12533 } else {
12534 assert(ODK.isSink(C) && "Expect sink modifier.");
12535 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12536 FnID: OMPRTL___kmpc_doacross_wait);
12537 }
12538 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12539}
12540
12541void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12542 const OMPDependClause *C) {
12543 return EmitDoacrossOrdered<OMPDependClause>(
12544 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12545 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12546}
12547
12548void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12549 const OMPDoacrossClause *C) {
12550 return EmitDoacrossOrdered<OMPDoacrossClause>(
12551 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12552 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12553}
12554
12555void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12556 llvm::FunctionCallee Callee,
12557 ArrayRef<llvm::Value *> Args) const {
12558 assert(Loc.isValid() && "Outlined function call location must be valid.");
12559 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
12560
12561 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
12562 if (Fn->doesNotThrow()) {
12563 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
12564 return;
12565 }
12566 }
12567 CGF.EmitRuntimeCall(callee: Callee, args: Args);
12568}
12569
12570void CGOpenMPRuntime::emitOutlinedFunctionCall(
12571 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12572 ArrayRef<llvm::Value *> Args) const {
12573 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
12574}
12575
12576void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12577 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
12578 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: FD))
12579 HasEmittedDeclareTargetRegion = true;
12580}
12581
12582Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12583 const VarDecl *NativeParam,
12584 const VarDecl *TargetParam) const {
12585 return CGF.GetAddrOfLocalVar(VD: NativeParam);
12586}
12587
12588/// Return allocator value from expression, or return a null allocator (default
12589/// when no allocator specified).
12590static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12591 const Expr *Allocator) {
12592 llvm::Value *AllocVal;
12593 if (Allocator) {
12594 AllocVal = CGF.EmitScalarExpr(E: Allocator);
12595 // According to the standard, the original allocator type is a enum
12596 // (integer). Convert to pointer type, if required.
12597 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
12598 DstTy: CGF.getContext().VoidPtrTy,
12599 Loc: Allocator->getExprLoc());
12600 } else {
12601 // If no allocator specified, it defaults to the null allocator.
12602 AllocVal = llvm::Constant::getNullValue(
12603 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
12604 }
12605 return AllocVal;
12606}
12607
12608/// Return the alignment from an allocate directive if present.
12609static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12610 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12611
12612 if (!AllocateAlignment)
12613 return nullptr;
12614
12615 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
12616}
12617
12618Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12619 const VarDecl *VD) {
12620 if (!VD)
12621 return Address::invalid();
12622 Address UntiedAddr = Address::invalid();
12623 Address UntiedRealAddr = Address::invalid();
12624 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12625 if (It != FunctionToUntiedTaskStackMap.end()) {
12626 const UntiedLocalVarsAddressesMap &UntiedData =
12627 UntiedLocalVarsStack[It->second];
12628 auto I = UntiedData.find(Key: VD);
12629 if (I != UntiedData.end()) {
12630 UntiedAddr = I->second.first;
12631 UntiedRealAddr = I->second.second;
12632 }
12633 }
12634 const VarDecl *CVD = VD->getCanonicalDecl();
12635 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12636 // Use the default allocation.
12637 if (!isAllocatableDecl(VD))
12638 return UntiedAddr;
12639 llvm::Value *Size;
12640 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
12641 if (CVD->getType()->isVariablyModifiedType()) {
12642 Size = CGF.getTypeSize(Ty: CVD->getType());
12643 // Align the size: ((size + align - 1) / align) * align
12644 Size = CGF.Builder.CreateNUWAdd(
12645 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
12646 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
12647 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
12648 } else {
12649 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
12650 Size = CGM.getSize(numChars: Sz.alignTo(Align));
12651 }
12652 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
12653 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12654 const Expr *Allocator = AA->getAllocator();
12655 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12656 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
12657 SmallVector<llvm::Value *, 4> Args;
12658 Args.push_back(Elt: ThreadID);
12659 if (Alignment)
12660 Args.push_back(Elt: Alignment);
12661 Args.push_back(Elt: Size);
12662 Args.push_back(Elt: AllocVal);
12663 llvm::omp::RuntimeFunction FnID =
12664 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12665 llvm::Value *Addr = CGF.EmitRuntimeCall(
12666 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args,
12667 name: getName(Parts: {CVD->getName(), ".void.addr"}));
12668 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12669 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
12670 QualType Ty = CGM.getContext().getPointerType(T: CVD->getType());
12671 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12672 V: Addr, DestTy: CGF.ConvertTypeForMem(T: Ty), Name: getName(Parts: {CVD->getName(), ".addr"}));
12673 if (UntiedAddr.isValid())
12674 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
12675
12676 // Cleanup action for allocate support.
12677 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12678 llvm::FunctionCallee RTLFn;
12679 SourceLocation::UIntTy LocEncoding;
12680 Address Addr;
12681 const Expr *AllocExpr;
12682
12683 public:
12684 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12685 SourceLocation::UIntTy LocEncoding, Address Addr,
12686 const Expr *AllocExpr)
12687 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12688 AllocExpr(AllocExpr) {}
12689 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12690 if (!CGF.HaveInsertPoint())
12691 return;
12692 llvm::Value *Args[3];
12693 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12694 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
12695 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12696 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
12697 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
12698 Args[2] = AllocVal;
12699 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12700 }
12701 };
12702 Address VDAddr =
12703 UntiedRealAddr.isValid()
12704 ? UntiedRealAddr
12705 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
12706 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12707 Kind: NormalAndEHCleanup, A: FiniRTLFn, A: CVD->getLocation().getRawEncoding(),
12708 A: VDAddr, A: Allocator);
12709 if (UntiedRealAddr.isValid())
12710 if (auto *Region =
12711 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
12712 Region->emitUntiedSwitch(CGF);
12713 return VDAddr;
12714 }
12715 return UntiedAddr;
12716}
12717
12718bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12719 const VarDecl *VD) const {
12720 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12721 if (It == FunctionToUntiedTaskStackMap.end())
12722 return false;
12723 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
12724}
12725
12726CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12727 CodeGenModule &CGM, const OMPLoopDirective &S)
12728 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12729 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12730 if (!NeedToPush)
12731 return;
12732 NontemporalDeclsSet &DS =
12733 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12734 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12735 for (const Stmt *Ref : C->private_refs()) {
12736 const auto *SimpleRefExpr = cast<Expr>(Val: Ref)->IgnoreParenImpCasts();
12737 const ValueDecl *VD;
12738 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SimpleRefExpr)) {
12739 VD = DRE->getDecl();
12740 } else {
12741 const auto *ME = cast<MemberExpr>(Val: SimpleRefExpr);
12742 assert((ME->isImplicitCXXThis() ||
12743 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12744 "Expected member of current class.");
12745 VD = ME->getMemberDecl();
12746 }
12747 DS.insert(V: VD);
12748 }
12749 }
12750}
12751
12752CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12753 if (!NeedToPush)
12754 return;
12755 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12756}
12757
12758CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12759 CodeGenFunction &CGF,
12760 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12761 std::pair<Address, Address>> &LocalVars)
12762 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12763 if (!NeedToPush)
12764 return;
12765 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12766 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12767 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
12768}
12769
12770CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12771 if (!NeedToPush)
12772 return;
12773 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12774}
12775
12776bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12777 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12778
12779 return llvm::any_of(
12780 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
12781 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(V: VD); });
12782}
12783
12784void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12785 const OMPExecutableDirective &S,
12786 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12787 const {
12788 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12789 // Vars in target/task regions must be excluded completely.
12790 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()) ||
12791 isOpenMPTaskingDirective(Kind: S.getDirectiveKind())) {
12792 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12793 getOpenMPCaptureRegions(CaptureRegions, DKind: S.getDirectiveKind());
12794 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CaptureRegions.front());
12795 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12796 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12797 NeedToCheckForLPCs.insert(V: Cap.getCapturedVar());
12798 }
12799 }
12800 // Exclude vars in private clauses.
12801 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12802 for (const Expr *Ref : C->varlist()) {
12803 if (!Ref->getType()->isScalarType())
12804 continue;
12805 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12806 if (!DRE)
12807 continue;
12808 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12809 }
12810 }
12811 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12812 for (const Expr *Ref : C->varlist()) {
12813 if (!Ref->getType()->isScalarType())
12814 continue;
12815 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12816 if (!DRE)
12817 continue;
12818 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12819 }
12820 }
12821 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12822 for (const Expr *Ref : C->varlist()) {
12823 if (!Ref->getType()->isScalarType())
12824 continue;
12825 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12826 if (!DRE)
12827 continue;
12828 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12829 }
12830 }
12831 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12832 for (const Expr *Ref : C->varlist()) {
12833 if (!Ref->getType()->isScalarType())
12834 continue;
12835 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12836 if (!DRE)
12837 continue;
12838 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12839 }
12840 }
12841 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12842 for (const Expr *Ref : C->varlist()) {
12843 if (!Ref->getType()->isScalarType())
12844 continue;
12845 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12846 if (!DRE)
12847 continue;
12848 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12849 }
12850 }
12851 for (const Decl *VD : NeedToCheckForLPCs) {
12852 for (const LastprivateConditionalData &Data :
12853 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12854 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
12855 if (!Data.Disabled)
12856 NeedToAddForLPCsAsDisabled.insert(V: VD);
12857 break;
12858 }
12859 }
12860 }
12861}
12862
12863CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12864 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12865 : CGM(CGF.CGM),
12866 Action((CGM.getLangOpts().OpenMP >= 50 &&
12867 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
12868 P: [](const OMPLastprivateClause *C) {
12869 return C->getKind() ==
12870 OMPC_LASTPRIVATE_conditional;
12871 }))
12872 ? ActionToDo::PushAsLastprivateConditional
12873 : ActionToDo::DoNotPush) {
12874 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12875 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12876 return;
12877 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12878 "Expected a push action.");
12879 LastprivateConditionalData &Data =
12880 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12881 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12882 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12883 continue;
12884
12885 for (const Expr *Ref : C->varlist()) {
12886 Data.DeclToUniqueName.insert(KV: std::make_pair(
12887 x: cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts())->getDecl(),
12888 y: SmallString<16>(generateUniqueName(CGM, Prefix: "pl_cond", Ref))));
12889 }
12890 }
12891 Data.IVLVal = IVLVal;
12892 Data.Fn = CGF.CurFn;
12893}
12894
12895CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12896 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12897 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12898 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12899 if (CGM.getLangOpts().OpenMP < 50)
12900 return;
12901 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12902 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12903 if (!NeedToAddForLPCsAsDisabled.empty()) {
12904 Action = ActionToDo::DisableLastprivateConditional;
12905 LastprivateConditionalData &Data =
12906 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12907 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12908 Data.DeclToUniqueName.try_emplace(Key: VD);
12909 Data.Fn = CGF.CurFn;
12910 Data.Disabled = true;
12911 }
12912}
12913
12914CGOpenMPRuntime::LastprivateConditionalRAII
12915CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12916 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12917 return LastprivateConditionalRAII(CGF, S);
12918}
12919
12920CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12921 if (CGM.getLangOpts().OpenMP < 50)
12922 return;
12923 if (Action == ActionToDo::DisableLastprivateConditional) {
12924 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12925 "Expected list of disabled private vars.");
12926 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12927 }
12928 if (Action == ActionToDo::PushAsLastprivateConditional) {
12929 assert(
12930 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12931 "Expected list of lastprivate conditional vars.");
12932 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12933 }
12934}
12935
12936Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12937 const VarDecl *VD) {
12938 ASTContext &C = CGM.getContext();
12939 auto I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
12940 QualType NewType;
12941 const FieldDecl *VDField;
12942 const FieldDecl *FiredField;
12943 LValue BaseLVal;
12944 auto VI = I->getSecond().find(Val: VD);
12945 if (VI == I->getSecond().end()) {
12946 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
12947 RD->startDefinition();
12948 VDField = addFieldToRecordDecl(C, DC: RD, FieldTy: VD->getType().getNonReferenceType());
12949 FiredField = addFieldToRecordDecl(C, DC: RD, FieldTy: C.CharTy);
12950 RD->completeDefinition();
12951 NewType = C.getCanonicalTagType(TD: RD);
12952 Address Addr = CGF.CreateMemTemp(T: NewType, Align: C.getDeclAlign(D: VD), Name: VD->getName());
12953 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
12954 I->getSecond().try_emplace(Key: VD, Args&: NewType, Args&: VDField, Args&: FiredField, Args&: BaseLVal);
12955 } else {
12956 NewType = std::get<0>(t&: VI->getSecond());
12957 VDField = std::get<1>(t&: VI->getSecond());
12958 FiredField = std::get<2>(t&: VI->getSecond());
12959 BaseLVal = std::get<3>(t&: VI->getSecond());
12960 }
12961 LValue FiredLVal =
12962 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
12963 CGF.EmitStoreOfScalar(
12964 value: llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
12965 lvalue: FiredLVal);
12966 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
12967}
12968
12969namespace {
12970/// Checks if the lastprivate conditional variable is referenced in LHS.
12971class LastprivateConditionalRefChecker final
12972 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12973 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12974 const Expr *FoundE = nullptr;
12975 const Decl *FoundD = nullptr;
12976 StringRef UniqueDeclName;
12977 LValue IVLVal;
12978 llvm::Function *FoundFn = nullptr;
12979 SourceLocation Loc;
12980
12981public:
12982 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12983 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12984 llvm::reverse(C&: LPM)) {
12985 auto It = D.DeclToUniqueName.find(Key: E->getDecl());
12986 if (It == D.DeclToUniqueName.end())
12987 continue;
12988 if (D.Disabled)
12989 return false;
12990 FoundE = E;
12991 FoundD = E->getDecl()->getCanonicalDecl();
12992 UniqueDeclName = It->second;
12993 IVLVal = D.IVLVal;
12994 FoundFn = D.Fn;
12995 break;
12996 }
12997 return FoundE == E;
12998 }
12999 bool VisitMemberExpr(const MemberExpr *E) {
13000 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
13001 return false;
13002 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
13003 llvm::reverse(C&: LPM)) {
13004 auto It = D.DeclToUniqueName.find(Key: E->getMemberDecl());
13005 if (It == D.DeclToUniqueName.end())
13006 continue;
13007 if (D.Disabled)
13008 return false;
13009 FoundE = E;
13010 FoundD = E->getMemberDecl()->getCanonicalDecl();
13011 UniqueDeclName = It->second;
13012 IVLVal = D.IVLVal;
13013 FoundFn = D.Fn;
13014 break;
13015 }
13016 return FoundE == E;
13017 }
13018 bool VisitStmt(const Stmt *S) {
13019 for (const Stmt *Child : S->children()) {
13020 if (!Child)
13021 continue;
13022 if (const auto *E = dyn_cast<Expr>(Val: Child))
13023 if (!E->isGLValue())
13024 continue;
13025 if (Visit(S: Child))
13026 return true;
13027 }
13028 return false;
13029 }
13030 explicit LastprivateConditionalRefChecker(
13031 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
13032 : LPM(LPM) {}
13033 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
13034 getFoundData() const {
13035 return std::make_tuple(args: FoundE, args: FoundD, args: UniqueDeclName, args: IVLVal, args: FoundFn);
13036 }
13037};
13038} // namespace
13039
13040void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
13041 LValue IVLVal,
13042 StringRef UniqueDeclName,
13043 LValue LVal,
13044 SourceLocation Loc) {
13045 // Last updated loop counter for the lastprivate conditional var.
13046 // int<xx> last_iv = 0;
13047 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
13048 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
13049 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
13050 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
13051 IVLVal.getAlignment().getAsAlign());
13052 LValue LastIVLVal =
13053 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
13054
13055 // Last value of the lastprivate conditional.
13056 // decltype(priv_a) last_a;
13057 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
13058 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
13059 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
13060 LVal.getAlignment().getAsAlign());
13061 LValue LastLVal =
13062 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
13063
13064 // Global loop counter. Required to handle inner parallel-for regions.
13065 // iv
13066 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
13067
13068 // #pragma omp critical(a)
13069 // if (last_iv <= iv) {
13070 // last_iv = iv;
13071 // last_a = priv_a;
13072 // }
13073 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
13074 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
13075 Action.Enter(CGF);
13076 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
13077 // (last_iv <= iv) ? Check if the variable is updated and store new
13078 // value in global var.
13079 llvm::Value *CmpRes;
13080 if (IVLVal.getType()->isSignedIntegerType()) {
13081 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
13082 } else {
13083 assert(IVLVal.getType()->isUnsignedIntegerType() &&
13084 "Loop iteration variable must be integer.");
13085 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
13086 }
13087 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
13088 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
13089 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
13090 // {
13091 CGF.EmitBlock(BB: ThenBB);
13092
13093 // last_iv = iv;
13094 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
13095
13096 // last_a = priv_a;
13097 switch (CGF.getEvaluationKind(T: LVal.getType())) {
13098 case TEK_Scalar: {
13099 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
13100 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
13101 break;
13102 }
13103 case TEK_Complex: {
13104 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
13105 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
13106 break;
13107 }
13108 case TEK_Aggregate:
13109 llvm_unreachable(
13110 "Aggregates are not supported in lastprivate conditional.");
13111 }
13112 // }
13113 CGF.EmitBranch(Block: ExitBB);
13114 // There is no need to emit line number for unconditional branch.
13115 (void)ApplyDebugLocation::CreateEmpty(CGF);
13116 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
13117 };
13118
13119 if (CGM.getLangOpts().OpenMPSimd) {
13120 // Do not emit as a critical region as no parallel region could be emitted.
13121 RegionCodeGenTy ThenRCG(CodeGen);
13122 ThenRCG(CGF);
13123 } else {
13124 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
13125 }
13126}
13127
13128void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
13129 const Expr *LHS) {
13130 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13131 return;
13132 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
13133 if (!Checker.Visit(S: LHS))
13134 return;
13135 const Expr *FoundE;
13136 const Decl *FoundD;
13137 StringRef UniqueDeclName;
13138 LValue IVLVal;
13139 llvm::Function *FoundFn;
13140 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
13141 Checker.getFoundData();
13142 if (FoundFn != CGF.CurFn) {
13143 // Special codegen for inner parallel regions.
13144 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
13145 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
13146 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
13147 "Lastprivate conditional is not found in outer region.");
13148 QualType StructTy = std::get<0>(t&: It->getSecond());
13149 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
13150 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
13151 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
13152 Addr: PrivLVal.getAddress(),
13153 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
13154 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
13155 LValue BaseLVal =
13156 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
13157 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
13158 CGF.EmitAtomicStore(rvalue: RValue::get(V: llvm::ConstantInt::get(
13159 Ty: CGF.ConvertTypeForMem(T: FiredDecl->getType()), V: 1)),
13160 lvalue: FiredLVal, AO: llvm::AtomicOrdering::Unordered,
13161 /*IsVolatile=*/true, /*isInit=*/false);
13162 return;
13163 }
13164
13165 // Private address of the lastprivate conditional in the current context.
13166 // priv_a
13167 LValue LVal = CGF.EmitLValue(E: FoundE);
13168 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
13169 Loc: FoundE->getExprLoc());
13170}
13171
13172void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
13173 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13174 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13175 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13176 return;
13177 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
13178 auto It = llvm::find_if(
13179 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
13180 if (It == Range.end() || It->Fn != CGF.CurFn)
13181 return;
13182 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
13183 assert(LPCI != LastprivateConditionalToTypes.end() &&
13184 "Lastprivates must be registered already.");
13185 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
13186 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
13187 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: CaptureRegions.back());
13188 for (const auto &Pair : It->DeclToUniqueName) {
13189 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
13190 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
13191 continue;
13192 auto I = LPCI->getSecond().find(Val: Pair.first);
13193 assert(I != LPCI->getSecond().end() &&
13194 "Lastprivate must be rehistered already.");
13195 // bool Cmp = priv_a.Fired != 0;
13196 LValue BaseLVal = std::get<3>(t&: I->getSecond());
13197 LValue FiredLVal =
13198 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
13199 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
13200 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
13201 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
13202 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
13203 // if (Cmp) {
13204 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
13205 CGF.EmitBlock(BB: ThenBB);
13206 Address Addr = CGF.GetAddrOfLocalVar(VD);
13207 LValue LVal;
13208 if (VD->getType()->isReferenceType())
13209 LVal = CGF.EmitLoadOfReferenceLValue(RefAddr: Addr, RefTy: VD->getType(),
13210 Source: AlignmentSource::Decl);
13211 else
13212 LVal = CGF.MakeAddrLValue(Addr, T: VD->getType().getNonReferenceType(),
13213 Source: AlignmentSource::Decl);
13214 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
13215 Loc: D.getBeginLoc());
13216 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
13217 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
13218 // }
13219 }
13220}
13221
13222void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
13223 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13224 SourceLocation Loc) {
13225 if (CGF.getLangOpts().OpenMP < 50)
13226 return;
13227 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(Key: VD);
13228 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13229 "Unknown lastprivate conditional variable.");
13230 StringRef UniqueName = It->second;
13231 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
13232 // The variable was not updated in the region - exit.
13233 if (!GV)
13234 return;
13235 LValue LPLVal = CGF.MakeRawAddrLValue(
13236 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
13237 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
13238 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
13239}
13240
13241llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
13242 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13243 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13244 const RegionCodeGenTy &CodeGen) {
13245 llvm_unreachable("Not supported in SIMD-only mode");
13246}
13247
13248llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
13249 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13250 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13251 const RegionCodeGenTy &CodeGen) {
13252 llvm_unreachable("Not supported in SIMD-only mode");
13253}
13254
13255llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
13256 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13257 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13258 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13259 bool Tied, unsigned &NumberOfParts) {
13260 llvm_unreachable("Not supported in SIMD-only mode");
13261}
13262
13263void CGOpenMPSIMDRuntime::emitParallelCall(
13264 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13265 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13266 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13267 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13268 llvm_unreachable("Not supported in SIMD-only mode");
13269}
13270
13271void CGOpenMPSIMDRuntime::emitCriticalRegion(
13272 CodeGenFunction &CGF, StringRef CriticalName,
13273 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13274 const Expr *Hint) {
13275 llvm_unreachable("Not supported in SIMD-only mode");
13276}
13277
13278void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
13279 const RegionCodeGenTy &MasterOpGen,
13280 SourceLocation Loc) {
13281 llvm_unreachable("Not supported in SIMD-only mode");
13282}
13283
13284void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
13285 const RegionCodeGenTy &MasterOpGen,
13286 SourceLocation Loc,
13287 const Expr *Filter) {
13288 llvm_unreachable("Not supported in SIMD-only mode");
13289}
13290
13291void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
13292 SourceLocation Loc) {
13293 llvm_unreachable("Not supported in SIMD-only mode");
13294}
13295
13296void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
13297 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13298 SourceLocation Loc) {
13299 llvm_unreachable("Not supported in SIMD-only mode");
13300}
13301
13302void CGOpenMPSIMDRuntime::emitSingleRegion(
13303 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13304 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13305 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
13306 ArrayRef<const Expr *> AssignmentOps) {
13307 llvm_unreachable("Not supported in SIMD-only mode");
13308}
13309
13310void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
13311 const RegionCodeGenTy &OrderedOpGen,
13312 SourceLocation Loc,
13313 bool IsThreads) {
13314 llvm_unreachable("Not supported in SIMD-only mode");
13315}
13316
13317void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
13318 SourceLocation Loc,
13319 OpenMPDirectiveKind Kind,
13320 bool EmitChecks,
13321 bool ForceSimpleCall) {
13322 llvm_unreachable("Not supported in SIMD-only mode");
13323}
13324
13325void CGOpenMPSIMDRuntime::emitForDispatchInit(
13326 CodeGenFunction &CGF, SourceLocation Loc,
13327 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13328 bool Ordered, const DispatchRTInput &DispatchValues) {
13329 llvm_unreachable("Not supported in SIMD-only mode");
13330}
13331
13332void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
13333 SourceLocation Loc) {
13334 llvm_unreachable("Not supported in SIMD-only mode");
13335}
13336
13337void CGOpenMPSIMDRuntime::emitForStaticInit(
13338 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
13339 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13340 llvm_unreachable("Not supported in SIMD-only mode");
13341}
13342
13343void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13344 CodeGenFunction &CGF, SourceLocation Loc,
13345 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13346 llvm_unreachable("Not supported in SIMD-only mode");
13347}
13348
13349void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13350 SourceLocation Loc,
13351 unsigned IVSize,
13352 bool IVSigned) {
13353 llvm_unreachable("Not supported in SIMD-only mode");
13354}
13355
13356void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13357 SourceLocation Loc,
13358 OpenMPDirectiveKind DKind) {
13359 llvm_unreachable("Not supported in SIMD-only mode");
13360}
13361
13362llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13363 SourceLocation Loc,
13364 unsigned IVSize, bool IVSigned,
13365 Address IL, Address LB,
13366 Address UB, Address ST) {
13367 llvm_unreachable("Not supported in SIMD-only mode");
13368}
13369
13370void CGOpenMPSIMDRuntime::emitNumThreadsClause(
13371 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13372 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
13373 SourceLocation SeverityLoc, const Expr *Message,
13374 SourceLocation MessageLoc) {
13375 llvm_unreachable("Not supported in SIMD-only mode");
13376}
13377
13378void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13379 ProcBindKind ProcBind,
13380 SourceLocation Loc) {
13381 llvm_unreachable("Not supported in SIMD-only mode");
13382}
13383
13384Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13385 const VarDecl *VD,
13386 Address VDAddr,
13387 SourceLocation Loc) {
13388 llvm_unreachable("Not supported in SIMD-only mode");
13389}
13390
13391llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13392 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13393 CodeGenFunction *CGF) {
13394 llvm_unreachable("Not supported in SIMD-only mode");
13395}
13396
13397Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13398 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13399 llvm_unreachable("Not supported in SIMD-only mode");
13400}
13401
13402void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13403 ArrayRef<const Expr *> Vars,
13404 SourceLocation Loc,
13405 llvm::AtomicOrdering AO) {
13406 llvm_unreachable("Not supported in SIMD-only mode");
13407}
13408
13409void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13410 const OMPExecutableDirective &D,
13411 llvm::Function *TaskFunction,
13412 QualType SharedsTy, Address Shareds,
13413 const Expr *IfCond,
13414 const OMPTaskDataTy &Data) {
13415 llvm_unreachable("Not supported in SIMD-only mode");
13416}
13417
13418void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13419 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13420 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13421 const Expr *IfCond, const OMPTaskDataTy &Data) {
13422 llvm_unreachable("Not supported in SIMD-only mode");
13423}
13424
13425void CGOpenMPSIMDRuntime::emitReduction(
13426 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13427 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13428 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13429 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13430 CGOpenMPRuntime::emitReduction(CGF, Loc, OrgPrivates: Privates, OrgLHSExprs: LHSExprs, OrgRHSExprs: RHSExprs,
13431 OrgReductionOps: ReductionOps, Options);
13432}
13433
13434llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13435 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13436 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13437 llvm_unreachable("Not supported in SIMD-only mode");
13438}
13439
13440void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13441 SourceLocation Loc,
13442 bool IsWorksharingReduction) {
13443 llvm_unreachable("Not supported in SIMD-only mode");
13444}
13445
13446void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13447 SourceLocation Loc,
13448 ReductionCodeGen &RCG,
13449 unsigned N) {
13450 llvm_unreachable("Not supported in SIMD-only mode");
13451}
13452
13453Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13454 SourceLocation Loc,
13455 llvm::Value *ReductionsPtr,
13456 LValue SharedLVal) {
13457 llvm_unreachable("Not supported in SIMD-only mode");
13458}
13459
13460void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13461 SourceLocation Loc,
13462 const OMPTaskDataTy &Data) {
13463 llvm_unreachable("Not supported in SIMD-only mode");
13464}
13465
13466void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13467 CodeGenFunction &CGF, SourceLocation Loc,
13468 OpenMPDirectiveKind CancelRegion) {
13469 llvm_unreachable("Not supported in SIMD-only mode");
13470}
13471
13472void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13473 SourceLocation Loc, const Expr *IfCond,
13474 OpenMPDirectiveKind CancelRegion) {
13475 llvm_unreachable("Not supported in SIMD-only mode");
13476}
13477
13478void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13479 const OMPExecutableDirective &D, StringRef ParentName,
13480 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13481 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13482 llvm_unreachable("Not supported in SIMD-only mode");
13483}
13484
13485void CGOpenMPSIMDRuntime::emitTargetCall(
13486 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13487 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13488 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13489 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13490 const OMPLoopDirective &D)>
13491 SizeEmitter) {
13492 llvm_unreachable("Not supported in SIMD-only mode");
13493}
13494
13495bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13496 llvm_unreachable("Not supported in SIMD-only mode");
13497}
13498
13499bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13500 llvm_unreachable("Not supported in SIMD-only mode");
13501}
13502
13503bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13504 return false;
13505}
13506
13507void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13508 const OMPExecutableDirective &D,
13509 SourceLocation Loc,
13510 llvm::Function *OutlinedFn,
13511 ArrayRef<llvm::Value *> CapturedVars) {
13512 llvm_unreachable("Not supported in SIMD-only mode");
13513}
13514
13515void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13516 const Expr *NumTeams,
13517 const Expr *ThreadLimit,
13518 SourceLocation Loc) {
13519 llvm_unreachable("Not supported in SIMD-only mode");
13520}
13521
13522void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13523 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13524 const Expr *Device, const RegionCodeGenTy &CodeGen,
13525 CGOpenMPRuntime::TargetDataInfo &Info) {
13526 llvm_unreachable("Not supported in SIMD-only mode");
13527}
13528
13529void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13530 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13531 const Expr *Device) {
13532 llvm_unreachable("Not supported in SIMD-only mode");
13533}
13534
13535void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13536 const OMPLoopDirective &D,
13537 ArrayRef<Expr *> NumIterations) {
13538 llvm_unreachable("Not supported in SIMD-only mode");
13539}
13540
13541void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13542 const OMPDependClause *C) {
13543 llvm_unreachable("Not supported in SIMD-only mode");
13544}
13545
13546void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13547 const OMPDoacrossClause *C) {
13548 llvm_unreachable("Not supported in SIMD-only mode");
13549}
13550
13551const VarDecl *
13552CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13553 const VarDecl *NativeParam) const {
13554 llvm_unreachable("Not supported in SIMD-only mode");
13555}
13556
13557Address
13558CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13559 const VarDecl *NativeParam,
13560 const VarDecl *TargetParam) const {
13561 llvm_unreachable("Not supported in SIMD-only mode");
13562}
13563