1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGRecordLayout.h"
18#include "CodeGenFunction.h"
19#include "TargetInfo.h"
20#include "clang/AST/APValue.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/Decl.h"
23#include "clang/AST/OpenMPClause.h"
24#include "clang/AST/StmtOpenMP.h"
25#include "clang/AST/StmtVisitor.h"
26#include "clang/Basic/BitmaskEnum.h"
27#include "clang/Basic/FileManager.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/SourceManager.h"
30#include "clang/CodeGen/ConstantInitBuilder.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SetOperations.h"
33#include "llvm/ADT/SmallBitVector.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/StringExtras.h"
36#include "llvm/Bitcode/BitcodeReader.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Value.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Format.h"
44#include "llvm/Support/raw_ostream.h"
45#include <cassert>
46#include <cstdint>
47#include <numeric>
48#include <optional>
49
50using namespace clang;
51using namespace CodeGen;
52using namespace llvm::omp;
53
54namespace {
55/// Base class for handling code generation inside OpenMP regions.
56class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
57public:
58 /// Kinds of OpenMP regions used in codegen.
59 enum CGOpenMPRegionKind {
60 /// Region with outlined function for standalone 'parallel'
61 /// directive.
62 ParallelOutlinedRegion,
63 /// Region with outlined function for standalone 'task' directive.
64 TaskOutlinedRegion,
65 /// Region for constructs that do not require function outlining,
66 /// like 'for', 'sections', 'atomic' etc. directives.
67 InlinedRegion,
68 /// Region with outlined function for standalone 'target' directive.
69 TargetRegion,
70 };
71
72 CGOpenMPRegionInfo(const CapturedStmt &CS,
73 const CGOpenMPRegionKind RegionKind,
74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75 bool HasCancel)
76 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
77 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
78
79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
80 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
81 bool HasCancel)
82 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
83 Kind(Kind), HasCancel(HasCancel) {}
84
85 /// Get a variable or parameter for storing global thread id
86 /// inside OpenMP construct.
87 virtual const VarDecl *getThreadIDVariable() const = 0;
88
89 /// Emit the captured statement body.
90 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
91
92 /// Get an LValue for the current ThreadID variable.
93 /// \return LValue for thread id variable. This LValue always has type int32*.
94 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
95
96 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
97
98 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
99
100 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
101
102 bool hasCancel() const { return HasCancel; }
103
104 static bool classof(const CGCapturedStmtInfo *Info) {
105 return Info->getKind() == CR_OpenMP;
106 }
107
108 ~CGOpenMPRegionInfo() override = default;
109
110protected:
111 CGOpenMPRegionKind RegionKind;
112 RegionCodeGenTy CodeGen;
113 OpenMPDirectiveKind Kind;
114 bool HasCancel;
115};
116
117/// API for captured statement code generation in OpenMP constructs.
118class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
119public:
120 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
121 const RegionCodeGenTy &CodeGen,
122 OpenMPDirectiveKind Kind, bool HasCancel,
123 StringRef HelperName)
124 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
125 HasCancel),
126 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
127 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128 }
129
130 /// Get a variable or parameter for storing global thread id
131 /// inside OpenMP construct.
132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133
134 /// Get the name of the capture helper.
135 StringRef getHelperName() const override { return HelperName; }
136
137 static bool classof(const CGCapturedStmtInfo *Info) {
138 return CGOpenMPRegionInfo::classof(Info) &&
139 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
140 ParallelOutlinedRegion;
141 }
142
143private:
144 /// A variable or parameter storing global thread id for OpenMP
145 /// constructs.
146 const VarDecl *ThreadIDVar;
147 StringRef HelperName;
148};
149
150/// API for captured statement code generation in OpenMP constructs.
151class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
152public:
153 class UntiedTaskActionTy final : public PrePostActionTy {
154 bool Untied;
155 const VarDecl *PartIDVar;
156 const RegionCodeGenTy UntiedCodeGen;
157 llvm::SwitchInst *UntiedSwitch = nullptr;
158
159 public:
160 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
161 const RegionCodeGenTy &UntiedCodeGen)
162 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
163 void Enter(CodeGenFunction &CGF) override {
164 if (Untied) {
165 // Emit task switching point.
166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
168 PtrTy: PartIDVar->getType()->castAs<PointerType>());
169 llvm::Value *Res =
170 CGF.EmitLoadOfScalar(lvalue: PartIdLVal, Loc: PartIDVar->getLocation());
171 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
172 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
173 CGF.EmitBlock(BB: DoneBB);
174 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
175 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
176 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
177 Dest: CGF.Builder.GetInsertBlock());
178 emitUntiedSwitch(CGF);
179 }
180 }
181 void emitUntiedSwitch(CodeGenFunction &CGF) const {
182 if (Untied) {
183 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
184 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
185 PtrTy: PartIDVar->getType()->castAs<PointerType>());
186 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
187 lvalue: PartIdLVal);
188 UntiedCodeGen(CGF);
189 CodeGenFunction::JumpDest CurPoint =
190 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
191 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
192 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
193 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
194 Dest: CGF.Builder.GetInsertBlock());
195 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
196 CGF.EmitBlock(BB: CurPoint.getBlock());
197 }
198 }
199 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
200 };
201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
202 const VarDecl *ThreadIDVar,
203 const RegionCodeGenTy &CodeGen,
204 OpenMPDirectiveKind Kind, bool HasCancel,
205 const UntiedTaskActionTy &Action)
206 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
207 ThreadIDVar(ThreadIDVar), Action(Action) {
208 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209 }
210
211 /// Get a variable or parameter for storing global thread id
212 /// inside OpenMP construct.
213 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
214
215 /// Get an LValue for the current ThreadID variable.
216 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
217
218 /// Get the name of the capture helper.
219 StringRef getHelperName() const override { return ".omp_outlined."; }
220
221 void emitUntiedSwitch(CodeGenFunction &CGF) override {
222 Action.emitUntiedSwitch(CGF);
223 }
224
225 static bool classof(const CGCapturedStmtInfo *Info) {
226 return CGOpenMPRegionInfo::classof(Info) &&
227 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
228 TaskOutlinedRegion;
229 }
230
231private:
232 /// A variable or parameter storing global thread id for OpenMP
233 /// constructs.
234 const VarDecl *ThreadIDVar;
235 /// Action for emitting code for untied tasks.
236 const UntiedTaskActionTy &Action;
237};
238
239/// API for inlined captured statement code generation in OpenMP
240/// constructs.
241class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
242public:
243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
244 const RegionCodeGenTy &CodeGen,
245 OpenMPDirectiveKind Kind, bool HasCancel)
246 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
247 OldCSI(OldCSI),
248 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
249
250 // Retrieve the value of the context parameter.
251 llvm::Value *getContextValue() const override {
252 if (OuterRegionInfo)
253 return OuterRegionInfo->getContextValue();
254 llvm_unreachable("No context value for inlined OpenMP region");
255 }
256
257 void setContextValue(llvm::Value *V) override {
258 if (OuterRegionInfo) {
259 OuterRegionInfo->setContextValue(V);
260 return;
261 }
262 llvm_unreachable("No context value for inlined OpenMP region");
263 }
264
265 /// Lookup the captured field decl for a variable.
266 const FieldDecl *lookup(const VarDecl *VD) const override {
267 if (OuterRegionInfo)
268 return OuterRegionInfo->lookup(VD);
269 // If there is no outer outlined region,no need to lookup in a list of
270 // captured variables, we can use the original one.
271 return nullptr;
272 }
273
274 FieldDecl *getThisFieldDecl() const override {
275 if (OuterRegionInfo)
276 return OuterRegionInfo->getThisFieldDecl();
277 return nullptr;
278 }
279
280 /// Get a variable or parameter for storing global thread id
281 /// inside OpenMP construct.
282 const VarDecl *getThreadIDVariable() const override {
283 if (OuterRegionInfo)
284 return OuterRegionInfo->getThreadIDVariable();
285 return nullptr;
286 }
287
288 /// Get an LValue for the current ThreadID variable.
289 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
290 if (OuterRegionInfo)
291 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
292 llvm_unreachable("No LValue for inlined OpenMP construct");
293 }
294
295 /// Get the name of the capture helper.
296 StringRef getHelperName() const override {
297 if (auto *OuterRegionInfo = getOldCSI())
298 return OuterRegionInfo->getHelperName();
299 llvm_unreachable("No helper name for inlined OpenMP construct");
300 }
301
302 void emitUntiedSwitch(CodeGenFunction &CGF) override {
303 if (OuterRegionInfo)
304 OuterRegionInfo->emitUntiedSwitch(CGF);
305 }
306
307 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
308
309 static bool classof(const CGCapturedStmtInfo *Info) {
310 return CGOpenMPRegionInfo::classof(Info) &&
311 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
312 }
313
314 ~CGOpenMPInlinedRegionInfo() override = default;
315
316private:
317 /// CodeGen info about outer OpenMP region.
318 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
319 CGOpenMPRegionInfo *OuterRegionInfo;
320};
321
322/// API for captured statement code generation in OpenMP target
323/// constructs. For this captures, implicit parameters are used instead of the
324/// captured fields. The name of the target region has to be unique in a given
325/// application so it is provided by the client, because only the client has
326/// the information to generate that.
327class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
328public:
329 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
330 const RegionCodeGenTy &CodeGen, StringRef HelperName)
331 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
332 /*HasCancel=*/false),
333 HelperName(HelperName) {}
334
335 /// This is unused for target regions because each starts executing
336 /// with a single thread.
337 const VarDecl *getThreadIDVariable() const override { return nullptr; }
338
339 /// Get the name of the capture helper.
340 StringRef getHelperName() const override { return HelperName; }
341
342 static bool classof(const CGCapturedStmtInfo *Info) {
343 return CGOpenMPRegionInfo::classof(Info) &&
344 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
345 }
346
347private:
348 StringRef HelperName;
349};
350
351static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
352 llvm_unreachable("No codegen for expressions");
353}
354/// API for generation of expressions captured in a innermost OpenMP
355/// region.
356class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
357public:
358 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
359 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
360 OMPD_unknown,
361 /*HasCancel=*/false),
362 PrivScope(CGF) {
363 // Make sure the globals captured in the provided statement are local by
364 // using the privatization logic. We assume the same variable is not
365 // captured more than once.
366 for (const auto &C : CS.captures()) {
367 if (!C.capturesVariable() && !C.capturesVariableByCopy())
368 continue;
369
370 const VarDecl *VD = C.getCapturedVar();
371 if (VD->isLocalVarDeclOrParm())
372 continue;
373
374 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
375 /*RefersToEnclosingVariableOrCapture=*/false,
376 VD->getType().getNonReferenceType(), VK_LValue,
377 C.getLocation());
378 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
379 }
380 (void)PrivScope.Privatize();
381 }
382
383 /// Lookup the captured field decl for a variable.
384 const FieldDecl *lookup(const VarDecl *VD) const override {
385 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
386 return FD;
387 return nullptr;
388 }
389
390 /// Emit the captured statement body.
391 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
392 llvm_unreachable("No body for expressions");
393 }
394
395 /// Get a variable or parameter for storing global thread id
396 /// inside OpenMP construct.
397 const VarDecl *getThreadIDVariable() const override {
398 llvm_unreachable("No thread id for expressions");
399 }
400
401 /// Get the name of the capture helper.
402 StringRef getHelperName() const override {
403 llvm_unreachable("No helper name for expressions");
404 }
405
406 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
407
408private:
409 /// Private scope to capture global variables.
410 CodeGenFunction::OMPPrivateScope PrivScope;
411};
412
413/// RAII for emitting code of OpenMP constructs.
414class InlinedOpenMPRegionRAII {
415 CodeGenFunction &CGF;
416 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
417 FieldDecl *LambdaThisCaptureField = nullptr;
418 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
419 bool NoInheritance = false;
420
421public:
422 /// Constructs region for combined constructs.
423 /// \param CodeGen Code generation sequence for combined directives. Includes
424 /// a list of functions used for code generation of implicitly inlined
425 /// regions.
426 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
427 OpenMPDirectiveKind Kind, bool HasCancel,
428 bool NoInheritance = true)
429 : CGF(CGF), NoInheritance(NoInheritance) {
430 // Start emission for the construct.
431 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
432 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
433 if (NoInheritance) {
434 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
435 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
436 CGF.LambdaThisCaptureField = nullptr;
437 BlockInfo = CGF.BlockInfo;
438 CGF.BlockInfo = nullptr;
439 }
440 }
441
442 ~InlinedOpenMPRegionRAII() {
443 // Restore original CapturedStmtInfo only if we're done with code emission.
444 auto *OldCSI =
445 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
446 delete CGF.CapturedStmtInfo;
447 CGF.CapturedStmtInfo = OldCSI;
448 if (NoInheritance) {
449 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
450 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
451 CGF.BlockInfo = BlockInfo;
452 }
453 }
454};
455
456/// Values for bit flags used in the ident_t to describe the fields.
457/// All enumeric elements are named and described in accordance with the code
458/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459enum OpenMPLocationFlags : unsigned {
460 /// Use trampoline for internal microtask.
461 OMP_IDENT_IMD = 0x01,
462 /// Use c-style ident structure.
463 OMP_IDENT_KMPC = 0x02,
464 /// Atomic reduction option for kmpc_reduce.
465 OMP_ATOMIC_REDUCE = 0x10,
466 /// Explicit 'barrier' directive.
467 OMP_IDENT_BARRIER_EXPL = 0x20,
468 /// Implicit barrier in code.
469 OMP_IDENT_BARRIER_IMPL = 0x40,
470 /// Implicit barrier in 'for' directive.
471 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
472 /// Implicit barrier in 'sections' directive.
473 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
474 /// Implicit barrier in 'single' directive.
475 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
476 /// Call of __kmp_for_static_init for static loop.
477 OMP_IDENT_WORK_LOOP = 0x200,
478 /// Call of __kmp_for_static_init for sections.
479 OMP_IDENT_WORK_SECTIONS = 0x400,
480 /// Call of __kmp_for_static_init for distribute.
481 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483};
484
485/// Describes ident structure that describes a source location.
486/// All descriptions are taken from
487/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488/// Original structure:
489/// typedef struct ident {
490/// kmp_int32 reserved_1; /**< might be used in Fortran;
491/// see above */
492/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
493/// KMP_IDENT_KMPC identifies this union
494/// member */
495/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
496/// see above */
497///#if USE_ITT_BUILD
498/// /* but currently used for storing
499/// region-specific ITT */
500/// /* contextual information. */
501///#endif /* USE_ITT_BUILD */
502/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
503/// C++ */
504/// char const *psource; /**< String describing the source location.
505/// The string is composed of semi-colon separated
506// fields which describe the source file,
507/// the function and a pair of line numbers that
508/// delimit the construct.
509/// */
510/// } ident_t;
511enum IdentFieldIndex {
512 /// might be used in Fortran
513 IdentField_Reserved_1,
514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
515 IdentField_Flags,
516 /// Not really used in Fortran any more
517 IdentField_Reserved_2,
518 /// Source[4] in Fortran, do not use for C++
519 IdentField_Reserved_3,
520 /// String describing the source location. The string is composed of
521 /// semi-colon separated fields which describe the source file, the function
522 /// and a pair of line numbers that delimit the construct.
523 IdentField_PSource
524};
525
526/// Schedule types for 'omp for' loops (these enumerators are taken from
527/// the enum sched_type in kmp.h).
528enum OpenMPSchedType {
529 /// Lower bound for default (unordered) versions.
530 OMP_sch_lower = 32,
531 OMP_sch_static_chunked = 33,
532 OMP_sch_static = 34,
533 OMP_sch_dynamic_chunked = 35,
534 OMP_sch_guided_chunked = 36,
535 OMP_sch_runtime = 37,
536 OMP_sch_auto = 38,
537 /// static with chunk adjustment (e.g., simd)
538 OMP_sch_static_balanced_chunked = 45,
539 /// Lower bound for 'ordered' versions.
540 OMP_ord_lower = 64,
541 OMP_ord_static_chunked = 65,
542 OMP_ord_static = 66,
543 OMP_ord_dynamic_chunked = 67,
544 OMP_ord_guided_chunked = 68,
545 OMP_ord_runtime = 69,
546 OMP_ord_auto = 70,
547 OMP_sch_default = OMP_sch_static,
548 /// dist_schedule types
549 OMP_dist_sch_static_chunked = 91,
550 OMP_dist_sch_static = 92,
551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552 /// Set if the monotonic schedule modifier was present.
553 OMP_sch_modifier_monotonic = (1 << 29),
554 /// Set if the nonmonotonic schedule modifier was present.
555 OMP_sch_modifier_nonmonotonic = (1 << 30),
556};
557
558/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
559/// region.
560class CleanupTy final : public EHScopeStack::Cleanup {
561 PrePostActionTy *Action;
562
563public:
564 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
566 if (!CGF.HaveInsertPoint())
567 return;
568 Action->Exit(CGF);
569 }
570};
571
572} // anonymous namespace
573
574void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
575 CodeGenFunction::RunCleanupsScope Scope(CGF);
576 if (PrePostAction) {
577 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
578 Callback(CodeGen, CGF, *PrePostAction);
579 } else {
580 PrePostActionTy Action;
581 Callback(CodeGen, CGF, Action);
582 }
583}
584
585/// Check if the combiner is a call to UDR combiner and if it is so return the
586/// UDR decl used for reduction.
587static const OMPDeclareReductionDecl *
588getReductionInit(const Expr *ReductionOp) {
589 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
590 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
591 if (const auto *DRE =
592 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
593 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
594 return DRD;
595 return nullptr;
596}
597
598static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
599 const OMPDeclareReductionDecl *DRD,
600 const Expr *InitOp,
601 Address Private, Address Original,
602 QualType Ty) {
603 if (DRD->getInitializer()) {
604 std::pair<llvm::Function *, llvm::Function *> Reduction =
605 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
606 const auto *CE = cast<CallExpr>(Val: InitOp);
607 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
608 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
610 const auto *LHSDRE =
611 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
612 const auto *RHSDRE =
613 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
615 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
616 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
617 (void)PrivateScope.Privatize();
618 RValue Func = RValue::get(V: Reduction.second);
619 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
620 CGF.EmitIgnoredExpr(E: InitOp);
621 } else {
622 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
623 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
624 auto *GV = new llvm::GlobalVariable(
625 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
626 llvm::GlobalValue::PrivateLinkage, Init, Name);
627 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
628 RValue InitRVal;
629 switch (CGF.getEvaluationKind(T: Ty)) {
630 case TEK_Scalar:
631 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
632 break;
633 case TEK_Complex:
634 InitRVal =
635 RValue::getComplex(C: CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
636 break;
637 case TEK_Aggregate: {
638 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
639 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
640 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
641 /*IsInitializer=*/false);
642 return;
643 }
644 }
645 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
646 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
647 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
648 /*IsInitializer=*/false);
649 }
650}
651
652/// Emit initialization of arrays of complex types.
653/// \param DestAddr Address of the array.
654/// \param Type Type of array.
655/// \param Init Initial expression of array.
656/// \param SrcAddr Address of the original array.
657static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
658 QualType Type, bool EmitDeclareReductionInit,
659 const Expr *Init,
660 const OMPDeclareReductionDecl *DRD,
661 Address SrcAddr = Address::invalid()) {
662 // Perform element-by-element initialization.
663 QualType ElementTy;
664
665 // Drill down to the base element type on both arrays.
666 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
667 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
668 if (DRD)
669 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
670
671 llvm::Value *SrcBegin = nullptr;
672 if (DRD)
673 SrcBegin = SrcAddr.emitRawPointer(CGF);
674 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
675 // Cast from pointer to array type to pointer to single element.
676 llvm::Value *DestEnd =
677 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
678 // The basic structure here is a while-do loop.
679 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
680 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
681 llvm::Value *IsEmpty =
682 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
683 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
684
685 // Enter the loop body, making that address the current address.
686 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
687 CGF.EmitBlock(BB: BodyBB);
688
689 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
690
691 llvm::PHINode *SrcElementPHI = nullptr;
692 Address SrcElementCurrent = Address::invalid();
693 if (DRD) {
694 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
695 Name: "omp.arraycpy.srcElementPast");
696 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
697 SrcElementCurrent =
698 Address(SrcElementPHI, SrcAddr.getElementType(),
699 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
700 }
701 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
702 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
703 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
704 Address DestElementCurrent =
705 Address(DestElementPHI, DestAddr.getElementType(),
706 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
707
708 // Emit copy.
709 {
710 CodeGenFunction::RunCleanupsScope InitScope(CGF);
711 if (EmitDeclareReductionInit) {
712 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
713 Original: SrcElementCurrent, Ty: ElementTy);
714 } else
715 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
716 /*IsInitializer=*/false);
717 }
718
719 if (DRD) {
720 // Shift the address forward by one element.
721 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
722 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
723 Name: "omp.arraycpy.dest.element");
724 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
725 }
726
727 // Shift the address forward by one element.
728 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
729 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
730 Name: "omp.arraycpy.dest.element");
731 // Check whether we've reached the end.
732 llvm::Value *Done =
733 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
734 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
735 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
736
737 // Done.
738 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
739}
740
741LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
742 return CGF.EmitOMPSharedLValue(E);
743}
744
745LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
746 const Expr *E) {
747 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
748 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
749 return LValue();
750}
751
752void ReductionCodeGen::emitAggregateInitialization(
753 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
754 const OMPDeclareReductionDecl *DRD) {
755 // Emit VarDecl with copy init for arrays.
756 // Get the address of the original variable captured in current
757 // captured region.
758 const auto *PrivateVD =
759 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
760 bool EmitDeclareReductionInit =
761 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
762 EmitOMPAggregateInit(CGF, DestAddr: PrivateAddr, Type: PrivateVD->getType(),
763 EmitDeclareReductionInit,
764 Init: EmitDeclareReductionInit ? ClausesData[N].ReductionOp
765 : PrivateVD->getInit(),
766 DRD, SrcAddr: SharedAddr);
767}
768
769ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
770 ArrayRef<const Expr *> Origs,
771 ArrayRef<const Expr *> Privates,
772 ArrayRef<const Expr *> ReductionOps) {
773 ClausesData.reserve(N: Shareds.size());
774 SharedAddresses.reserve(N: Shareds.size());
775 Sizes.reserve(N: Shareds.size());
776 BaseDecls.reserve(N: Shareds.size());
777 const auto *IOrig = Origs.begin();
778 const auto *IPriv = Privates.begin();
779 const auto *IRed = ReductionOps.begin();
780 for (const Expr *Ref : Shareds) {
781 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
782 std::advance(i&: IOrig, n: 1);
783 std::advance(i&: IPriv, n: 1);
784 std::advance(i&: IRed, n: 1);
785 }
786}
787
788void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
789 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
790 "Number of generated lvalues must be exactly N.");
791 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
792 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
793 SharedAddresses.emplace_back(Args&: First, Args&: Second);
794 if (ClausesData[N].Shared == ClausesData[N].Ref) {
795 OrigAddresses.emplace_back(Args&: First, Args&: Second);
796 } else {
797 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
798 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
799 OrigAddresses.emplace_back(Args&: First, Args&: Second);
800 }
801}
802
803void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
804 QualType PrivateType = getPrivateType(N);
805 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
806 if (!PrivateType->isVariablyModifiedType()) {
807 Sizes.emplace_back(
808 Args: CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
809 Args: nullptr);
810 return;
811 }
812 llvm::Value *Size;
813 llvm::Value *SizeInChars;
814 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
815 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
816 if (AsArraySection) {
817 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
818 LHS: OrigAddresses[N].second.getPointer(CGF),
819 RHS: OrigAddresses[N].first.getPointer(CGF));
820 Size = CGF.Builder.CreateNUWAdd(
821 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
822 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
823 } else {
824 SizeInChars =
825 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
826 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
827 }
828 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
829 CodeGenFunction::OpaqueValueMapping OpaqueMap(
830 CGF,
831 cast<OpaqueValueExpr>(
832 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
833 RValue::get(V: Size));
834 CGF.EmitVariablyModifiedType(Ty: PrivateType);
835}
836
837void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
838 llvm::Value *Size) {
839 QualType PrivateType = getPrivateType(N);
840 if (!PrivateType->isVariablyModifiedType()) {
841 assert(!Size && !Sizes[N].second &&
842 "Size should be nullptr for non-variably modified reduction "
843 "items.");
844 return;
845 }
846 CodeGenFunction::OpaqueValueMapping OpaqueMap(
847 CGF,
848 cast<OpaqueValueExpr>(
849 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
850 RValue::get(V: Size));
851 CGF.EmitVariablyModifiedType(Ty: PrivateType);
852}
853
854void ReductionCodeGen::emitInitialization(
855 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
856 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
857 assert(SharedAddresses.size() > N && "No variable was generated");
858 const auto *PrivateVD =
859 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
860 const OMPDeclareReductionDecl *DRD =
861 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
862 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
863 if (DRD && DRD->getInitializer())
864 (void)DefaultInit(CGF);
865 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
866 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
867 (void)DefaultInit(CGF);
868 QualType SharedType = SharedAddresses[N].first.getType();
869 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
870 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
871 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
872 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
873 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
874 Quals: PrivateVD->getType().getQualifiers(),
875 /*IsInitializer=*/false);
876 }
877}
878
879bool ReductionCodeGen::needCleanups(unsigned N) {
880 QualType PrivateType = getPrivateType(N);
881 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
882 return DTorKind != QualType::DK_none;
883}
884
885void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
886 Address PrivateAddr) {
887 QualType PrivateType = getPrivateType(N);
888 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
889 if (needCleanups(N)) {
890 PrivateAddr =
891 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
892 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
893 }
894}
895
896static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
897 LValue BaseLV) {
898 BaseTy = BaseTy.getNonReferenceType();
899 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
900 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
901 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
902 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
903 } else {
904 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
905 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
906 }
907 BaseTy = BaseTy->getPointeeType();
908 }
909 return CGF.MakeAddrLValue(
910 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
911 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
912 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
913}
914
915static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
916 Address OriginalBaseAddress, llvm::Value *Addr) {
917 RawAddress Tmp = RawAddress::invalid();
918 Address TopTmp = Address::invalid();
919 Address MostTopTmp = Address::invalid();
920 BaseTy = BaseTy.getNonReferenceType();
921 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
922 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
923 Tmp = CGF.CreateMemTemp(T: BaseTy);
924 if (TopTmp.isValid())
925 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
926 else
927 MostTopTmp = Tmp;
928 TopTmp = Tmp;
929 BaseTy = BaseTy->getPointeeType();
930 }
931
932 if (Tmp.isValid()) {
933 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
934 V: Addr, DestTy: Tmp.getElementType());
935 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
936 return MostTopTmp;
937 }
938
939 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
940 V: Addr, DestTy: OriginalBaseAddress.getType());
941 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
942}
943
944static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
945 const VarDecl *OrigVD = nullptr;
946 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
947 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
949 Base = TempOASE->getBase()->IgnoreParenImpCasts();
950 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
951 Base = TempASE->getBase()->IgnoreParenImpCasts();
952 DE = cast<DeclRefExpr>(Val: Base);
953 OrigVD = cast<VarDecl>(Val: DE->getDecl());
954 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
955 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
956 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
957 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 DE = cast<DeclRefExpr>(Val: Base);
959 OrigVD = cast<VarDecl>(Val: DE->getDecl());
960 }
961 return OrigVD;
962}
963
964Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
965 Address PrivateAddr) {
966 const DeclRefExpr *DE;
967 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
968 BaseDecls.emplace_back(Args&: OrigVD);
969 LValue OriginalBaseLValue = CGF.EmitLValue(E: DE);
970 LValue BaseLValue =
971 loadToBegin(CGF, BaseTy: OrigVD->getType(), ElTy: SharedAddresses[N].first.getType(),
972 BaseLV: OriginalBaseLValue);
973 Address SharedAddr = SharedAddresses[N].first.getAddress();
974 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
975 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
976 RHS: SharedAddr.emitRawPointer(CGF));
977 llvm::Value *PrivatePointer =
978 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
979 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
980 llvm::Value *Ptr = CGF.Builder.CreateGEP(
981 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
982 return castToBase(CGF, BaseTy: OrigVD->getType(),
983 ElTy: SharedAddresses[N].first.getType(),
984 OriginalBaseAddress: OriginalBaseLValue.getAddress(), Addr: Ptr);
985 }
986 BaseDecls.emplace_back(
987 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
988 return PrivateAddr;
989}
990
991bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
992 const OMPDeclareReductionDecl *DRD =
993 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
994 return DRD && DRD->getInitializer();
995}
996
997LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
998 return CGF.EmitLoadOfPointerLValue(
999 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1000 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
1001}
1002
1003void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1004 if (!CGF.HaveInsertPoint())
1005 return;
1006 // 1.2.2 OpenMP Language Terminology
1007 // Structured block - An executable statement with a single entry at the
1008 // top and a single exit at the bottom.
1009 // The point of exit cannot be a branch out of the structured block.
1010 // longjmp() and throw() must not violate the entry/exit criteria.
1011 CGF.EHStack.pushTerminate();
1012 if (S)
1013 CGF.incrementProfileCounter(S);
1014 CodeGen(CGF);
1015 CGF.EHStack.popTerminate();
1016}
1017
1018LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019 CodeGenFunction &CGF) {
1020 return CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1021 T: getThreadIDVariable()->getType(),
1022 Source: AlignmentSource::Decl);
1023}
1024
1025static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1026 QualType FieldTy) {
1027 auto *Field = FieldDecl::Create(
1028 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1029 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1031 Field->setAccess(AS_public);
1032 DC->addDecl(D: Field);
1033 return Field;
1034}
1035
1036CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1037 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1038 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1039 llvm::OpenMPIRBuilderConfig Config(
1040 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1041 CGM.getLangOpts().OpenMPOffloadMandatory,
1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1046 ? CGM.getLangOpts().OMPHostIRFile
1047 : StringRef{});
1048 OMPBuilder.setConfig(Config);
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1053 HasRequiresUnifiedSharedMemory = true;
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1058void CGOpenMPRuntime::clear() {
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1073std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1078emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(Elt: &OmpOutParm);
1090 Args.push_back(Elt: &OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1097 N: Name, M: &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1099 if (CGM.getLangOpts().Optimize) {
1100 Fn->removeFnAttr(Kind: llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc: In->getLocation(),
1108 StartLoc: Out->getLocation());
1109 CodeGenFunction::OMPPrivateScope Scope(CGF);
1110 Address AddrIn = CGF.GetAddrOfLocalVar(VD: &OmpInParm);
1111 Scope.addPrivate(
1112 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(VD: &OmpOutParm);
1115 Scope.addPrivate(
1116 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1121 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1122 Quals: Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1132void CGOpenMPRuntime::emitUserDefinedReduction(
1133 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1134 if (UDRMap.count(Val: D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, Ty: D->getType(), CombinerInitializer: D->getCombiner(),
1138 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1139 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1143 Initializer = emitCombinerOrInitializer(
1144 CGM, Ty: D->getType(),
1145 CombinerInitializer: D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1146 : nullptr,
1147 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1148 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1152 if (CGF) {
1153 auto &Decls = FunctionUDRMap.FindAndConstruct(Key: CGF->CurFn);
1154 Decls.second.push_back(Elt: D);
1155 }
1156}
1157
1158std::pair<llvm::Function *, llvm::Function *>
1159CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1160 auto I = UDRMap.find(Val: D);
1161 if (I != UDRMap.end())
1162 return I->second;
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164 return UDRMap.lookup(Val: D);
1165}
1166
1167namespace {
1168// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169// Builder if one is present.
1170struct PushAndPopStackRAII {
1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172 bool HasCancel, llvm::omp::Directive Kind)
1173 : OMPBuilder(OMPBuilder) {
1174 if (!OMPBuilder)
1175 return;
1176
1177 // The following callback is the crucial part of clangs cleanup process.
1178 //
1179 // NOTE:
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190 assert(IP.getBlock()->end() == IP.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193 CGF.Builder.restoreIP(IP);
1194 CodeGenFunction::JumpDest Dest =
1195 CGF.getOMPCancelDestination(Kind: OMPD_parallel);
1196 CGF.EmitBranchThroughCleanup(Dest);
1197 };
1198
1199 // TODO: Remove this once we emit parallel regions through the
1200 // OpenMPIRBuilder as it can do this setup internally.
1201 llvm::OpenMPIRBuilder::FinalizationInfo FI({.FiniCB: FiniCB, .DK: Kind, .IsCancellable: HasCancel});
1202 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1203 }
1204 ~PushAndPopStackRAII() {
1205 if (OMPBuilder)
1206 OMPBuilder->popFinalizationCB();
1207 }
1208 llvm::OpenMPIRBuilder *OMPBuilder;
1209};
1210} // namespace
1211
1212static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1213 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1214 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1215 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1216 assert(ThreadIDVar->getType()->isPointerType() &&
1217 "thread id variable must be of type kmp_int32 *");
1218 CodeGenFunction CGF(CGM, true);
1219 bool HasCancel = false;
1220 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1223 HasCancel = OPD->hasCancel();
1224 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1225 HasCancel = OPSD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD =
1233 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD =
1236 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1237 HasCancel = OPFD->hasCancel();
1238
1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240 // parallel region to make cancellation barriers work properly.
1241 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1242 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1243 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1244 HasCancel, OutlinedHelperName);
1245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1246 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, Loc: D.getBeginLoc());
1247}
1248
1249std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1250 std::string Suffix = getName(Parts: {"omp_outlined"});
1251 return (Name + Suffix).str();
1252}
1253
1254std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1255 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1256}
1257
1258std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1259 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1260 return (Name + Suffix).str();
1261}
1262
1263llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1264 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1265 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1266 const RegionCodeGenTy &CodeGen) {
1267 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_parallel);
1268 return emitParallelOrTeamsOutlinedFunction(
1269 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1270 CodeGen);
1271}
1272
1273llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1274 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276 const RegionCodeGenTy &CodeGen) {
1277 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_teams);
1278 return emitParallelOrTeamsOutlinedFunction(
1279 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1280 CodeGen);
1281}
1282
1283llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1287 bool Tied, unsigned &NumberOfParts) {
1288 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1289 PrePostActionTy &) {
1290 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1291 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1292 llvm::Value *TaskArgs[] = {
1293 UpLoc, ThreadID,
1294 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1295 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1296 .getPointer(CGF)};
1297 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1298 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1299 args: TaskArgs);
1300 };
1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1302 UntiedCodeGen);
1303 CodeGen.setAction(Action);
1304 assert(!ThreadIDVar->getType()->isPointerType() &&
1305 "thread id variable must be of type kmp_int32 for tasks");
1306 const OpenMPDirectiveKind Region =
1307 isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) ? OMPD_taskloop
1308 : OMPD_task;
1309 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: Region);
1310 bool HasCancel = false;
1311 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1318 HasCancel = TD->hasCancel();
1319
1320 CodeGenFunction CGF(CGM, true);
1321 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1322 InnermostKind, HasCancel, Action);
1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1325 if (!Tied)
1326 NumberOfParts = Action.getNumberOfParts();
1327 return Res;
1328}
1329
1330void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1331 bool AtCurrentPoint) {
1332 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1333 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1334
1335 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1336 if (AtCurrentPoint) {
1337 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1338 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1339 } else {
1340 Elem.second.ServiceInsertPt =
1341 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342 Elem.second.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt);
1343 }
1344}
1345
1346void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1347 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1348 if (Elem.second.ServiceInsertPt) {
1349 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1350 Elem.second.ServiceInsertPt = nullptr;
1351 Ptr->eraseFromParent();
1352 }
1353}
1354
1355static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1356 SourceLocation Loc,
1357 SmallString<128> &Buffer) {
1358 llvm::raw_svector_ostream OS(Buffer);
1359 // Build debug location
1360 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1361 OS << ";" << PLoc.getFilename() << ";";
1362 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1363 OS << FD->getQualifiedNameAsString();
1364 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1365 return OS.str();
1366}
1367
1368llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1369 SourceLocation Loc,
1370 unsigned Flags, bool EmitLoc) {
1371 uint32_t SrcLocStrSize;
1372 llvm::Constant *SrcLocStr;
1373 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1374 llvm::codegenoptions::NoDebugInfo) ||
1375 Loc.isInvalid()) {
1376 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1377 } else {
1378 std::string FunctionName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382 const char *FileName = PLoc.getFilename();
1383 unsigned Line = PLoc.getLine();
1384 unsigned Column = PLoc.getColumn();
1385 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1386 Column, SrcLocStrSize);
1387 }
1388 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1389 return OMPBuilder.getOrCreateIdent(
1390 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1391}
1392
1393llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1394 SourceLocation Loc) {
1395 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397 // the clang invariants used below might be broken.
1398 if (CGM.getLangOpts().OpenMPIRBuilder) {
1399 SmallString<128> Buffer;
1400 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1401 uint32_t SrcLocStrSize;
1402 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1403 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1404 return OMPBuilder.getOrCreateThreadID(
1405 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1406 }
1407
1408 llvm::Value *ThreadID = nullptr;
1409 // Check whether we've already cached a load of the thread id in this
1410 // function.
1411 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1412 if (I != OpenMPLocThreadIDMap.end()) {
1413 ThreadID = I->second.ThreadID;
1414 if (ThreadID != nullptr)
1415 return ThreadID;
1416 }
1417 // If exceptions are enabled, do not use parameter to avoid possible crash.
1418 if (auto *OMPRegionInfo =
1419 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1420 if (OMPRegionInfo->getThreadIDVariable()) {
1421 // Check if this an outlined function with thread id passed as argument.
1422 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1423 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1424 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1425 !CGF.getLangOpts().CXXExceptions ||
1426 CGF.Builder.GetInsertBlock() == TopBlock ||
1427 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1428 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1429 TopBlock ||
1430 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1431 CGF.Builder.GetInsertBlock()) {
1432 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1433 // If value loaded in entry block, cache it and use it everywhere in
1434 // function.
1435 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1436 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1437 Elem.second.ThreadID = ThreadID;
1438 }
1439 return ThreadID;
1440 }
1441 }
1442 }
1443
1444 // This is not an outlined function region - need to call __kmpc_int32
1445 // kmpc_global_thread_num(ident_t *loc).
1446 // Generate thread id value and cache this value for use across the
1447 // function.
1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1449 if (!Elem.second.ServiceInsertPt)
1450 setLocThreadIdInsertPt(CGF);
1451 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1452 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1453 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1454 llvm::CallInst *Call = CGF.Builder.CreateCall(
1455 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1456 FnID: OMPRTL___kmpc_global_thread_num),
1457 Args: emitUpdateLocation(CGF, Loc));
1458 Call->setCallingConv(CGF.getRuntimeCC());
1459 Elem.second.ThreadID = Call;
1460 return Call;
1461}
1462
1463void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1466 clearLocThreadIdInsertPt(CGF);
1467 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1468 }
1469 if (FunctionUDRMap.count(Val: CGF.CurFn) > 0) {
1470 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471 UDRMap.erase(Val: D);
1472 FunctionUDRMap.erase(Val: CGF.CurFn);
1473 }
1474 auto I = FunctionUDMMap.find(Val: CGF.CurFn);
1475 if (I != FunctionUDMMap.end()) {
1476 for(const auto *D : I->second)
1477 UDMMap.erase(Val: D);
1478 FunctionUDMMap.erase(I);
1479 }
1480 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1482}
1483
1484llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1486}
1487
1488llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489 if (!Kmpc_MicroTy) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(ElementType: CGM.Int32Ty),
1492 llvm::PointerType::getUnqual(ElementType: CGM.Int32Ty)};
1493 Kmpc_MicroTy = llvm::FunctionType::get(Result: CGM.VoidTy, Params: MicroParams, isVarArg: true);
1494 }
1495 return llvm::PointerType::getUnqual(ElementType: Kmpc_MicroTy);
1496}
1497
1498llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499convertDeviceClause(const VarDecl *VD) {
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522convertCaptureClause(const VarDecl *VD) {
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533 break;
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536 break;
1537 default:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539 break;
1540 }
1541}
1542
1543static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546
1547 auto FileInfoCallBack = [&]() {
1548 SourceManager &SM = CGM.getContext().getSourceManager();
1549 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1550
1551 llvm::sys::fs::UniqueID ID;
1552 if (llvm::sys::fs::getUniqueID(Path: PLoc.getFilename(), Result&: ID)) {
1553 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1554 }
1555
1556 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1557 };
1558
1559 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack, ParentName);
1560}
1561
1562ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1566 return CGM.getLLVMLinkageVarDefinition(VD);
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1572 T: CGM.getContext().getPointerType(T: VD->getType()));
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1575 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1576 IsExternallyVisible: VD->isExternallyVisible(),
1577 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1578 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
1579 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1580 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1581 VariableLinkage: LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(D: VD));
1586}
1587
1588llvm::Constant *
1589CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1591 !CGM.getContext().getTargetInfo().isTLSSupported());
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName(Parts: {"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1596}
1597
1598Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1599 const VarDecl *VD,
1600 Address VDAddr,
1601 SourceLocation Loc) {
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1603 CGM.getContext().getTargetInfo().isTLSSupported())
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1608 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1609 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1610 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1611 getOrCreateThreadPrivateCache(VD)};
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 callee: OMPBuilder.getOrCreateRuntimeFunction(
1615 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1616 args: Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618}
1619
1620void CGOpenMPRuntime::emitThreadPrivateVarInit(
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1627 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1628 args: OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 callee: OMPBuilder.getOrCreateRuntimeFunction(
1637 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1638 args: Args);
1639}
1640
1641llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1645 CGM.getContext().getTargetInfo().isTLSSupported())
1646 return nullptr;
1647
1648 VD = VD->getDefinition(C&: CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1661 ImplicitParamKind::Other);
1662 Args.push_back(Elt: &Dst);
1663
1664 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1665 resultType: CGM.getContext().VoidPtrTy, args: Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1667 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1669 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1670 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1671 Args, Loc, StartLoc: Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1674 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1681 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1682 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1686 if (VD->getType().isDestructedType() != QualType::DK_none) {
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1693 ImplicitParamKind::Other);
1694 Args.push_back(Elt: &Dst);
1695
1696 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1697 resultType: CGM.getContext().VoidTy, args: Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1699 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1701 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1702 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1703 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1704 Loc, StartLoc: Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 Addr: DtorCGF.GetAddrOfLocalVar(VD: &Dst),
1709 /*Volatile=*/false, Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1712 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1713 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1722 auto *CopyCtorTy = llvm::FunctionType::get(Result: CGM.VoidPtrTy, Params: CopyCtorTyArgs,
1723 /*isVarArg=*/false)
1724 ->getPointerTo();
1725 // Copying constructor for the threadprivate variable.
1726 // Must be NULL - reserved by runtime, but currently it requires that this
1727 // parameter is always NULL. Otherwise it fires assertion.
1728 CopyCtor = llvm::Constant::getNullValue(Ty: CopyCtorTy);
1729 if (Ctor == nullptr) {
1730 auto *CtorTy = llvm::FunctionType::get(Result: CGM.VoidPtrTy, Params: CGM.VoidPtrTy,
1731 /*isVarArg=*/false)
1732 ->getPointerTo();
1733 Ctor = llvm::Constant::getNullValue(Ty: CtorTy);
1734 }
1735 if (Dtor == nullptr) {
1736 auto *DtorTy = llvm::FunctionType::get(Result: CGM.VoidTy, Params: CGM.VoidPtrTy,
1737 /*isVarArg=*/false)
1738 ->getPointerTo();
1739 Dtor = llvm::Constant::getNullValue(Ty: DtorTy);
1740 }
1741 if (!CGF) {
1742 auto *InitFunctionTy =
1743 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1744 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1745 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1746 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1747 CodeGenFunction InitCGF(CGM);
1748 FunctionArgList ArgList;
1749 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1750 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1751 Loc, StartLoc: Loc);
1752 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753 InitCGF.FinishFunction();
1754 return InitFunction;
1755 }
1756 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1757 }
1758 return nullptr;
1759}
1760
1761void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1762 llvm::GlobalValue *GV) {
1763 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1764 OMPDeclareTargetDeclAttr::getActiveAttr(VD: FD);
1765
1766 // We only need to handle active 'indirect' declare target functions.
1767 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1768 return;
1769
1770 // Get a mangled name to store the new device global in.
1771 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1772 CGM, OMPBuilder, BeginLoc: FD->getCanonicalDecl()->getBeginLoc(), ParentName: FD->getName());
1773 SmallString<128> Name;
1774 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1775
1776 // We need to generate a new global to hold the address of the indirectly
1777 // called device function. Doing this allows us to keep the visibility and
1778 // linkage of the associated function unchanged while allowing the runtime to
1779 // access its value.
1780 llvm::GlobalValue *Addr = GV;
1781 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1782 Addr = new llvm::GlobalVariable(
1783 CGM.getModule(), CGM.VoidPtrTy,
1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1785 nullptr, llvm::GlobalValue::NotThreadLocal,
1786 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1788 }
1789
1790 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1791 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1792 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1793 Linkage: llvm::GlobalValue::WeakODRLinkage);
1794}
1795
1796Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1797 QualType VarType,
1798 StringRef Name) {
1799 std::string Suffix = getName(Parts: {"artificial", ""});
1800 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1801 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1802 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1803 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1804 CGM.getTarget().isTLSSupported()) {
1805 GAddr->setThreadLocal(/*Val=*/true);
1806 return Address(GAddr, GAddr->getValueType(),
1807 CGM.getContext().getTypeAlignInChars(T: VarType));
1808 }
1809 std::string CacheSuffix = getName(Parts: {"cache", ""});
1810 llvm::Value *Args[] = {
1811 emitUpdateLocation(CGF, Loc: SourceLocation()),
1812 getThreadID(CGF, Loc: SourceLocation()),
1813 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1814 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1815 /*isSigned=*/false),
1816 OMPBuilder.getOrCreateInternalVariable(
1817 Ty: CGM.VoidPtrPtrTy,
1818 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1819 return Address(
1820 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1821 V: CGF.EmitRuntimeCall(
1822 callee: OMPBuilder.getOrCreateRuntimeFunction(
1823 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1824 args: Args),
1825 DestTy: VarLVType->getPointerTo(/*AddrSpace=*/0)),
1826 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1827}
1828
1829void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1830 const RegionCodeGenTy &ThenGen,
1831 const RegionCodeGenTy &ElseGen) {
1832 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1833
1834 // If the condition constant folds and can be elided, try to avoid emitting
1835 // the condition and the dead arm of the if/else.
1836 bool CondConstant;
1837 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1838 if (CondConstant)
1839 ThenGen(CGF);
1840 else
1841 ElseGen(CGF);
1842 return;
1843 }
1844
1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1846 // emit the conditional branch.
1847 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1848 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1849 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1850 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1851
1852 // Emit the 'then' code.
1853 CGF.EmitBlock(BB: ThenBlock);
1854 ThenGen(CGF);
1855 CGF.EmitBranch(Block: ContBlock);
1856 // Emit the 'else' code if present.
1857 // There is no need to emit line number for unconditional branch.
1858 (void)ApplyDebugLocation::CreateEmpty(CGF);
1859 CGF.EmitBlock(BB: ElseBlock);
1860 ElseGen(CGF);
1861 // There is no need to emit line number for unconditional branch.
1862 (void)ApplyDebugLocation::CreateEmpty(CGF);
1863 CGF.EmitBranch(Block: ContBlock);
1864 // Emit the continuation block for code after the if.
1865 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1866}
1867
1868void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1869 llvm::Function *OutlinedFn,
1870 ArrayRef<llvm::Value *> CapturedVars,
1871 const Expr *IfCond,
1872 llvm::Value *NumThreads) {
1873 if (!CGF.HaveInsertPoint())
1874 return;
1875 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1876 auto &M = CGM.getModule();
1877 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1878 this](CodeGenFunction &CGF, PrePostActionTy &) {
1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1880 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1881 llvm::Value *Args[] = {
1882 RTLoc,
1883 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1884 CGF.Builder.CreateBitCast(V: OutlinedFn, DestTy: RT.getKmpc_MicroPointerTy())};
1885 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1886 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1887 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1888
1889 llvm::FunctionCallee RTLFn =
1890 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1891 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1892 };
1893 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1894 this](CodeGenFunction &CGF, PrePostActionTy &) {
1895 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1896 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1897 // Build calls:
1898 // __kmpc_serialized_parallel(&Loc, GTid);
1899 llvm::Value *Args[] = {RTLoc, ThreadID};
1900 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1901 M, FnID: OMPRTL___kmpc_serialized_parallel),
1902 args: Args);
1903
1904 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1905 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1906 RawAddress ZeroAddrBound =
1907 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
1908 /*Name=*/".bound.zero.addr");
1909 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
1910 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1911 // ThreadId for serialized parallels is 0.
1912 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
1913 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
1914 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1915
1916 // Ensure we do not inline the function. This is trivially true for the ones
1917 // passed to __kmpc_fork_call but the ones called in serialized regions
1918 // could be inlined. This is not a perfect but it is closer to the invariant
1919 // we want, namely, every data environment starts with a new function.
1920 // TODO: We should pass the if condition to the runtime function and do the
1921 // handling there. Much cleaner code.
1922 OutlinedFn->removeFnAttr(Kind: llvm::Attribute::AlwaysInline);
1923 OutlinedFn->addFnAttr(Kind: llvm::Attribute::NoInline);
1924 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
1925
1926 // __kmpc_end_serialized_parallel(&Loc, GTid);
1927 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1928 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1929 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
1930 args: EndArgs);
1931 };
1932 if (IfCond) {
1933 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
1934 } else {
1935 RegionCodeGenTy ThenRCG(ThenGen);
1936 ThenRCG(CGF);
1937 }
1938}
1939
1940// If we're inside an (outlined) parallel region, use the region info's
1941// thread-ID variable (it is passed in a first argument of the outlined function
1942// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943// regular serial code region, get thread ID by calling kmp_int32
1944// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945// return the address of that temp.
1946Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1947 SourceLocation Loc) {
1948 if (auto *OMPRegionInfo =
1949 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
1950 if (OMPRegionInfo->getThreadIDVariable())
1951 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1952
1953 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1954 QualType Int32Ty =
1955 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
1957 CGF.EmitStoreOfScalar(value: ThreadID,
1958 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
1959
1960 return ThreadIDTemp;
1961}
1962
1963llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1964 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1965 std::string Name = getName(Parts: {Prefix, "var"});
1966 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
1967}
1968
1969namespace {
1970/// Common pre(post)-action for different OpenMP constructs.
1971class CommonActionTy final : public PrePostActionTy {
1972 llvm::FunctionCallee EnterCallee;
1973 ArrayRef<llvm::Value *> EnterArgs;
1974 llvm::FunctionCallee ExitCallee;
1975 ArrayRef<llvm::Value *> ExitArgs;
1976 bool Conditional;
1977 llvm::BasicBlock *ContBlock = nullptr;
1978
1979public:
1980 CommonActionTy(llvm::FunctionCallee EnterCallee,
1981 ArrayRef<llvm::Value *> EnterArgs,
1982 llvm::FunctionCallee ExitCallee,
1983 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1984 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1985 ExitArgs(ExitArgs), Conditional(Conditional) {}
1986 void Enter(CodeGenFunction &CGF) override {
1987 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
1988 if (Conditional) {
1989 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
1990 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1991 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1992 // Generate the branch (If-stmt)
1993 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
1994 CGF.EmitBlock(BB: ThenBlock);
1995 }
1996 }
1997 void Done(CodeGenFunction &CGF) {
1998 // Emit the rest of blocks/branches
1999 CGF.EmitBranch(Block: ContBlock);
2000 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2001 }
2002 void Exit(CodeGenFunction &CGF) override {
2003 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
2004 }
2005};
2006} // anonymous namespace
2007
2008void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2009 StringRef CriticalName,
2010 const RegionCodeGenTy &CriticalOpGen,
2011 SourceLocation Loc, const Expr *Hint) {
2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2013 // CriticalOpGen();
2014 // __kmpc_end_critical(ident_t *, gtid, Lock);
2015 // Prepare arguments and build a call to __kmpc_critical
2016 if (!CGF.HaveInsertPoint())
2017 return;
2018 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2019 getCriticalRegionLock(CriticalName)};
2020 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2021 std::end(arr&: Args));
2022 if (Hint) {
2023 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2024 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2025 }
2026 CommonActionTy Action(
2027 OMPBuilder.getOrCreateRuntimeFunction(
2028 M&: CGM.getModule(),
2029 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2030 EnterArgs,
2031 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2032 FnID: OMPRTL___kmpc_end_critical),
2033 Args);
2034 CriticalOpGen.setAction(Action);
2035 emitInlinedDirective(CGF, InnermostKind: OMPD_critical, CodeGen: CriticalOpGen);
2036}
2037
2038void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2039 const RegionCodeGenTy &MasterOpGen,
2040 SourceLocation Loc) {
2041 if (!CGF.HaveInsertPoint())
2042 return;
2043 // if(__kmpc_master(ident_t *, gtid)) {
2044 // MasterOpGen();
2045 // __kmpc_end_master(ident_t *, gtid);
2046 // }
2047 // Prepare arguments and build a call to __kmpc_master
2048 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2049 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2050 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2051 Args,
2052 OMPBuilder.getOrCreateRuntimeFunction(
2053 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2054 Args,
2055 /*Conditional=*/true);
2056 MasterOpGen.setAction(Action);
2057 emitInlinedDirective(CGF, InnermostKind: OMPD_master, CodeGen: MasterOpGen);
2058 Action.Done(CGF);
2059}
2060
2061void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2062 const RegionCodeGenTy &MaskedOpGen,
2063 SourceLocation Loc, const Expr *Filter) {
2064 if (!CGF.HaveInsertPoint())
2065 return;
2066 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2067 // MaskedOpGen();
2068 // __kmpc_end_masked(iden_t *, gtid);
2069 // }
2070 // Prepare arguments and build a call to __kmpc_masked
2071 llvm::Value *FilterVal = Filter
2072 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2073 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2075 FilterVal};
2076 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2077 getThreadID(CGF, Loc)};
2078 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2079 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2080 Args,
2081 OMPBuilder.getOrCreateRuntimeFunction(
2082 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2083 ArgsEnd,
2084 /*Conditional=*/true);
2085 MaskedOpGen.setAction(Action);
2086 emitInlinedDirective(CGF, InnermostKind: OMPD_masked, CodeGen: MaskedOpGen);
2087 Action.Done(CGF);
2088}
2089
2090void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2091 SourceLocation Loc) {
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2095 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2096 } else {
2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098 llvm::Value *Args[] = {
2099 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2100 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2101 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2102 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2103 args: Args);
2104 }
2105
2106 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2107 Region->emitUntiedSwitch(CGF);
2108}
2109
2110void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2111 const RegionCodeGenTy &TaskgroupOpGen,
2112 SourceLocation Loc) {
2113 if (!CGF.HaveInsertPoint())
2114 return;
2115 // __kmpc_taskgroup(ident_t *, gtid);
2116 // TaskgroupOpGen();
2117 // __kmpc_end_taskgroup(ident_t *, gtid);
2118 // Prepare arguments and build a call to __kmpc_taskgroup
2119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2120 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2121 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2122 Args,
2123 OMPBuilder.getOrCreateRuntimeFunction(
2124 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2125 Args);
2126 TaskgroupOpGen.setAction(Action);
2127 emitInlinedDirective(CGF, InnermostKind: OMPD_taskgroup, CodeGen: TaskgroupOpGen);
2128}
2129
2130/// Given an array of pointers to variables, project the address of a
2131/// given variable.
2132static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2133 unsigned Index, const VarDecl *Var) {
2134 // Pull out the pointer to the variable.
2135 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2136 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2137
2138 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2139 return Address(
2140 CGF.Builder.CreateBitCast(
2141 V: Ptr, DestTy: ElemTy->getPointerTo(AddrSpace: Ptr->getType()->getPointerAddressSpace())),
2142 ElemTy, CGF.getContext().getDeclAlign(D: Var));
2143}
2144
2145static llvm::Value *emitCopyprivateCopyFunction(
2146 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2147 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2148 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2149 SourceLocation Loc) {
2150 ASTContext &C = CGM.getContext();
2151 // void copy_func(void *LHSArg, void *RHSArg);
2152 FunctionArgList Args;
2153 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2154 ImplicitParamKind::Other);
2155 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2156 ImplicitParamKind::Other);
2157 Args.push_back(Elt: &LHSArg);
2158 Args.push_back(Elt: &RHSArg);
2159 const auto &CGFI =
2160 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
2161 std::string Name =
2162 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2163 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
2164 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
2165 M: &CGM.getModule());
2166 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2167 Fn->setDoesNotRecurse();
2168 CodeGenFunction CGF(CGM);
2169 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2170 // Dest = (void*[n])(LHSArg);
2171 // Src = (void*[n])(RHSArg);
2172 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2173 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
2174 DestTy: ArgsElemType->getPointerTo()),
2175 ArgsElemType, CGF.getPointerAlign());
2176 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2177 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
2178 DestTy: ArgsElemType->getPointerTo()),
2179 ArgsElemType, CGF.getPointerAlign());
2180 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2181 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2182 // ...
2183 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2184 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2185 const auto *DestVar =
2186 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2187 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2188
2189 const auto *SrcVar =
2190 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2191 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2192
2193 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2194 QualType Type = VD->getType();
2195 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2196 }
2197 CGF.FinishFunction();
2198 return Fn;
2199}
2200
2201void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2202 const RegionCodeGenTy &SingleOpGen,
2203 SourceLocation Loc,
2204 ArrayRef<const Expr *> CopyprivateVars,
2205 ArrayRef<const Expr *> SrcExprs,
2206 ArrayRef<const Expr *> DstExprs,
2207 ArrayRef<const Expr *> AssignmentOps) {
2208 if (!CGF.HaveInsertPoint())
2209 return;
2210 assert(CopyprivateVars.size() == SrcExprs.size() &&
2211 CopyprivateVars.size() == DstExprs.size() &&
2212 CopyprivateVars.size() == AssignmentOps.size());
2213 ASTContext &C = CGM.getContext();
2214 // int32 did_it = 0;
2215 // if(__kmpc_single(ident_t *, gtid)) {
2216 // SingleOpGen();
2217 // __kmpc_end_single(ident_t *, gtid);
2218 // did_it = 1;
2219 // }
2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221 // <copy_func>, did_it);
2222
2223 Address DidIt = Address::invalid();
2224 if (!CopyprivateVars.empty()) {
2225 // int32 did_it = 0;
2226 QualType KmpInt32Ty =
2227 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2229 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2230 }
2231 // Prepare arguments and build a call to __kmpc_single
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2238 Args,
2239 /*Conditional=*/true);
2240 SingleOpGen.setAction(Action);
2241 emitInlinedDirective(CGF, InnermostKind: OMPD_single, CodeGen: SingleOpGen);
2242 if (DidIt.isValid()) {
2243 // did_it = 1;
2244 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2245 }
2246 Action.Done(CGF);
2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248 // <copy_func>, did_it);
2249 if (DidIt.isValid()) {
2250 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2251 QualType CopyprivateArrayTy = C.getConstantArrayType(
2252 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2253 /*IndexTypeQuals=*/0);
2254 // Create a list of all private variables for copyprivate.
2255 Address CopyprivateList =
2256 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2257 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2258 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2259 CGF.Builder.CreateStore(
2260 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2261 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2262 DestTy: CGF.VoidPtrTy),
2263 Addr: Elem);
2264 }
2265 // Build function that copies private values from single region to all other
2266 // threads in the corresponding parallel region.
2267 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2268 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2269 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2270 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2271 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2272 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2273 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2274 llvm::Value *Args[] = {
2275 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2276 getThreadID(CGF, Loc), // i32 <gtid>
2277 BufSize, // size_t <buf_size>
2278 CL.emitRawPointer(CGF), // void *<copyprivate list>
2279 CpyFn, // void (*) (void *, void *) <copy_func>
2280 DidItVal // i32 did_it
2281 };
2282 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2283 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2284 args: Args);
2285 }
2286}
2287
2288void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2289 const RegionCodeGenTy &OrderedOpGen,
2290 SourceLocation Loc, bool IsThreads) {
2291 if (!CGF.HaveInsertPoint())
2292 return;
2293 // __kmpc_ordered(ident_t *, gtid);
2294 // OrderedOpGen();
2295 // __kmpc_end_ordered(ident_t *, gtid);
2296 // Prepare arguments and build a call to __kmpc_ordered
2297 if (IsThreads) {
2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2301 Args,
2302 OMPBuilder.getOrCreateRuntimeFunction(
2303 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2304 Args);
2305 OrderedOpGen.setAction(Action);
2306 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2307 return;
2308 }
2309 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2310}
2311
2312unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2313 unsigned Flags;
2314 if (Kind == OMPD_for)
2315 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2316 else if (Kind == OMPD_sections)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2318 else if (Kind == OMPD_single)
2319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2320 else if (Kind == OMPD_barrier)
2321 Flags = OMP_IDENT_BARRIER_EXPL;
2322 else
2323 Flags = OMP_IDENT_BARRIER_IMPL;
2324 return Flags;
2325}
2326
2327void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2328 CodeGenFunction &CGF, const OMPLoopDirective &S,
2329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2330 // Check if the loop directive is actually a doacross loop directive. In this
2331 // case choose static, 1 schedule.
2332 if (llvm::any_of(
2333 Range: S.getClausesOfKind<OMPOrderedClause>(),
2334 P: [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2335 ScheduleKind = OMPC_SCHEDULE_static;
2336 // Chunk size is 1 in this case.
2337 llvm::APInt ChunkSize(32, 1);
2338 ChunkExpr = IntegerLiteral::Create(
2339 C: CGF.getContext(), V: ChunkSize,
2340 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2341 l: SourceLocation());
2342 }
2343}
2344
2345void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2346 OpenMPDirectiveKind Kind, bool EmitChecks,
2347 bool ForceSimpleCall) {
2348 // Check if we should use the OMPBuilder
2349 auto *OMPRegionInfo =
2350 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2351 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2352 CGF.Builder.restoreIP(IP: OMPBuilder.createBarrier(
2353 Loc: CGF.Builder, Kind, ForceSimpleCall, CheckCancelFlag: EmitChecks));
2354 return;
2355 }
2356
2357 if (!CGF.HaveInsertPoint())
2358 return;
2359 // Build call __kmpc_cancel_barrier(loc, thread_id);
2360 // Build call __kmpc_barrier(loc, thread_id);
2361 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2363 // thread_id);
2364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2365 getThreadID(CGF, Loc)};
2366 if (OMPRegionInfo) {
2367 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2368 llvm::Value *Result = CGF.EmitRuntimeCall(
2369 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2370 FnID: OMPRTL___kmpc_cancel_barrier),
2371 args: Args);
2372 if (EmitChecks) {
2373 // if (__kmpc_cancel_barrier()) {
2374 // exit from construct;
2375 // }
2376 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2377 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2378 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2379 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2380 CGF.EmitBlock(BB: ExitBB);
2381 // exit from construct;
2382 CodeGenFunction::JumpDest CancelDestination =
2383 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
2384 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2385 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2386 }
2387 return;
2388 }
2389 }
2390 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2391 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2392 args: Args);
2393}
2394
2395void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2396 Expr *ME, bool IsFatal) {
2397 llvm::Value *MVL =
2398 ME ? CGF.EmitStringLiteralLValue(E: cast<StringLiteral>(Val: ME)).getPointer(CGF)
2399 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2401 // *message)
2402 llvm::Value *Args[] = {
2403 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2404 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2405 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2406 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2407 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2408 args: Args);
2409}
2410
2411/// Map the OpenMP loop schedule to the runtime enumeration.
2412static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2413 bool Chunked, bool Ordered) {
2414 switch (ScheduleKind) {
2415 case OMPC_SCHEDULE_static:
2416 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2417 : (Ordered ? OMP_ord_static : OMP_sch_static);
2418 case OMPC_SCHEDULE_dynamic:
2419 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2420 case OMPC_SCHEDULE_guided:
2421 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2422 case OMPC_SCHEDULE_runtime:
2423 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2424 case OMPC_SCHEDULE_auto:
2425 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2426 case OMPC_SCHEDULE_unknown:
2427 assert(!Chunked && "chunk was specified but schedule kind not known");
2428 return Ordered ? OMP_ord_static : OMP_sch_static;
2429 }
2430 llvm_unreachable("Unexpected runtime schedule");
2431}
2432
2433/// Map the OpenMP distribute schedule to the runtime enumeration.
2434static OpenMPSchedType
2435getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2436 // only static is allowed for dist_schedule
2437 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2438}
2439
2440bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2441 bool Chunked) const {
2442 OpenMPSchedType Schedule =
2443 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2444 return Schedule == OMP_sch_static;
2445}
2446
2447bool CGOpenMPRuntime::isStaticNonchunked(
2448 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2449 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2450 return Schedule == OMP_dist_sch_static;
2451}
2452
2453bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2454 bool Chunked) const {
2455 OpenMPSchedType Schedule =
2456 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2457 return Schedule == OMP_sch_static_chunked;
2458}
2459
2460bool CGOpenMPRuntime::isStaticChunked(
2461 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2462 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2463 return Schedule == OMP_dist_sch_static_chunked;
2464}
2465
2466bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2467 OpenMPSchedType Schedule =
2468 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2469 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2470 return Schedule != OMP_sch_static;
2471}
2472
2473static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2474 OpenMPScheduleClauseModifier M1,
2475 OpenMPScheduleClauseModifier M2) {
2476 int Modifier = 0;
2477 switch (M1) {
2478 case OMPC_SCHEDULE_MODIFIER_monotonic:
2479 Modifier = OMP_sch_modifier_monotonic;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2482 Modifier = OMP_sch_modifier_nonmonotonic;
2483 break;
2484 case OMPC_SCHEDULE_MODIFIER_simd:
2485 if (Schedule == OMP_sch_static_chunked)
2486 Schedule = OMP_sch_static_balanced_chunked;
2487 break;
2488 case OMPC_SCHEDULE_MODIFIER_last:
2489 case OMPC_SCHEDULE_MODIFIER_unknown:
2490 break;
2491 }
2492 switch (M2) {
2493 case OMPC_SCHEDULE_MODIFIER_monotonic:
2494 Modifier = OMP_sch_modifier_monotonic;
2495 break;
2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 break;
2499 case OMPC_SCHEDULE_MODIFIER_simd:
2500 if (Schedule == OMP_sch_static_chunked)
2501 Schedule = OMP_sch_static_balanced_chunked;
2502 break;
2503 case OMPC_SCHEDULE_MODIFIER_last:
2504 case OMPC_SCHEDULE_MODIFIER_unknown:
2505 break;
2506 }
2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508 // If the static schedule kind is specified or if the ordered clause is
2509 // specified, and if the nonmonotonic modifier is not specified, the effect is
2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511 // modifier is specified, the effect is as if the nonmonotonic modifier is
2512 // specified.
2513 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2514 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2515 Schedule == OMP_sch_static_balanced_chunked ||
2516 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2517 Schedule == OMP_dist_sch_static_chunked ||
2518 Schedule == OMP_dist_sch_static))
2519 Modifier = OMP_sch_modifier_nonmonotonic;
2520 }
2521 return Schedule | Modifier;
2522}
2523
2524void CGOpenMPRuntime::emitForDispatchInit(
2525 CodeGenFunction &CGF, SourceLocation Loc,
2526 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2527 bool Ordered, const DispatchRTInput &DispatchValues) {
2528 if (!CGF.HaveInsertPoint())
2529 return;
2530 OpenMPSchedType Schedule = getRuntimeSchedule(
2531 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2532 assert(Ordered ||
2533 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2534 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2535 Schedule != OMP_sch_static_balanced_chunked));
2536 // Call __kmpc_dispatch_init(
2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2540
2541 // If the Chunk was not specified in the clause - use default value 1.
2542 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2543 : CGF.Builder.getIntN(N: IVSize, C: 1);
2544 llvm::Value *Args[] = {
2545 emitUpdateLocation(CGF, Loc),
2546 getThreadID(CGF, Loc),
2547 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2548 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2549 DispatchValues.LB, // Lower
2550 DispatchValues.UB, // Upper
2551 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2552 Chunk // Chunk
2553 };
2554 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2555 args: Args);
2556}
2557
2558void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2559 SourceLocation Loc) {
2560 if (!CGF.HaveInsertPoint())
2561 return;
2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2564 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2565}
2566
2567static void emitForStaticInitCall(
2568 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2569 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2570 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2571 const CGOpenMPRuntime::StaticRTInput &Values) {
2572 if (!CGF.HaveInsertPoint())
2573 return;
2574
2575 assert(!Values.Ordered);
2576 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static ||
2580 Schedule == OMP_dist_sch_static_chunked);
2581
2582 // Call __kmpc_for_static_init(
2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587 llvm::Value *Chunk = Values.Chunk;
2588 if (Chunk == nullptr) {
2589 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2590 Schedule == OMP_dist_sch_static) &&
2591 "expected static non-chunked schedule");
2592 // If the Chunk was not specified in the clause - use default value 1.
2593 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2594 } else {
2595 assert((Schedule == OMP_sch_static_chunked ||
2596 Schedule == OMP_sch_static_balanced_chunked ||
2597 Schedule == OMP_ord_static_chunked ||
2598 Schedule == OMP_dist_sch_static_chunked) &&
2599 "expected static chunked schedule");
2600 }
2601 llvm::Value *Args[] = {
2602 UpdateLocation,
2603 ThreadId,
2604 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2605 M2)), // Schedule type
2606 Values.IL.emitRawPointer(CGF), // &isLastIter
2607 Values.LB.emitRawPointer(CGF), // &LB
2608 Values.UB.emitRawPointer(CGF), // &UB
2609 Values.ST.emitRawPointer(CGF), // &Stride
2610 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2611 Chunk // Chunk
2612 };
2613 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2614}
2615
2616void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2617 SourceLocation Loc,
2618 OpenMPDirectiveKind DKind,
2619 const OpenMPScheduleTy &ScheduleKind,
2620 const StaticRTInput &Values) {
2621 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2622 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2623 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2624 "Expected loop-based or sections-based directive.");
2625 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2626 Flags: isOpenMPLoopDirective(DKind)
2627 ? OMP_IDENT_WORK_LOOP
2628 : OMP_IDENT_WORK_SECTIONS);
2629 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630 llvm::FunctionCallee StaticInitFunction =
2631 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2632 IsGPUDistribute: false);
2633 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2634 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2635 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2636}
2637
2638void CGOpenMPRuntime::emitDistributeStaticInit(
2639 CodeGenFunction &CGF, SourceLocation Loc,
2640 OpenMPDistScheduleClauseKind SchedKind,
2641 const CGOpenMPRuntime::StaticRTInput &Values) {
2642 OpenMPSchedType ScheduleNum =
2643 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2644 llvm::Value *UpdatedLocation =
2645 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2646 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2647 llvm::FunctionCallee StaticInitFunction;
2648 bool isGPUDistribute =
2649 CGM.getLangOpts().OpenMPIsTargetDevice &&
2650 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2651 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2652 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2653
2654 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2655 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2656 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2657}
2658
2659void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2660 SourceLocation Loc,
2661 OpenMPDirectiveKind DKind) {
2662 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2663 DKind == OMPD_sections) &&
2664 "Expected distribute, for, or sections directive kind");
2665 if (!CGF.HaveInsertPoint())
2666 return;
2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668 llvm::Value *Args[] = {
2669 emitUpdateLocation(CGF, Loc,
2670 Flags: isOpenMPDistributeDirective(DKind) ||
2671 (DKind == OMPD_target_teams_loop)
2672 ? OMP_IDENT_WORK_DISTRIBUTE
2673 : isOpenMPLoopDirective(DKind)
2674 ? OMP_IDENT_WORK_LOOP
2675 : OMP_IDENT_WORK_SECTIONS),
2676 getThreadID(CGF, Loc)};
2677 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2678 if (isOpenMPDistributeDirective(DKind) &&
2679 CGM.getLangOpts().OpenMPIsTargetDevice &&
2680 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2681 CGF.EmitRuntimeCall(
2682 callee: OMPBuilder.getOrCreateRuntimeFunction(
2683 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2684 args: Args);
2685 else
2686 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2687 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2688 args: Args);
2689}
2690
2691void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2692 SourceLocation Loc,
2693 unsigned IVSize,
2694 bool IVSigned) {
2695 if (!CGF.HaveInsertPoint())
2696 return;
2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2699 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2700 args: Args);
2701}
2702
2703llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2704 SourceLocation Loc, unsigned IVSize,
2705 bool IVSigned, Address IL,
2706 Address LB, Address UB,
2707 Address ST) {
2708 // Call __kmpc_dispatch_next(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711 // kmp_int[32|64] *p_stride);
2712 llvm::Value *Args[] = {
2713 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2714 IL.emitRawPointer(CGF), // &isLastIter
2715 LB.emitRawPointer(CGF), // &Lower
2716 UB.emitRawPointer(CGF), // &Upper
2717 ST.emitRawPointer(CGF) // &Stride
2718 };
2719 llvm::Value *Call = CGF.EmitRuntimeCall(
2720 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2721 return CGF.EmitScalarConversion(
2722 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2723 DstTy: CGF.getContext().BoolTy, Loc);
2724}
2725
2726void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2727 llvm::Value *NumThreads,
2728 SourceLocation Loc) {
2729 if (!CGF.HaveInsertPoint())
2730 return;
2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732 llvm::Value *Args[] = {
2733 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2734 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)};
2735 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2736 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_threads),
2737 args: Args);
2738}
2739
2740void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2741 ProcBindKind ProcBind,
2742 SourceLocation Loc) {
2743 if (!CGF.HaveInsertPoint())
2744 return;
2745 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747 llvm::Value *Args[] = {
2748 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2749 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2750 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2751 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2752 args: Args);
2753}
2754
2755void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2756 SourceLocation Loc, llvm::AtomicOrdering AO) {
2757 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2758 OMPBuilder.createFlush(Loc: CGF.Builder);
2759 } else {
2760 if (!CGF.HaveInsertPoint())
2761 return;
2762 // Build call void __kmpc_flush(ident_t *loc)
2763 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2764 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2765 args: emitUpdateLocation(CGF, Loc));
2766 }
2767}
2768
2769namespace {
2770/// Indexes of fields for type kmp_task_t.
2771enum KmpTaskTFields {
2772 /// List of shared variables.
2773 KmpTaskTShareds,
2774 /// Task routine.
2775 KmpTaskTRoutine,
2776 /// Partition id for the untied tasks.
2777 KmpTaskTPartId,
2778 /// Function with call of destructors for private variables.
2779 Data1,
2780 /// Task priority.
2781 Data2,
2782 /// (Taskloops only) Lower bound.
2783 KmpTaskTLowerBound,
2784 /// (Taskloops only) Upper bound.
2785 KmpTaskTUpperBound,
2786 /// (Taskloops only) Stride.
2787 KmpTaskTStride,
2788 /// (Taskloops only) Is last iteration flag.
2789 KmpTaskTLastIter,
2790 /// (Taskloops only) Reduction data.
2791 KmpTaskTReductions,
2792};
2793} // anonymous namespace
2794
2795void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2796 // If we are in simd mode or there are no entries, we don't need to do
2797 // anything.
2798 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2799 return;
2800
2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2803 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2804 SourceLocation Loc;
2805 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2806 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2807 E = CGM.getContext().getSourceManager().fileinfo_end();
2808 I != E; ++I) {
2809 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2810 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2811 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2812 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2813 break;
2814 }
2815 }
2816 }
2817 switch (Kind) {
2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2819 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2820 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for target region in "
2821 "%0 is incorrect: either the "
2822 "address or the ID is invalid.");
2823 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2824 } break;
2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2826 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for declare target "
2828 "variable %0 is incorrect: the "
2829 "address is invalid.");
2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831 } break;
2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2833 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2834 L: DiagnosticsEngine::Error,
2835 FormatString: "Offloading entry for declare target variable is incorrect: the "
2836 "address is invalid.");
2837 CGM.getDiags().Report(DiagID);
2838 } break;
2839 }
2840 };
2841
2842 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2843}
2844
2845void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2846 if (!KmpRoutineEntryPtrTy) {
2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2848 ASTContext &C = CGM.getContext();
2849 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2850 FunctionProtoType::ExtProtoInfo EPI;
2851 KmpRoutineEntryPtrQTy = C.getPointerType(
2852 T: C.getFunctionType(ResultTy: KmpInt32Ty, Args: KmpRoutineEntryTyArgs, EPI));
2853 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(T: KmpRoutineEntryPtrQTy);
2854 }
2855}
2856
2857namespace {
2858struct PrivateHelpersTy {
2859 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2860 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2861 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2862 PrivateElemInit(PrivateElemInit) {}
2863 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2864 const Expr *OriginalRef = nullptr;
2865 const VarDecl *Original = nullptr;
2866 const VarDecl *PrivateCopy = nullptr;
2867 const VarDecl *PrivateElemInit = nullptr;
2868 bool isLocalPrivate() const {
2869 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2870 }
2871};
2872typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2873} // anonymous namespace
2874
2875static bool isAllocatableDecl(const VarDecl *VD) {
2876 const VarDecl *CVD = VD->getCanonicalDecl();
2877 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2878 return false;
2879 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2880 // Use the default allocation.
2881 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2882 !AA->getAllocator());
2883}
2884
2885static RecordDecl *
2886createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2887 if (!Privates.empty()) {
2888 ASTContext &C = CGM.getContext();
2889 // Build struct .kmp_privates_t. {
2890 // /* private vars */
2891 // };
2892 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
2893 RD->startDefinition();
2894 for (const auto &Pair : Privates) {
2895 const VarDecl *VD = Pair.second.Original;
2896 QualType Type = VD->getType().getNonReferenceType();
2897 // If the private variable is a local variable with lvalue ref type,
2898 // allocate the pointer instead of the pointee type.
2899 if (Pair.second.isLocalPrivate()) {
2900 if (VD->getType()->isLValueReferenceType())
2901 Type = C.getPointerType(T: Type);
2902 if (isAllocatableDecl(VD))
2903 Type = C.getPointerType(T: Type);
2904 }
2905 FieldDecl *FD = addFieldToRecordDecl(C, DC: RD, FieldTy: Type);
2906 if (VD->hasAttrs()) {
2907 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2908 E(VD->getAttrs().end());
2909 I != E; ++I)
2910 FD->addAttr(A: *I);
2911 }
2912 }
2913 RD->completeDefinition();
2914 return RD;
2915 }
2916 return nullptr;
2917}
2918
2919static RecordDecl *
2920createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2921 QualType KmpInt32Ty,
2922 QualType KmpRoutineEntryPointerQTy) {
2923 ASTContext &C = CGM.getContext();
2924 // Build struct kmp_task_t {
2925 // void * shareds;
2926 // kmp_routine_entry_t routine;
2927 // kmp_int32 part_id;
2928 // kmp_cmplrdata_t data1;
2929 // kmp_cmplrdata_t data2;
2930 // For taskloops additional fields:
2931 // kmp_uint64 lb;
2932 // kmp_uint64 ub;
2933 // kmp_int64 st;
2934 // kmp_int32 liter;
2935 // void * reductions;
2936 // };
2937 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
2938 UD->startDefinition();
2939 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpInt32Ty);
2940 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpRoutineEntryPointerQTy);
2941 UD->completeDefinition();
2942 QualType KmpCmplrdataTy = C.getRecordType(Decl: UD);
2943 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
2944 RD->startDefinition();
2945 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
2946 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpRoutineEntryPointerQTy);
2947 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
2948 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
2949 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
2950 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
2951 QualType KmpUInt64Ty =
2952 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953 QualType KmpInt64Ty =
2954 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
2956 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
2957 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt64Ty);
2958 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
2959 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
2960 }
2961 RD->completeDefinition();
2962 return RD;
2963}
2964
2965static RecordDecl *
2966createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2967 ArrayRef<PrivateDataTy> Privates) {
2968 ASTContext &C = CGM.getContext();
2969 // Build struct kmp_task_t_with_privates {
2970 // kmp_task_t task_data;
2971 // .kmp_privates_t. privates;
2972 // };
2973 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
2974 RD->startDefinition();
2975 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpTaskTQTy);
2976 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2977 addFieldToRecordDecl(C, DC: RD, FieldTy: C.getRecordType(Decl: PrivateRD));
2978 RD->completeDefinition();
2979 return RD;
2980}
2981
2982/// Emit a proxy function which accepts kmp_task_t as the second
2983/// argument.
2984/// \code
2985/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2987/// For taskloops:
2988/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989/// tt->reductions, tt->shareds);
2990/// return 0;
2991/// }
2992/// \endcode
2993static llvm::Function *
2994emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2995 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2996 QualType KmpTaskTWithPrivatesPtrQTy,
2997 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2998 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2999 llvm::Value *TaskPrivatesMap) {
3000 ASTContext &C = CGM.getContext();
3001 FunctionArgList Args;
3002 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3003 ImplicitParamKind::Other);
3004 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3005 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3006 ImplicitParamKind::Other);
3007 Args.push_back(Elt: &GtidArg);
3008 Args.push_back(Elt: &TaskTypeArg);
3009 const auto &TaskEntryFnInfo =
3010 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3011 llvm::FunctionType *TaskEntryTy =
3012 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
3013 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
3014 auto *TaskEntry = llvm::Function::Create(
3015 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3016 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
3017 TaskEntry->setDoesNotRecurse();
3018 CodeGenFunction CGF(CGM);
3019 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
3020 Loc, StartLoc: Loc);
3021
3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3023 // tt,
3024 // For taskloops:
3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026 // tt->task_data.shareds);
3027 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3028 Addr: CGF.GetAddrOfLocalVar(VD: &GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3029 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3030 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3031 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3032 const auto *KmpTaskTWithPrivatesQTyRD =
3033 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3034 LValue Base =
3035 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3037 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3038 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3039 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3040
3041 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3042 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3043 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3045 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3046
3047 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3048 llvm::Value *PrivatesParam;
3049 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3050 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3051 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3052 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3053 } else {
3054 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3055 }
3056
3057 llvm::Value *CommonArgs[] = {
3058 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3059 CGF.Builder
3060 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3061 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3062 .emitRawPointer(CGF)};
3063 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3064 std::end(arr&: CommonArgs));
3065 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3066 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3067 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3068 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3069 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3070 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3071 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3072 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3073 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3074 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3075 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3076 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3077 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3078 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3079 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3080 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3081 CallArgs.push_back(Elt: LBParam);
3082 CallArgs.push_back(Elt: UBParam);
3083 CallArgs.push_back(Elt: StParam);
3084 CallArgs.push_back(Elt: LIParam);
3085 CallArgs.push_back(Elt: RParam);
3086 }
3087 CallArgs.push_back(Elt: SharedsParam);
3088
3089 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3090 Args: CallArgs);
3091 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3092 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3093 CGF.FinishFunction();
3094 return TaskEntry;
3095}
3096
3097static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3098 SourceLocation Loc,
3099 QualType KmpInt32Ty,
3100 QualType KmpTaskTWithPrivatesPtrQTy,
3101 QualType KmpTaskTWithPrivatesQTy) {
3102 ASTContext &C = CGM.getContext();
3103 FunctionArgList Args;
3104 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3105 ImplicitParamKind::Other);
3106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3107 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3108 ImplicitParamKind::Other);
3109 Args.push_back(Elt: &GtidArg);
3110 Args.push_back(Elt: &TaskTypeArg);
3111 const auto &DestructorFnInfo =
3112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3113 llvm::FunctionType *DestructorFnTy =
3114 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3115 std::string Name =
3116 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3117 auto *DestructorFn =
3118 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3119 N: Name, M: &CGM.getModule());
3120 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3121 FI: DestructorFnInfo);
3122 DestructorFn->setDoesNotRecurse();
3123 CodeGenFunction CGF(CGM);
3124 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3125 Args, Loc, StartLoc: Loc);
3126
3127 LValue Base = CGF.EmitLoadOfPointerLValue(
3128 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3129 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3130 const auto *KmpTaskTWithPrivatesQTyRD =
3131 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3132 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3133 Base = CGF.EmitLValueForField(Base, Field: *FI);
3134 for (const auto *Field :
3135 cast<RecordDecl>(Val: FI->getType()->getAsTagDecl())->fields()) {
3136 if (QualType::DestructionKind DtorKind =
3137 Field->getType().isDestructedType()) {
3138 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3139 CGF.pushDestroy(dtorKind: DtorKind, addr: FieldLValue.getAddress(), type: Field->getType());
3140 }
3141 }
3142 CGF.FinishFunction();
3143 return DestructorFn;
3144}
3145
3146/// Emit a privates mapping function for correct handling of private and
3147/// firstprivate variables.
3148/// \code
3149/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150/// **noalias priv1,..., <tyn> **noalias privn) {
3151/// *priv1 = &.privates.priv1;
3152/// ...;
3153/// *privn = &.privates.privn;
3154/// }
3155/// \endcode
3156static llvm::Value *
3157emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3158 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3159 ArrayRef<PrivateDataTy> Privates) {
3160 ASTContext &C = CGM.getContext();
3161 FunctionArgList Args;
3162 ImplicitParamDecl TaskPrivatesArg(
3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3165 ImplicitParamKind::Other);
3166 Args.push_back(Elt: &TaskPrivatesArg);
3167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3168 unsigned Counter = 1;
3169 for (const Expr *E : Data.PrivateVars) {
3170 Args.push_back(Elt: ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3172 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3173 .withConst()
3174 .withRestrict(),
3175 ParamKind: ImplicitParamKind::Other));
3176 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.FirstprivateVars) {
3181 Args.push_back(Elt: ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3183 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3184 .withConst()
3185 .withRestrict(),
3186 ParamKind: ImplicitParamKind::Other));
3187 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const Expr *E : Data.LastprivateVars) {
3192 Args.push_back(Elt: ImplicitParamDecl::Create(
3193 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3194 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3195 .withConst()
3196 .withRestrict(),
3197 ParamKind: ImplicitParamKind::Other));
3198 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3199 PrivateVarsPos[VD] = Counter;
3200 ++Counter;
3201 }
3202 for (const VarDecl *VD : Data.PrivateLocals) {
3203 QualType Ty = VD->getType().getNonReferenceType();
3204 if (VD->getType()->isLValueReferenceType())
3205 Ty = C.getPointerType(T: Ty);
3206 if (isAllocatableDecl(VD))
3207 Ty = C.getPointerType(T: Ty);
3208 Args.push_back(Elt: ImplicitParamDecl::Create(
3209 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3210 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3211 ParamKind: ImplicitParamKind::Other));
3212 PrivateVarsPos[VD] = Counter;
3213 ++Counter;
3214 }
3215 const auto &TaskPrivatesMapFnInfo =
3216 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3217 llvm::FunctionType *TaskPrivatesMapTy =
3218 CGM.getTypes().GetFunctionType(Info: TaskPrivatesMapFnInfo);
3219 std::string Name =
3220 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3221 auto *TaskPrivatesMap = llvm::Function::Create(
3222 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3223 M: &CGM.getModule());
3224 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3225 FI: TaskPrivatesMapFnInfo);
3226 if (CGM.getLangOpts().Optimize) {
3227 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::NoInline);
3228 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
3229 TaskPrivatesMap->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
3230 }
3231 CodeGenFunction CGF(CGM);
3232 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3233 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3234
3235 // *privi = &.privates.privi;
3236 LValue Base = CGF.EmitLoadOfPointerLValue(
3237 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskPrivatesArg),
3238 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3239 const auto *PrivatesQTyRD = cast<RecordDecl>(Val: PrivatesQTy->getAsTagDecl());
3240 Counter = 0;
3241 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3242 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3243 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3244 LValue RefLVal =
3245 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD), T: VD->getType());
3246 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3247 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3248 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3249 ++Counter;
3250 }
3251 CGF.FinishFunction();
3252 return TaskPrivatesMap;
3253}
3254
3255/// Emit initialization for private variables in task-based directives.
3256static void emitPrivatesInit(CodeGenFunction &CGF,
3257 const OMPExecutableDirective &D,
3258 Address KmpTaskSharedsPtr, LValue TDBase,
3259 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3260 QualType SharedsTy, QualType SharedsPtrTy,
3261 const OMPTaskDataTy &Data,
3262 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3263 ASTContext &C = CGF.getContext();
3264 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3265 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3266 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())
3267 ? OMPD_taskloop
3268 : OMPD_task;
3269 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: Kind);
3270 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3271 LValue SrcBase;
3272 bool IsTargetTask =
3273 isOpenMPTargetDataManagementDirective(DKind: D.getDirectiveKind()) ||
3274 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276 // PointersArray, SizesArray, and MappersArray. The original variables for
3277 // these arrays are not captured and we get their addresses explicitly.
3278 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3279 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3280 SrcBase = CGF.MakeAddrLValue(
3281 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3282 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3283 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3284 T: SharedsTy);
3285 }
3286 FI = cast<RecordDecl>(Val: FI->getType()->getAsTagDecl())->field_begin();
3287 for (const PrivateDataTy &Pair : Privates) {
3288 // Do not initialize private locals.
3289 if (Pair.second.isLocalPrivate()) {
3290 ++FI;
3291 continue;
3292 }
3293 const VarDecl *VD = Pair.second.PrivateCopy;
3294 const Expr *Init = VD->getAnyInitializer();
3295 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3296 !CGF.isTrivialInitializer(Init)))) {
3297 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3298 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3299 const VarDecl *OriginalVD = Pair.second.Original;
3300 // Check if the variable is the target-based BasePointersArray,
3301 // PointersArray, SizesArray, or MappersArray.
3302 LValue SharedRefLValue;
3303 QualType Type = PrivateLValue.getType();
3304 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3305 if (IsTargetTask && !SharedField) {
3306 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3307 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3308 cast<CapturedDecl>(OriginalVD->getDeclContext())
3309 ->getNumParams() == 0 &&
3310 isa<TranslationUnitDecl>(
3311 cast<CapturedDecl>(OriginalVD->getDeclContext())
3312 ->getDeclContext()) &&
3313 "Expected artificial target data variable.");
3314 SharedRefLValue =
3315 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3316 } else if (ForDup) {
3317 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3318 SharedRefLValue = CGF.MakeAddrLValue(
3319 Addr: SharedRefLValue.getAddress().withAlignment(
3320 NewAlignment: C.getDeclAlign(D: OriginalVD)),
3321 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3322 TBAAInfo: SharedRefLValue.getTBAAInfo());
3323 } else if (CGF.LambdaCaptureFields.count(
3324 Val: Pair.second.Original->getCanonicalDecl()) > 0 ||
3325 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3326 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3327 } else {
3328 // Processing for implicitly captured variables.
3329 InlinedOpenMPRegionRAII Region(
3330 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3331 /*HasCancel=*/false, /*NoInheritance=*/true);
3332 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3333 }
3334 if (Type->isArrayType()) {
3335 // Initialize firstprivate array.
3336 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3337 // Perform simple memcpy.
3338 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3339 } else {
3340 // Initialize firstprivate array using element-by-element
3341 // initialization.
3342 CGF.EmitOMPAggregateAssign(
3343 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3344 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3345 Address SrcElement) {
3346 // Clean up any temporaries needed by the initialization.
3347 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3348 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3349 (void)InitScope.Privatize();
3350 // Emit initialization for single element.
3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3352 CGF, &CapturesInfo);
3353 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3354 Quals: Init->getType().getQualifiers(),
3355 /*IsInitializer=*/false);
3356 });
3357 }
3358 } else {
3359 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3360 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3361 (void)InitScope.Privatize();
3362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3363 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue,
3364 /*capturedByInit=*/false);
3365 }
3366 } else {
3367 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue, /*capturedByInit=*/false);
3368 }
3369 }
3370 ++FI;
3371 }
3372}
3373
3374/// Check if duplication function is required for taskloops.
3375static bool checkInitIsRequired(CodeGenFunction &CGF,
3376 ArrayRef<PrivateDataTy> Privates) {
3377 bool InitRequired = false;
3378 for (const PrivateDataTy &Pair : Privates) {
3379 if (Pair.second.isLocalPrivate())
3380 continue;
3381 const VarDecl *VD = Pair.second.PrivateCopy;
3382 const Expr *Init = VD->getAnyInitializer();
3383 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3384 !CGF.isTrivialInitializer(Init));
3385 if (InitRequired)
3386 break;
3387 }
3388 return InitRequired;
3389}
3390
3391
3392/// Emit task_dup function (for initialization of
3393/// private/firstprivate/lastprivate vars and last_iter flag)
3394/// \code
3395/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3396/// lastpriv) {
3397/// // setup lastprivate flag
3398/// task_dst->last = lastpriv;
3399/// // could be constructor calls here...
3400/// }
3401/// \endcode
3402static llvm::Value *
3403emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3404 const OMPExecutableDirective &D,
3405 QualType KmpTaskTWithPrivatesPtrQTy,
3406 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3407 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3408 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3409 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3410 ASTContext &C = CGM.getContext();
3411 FunctionArgList Args;
3412 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3413 KmpTaskTWithPrivatesPtrQTy,
3414 ImplicitParamKind::Other);
3415 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3416 KmpTaskTWithPrivatesPtrQTy,
3417 ImplicitParamKind::Other);
3418 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3419 ImplicitParamKind::Other);
3420 Args.push_back(Elt: &DstArg);
3421 Args.push_back(Elt: &SrcArg);
3422 Args.push_back(Elt: &LastprivArg);
3423 const auto &TaskDupFnInfo =
3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3425 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(Info: TaskDupFnInfo);
3426 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3427 auto *TaskDup = llvm::Function::Create(
3428 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3429 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3430 TaskDup->setDoesNotRecurse();
3431 CodeGenFunction CGF(CGM);
3432 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3433 StartLoc: Loc);
3434
3435 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3436 Ptr: CGF.GetAddrOfLocalVar(VD: &DstArg),
3437 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438 // task_dst->liter = lastpriv;
3439 if (WithLastIter) {
3440 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3441 LValue Base = CGF.EmitLValueForField(
3442 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3443 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3444 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3445 Addr: CGF.GetAddrOfLocalVar(VD: &LastprivArg), /*Volatile=*/false, Ty: C.IntTy, Loc);
3446 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3447 }
3448
3449 // Emit initial values for private copies (if any).
3450 assert(!Privates.empty());
3451 Address KmpTaskSharedsPtr = Address::invalid();
3452 if (!Data.FirstprivateVars.empty()) {
3453 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454 Ptr: CGF.GetAddrOfLocalVar(VD: &SrcArg),
3455 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3456 LValue Base = CGF.EmitLValueForField(
3457 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3458 KmpTaskSharedsPtr = Address(
3459 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3460 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3461 n: KmpTaskTShareds)),
3462 Loc),
3463 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3464 }
3465 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3466 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3467 CGF.FinishFunction();
3468 return TaskDup;
3469}
3470
3471/// Checks if destructor function is required to be generated.
3472/// \return true if cleanups are required, false otherwise.
3473static bool
3474checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3475 ArrayRef<PrivateDataTy> Privates) {
3476 for (const PrivateDataTy &P : Privates) {
3477 if (P.second.isLocalPrivate())
3478 continue;
3479 QualType Ty = P.second.Original->getType().getNonReferenceType();
3480 if (Ty.isDestructedType())
3481 return true;
3482 }
3483 return false;
3484}
3485
3486namespace {
3487/// Loop generator for OpenMP iterator expression.
3488class OMPIteratorGeneratorScope final
3489 : public CodeGenFunction::OMPPrivateScope {
3490 CodeGenFunction &CGF;
3491 const OMPIteratorExpr *E = nullptr;
3492 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3493 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3494 OMPIteratorGeneratorScope() = delete;
3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3496
3497public:
3498 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3499 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3500 if (!E)
3501 return;
3502 SmallVector<llvm::Value *, 4> Uppers;
3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3505 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3506 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(T: VD->getType(), Name: VD->getName()));
3507 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3508 addPrivate(
3509 LocalVD: HelperData.CounterVD,
3510 Addr: CGF.CreateMemTemp(T: HelperData.CounterVD->getType(), Name: "counter.addr"));
3511 }
3512 Privatize();
3513
3514 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3515 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516 LValue CLVal =
3517 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3518 T: HelperData.CounterVD->getType());
3519 // Counter = 0;
3520 CGF.EmitStoreOfScalar(
3521 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3522 lvalue: CLVal);
3523 CodeGenFunction::JumpDest &ContDest =
3524 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3525 CodeGenFunction::JumpDest &ExitDest =
3526 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3527 // N = <number-of_iterations>;
3528 llvm::Value *N = Uppers[I];
3529 // cont:
3530 // if (Counter < N) goto body; else goto exit;
3531 CGF.EmitBlock(BB: ContDest.getBlock());
3532 auto *CVal =
3533 CGF.EmitLoadOfScalar(lvalue: CLVal, Loc: HelperData.CounterVD->getLocation());
3534 llvm::Value *Cmp =
3535 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3536 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3537 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3538 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3539 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3540 // body:
3541 CGF.EmitBlock(BB: BodyBB);
3542 // Iteri = Begini + Counter * Stepi;
3543 CGF.EmitIgnoredExpr(E: HelperData.Update);
3544 }
3545 }
3546 ~OMPIteratorGeneratorScope() {
3547 if (!E)
3548 return;
3549 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3550 // Counter = Counter + 1;
3551 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3552 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3553 // goto cont;
3554 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3555 // exit:
3556 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3557 }
3558 }
3559};
3560} // namespace
3561
3562static std::pair<llvm::Value *, llvm::Value *>
3563getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3564 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3565 llvm::Value *Addr;
3566 if (OASE) {
3567 const Expr *Base = OASE->getBase();
3568 Addr = CGF.EmitScalarExpr(E: Base);
3569 } else {
3570 Addr = CGF.EmitLValue(E).getPointer(CGF);
3571 }
3572 llvm::Value *SizeVal;
3573 QualType Ty = E->getType();
3574 if (OASE) {
3575 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3576 for (const Expr *SE : OASE->getDimensions()) {
3577 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3578 Sz = CGF.EmitScalarConversion(
3579 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3580 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3581 }
3582 } else if (const auto *ASE =
3583 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3584 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3585 Address UpAddrAddress = UpAddrLVal.getAddress();
3586 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3587 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3588 /*Idx0=*/1);
3589 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.SizeTy);
3590 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(V: UpAddr, DestTy: CGF.SizeTy);
3591 SizeVal = CGF.Builder.CreateNUWSub(LHS: UpIntPtr, RHS: LowIntPtr);
3592 } else {
3593 SizeVal = CGF.getTypeSize(Ty);
3594 }
3595 return std::make_pair(x&: Addr, y&: SizeVal);
3596}
3597
3598/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3599static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3600 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3601 if (KmpTaskAffinityInfoTy.isNull()) {
3602 RecordDecl *KmpAffinityInfoRD =
3603 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3604 KmpAffinityInfoRD->startDefinition();
3605 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getIntPtrType());
3606 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getSizeType());
3607 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: FlagsTy);
3608 KmpAffinityInfoRD->completeDefinition();
3609 KmpTaskAffinityInfoTy = C.getRecordType(Decl: KmpAffinityInfoRD);
3610 }
3611}
3612
3613CGOpenMPRuntime::TaskResultTy
3614CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3615 const OMPExecutableDirective &D,
3616 llvm::Function *TaskFunction, QualType SharedsTy,
3617 Address Shareds, const OMPTaskDataTy &Data) {
3618 ASTContext &C = CGM.getContext();
3619 llvm::SmallVector<PrivateDataTy, 4> Privates;
3620 // Aggregate privates and sort them by the alignment.
3621 const auto *I = Data.PrivateCopies.begin();
3622 for (const Expr *E : Data.PrivateVars) {
3623 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3624 Privates.emplace_back(
3625 Args: C.getDeclAlign(D: VD),
3626 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3627 /*PrivateElemInit=*/nullptr));
3628 ++I;
3629 }
3630 I = Data.FirstprivateCopies.begin();
3631 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3632 for (const Expr *E : Data.FirstprivateVars) {
3633 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3634 Privates.emplace_back(
3635 Args: C.getDeclAlign(D: VD),
3636 Args: PrivateHelpersTy(
3637 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3638 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3639 ++I;
3640 ++IElemInitRef;
3641 }
3642 I = Data.LastprivateCopies.begin();
3643 for (const Expr *E : Data.LastprivateVars) {
3644 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3645 Privates.emplace_back(
3646 Args: C.getDeclAlign(D: VD),
3647 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3648 /*PrivateElemInit=*/nullptr));
3649 ++I;
3650 }
3651 for (const VarDecl *VD : Data.PrivateLocals) {
3652 if (isAllocatableDecl(VD))
3653 Privates.emplace_back(Args: CGM.getPointerAlign(), Args: PrivateHelpersTy(VD));
3654 else
3655 Privates.emplace_back(Args: C.getDeclAlign(D: VD), Args: PrivateHelpersTy(VD));
3656 }
3657 llvm::stable_sort(Range&: Privates,
3658 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3659 return L.first > R.first;
3660 });
3661 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662 // Build type kmp_routine_entry_t (if not built yet).
3663 emitKmpRoutineEntryT(KmpInt32Ty);
3664 // Build type kmp_task_t (if not built yet).
3665 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())) {
3666 if (SavedKmpTaskloopTQTy.isNull()) {
3667 SavedKmpTaskloopTQTy = C.getRecordType(Decl: createKmpTaskTRecordDecl(
3668 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3669 }
3670 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3671 } else {
3672 assert((D.getDirectiveKind() == OMPD_task ||
3673 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3674 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3675 "Expected taskloop, task or target directive");
3676 if (SavedKmpTaskTQTy.isNull()) {
3677 SavedKmpTaskTQTy = C.getRecordType(Decl: createKmpTaskTRecordDecl(
3678 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3679 }
3680 KmpTaskTQTy = SavedKmpTaskTQTy;
3681 }
3682 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3683 // Build particular struct kmp_task_t for the given task.
3684 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3685 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3686 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(Decl: KmpTaskTWithPrivatesQTyRD);
3687 QualType KmpTaskTWithPrivatesPtrQTy =
3688 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3689 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(T: KmpTaskTWithPrivatesQTy);
3690 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3691 KmpTaskTWithPrivatesTy->getPointerTo();
3692 llvm::Value *KmpTaskTWithPrivatesTySize =
3693 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3694 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3695
3696 // Emit initial values for private copies (if any).
3697 llvm::Value *TaskPrivatesMap = nullptr;
3698 llvm::Type *TaskPrivatesMapTy =
3699 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3700 if (!Privates.empty()) {
3701 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3702 TaskPrivatesMap =
3703 emitTaskPrivateMappingFunction(CGM, Loc, Data, PrivatesQTy: FI->getType(), Privates);
3704 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3705 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3706 } else {
3707 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3708 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3709 }
3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3711 // kmp_task_t *tt);
3712 llvm::Function *TaskEntry = emitProxyTaskFunction(
3713 CGM, Loc, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3714 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3715 TaskPrivatesMap);
3716
3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719 // kmp_routine_entry_t *task_entry);
3720 // Task flags. Format is taken from
3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722 // description of kmp_tasking_flags struct.
3723 enum {
3724 TiedFlag = 0x1,
3725 FinalFlag = 0x2,
3726 DestructorsFlag = 0x8,
3727 PriorityFlag = 0x20,
3728 DetachableFlag = 0x40,
3729 };
3730 unsigned Flags = Data.Tied ? TiedFlag : 0;
3731 bool NeedsCleanup = false;
3732 if (!Privates.empty()) {
3733 NeedsCleanup =
3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3735 if (NeedsCleanup)
3736 Flags = Flags | DestructorsFlag;
3737 }
3738 if (Data.Priority.getInt())
3739 Flags = Flags | PriorityFlag;
3740 if (D.hasClausesOfKind<OMPDetachClause>())
3741 Flags = Flags | DetachableFlag;
3742 llvm::Value *TaskFlags =
3743 Data.Final.getPointer()
3744 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3745 True: CGF.Builder.getInt32(C: FinalFlag),
3746 False: CGF.Builder.getInt32(/*C=*/0))
3747 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3748 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3749 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3750 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3751 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3752 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3753 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3754 llvm::Value *NewTask;
3755 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3756 // Check if we have any device clause associated with the directive.
3757 const Expr *Device = nullptr;
3758 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3759 Device = C->getDevice();
3760 // Emit device ID if any otherwise use default value.
3761 llvm::Value *DeviceID;
3762 if (Device)
3763 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3764 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3765 else
3766 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3767 AllocArgs.push_back(Elt: DeviceID);
3768 NewTask = CGF.EmitRuntimeCall(
3769 callee: OMPBuilder.getOrCreateRuntimeFunction(
3770 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3771 args: AllocArgs);
3772 } else {
3773 NewTask =
3774 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3775 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3776 args: AllocArgs);
3777 }
3778 // Emit detach clause initialization.
3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780 // task_descriptor);
3781 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3782 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3783 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3784
3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786 // int gtid, kmp_task_t *task);
3787 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3788 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3789 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3790 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3791 callee: OMPBuilder.getOrCreateRuntimeFunction(
3792 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3793 args: {Loc, Tid, NewTask});
3794 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3795 Loc: Evt->getExprLoc());
3796 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3797 }
3798 // Process affinity clauses.
3799 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3800 // Process list of affinity data.
3801 ASTContext &C = CGM.getContext();
3802 Address AffinitiesArray = Address::invalid();
3803 // Calculate number of elements to form the array of affinity data.
3804 llvm::Value *NumOfElements = nullptr;
3805 unsigned NumAffinities = 0;
3806 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3807 if (const Expr *Modifier = C->getModifier()) {
3808 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3809 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3810 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3811 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3812 NumOfElements =
3813 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3814 }
3815 } else {
3816 NumAffinities += C->varlist_size();
3817 }
3818 }
3819 getKmpAffinityType(C&: CGM.getContext(), KmpTaskAffinityInfoTy);
3820 // Fields ids in kmp_task_affinity_info record.
3821 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3822
3823 QualType KmpTaskAffinityInfoArrayTy;
3824 if (NumOfElements) {
3825 NumOfElements = CGF.Builder.CreateNUWAdd(
3826 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3827 auto *OVE = new (C) OpaqueValueExpr(
3828 Loc,
3829 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3830 VK_PRValue);
3831 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3832 RValue::get(V: NumOfElements));
3833 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3834 EltTy: KmpTaskAffinityInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
3835 /*IndexTypeQuals=*/0, Brackets: SourceRange(Loc, Loc));
3836 // Properly emit variable-sized array.
3837 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3838 ParamKind: ImplicitParamKind::Other);
3839 CGF.EmitVarDecl(D: *PD);
3840 AffinitiesArray = CGF.GetAddrOfLocalVar(VD: PD);
3841 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3842 /*isSigned=*/false);
3843 } else {
3844 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3845 EltTy: KmpTaskAffinityInfoTy,
3846 ArySize: llvm::APInt(C.getTypeSize(T: C.getSizeType()), NumAffinities), SizeExpr: nullptr,
3847 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3848 AffinitiesArray =
3849 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
3850 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
3851 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
3852 /*isSigned=*/IsSigned: false);
3853 }
3854
3855 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3856 // Fill array by elements without iterators.
3857 unsigned Pos = 0;
3858 bool HasIterator = false;
3859 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3860 if (C->getModifier()) {
3861 HasIterator = true;
3862 continue;
3863 }
3864 for (const Expr *E : C->varlists()) {
3865 llvm::Value *Addr;
3866 llvm::Value *Size;
3867 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
3868 LValue Base =
3869 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateConstGEP(Addr: AffinitiesArray, Index: Pos),
3870 T: KmpTaskAffinityInfoTy);
3871 // affs[i].base_addr = &<Affinities[i].second>;
3872 LValue BaseAddrLVal = CGF.EmitLValueForField(
3873 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
3874 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
3875 lvalue: BaseAddrLVal);
3876 // affs[i].len = sizeof(<Affinities[i].second>);
3877 LValue LenLVal = CGF.EmitLValueForField(
3878 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
3879 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
3880 ++Pos;
3881 }
3882 }
3883 LValue PosLVal;
3884 if (HasIterator) {
3885 PosLVal = CGF.MakeAddrLValue(
3886 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
3887 T: C.getSizeType());
3888 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
3889 }
3890 // Process elements with iterators.
3891 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3892 const Expr *Modifier = C->getModifier();
3893 if (!Modifier)
3894 continue;
3895 OMPIteratorGeneratorScope IteratorScope(
3896 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
3897 for (const Expr *E : C->varlists()) {
3898 llvm::Value *Addr;
3899 llvm::Value *Size;
3900 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
3901 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
3902 LValue Base =
3903 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateGEP(CGF, Addr: AffinitiesArray, Index: Idx),
3904 T: KmpTaskAffinityInfoTy);
3905 // affs[i].base_addr = &<Affinities[i].second>;
3906 LValue BaseAddrLVal = CGF.EmitLValueForField(
3907 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
3908 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
3909 lvalue: BaseAddrLVal);
3910 // affs[i].len = sizeof(<Affinities[i].second>);
3911 LValue LenLVal = CGF.EmitLValueForField(
3912 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
3913 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
3914 Idx = CGF.Builder.CreateNUWAdd(
3915 LHS: Idx, RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
3916 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
3917 }
3918 }
3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921 // naffins, kmp_task_affinity_info_t *affin_list);
3922 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3923 llvm::Value *GTid = getThreadID(CGF, Loc);
3924 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
3926 // FIXME: Emit the function and ignore its result for now unless the
3927 // runtime function is properly implemented.
3928 (void)CGF.EmitRuntimeCall(
3929 callee: OMPBuilder.getOrCreateRuntimeFunction(
3930 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
3931 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3932 }
3933 llvm::Value *NewTaskNewTaskTTy =
3934 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3935 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
3936 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
3937 T: KmpTaskTWithPrivatesQTy);
3938 LValue TDBase =
3939 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3940 // Fill the data in the resulting kmp_task_t record.
3941 // Copy shareds if there are any.
3942 Address KmpTaskSharedsPtr = Address::invalid();
3943 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3944 KmpTaskSharedsPtr = Address(
3945 CGF.EmitLoadOfScalar(
3946 lvalue: CGF.EmitLValueForField(
3947 Base: TDBase,
3948 Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds)),
3949 Loc),
3950 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3951 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
3952 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
3953 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
3954 }
3955 // Emit initial values for private copies (if any).
3956 TaskResultTy Result;
3957 if (!Privates.empty()) {
3958 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
3959 SharedsTy, SharedsPtrTy, Data, Privates,
3960 /*ForDup=*/false);
3961 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
3962 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3963 Result.TaskDupFn = emitTaskDupFunction(
3964 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3965 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3966 /*WithLastIter=*/!Data.LastprivateVars.empty());
3967 }
3968 }
3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970 enum { Priority = 0, Destructors = 1 };
3971 // Provide pointer to function with destructors for privates.
3972 auto FI = std::next(x: KmpTaskTQTyRD->field_begin(), n: Data1);
3973 const RecordDecl *KmpCmplrdataUD =
3974 (*FI)->getType()->getAsUnionType()->getDecl();
3975 if (NeedsCleanup) {
3976 llvm::Value *DestructorFn = emitDestructorsFunction(
3977 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3978 KmpTaskTWithPrivatesQTy);
3979 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3980 LValue DestructorsLV = CGF.EmitLValueForField(
3981 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
3982 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3983 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
3984 lvalue: DestructorsLV);
3985 }
3986 // Set priority.
3987 if (Data.Priority.getInt()) {
3988 LValue Data2LV = CGF.EmitLValueForField(
3989 Base: TDBase, Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: Data2));
3990 LValue PriorityLV = CGF.EmitLValueForField(
3991 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
3992 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
3993 }
3994 Result.NewTask = NewTask;
3995 Result.TaskEntry = TaskEntry;
3996 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3997 Result.TDBase = TDBase;
3998 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3999 return Result;
4000}
4001
4002/// Translates internal dependency kind into the runtime kind.
4003static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4004 RTLDependenceKindTy DepKind;
4005 switch (K) {
4006 case OMPC_DEPEND_in:
4007 DepKind = RTLDependenceKindTy::DepIn;
4008 break;
4009 // Out and InOut dependencies must use the same code.
4010 case OMPC_DEPEND_out:
4011 case OMPC_DEPEND_inout:
4012 DepKind = RTLDependenceKindTy::DepInOut;
4013 break;
4014 case OMPC_DEPEND_mutexinoutset:
4015 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4016 break;
4017 case OMPC_DEPEND_inoutset:
4018 DepKind = RTLDependenceKindTy::DepInOutSet;
4019 break;
4020 case OMPC_DEPEND_outallmemory:
4021 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4022 break;
4023 case OMPC_DEPEND_source:
4024 case OMPC_DEPEND_sink:
4025 case OMPC_DEPEND_depobj:
4026 case OMPC_DEPEND_inoutallmemory:
4027 case OMPC_DEPEND_unknown:
4028 llvm_unreachable("Unknown task dependence type");
4029 }
4030 return DepKind;
4031}
4032
4033/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4034static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4035 QualType &FlagsTy) {
4036 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.BoolTy), /*Signed=*/false);
4037 if (KmpDependInfoTy.isNull()) {
4038 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4039 KmpDependInfoRD->startDefinition();
4040 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getIntPtrType());
4041 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getSizeType());
4042 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: FlagsTy);
4043 KmpDependInfoRD->completeDefinition();
4044 KmpDependInfoTy = C.getRecordType(Decl: KmpDependInfoRD);
4045 }
4046}
4047
4048std::pair<llvm::Value *, LValue>
4049CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4050 SourceLocation Loc) {
4051 ASTContext &C = CGM.getContext();
4052 QualType FlagsTy;
4053 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4054 RecordDecl *KmpDependInfoRD =
4055 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4056 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4057 LValue Base = CGF.EmitLoadOfPointerLValue(
4058 Ptr: DepobjLVal.getAddress().withElementType(
4059 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4060 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4061 Address DepObjAddr = CGF.Builder.CreateGEP(
4062 CGF, Addr: Base.getAddress(),
4063 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4064 LValue NumDepsBase = CGF.MakeAddrLValue(
4065 Addr: DepObjAddr, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(), TBAAInfo: Base.getTBAAInfo());
4066 // NumDeps = deps[i].base_addr;
4067 LValue BaseAddrLVal = CGF.EmitLValueForField(
4068 Base: NumDepsBase,
4069 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4070 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4071 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4072 return std::make_pair(x&: NumDeps, y&: Base);
4073}
4074
4075static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4076 llvm::PointerUnion<unsigned *, LValue *> Pos,
4077 const OMPTaskDataTy::DependData &Data,
4078 Address DependenciesArray) {
4079 CodeGenModule &CGM = CGF.CGM;
4080 ASTContext &C = CGM.getContext();
4081 QualType FlagsTy;
4082 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4083 RecordDecl *KmpDependInfoRD =
4084 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4086
4087 OMPIteratorGeneratorScope IteratorScope(
4088 CGF, cast_or_null<OMPIteratorExpr>(
4089 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4090 : nullptr));
4091 for (const Expr *E : Data.DepExprs) {
4092 llvm::Value *Addr;
4093 llvm::Value *Size;
4094
4095 // The expression will be a nullptr in the 'omp_all_memory' case.
4096 if (E) {
4097 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4098 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4099 } else {
4100 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4101 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4102 }
4103 LValue Base;
4104 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4105 Base = CGF.MakeAddrLValue(
4106 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4107 } else {
4108 assert(E && "Expected a non-null expression");
4109 LValue &PosLVal = *Pos.get<LValue *>();
4110 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4111 Base = CGF.MakeAddrLValue(
4112 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4113 }
4114 // deps[i].base_addr = &<Dependencies[i].second>;
4115 LValue BaseAddrLVal = CGF.EmitLValueForField(
4116 Base,
4117 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4118 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4119 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4120 // deps[i].len = sizeof(<Dependencies[i].second>);
4121 LValue LenLVal = CGF.EmitLValueForField(
4122 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4123 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4124 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4125 // deps[i].flags = <Dependencies[i].first>;
4126 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4127 LValue FlagsLVal = CGF.EmitLValueForField(
4128 Base,
4129 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4130 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4131 CGF.EmitStoreOfScalar(
4132 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4133 lvalue: FlagsLVal);
4134 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4135 ++(*P);
4136 } else {
4137 LValue &PosLVal = *Pos.get<LValue *>();
4138 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4139 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4140 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4141 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4142 }
4143 }
4144}
4145
4146SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4147 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4148 const OMPTaskDataTy::DependData &Data) {
4149 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4150 "Expected depobj dependency kind.");
4151 SmallVector<llvm::Value *, 4> Sizes;
4152 SmallVector<LValue, 4> SizeLVals;
4153 ASTContext &C = CGF.getContext();
4154 {
4155 OMPIteratorGeneratorScope IteratorScope(
4156 CGF, cast_or_null<OMPIteratorExpr>(
4157 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4158 : nullptr));
4159 for (const Expr *E : Data.DepExprs) {
4160 llvm::Value *NumDeps;
4161 LValue Base;
4162 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4163 std::tie(args&: NumDeps, args&: Base) =
4164 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4165 LValue NumLVal = CGF.MakeAddrLValue(
4166 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4167 T: C.getUIntPtrType());
4168 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4169 Addr: NumLVal.getAddress());
4170 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4171 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4172 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4173 SizeLVals.push_back(Elt: NumLVal);
4174 }
4175 }
4176 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4177 llvm::Value *Size =
4178 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4179 Sizes.push_back(Elt: Size);
4180 }
4181 return Sizes;
4182}
4183
4184void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4185 QualType &KmpDependInfoTy,
4186 LValue PosLVal,
4187 const OMPTaskDataTy::DependData &Data,
4188 Address DependenciesArray) {
4189 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4190 "Expected depobj dependency kind.");
4191 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4192 {
4193 OMPIteratorGeneratorScope IteratorScope(
4194 CGF, cast_or_null<OMPIteratorExpr>(
4195 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4196 : nullptr));
4197 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4198 const Expr *E = Data.DepExprs[I];
4199 llvm::Value *NumDeps;
4200 LValue Base;
4201 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4202 std::tie(args&: NumDeps, args&: Base) =
4203 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4204
4205 // memcopy dependency data.
4206 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4207 LHS: ElSize,
4208 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4209 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4210 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4211 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4212
4213 // Increase pos.
4214 // pos += size;
4215 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4216 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4217 }
4218 }
4219}
4220
4221std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4222 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4223 SourceLocation Loc) {
4224 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4225 return D.DepExprs.empty();
4226 }))
4227 return std::make_pair(x: nullptr, y: Address::invalid());
4228 // Process list of dependencies.
4229 ASTContext &C = CGM.getContext();
4230 Address DependenciesArray = Address::invalid();
4231 llvm::Value *NumOfElements = nullptr;
4232 unsigned NumDependencies = std::accumulate(
4233 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4234 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4235 return D.DepKind == OMPC_DEPEND_depobj
4236 ? V
4237 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4238 });
4239 QualType FlagsTy;
4240 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241 bool HasDepobjDeps = false;
4242 bool HasRegularWithIterators = false;
4243 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4244 llvm::Value *NumOfRegularWithIterators =
4245 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4246 // Calculate number of depobj dependencies and regular deps with the
4247 // iterators.
4248 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4249 if (D.DepKind == OMPC_DEPEND_depobj) {
4250 SmallVector<llvm::Value *, 4> Sizes =
4251 emitDepobjElementsSizes(CGF, KmpDependInfoTy, Data: D);
4252 for (llvm::Value *Size : Sizes) {
4253 NumOfDepobjElements =
4254 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4255 }
4256 HasDepobjDeps = true;
4257 continue;
4258 }
4259 // Include number of iterations, if any.
4260
4261 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4262 llvm::Value *ClauseIteratorSpace =
4263 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4264 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4265 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4266 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4267 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4268 }
4269 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4270 LHS: ClauseIteratorSpace,
4271 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4272 NumOfRegularWithIterators =
4273 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4274 HasRegularWithIterators = true;
4275 continue;
4276 }
4277 }
4278
4279 QualType KmpDependInfoArrayTy;
4280 if (HasDepobjDeps || HasRegularWithIterators) {
4281 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4282 /*isSigned=*/IsSigned: false);
4283 if (HasDepobjDeps) {
4284 NumOfElements =
4285 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4286 }
4287 if (HasRegularWithIterators) {
4288 NumOfElements =
4289 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4290 }
4291 auto *OVE = new (C) OpaqueValueExpr(
4292 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4293 VK_PRValue);
4294 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4295 RValue::get(V: NumOfElements));
4296 KmpDependInfoArrayTy =
4297 C.getVariableArrayType(EltTy: KmpDependInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
4298 /*IndexTypeQuals=*/0, Brackets: SourceRange(Loc, Loc));
4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300 // Properly emit variable-sized array.
4301 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4302 ParamKind: ImplicitParamKind::Other);
4303 CGF.EmitVarDecl(D: *PD);
4304 DependenciesArray = CGF.GetAddrOfLocalVar(VD: PD);
4305 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4306 /*isSigned=*/false);
4307 } else {
4308 KmpDependInfoArrayTy = C.getConstantArrayType(
4309 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies), SizeExpr: nullptr,
4310 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4311 DependenciesArray =
4312 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4313 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4314 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4315 /*isSigned=*/IsSigned: false);
4316 }
4317 unsigned Pos = 0;
4318 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4319 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4320 Dependencies[I].IteratorExpr)
4321 continue;
4322 emitDependData(CGF, KmpDependInfoTy, Pos: &Pos, Data: Dependencies[I],
4323 DependenciesArray);
4324 }
4325 // Copy regular dependencies with iterators.
4326 LValue PosLVal = CGF.MakeAddrLValue(
4327 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4328 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4329 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4330 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4331 !Dependencies[I].IteratorExpr)
4332 continue;
4333 emitDependData(CGF, KmpDependInfoTy, Pos: &PosLVal, Data: Dependencies[I],
4334 DependenciesArray);
4335 }
4336 // Copy final depobj arrays without iterators.
4337 if (HasDepobjDeps) {
4338 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4339 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4340 continue;
4341 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Data: Dependencies[I],
4342 DependenciesArray);
4343 }
4344 }
4345 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4347 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4348}
4349
4350Address CGOpenMPRuntime::emitDepobjDependClause(
4351 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4352 SourceLocation Loc) {
4353 if (Dependencies.DepExprs.empty())
4354 return Address::invalid();
4355 // Process list of dependencies.
4356 ASTContext &C = CGM.getContext();
4357 Address DependenciesArray = Address::invalid();
4358 unsigned NumDependencies = Dependencies.DepExprs.size();
4359 QualType FlagsTy;
4360 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4361 RecordDecl *KmpDependInfoRD =
4362 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4363
4364 llvm::Value *Size;
4365 // Define type kmp_depend_info[<Dependencies.size()>];
4366 // For depobj reserve one extra element to store the number of elements.
4367 // It is required to handle depobj(x) update(in) construct.
4368 // kmp_depend_info[<Dependencies.size()>] deps;
4369 llvm::Value *NumDepsVal;
4370 CharUnits Align = C.getTypeAlignInChars(T: KmpDependInfoTy);
4371 if (const auto *IE =
4372 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4373 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4376 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4377 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4378 }
4379 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4380 RHS: NumDepsVal);
4381 CharUnits SizeInBytes =
4382 C.getTypeSizeInChars(T: KmpDependInfoTy).alignTo(Align);
4383 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4384 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4385 NumDepsVal =
4386 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4387 } else {
4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390 SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4392 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4393 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4394 }
4395 // Need to allocate on the dynamic memory.
4396 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397 // Use default allocator.
4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4399 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400
4401 llvm::Value *Addr =
4402 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4403 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4404 args: Args, name: ".dep.arr.addr");
4405 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(T: KmpDependInfoTy);
4406 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4407 V: Addr, DestTy: KmpDependInfoLlvmTy->getPointerTo());
4408 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4409 // Write number of elements in the first element of array for depobj.
4410 LValue Base = CGF.MakeAddrLValue(Addr: DependenciesArray, T: KmpDependInfoTy);
4411 // deps[i].base_addr = NumDependencies;
4412 LValue BaseAddrLVal = CGF.EmitLValueForField(
4413 Base,
4414 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4415 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4416 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4417 llvm::PointerUnion<unsigned *, LValue *> Pos;
4418 unsigned Idx = 1;
4419 LValue PosLVal;
4420 if (Dependencies.IteratorExpr) {
4421 PosLVal = CGF.MakeAddrLValue(
4422 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4423 T: C.getSizeType());
4424 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4425 /*IsInit=*/isInit: true);
4426 Pos = &PosLVal;
4427 } else {
4428 Pos = &Idx;
4429 }
4430 emitDependData(CGF, KmpDependInfoTy, Pos, Data: Dependencies, DependenciesArray);
4431 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4432 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4433 ElementTy: CGF.Int8Ty);
4434 return DependenciesArray;
4435}
4436
4437void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4438 SourceLocation Loc) {
4439 ASTContext &C = CGM.getContext();
4440 QualType FlagsTy;
4441 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4442 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4443 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4444 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4445 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4446 Addr: Base.getAddress(), Ty: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy),
4447 ElementTy: CGF.ConvertTypeForMem(T: KmpDependInfoTy));
4448 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4449 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4450 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4451 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4452 DestTy: CGF.VoidPtrTy);
4453 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4454 // Use default allocator.
4455 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4456 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4457
4458 // _kmpc_free(gtid, addr, nullptr);
4459 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4460 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4461 args: Args);
4462}
4463
4464void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4465 OpenMPDependClauseKind NewDepKind,
4466 SourceLocation Loc) {
4467 ASTContext &C = CGM.getContext();
4468 QualType FlagsTy;
4469 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4470 RecordDecl *KmpDependInfoRD =
4471 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4472 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4473 llvm::Value *NumDeps;
4474 LValue Base;
4475 std::tie(args&: NumDeps, args&: Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4476
4477 Address Begin = Base.getAddress();
4478 // Cast from pointer to array type to pointer to single element.
4479 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4480 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4481 // The basic structure here is a while-do loop.
4482 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4483 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4484 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4485 CGF.EmitBlock(BB: BodyBB);
4486 llvm::PHINode *ElementPHI =
4487 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4488 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4489 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4490 Base = CGF.MakeAddrLValue(Addr: Begin, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(),
4491 TBAAInfo: Base.getTBAAInfo());
4492 // deps[i].flags = NewDepKind;
4493 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4494 LValue FlagsLVal = CGF.EmitLValueForField(
4495 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4496 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4497 CGF.EmitStoreOfScalar(
4498 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4499 lvalue: FlagsLVal);
4500
4501 // Shift the address forward by one element.
4502 llvm::Value *ElementNext =
4503 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4504 .emitRawPointer(CGF);
4505 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4506 llvm::Value *IsEmpty =
4507 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4508 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4509 // Done.
4510 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4511}
4512
4513void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4514 const OMPExecutableDirective &D,
4515 llvm::Function *TaskFunction,
4516 QualType SharedsTy, Address Shareds,
4517 const Expr *IfCond,
4518 const OMPTaskDataTy &Data) {
4519 if (!CGF.HaveInsertPoint())
4520 return;
4521
4522 TaskResultTy Result =
4523 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4524 llvm::Value *NewTask = Result.NewTask;
4525 llvm::Function *TaskEntry = Result.TaskEntry;
4526 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4527 LValue TDBase = Result.TDBase;
4528 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4529 // Process list of dependences.
4530 Address DependenciesArray = Address::invalid();
4531 llvm::Value *NumOfElements;
4532 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4533 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4534
4535 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4536 // libcall.
4537 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4538 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4539 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4540 // list is not empty
4541 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4542 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4543 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4544 llvm::Value *DepTaskArgs[7];
4545 if (!Data.Dependences.empty()) {
4546 DepTaskArgs[0] = UpLoc;
4547 DepTaskArgs[1] = ThreadID;
4548 DepTaskArgs[2] = NewTask;
4549 DepTaskArgs[3] = NumOfElements;
4550 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4551 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4552 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4553 }
4554 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4555 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4556 if (!Data.Tied) {
4557 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4558 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4559 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4560 }
4561 if (!Data.Dependences.empty()) {
4562 CGF.EmitRuntimeCall(
4563 callee: OMPBuilder.getOrCreateRuntimeFunction(
4564 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4565 args: DepTaskArgs);
4566 } else {
4567 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4568 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4569 args: TaskArgs);
4570 }
4571 // Check if parent region is untied and build return for untied task;
4572 if (auto *Region =
4573 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4574 Region->emitUntiedSwitch(CGF);
4575 };
4576
4577 llvm::Value *DepWaitTaskArgs[7];
4578 if (!Data.Dependences.empty()) {
4579 DepWaitTaskArgs[0] = UpLoc;
4580 DepWaitTaskArgs[1] = ThreadID;
4581 DepWaitTaskArgs[2] = NumOfElements;
4582 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4583 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4584 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4585 DepWaitTaskArgs[6] =
4586 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4587 }
4588 auto &M = CGM.getModule();
4589 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4590 TaskEntry, &Data, &DepWaitTaskArgs,
4591 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4592 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4593 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4594 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4595 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4596 // is specified.
4597 if (!Data.Dependences.empty())
4598 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4599 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4600 args: DepWaitTaskArgs);
4601 // Call proxy_task_entry(gtid, new_task);
4602 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4603 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4604 Action.Enter(CGF);
4605 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4606 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4607 Args: OutlinedFnArgs);
4608 };
4609
4610 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4611 // kmp_task_t *new_task);
4612 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4613 // kmp_task_t *new_task);
4614 RegionCodeGenTy RCG(CodeGen);
4615 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4616 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4617 TaskArgs,
4618 OMPBuilder.getOrCreateRuntimeFunction(
4619 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4620 TaskArgs);
4621 RCG.setAction(Action);
4622 RCG(CGF);
4623 };
4624
4625 if (IfCond) {
4626 emitIfClause(CGF, Cond: IfCond, ThenGen: ThenCodeGen, ElseGen: ElseCodeGen);
4627 } else {
4628 RegionCodeGenTy ThenRCG(ThenCodeGen);
4629 ThenRCG(CGF);
4630 }
4631}
4632
4633void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4634 const OMPLoopDirective &D,
4635 llvm::Function *TaskFunction,
4636 QualType SharedsTy, Address Shareds,
4637 const Expr *IfCond,
4638 const OMPTaskDataTy &Data) {
4639 if (!CGF.HaveInsertPoint())
4640 return;
4641 TaskResultTy Result =
4642 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4643 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4644 // libcall.
4645 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4646 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4647 // sched, kmp_uint64 grainsize, void *task_dup);
4648 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4649 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4650 llvm::Value *IfVal;
4651 if (IfCond) {
4652 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4653 /*isSigned=*/true);
4654 } else {
4655 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4656 }
4657
4658 LValue LBLVal = CGF.EmitLValueForField(
4659 Base: Result.TDBase,
4660 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4661 const auto *LBVar =
4662 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4663 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4664 /*IsInitializer=*/true);
4665 LValue UBLVal = CGF.EmitLValueForField(
4666 Base: Result.TDBase,
4667 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4668 const auto *UBVar =
4669 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4670 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4671 /*IsInitializer=*/true);
4672 LValue StLVal = CGF.EmitLValueForField(
4673 Base: Result.TDBase,
4674 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4675 const auto *StVar =
4676 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4677 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4678 /*IsInitializer=*/true);
4679 // Store reductions address.
4680 LValue RedLVal = CGF.EmitLValueForField(
4681 Base: Result.TDBase,
4682 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4683 if (Data.Reductions) {
4684 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4685 } else {
4686 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4687 Ty: CGF.getContext().VoidPtrTy);
4688 }
4689 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4690 llvm::Value *TaskArgs[] = {
4691 UpLoc,
4692 ThreadID,
4693 Result.NewTask,
4694 IfVal,
4695 LBLVal.getPointer(CGF),
4696 UBLVal.getPointer(CGF),
4697 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4698 llvm::ConstantInt::getSigned(
4699 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4700 llvm::ConstantInt::getSigned(
4701 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4702 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4703 : NoSchedule),
4704 Data.Schedule.getPointer()
4705 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4706 /*isSigned=*/false)
4707 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0),
4708 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4709 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4710 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy)};
4711 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4712 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskloop),
4713 args: TaskArgs);
4714}
4715
4716/// Emit reduction operation for each element of array (required for
4717/// array sections) LHS op = RHS.
4718/// \param Type Type of array.
4719/// \param LHSVar Variable on the left side of the reduction operation
4720/// (references element of array in original variable).
4721/// \param RHSVar Variable on the right side of the reduction operation
4722/// (references element of array in original variable).
4723/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4724/// RHSVar.
4725static void EmitOMPAggregateReduction(
4726 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4727 const VarDecl *RHSVar,
4728 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4729 const Expr *, const Expr *)> &RedOpGen,
4730 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4731 const Expr *UpExpr = nullptr) {
4732 // Perform element-by-element initialization.
4733 QualType ElementTy;
4734 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4735 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4736
4737 // Drill down to the base element type on both arrays.
4738 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4739 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4740
4741 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4742 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4743 // Cast from pointer to array type to pointer to single element.
4744 llvm::Value *LHSEnd =
4745 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4746 // The basic structure here is a while-do loop.
4747 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4748 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4749 llvm::Value *IsEmpty =
4750 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4751 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4752
4753 // Enter the loop body, making that address the current address.
4754 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4755 CGF.EmitBlock(BB: BodyBB);
4756
4757 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4758
4759 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4760 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4761 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4762 Address RHSElementCurrent(
4763 RHSElementPHI, RHSAddr.getElementType(),
4764 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4765
4766 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4767 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4768 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4769 Address LHSElementCurrent(
4770 LHSElementPHI, LHSAddr.getElementType(),
4771 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4772
4773 // Emit copy.
4774 CodeGenFunction::OMPPrivateScope Scope(CGF);
4775 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4776 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4777 Scope.Privatize();
4778 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4779 Scope.ForceCleanup();
4780
4781 // Shift the address forward by one element.
4782 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4783 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4784 Name: "omp.arraycpy.dest.element");
4785 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4786 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4787 Name: "omp.arraycpy.src.element");
4788 // Check whether we've reached the end.
4789 llvm::Value *Done =
4790 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4791 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4792 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4793 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4794
4795 // Done.
4796 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4797}
4798
4799/// Emit reduction combiner. If the combiner is a simple expression emit it as
4800/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4801/// UDR combiner function.
4802static void emitReductionCombiner(CodeGenFunction &CGF,
4803 const Expr *ReductionOp) {
4804 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4805 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4806 if (const auto *DRE =
4807 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4808 if (const auto *DRD =
4809 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4810 std::pair<llvm::Function *, llvm::Function *> Reduction =
4811 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4812 RValue Func = RValue::get(V: Reduction.first);
4813 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4814 CGF.EmitIgnoredExpr(E: ReductionOp);
4815 return;
4816 }
4817 CGF.EmitIgnoredExpr(E: ReductionOp);
4818}
4819
4820llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4821 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4822 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4823 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4824 ASTContext &C = CGM.getContext();
4825
4826 // void reduction_func(void *LHSArg, void *RHSArg);
4827 FunctionArgList Args;
4828 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4829 ImplicitParamKind::Other);
4830 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4831 ImplicitParamKind::Other);
4832 Args.push_back(Elt: &LHSArg);
4833 Args.push_back(Elt: &RHSArg);
4834 const auto &CGFI =
4835 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
4836 std::string Name = getReductionFuncName(Name: ReducerName);
4837 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
4838 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
4839 M: &CGM.getModule());
4840 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4841 Fn->setDoesNotRecurse();
4842 CodeGenFunction CGF(CGM);
4843 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4844
4845 // Dst = (void*[n])(LHSArg);
4846 // Src = (void*[n])(RHSArg);
4847 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4848 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
4849 DestTy: ArgsElemType->getPointerTo()),
4850 ArgsElemType, CGF.getPointerAlign());
4851 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4852 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
4853 DestTy: ArgsElemType->getPointerTo()),
4854 ArgsElemType, CGF.getPointerAlign());
4855
4856 // ...
4857 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4858 // ...
4859 CodeGenFunction::OMPPrivateScope Scope(CGF);
4860 const auto *IPriv = Privates.begin();
4861 unsigned Idx = 0;
4862 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4863 const auto *RHSVar =
4864 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
4865 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
4866 const auto *LHSVar =
4867 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
4868 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
4869 QualType PrivTy = (*IPriv)->getType();
4870 if (PrivTy->isVariablyModifiedType()) {
4871 // Get array size and emit VLA type.
4872 ++Idx;
4873 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
4875 const VariableArrayType *VLA =
4876 CGF.getContext().getAsVariableArrayType(T: PrivTy);
4877 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
4878 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4879 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
4880 CGF.EmitVariablyModifiedType(Ty: PrivTy);
4881 }
4882 }
4883 Scope.Privatize();
4884 IPriv = Privates.begin();
4885 const auto *ILHS = LHSExprs.begin();
4886 const auto *IRHS = RHSExprs.begin();
4887 for (const Expr *E : ReductionOps) {
4888 if ((*IPriv)->getType()->isArrayType()) {
4889 // Emit reduction for array section.
4890 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4891 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4892 EmitOMPAggregateReduction(
4893 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
4894 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4895 emitReductionCombiner(CGF, ReductionOp: E);
4896 });
4897 } else {
4898 // Emit reduction for array subscript or single variable.
4899 emitReductionCombiner(CGF, ReductionOp: E);
4900 }
4901 ++IPriv;
4902 ++ILHS;
4903 ++IRHS;
4904 }
4905 Scope.ForceCleanup();
4906 CGF.FinishFunction();
4907 return Fn;
4908}
4909
4910void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4911 const Expr *ReductionOp,
4912 const Expr *PrivateRef,
4913 const DeclRefExpr *LHS,
4914 const DeclRefExpr *RHS) {
4915 if (PrivateRef->getType()->isArrayType()) {
4916 // Emit reduction for array section.
4917 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
4918 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
4919 EmitOMPAggregateReduction(
4920 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
4921 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4922 emitReductionCombiner(CGF, ReductionOp);
4923 });
4924 } else {
4925 // Emit reduction for array subscript or single variable.
4926 emitReductionCombiner(CGF, ReductionOp);
4927 }
4928}
4929
4930void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4931 ArrayRef<const Expr *> Privates,
4932 ArrayRef<const Expr *> LHSExprs,
4933 ArrayRef<const Expr *> RHSExprs,
4934 ArrayRef<const Expr *> ReductionOps,
4935 ReductionOptionsTy Options) {
4936 if (!CGF.HaveInsertPoint())
4937 return;
4938
4939 bool WithNowait = Options.WithNowait;
4940 bool SimpleReduction = Options.SimpleReduction;
4941
4942 // Next code should be emitted for reduction:
4943 //
4944 // static kmp_critical_name lock = { 0 };
4945 //
4946 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4947 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4948 // ...
4949 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4950 // *(Type<n>-1*)rhs[<n>-1]);
4951 // }
4952 //
4953 // ...
4954 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4955 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4956 // RedList, reduce_func, &<lock>)) {
4957 // case 1:
4958 // ...
4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960 // ...
4961 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4962 // break;
4963 // case 2:
4964 // ...
4965 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4966 // ...
4967 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4968 // break;
4969 // default:;
4970 // }
4971 //
4972 // if SimpleReduction is true, only the next code is generated:
4973 // ...
4974 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4975 // ...
4976
4977 ASTContext &C = CGM.getContext();
4978
4979 if (SimpleReduction) {
4980 CodeGenFunction::RunCleanupsScope Scope(CGF);
4981 const auto *IPriv = Privates.begin();
4982 const auto *ILHS = LHSExprs.begin();
4983 const auto *IRHS = RHSExprs.begin();
4984 for (const Expr *E : ReductionOps) {
4985 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
4986 RHS: cast<DeclRefExpr>(Val: *IRHS));
4987 ++IPriv;
4988 ++ILHS;
4989 ++IRHS;
4990 }
4991 return;
4992 }
4993
4994 // 1. Build a list of reduction variables.
4995 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4996 auto Size = RHSExprs.size();
4997 for (const Expr *E : Privates) {
4998 if (E->getType()->isVariablyModifiedType())
4999 // Reserve place for array size.
5000 ++Size;
5001 }
5002 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5003 QualType ReductionArrayTy = C.getConstantArrayType(
5004 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5005 /*IndexTypeQuals=*/0);
5006 RawAddress ReductionList =
5007 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5008 const auto *IPriv = Privates.begin();
5009 unsigned Idx = 0;
5010 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5011 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5012 CGF.Builder.CreateStore(
5013 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5014 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5015 Addr: Elem);
5016 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5017 // Store array size.
5018 ++Idx;
5019 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5020 llvm::Value *Size = CGF.Builder.CreateIntCast(
5021 V: CGF.getVLASize(
5022 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5023 .NumElts,
5024 DestTy: CGF.SizeTy, /*isSigned=*/false);
5025 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5026 Addr: Elem);
5027 }
5028 }
5029
5030 // 2. Emit reduce_func().
5031 llvm::Function *ReductionFn = emitReductionFunction(
5032 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5033 Privates, LHSExprs, RHSExprs, ReductionOps);
5034
5035 // 3. Create static kmp_critical_name lock = { 0 };
5036 std::string Name = getName(Parts: {"reduction"});
5037 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5038
5039 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5040 // RedList, reduce_func, &<lock>);
5041 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5042 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5043 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5044 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5046 llvm::Value *Args[] = {
5047 IdentTLoc, // ident_t *<loc>
5048 ThreadId, // i32 <gtid>
5049 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5050 ReductionArrayTySize, // size_type sizeof(RedList)
5051 RL, // void *RedList
5052 ReductionFn, // void (*) (void *, void *) <reduce_func>
5053 Lock // kmp_critical_name *&<lock>
5054 };
5055 llvm::Value *Res = CGF.EmitRuntimeCall(
5056 callee: OMPBuilder.getOrCreateRuntimeFunction(
5057 M&: CGM.getModule(),
5058 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5059 args: Args);
5060
5061 // 5. Build switch(res)
5062 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5063 llvm::SwitchInst *SwInst =
5064 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5065
5066 // 6. Build case 1:
5067 // ...
5068 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5069 // ...
5070 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5071 // break;
5072 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5073 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5074 CGF.EmitBlock(BB: Case1BB);
5075
5076 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5077 llvm::Value *EndArgs[] = {
5078 IdentTLoc, // ident_t *<loc>
5079 ThreadId, // i32 <gtid>
5080 Lock // kmp_critical_name *&<lock>
5081 };
5082 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5083 CodeGenFunction &CGF, PrePostActionTy &Action) {
5084 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5085 const auto *IPriv = Privates.begin();
5086 const auto *ILHS = LHSExprs.begin();
5087 const auto *IRHS = RHSExprs.begin();
5088 for (const Expr *E : ReductionOps) {
5089 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5090 RHS: cast<DeclRefExpr>(Val: *IRHS));
5091 ++IPriv;
5092 ++ILHS;
5093 ++IRHS;
5094 }
5095 };
5096 RegionCodeGenTy RCG(CodeGen);
5097 CommonActionTy Action(
5098 nullptr, std::nullopt,
5099 OMPBuilder.getOrCreateRuntimeFunction(
5100 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5101 : OMPRTL___kmpc_end_reduce),
5102 EndArgs);
5103 RCG.setAction(Action);
5104 RCG(CGF);
5105
5106 CGF.EmitBranch(Block: DefaultBB);
5107
5108 // 7. Build case 2:
5109 // ...
5110 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5111 // ...
5112 // break;
5113 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5114 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5115 CGF.EmitBlock(BB: Case2BB);
5116
5117 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5118 CodeGenFunction &CGF, PrePostActionTy &Action) {
5119 const auto *ILHS = LHSExprs.begin();
5120 const auto *IRHS = RHSExprs.begin();
5121 const auto *IPriv = Privates.begin();
5122 for (const Expr *E : ReductionOps) {
5123 const Expr *XExpr = nullptr;
5124 const Expr *EExpr = nullptr;
5125 const Expr *UpExpr = nullptr;
5126 BinaryOperatorKind BO = BO_Comma;
5127 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5128 if (BO->getOpcode() == BO_Assign) {
5129 XExpr = BO->getLHS();
5130 UpExpr = BO->getRHS();
5131 }
5132 }
5133 // Try to emit update expression as a simple atomic.
5134 const Expr *RHSExpr = UpExpr;
5135 if (RHSExpr) {
5136 // Analyze RHS part of the whole expression.
5137 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5138 Val: RHSExpr->IgnoreParenImpCasts())) {
5139 // If this is a conditional operator, analyze its condition for
5140 // min/max reduction operator.
5141 RHSExpr = ACO->getCond();
5142 }
5143 if (const auto *BORHS =
5144 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5145 EExpr = BORHS->getRHS();
5146 BO = BORHS->getOpcode();
5147 }
5148 }
5149 if (XExpr) {
5150 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5151 auto &&AtomicRedGen = [BO, VD,
5152 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5153 const Expr *EExpr, const Expr *UpExpr) {
5154 LValue X = CGF.EmitLValue(E: XExpr);
5155 RValue E;
5156 if (EExpr)
5157 E = CGF.EmitAnyExpr(E: EExpr);
5158 CGF.EmitOMPAtomicSimpleUpdateExpr(
5159 X, E, BO, /*IsXLHSInRHSPart=*/true,
5160 AO: llvm::AtomicOrdering::Monotonic, Loc,
5161 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5162 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5163 Address LHSTemp = CGF.CreateMemTemp(T: VD->getType());
5164 CGF.emitOMPSimpleStore(
5165 LVal: CGF.MakeAddrLValue(Addr: LHSTemp, T: VD->getType()), RVal: XRValue,
5166 RValTy: VD->getType().getNonReferenceType(), Loc);
5167 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5168 (void)PrivateScope.Privatize();
5169 return CGF.EmitAnyExpr(E: UpExpr);
5170 });
5171 };
5172 if ((*IPriv)->getType()->isArrayType()) {
5173 // Emit atomic reduction for array section.
5174 const auto *RHSVar =
5175 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5176 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5177 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5178 } else {
5179 // Emit atomic reduction for array subscript or single variable.
5180 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5181 }
5182 } else {
5183 // Emit as a critical region.
5184 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5185 const Expr *, const Expr *) {
5186 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5187 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5188 RT.emitCriticalRegion(
5189 CGF, CriticalName: Name,
5190 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5191 Action.Enter(CGF);
5192 emitReductionCombiner(CGF, ReductionOp: E);
5193 },
5194 Loc);
5195 };
5196 if ((*IPriv)->getType()->isArrayType()) {
5197 const auto *LHSVar =
5198 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5199 const auto *RHSVar =
5200 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5201 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5202 RedOpGen: CritRedGen);
5203 } else {
5204 CritRedGen(CGF, nullptr, nullptr, nullptr);
5205 }
5206 }
5207 ++ILHS;
5208 ++IRHS;
5209 ++IPriv;
5210 }
5211 };
5212 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5213 if (!WithNowait) {
5214 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5215 llvm::Value *EndArgs[] = {
5216 IdentTLoc, // ident_t *<loc>
5217 ThreadId, // i32 <gtid>
5218 Lock // kmp_critical_name *&<lock>
5219 };
5220 CommonActionTy Action(nullptr, std::nullopt,
5221 OMPBuilder.getOrCreateRuntimeFunction(
5222 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5223 EndArgs);
5224 AtomicRCG.setAction(Action);
5225 AtomicRCG(CGF);
5226 } else {
5227 AtomicRCG(CGF);
5228 }
5229
5230 CGF.EmitBranch(Block: DefaultBB);
5231 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5232}
5233
5234/// Generates unique name for artificial threadprivate variables.
5235/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5236static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5237 const Expr *Ref) {
5238 SmallString<256> Buffer;
5239 llvm::raw_svector_ostream Out(Buffer);
5240 const clang::DeclRefExpr *DE;
5241 const VarDecl *D = ::getBaseDecl(Ref, DE);
5242 if (!D)
5243 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5244 D = D->getCanonicalDecl();
5245 std::string Name = CGM.getOpenMPRuntime().getName(
5246 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5247 Out << Prefix << Name << "_"
5248 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5249 return std::string(Out.str());
5250}
5251
5252/// Emits reduction initializer function:
5253/// \code
5254/// void @.red_init(void* %arg, void* %orig) {
5255/// %0 = bitcast void* %arg to <type>*
5256/// store <type> <init>, <type>* %0
5257/// ret void
5258/// }
5259/// \endcode
5260static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5261 SourceLocation Loc,
5262 ReductionCodeGen &RCG, unsigned N) {
5263 ASTContext &C = CGM.getContext();
5264 QualType VoidPtrTy = C.VoidPtrTy;
5265 VoidPtrTy.addRestrict();
5266 FunctionArgList Args;
5267 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5268 ImplicitParamKind::Other);
5269 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5270 ImplicitParamKind::Other);
5271 Args.emplace_back(Args: &Param);
5272 Args.emplace_back(Args: &ParamOrig);
5273 const auto &FnInfo =
5274 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5275 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5276 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5277 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5278 N: Name, M: &CGM.getModule());
5279 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5280 Fn->setDoesNotRecurse();
5281 CodeGenFunction CGF(CGM);
5282 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5283 QualType PrivateType = RCG.getPrivateType(N);
5284 Address PrivateAddr = CGF.EmitLoadOfPointer(
5285 Ptr: CGF.GetAddrOfLocalVar(VD: &Param).withElementType(
5286 ElemTy: CGF.ConvertTypeForMem(T: PrivateType)->getPointerTo()),
5287 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5288 llvm::Value *Size = nullptr;
5289 // If the size of the reduction item is non-constant, load it from global
5290 // threadprivate variable.
5291 if (RCG.getSizes(N).second) {
5292 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5293 CGF, VarType: CGM.getContext().getSizeType(),
5294 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5295 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5296 Ty: CGM.getContext().getSizeType(), Loc);
5297 }
5298 RCG.emitAggregateType(CGF, N, Size);
5299 Address OrigAddr = Address::invalid();
5300 // If initializer uses initializer from declare reduction construct, emit a
5301 // pointer to the address of the original reduction item (reuired by reduction
5302 // initializer)
5303 if (RCG.usesReductionInitializer(N)) {
5304 Address SharedAddr = CGF.GetAddrOfLocalVar(VD: &ParamOrig);
5305 OrigAddr = CGF.EmitLoadOfPointer(
5306 Ptr: SharedAddr,
5307 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5308 }
5309 // Emit the initializer:
5310 // %0 = bitcast void* %arg to <type>*
5311 // store <type> <init>, <type>* %0
5312 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5313 DefaultInit: [](CodeGenFunction &) { return false; });
5314 CGF.FinishFunction();
5315 return Fn;
5316}
5317
5318/// Emits reduction combiner function:
5319/// \code
5320/// void @.red_comb(void* %arg0, void* %arg1) {
5321/// %lhs = bitcast void* %arg0 to <type>*
5322/// %rhs = bitcast void* %arg1 to <type>*
5323/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5324/// store <type> %2, <type>* %lhs
5325/// ret void
5326/// }
5327/// \endcode
5328static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5329 SourceLocation Loc,
5330 ReductionCodeGen &RCG, unsigned N,
5331 const Expr *ReductionOp,
5332 const Expr *LHS, const Expr *RHS,
5333 const Expr *PrivateRef) {
5334 ASTContext &C = CGM.getContext();
5335 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5336 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5337 FunctionArgList Args;
5338 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5339 C.VoidPtrTy, ImplicitParamKind::Other);
5340 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5341 ImplicitParamKind::Other);
5342 Args.emplace_back(Args: &ParamInOut);
5343 Args.emplace_back(Args: &ParamIn);
5344 const auto &FnInfo =
5345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5346 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5347 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5348 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5349 N: Name, M: &CGM.getModule());
5350 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5351 Fn->setDoesNotRecurse();
5352 CodeGenFunction CGF(CGM);
5353 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5354 llvm::Value *Size = nullptr;
5355 // If the size of the reduction item is non-constant, load it from global
5356 // threadprivate variable.
5357 if (RCG.getSizes(N).second) {
5358 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5359 CGF, VarType: CGM.getContext().getSizeType(),
5360 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5361 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5362 Ty: CGM.getContext().getSizeType(), Loc);
5363 }
5364 RCG.emitAggregateType(CGF, N, Size);
5365 // Remap lhs and rhs variables to the addresses of the function arguments.
5366 // %lhs = bitcast void* %arg0 to <type>*
5367 // %rhs = bitcast void* %arg1 to <type>*
5368 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5369 PrivateScope.addPrivate(
5370 LocalVD: LHSVD,
5371 // Pull out the pointer to the variable.
5372 Addr: CGF.EmitLoadOfPointer(
5373 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamInOut)
5374 .withElementType(
5375 ElemTy: CGF.ConvertTypeForMem(T: LHSVD->getType())->getPointerTo()),
5376 PtrTy: C.getPointerType(T: LHSVD->getType())->castAs<PointerType>()));
5377 PrivateScope.addPrivate(
5378 LocalVD: RHSVD,
5379 // Pull out the pointer to the variable.
5380 Addr: CGF.EmitLoadOfPointer(
5381 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamIn).withElementType(
5382 ElemTy: CGF.ConvertTypeForMem(T: RHSVD->getType())->getPointerTo()),
5383 PtrTy: C.getPointerType(T: RHSVD->getType())->castAs<PointerType>()));
5384 PrivateScope.Privatize();
5385 // Emit the combiner body:
5386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5387 // store <type> %2, <type>* %lhs
5388 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5389 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5390 RHS: cast<DeclRefExpr>(Val: RHS));
5391 CGF.FinishFunction();
5392 return Fn;
5393}
5394
5395/// Emits reduction finalizer function:
5396/// \code
5397/// void @.red_fini(void* %arg) {
5398/// %0 = bitcast void* %arg to <type>*
5399/// <destroy>(<type>* %0)
5400/// ret void
5401/// }
5402/// \endcode
5403static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5404 SourceLocation Loc,
5405 ReductionCodeGen &RCG, unsigned N) {
5406 if (!RCG.needCleanups(N))
5407 return nullptr;
5408 ASTContext &C = CGM.getContext();
5409 FunctionArgList Args;
5410 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5411 ImplicitParamKind::Other);
5412 Args.emplace_back(Args: &Param);
5413 const auto &FnInfo =
5414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5415 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5416 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5417 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5418 N: Name, M: &CGM.getModule());
5419 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5420 Fn->setDoesNotRecurse();
5421 CodeGenFunction CGF(CGM);
5422 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5423 Address PrivateAddr = CGF.EmitLoadOfPointer(
5424 Ptr: CGF.GetAddrOfLocalVar(VD: &Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5425 llvm::Value *Size = nullptr;
5426 // If the size of the reduction item is non-constant, load it from global
5427 // threadprivate variable.
5428 if (RCG.getSizes(N).second) {
5429 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5430 CGF, VarType: CGM.getContext().getSizeType(),
5431 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5432 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5433 Ty: CGM.getContext().getSizeType(), Loc);
5434 }
5435 RCG.emitAggregateType(CGF, N, Size);
5436 // Emit the finalizer body:
5437 // <destroy>(<type>* %0)
5438 RCG.emitCleanups(CGF, N, PrivateAddr);
5439 CGF.FinishFunction(EndLoc: Loc);
5440 return Fn;
5441}
5442
5443llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5444 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5445 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5446 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5447 return nullptr;
5448
5449 // Build typedef struct:
5450 // kmp_taskred_input {
5451 // void *reduce_shar; // shared reduction item
5452 // void *reduce_orig; // original reduction item used for initialization
5453 // size_t reduce_size; // size of data item
5454 // void *reduce_init; // data initialization routine
5455 // void *reduce_fini; // data finalization routine
5456 // void *reduce_comb; // data combiner routine
5457 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5458 // } kmp_taskred_input_t;
5459 ASTContext &C = CGM.getContext();
5460 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5461 RD->startDefinition();
5462 const FieldDecl *SharedFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5463 const FieldDecl *OrigFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5464 const FieldDecl *SizeFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.getSizeType());
5465 const FieldDecl *InitFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5466 const FieldDecl *FiniFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5467 const FieldDecl *CombFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5468 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5469 C, DC: RD, FieldTy: C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5470 RD->completeDefinition();
5471 QualType RDType = C.getRecordType(Decl: RD);
5472 unsigned Size = Data.ReductionVars.size();
5473 llvm::APInt ArraySize(/*numBits=*/64, Size);
5474 QualType ArrayRDType =
5475 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5476 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5477 // kmp_task_red_input_t .rd_input.[Size];
5478 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5479 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5480 Data.ReductionCopies, Data.ReductionOps);
5481 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5482 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5483 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5484 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5485 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5486 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5487 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5488 Name: ".rd_input.gep.");
5489 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5490 // ElemLVal.reduce_shar = &Shareds[Cnt];
5491 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5492 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5493 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5494 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5495 // ElemLVal.reduce_orig = &Origs[Cnt];
5496 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5497 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5498 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5499 RCG.emitAggregateType(CGF, N: Cnt);
5500 llvm::Value *SizeValInChars;
5501 llvm::Value *SizeVal;
5502 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5503 // We use delayed creation/initialization for VLAs and array sections. It is
5504 // required because runtime does not provide the way to pass the sizes of
5505 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5506 // threadprivate global variables are used to store these values and use
5507 // them in the functions.
5508 bool DelayedCreation = !!SizeVal;
5509 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5510 /*isSigned=*/false);
5511 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5512 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5513 // ElemLVal.reduce_init = init;
5514 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5515 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5516 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5517 // ElemLVal.reduce_fini = fini;
5518 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5519 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5520 llvm::Value *FiniAddr =
5521 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5522 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5523 // ElemLVal.reduce_comb = comb;
5524 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5525 llvm::Value *CombAddr = emitReduceCombFunction(
5526 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5527 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5528 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5529 // ElemLVal.flags = 0;
5530 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5531 if (DelayedCreation) {
5532 CGF.EmitStoreOfScalar(
5533 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5534 lvalue: FlagsLVal);
5535 } else
5536 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5537 }
5538 if (Data.IsReductionWithTaskMod) {
5539 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5540 // is_ws, int num, void *data);
5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5542 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5543 DestTy: CGM.IntTy, /*isSigned=*/true);
5544 llvm::Value *Args[] = {
5545 IdentTLoc, GTid,
5546 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5547 /*isSigned=*/IsSigned: true),
5548 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5549 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5550 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5551 return CGF.EmitRuntimeCall(
5552 callee: OMPBuilder.getOrCreateRuntimeFunction(
5553 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5554 args: Args);
5555 }
5556 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5557 llvm::Value *Args[] = {
5558 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5559 /*isSigned=*/true),
5560 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5561 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5562 DestTy: CGM.VoidPtrTy)};
5563 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5564 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5565 args: Args);
5566}
5567
5568void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5569 SourceLocation Loc,
5570 bool IsWorksharingReduction) {
5571 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5572 // is_ws, int num, void *data);
5573 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5574 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5575 DestTy: CGM.IntTy, /*isSigned=*/true);
5576 llvm::Value *Args[] = {IdentTLoc, GTid,
5577 llvm::ConstantInt::get(Ty: CGM.IntTy,
5578 V: IsWorksharingReduction ? 1 : 0,
5579 /*isSigned=*/IsSigned: true)};
5580 (void)CGF.EmitRuntimeCall(
5581 callee: OMPBuilder.getOrCreateRuntimeFunction(
5582 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
5583 args: Args);
5584}
5585
5586void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5587 SourceLocation Loc,
5588 ReductionCodeGen &RCG,
5589 unsigned N) {
5590 auto Sizes = RCG.getSizes(N);
5591 // Emit threadprivate global variable if the type is non-constant
5592 // (Sizes.second = nullptr).
5593 if (Sizes.second) {
5594 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
5595 /*isSigned=*/false);
5596 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5597 CGF, VarType: CGM.getContext().getSizeType(),
5598 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5599 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
5600 }
5601}
5602
5603Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5604 SourceLocation Loc,
5605 llvm::Value *ReductionsPtr,
5606 LValue SharedLVal) {
5607 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5608 // *d);
5609 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5610 DestTy: CGM.IntTy,
5611 /*isSigned=*/true),
5612 ReductionsPtr,
5613 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5614 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
5615 return Address(
5616 CGF.EmitRuntimeCall(
5617 callee: OMPBuilder.getOrCreateRuntimeFunction(
5618 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
5619 args: Args),
5620 CGF.Int8Ty, SharedLVal.getAlignment());
5621}
5622
5623void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5624 const OMPTaskDataTy &Data) {
5625 if (!CGF.HaveInsertPoint())
5626 return;
5627
5628 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5629 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5630 OMPBuilder.createTaskwait(Loc: CGF.Builder);
5631 } else {
5632 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5633 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5634 auto &M = CGM.getModule();
5635 Address DependenciesArray = Address::invalid();
5636 llvm::Value *NumOfElements;
5637 std::tie(args&: NumOfElements, args&: DependenciesArray) =
5638 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
5639 if (!Data.Dependences.empty()) {
5640 llvm::Value *DepWaitTaskArgs[7];
5641 DepWaitTaskArgs[0] = UpLoc;
5642 DepWaitTaskArgs[1] = ThreadID;
5643 DepWaitTaskArgs[2] = NumOfElements;
5644 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5645 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
5646 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5647 DepWaitTaskArgs[6] =
5648 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
5649
5650 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5651
5652 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5653 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5654 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5655 // kmp_int32 has_no_wait); if dependence info is specified.
5656 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5657 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
5658 args: DepWaitTaskArgs);
5659
5660 } else {
5661
5662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5663 // global_tid);
5664 llvm::Value *Args[] = {UpLoc, ThreadID};
5665 // Ignore return result until untied tasks are supported.
5666 CGF.EmitRuntimeCall(
5667 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
5668 args: Args);
5669 }
5670 }
5671
5672 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
5673 Region->emitUntiedSwitch(CGF);
5674}
5675
5676void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5677 OpenMPDirectiveKind InnerKind,
5678 const RegionCodeGenTy &CodeGen,
5679 bool HasCancel) {
5680 if (!CGF.HaveInsertPoint())
5681 return;
5682 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5683 InnerKind != OMPD_critical &&
5684 InnerKind != OMPD_master &&
5685 InnerKind != OMPD_masked);
5686 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5687}
5688
5689namespace {
5690enum RTCancelKind {
5691 CancelNoreq = 0,
5692 CancelParallel = 1,
5693 CancelLoop = 2,
5694 CancelSections = 3,
5695 CancelTaskgroup = 4
5696};
5697} // anonymous namespace
5698
5699static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5700 RTCancelKind CancelKind = CancelNoreq;
5701 if (CancelRegion == OMPD_parallel)
5702 CancelKind = CancelParallel;
5703 else if (CancelRegion == OMPD_for)
5704 CancelKind = CancelLoop;
5705 else if (CancelRegion == OMPD_sections)
5706 CancelKind = CancelSections;
5707 else {
5708 assert(CancelRegion == OMPD_taskgroup);
5709 CancelKind = CancelTaskgroup;
5710 }
5711 return CancelKind;
5712}
5713
5714void CGOpenMPRuntime::emitCancellationPointCall(
5715 CodeGenFunction &CGF, SourceLocation Loc,
5716 OpenMPDirectiveKind CancelRegion) {
5717 if (!CGF.HaveInsertPoint())
5718 return;
5719 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5720 // global_tid, kmp_int32 cncl_kind);
5721 if (auto *OMPRegionInfo =
5722 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5723 // For 'cancellation point taskgroup', the task region info may not have a
5724 // cancel. This may instead happen in another adjacent task.
5725 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5726 llvm::Value *Args[] = {
5727 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5728 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
5729 // Ignore return result until untied tasks are supported.
5730 llvm::Value *Result = CGF.EmitRuntimeCall(
5731 callee: OMPBuilder.getOrCreateRuntimeFunction(
5732 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
5733 args: Args);
5734 // if (__kmpc_cancellationpoint()) {
5735 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5736 // exit from construct;
5737 // }
5738 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5739 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5740 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5741 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5742 CGF.EmitBlock(BB: ExitBB);
5743 if (CancelRegion == OMPD_parallel)
5744 emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
5745 // exit from construct;
5746 CodeGenFunction::JumpDest CancelDest =
5747 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
5748 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5749 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5750 }
5751 }
5752}
5753
5754void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5755 const Expr *IfCond,
5756 OpenMPDirectiveKind CancelRegion) {
5757 if (!CGF.HaveInsertPoint())
5758 return;
5759 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5760 // kmp_int32 cncl_kind);
5761 auto &M = CGM.getModule();
5762 if (auto *OMPRegionInfo =
5763 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5764 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5765 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5766 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5767 llvm::Value *Args[] = {
5768 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5769 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
5770 // Ignore return result until untied tasks are supported.
5771 llvm::Value *Result = CGF.EmitRuntimeCall(
5772 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), args: Args);
5773 // if (__kmpc_cancel()) {
5774 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5775 // exit from construct;
5776 // }
5777 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5778 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5779 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5780 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5781 CGF.EmitBlock(BB: ExitBB);
5782 if (CancelRegion == OMPD_parallel)
5783 RT.emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
5784 // exit from construct;
5785 CodeGenFunction::JumpDest CancelDest =
5786 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
5787 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5788 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5789 };
5790 if (IfCond) {
5791 emitIfClause(CGF, Cond: IfCond, ThenGen,
5792 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
5793 } else {
5794 RegionCodeGenTy ThenRCG(ThenGen);
5795 ThenRCG(CGF);
5796 }
5797 }
5798}
5799
5800namespace {
5801/// Cleanup action for uses_allocators support.
5802class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5803 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5804
5805public:
5806 OMPUsesAllocatorsActionTy(
5807 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5808 : Allocators(Allocators) {}
5809 void Enter(CodeGenFunction &CGF) override {
5810 if (!CGF.HaveInsertPoint())
5811 return;
5812 for (const auto &AllocatorData : Allocators) {
5813 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5814 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
5815 }
5816 }
5817 void Exit(CodeGenFunction &CGF) override {
5818 if (!CGF.HaveInsertPoint())
5819 return;
5820 for (const auto &AllocatorData : Allocators) {
5821 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5822 Allocator: AllocatorData.first);
5823 }
5824 }
5825};
5826} // namespace
5827
5828void CGOpenMPRuntime::emitTargetOutlinedFunction(
5829 const OMPExecutableDirective &D, StringRef ParentName,
5830 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5831 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5832 assert(!ParentName.empty() && "Invalid target entry parent name!");
5833 HasEmittedTargetRegion = true;
5834 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5835 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5836 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5837 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5838 if (!D.AllocatorTraits)
5839 continue;
5840 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
5841 }
5842 }
5843 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5844 CodeGen.setAction(UsesAllocatorAction);
5845 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5846 IsOffloadEntry, CodeGen);
5847}
5848
5849void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5850 const Expr *Allocator,
5851 const Expr *AllocatorTraits) {
5852 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
5853 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
5854 // Use default memspace handle.
5855 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5856 llvm::Value *NumTraits = llvm::ConstantInt::get(
5857 Ty: CGF.IntTy, V: cast<ConstantArrayType>(
5858 Val: AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5859 ->getSize()
5860 .getLimitedValue());
5861 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
5862 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5863 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
5864 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, T: CGF.getContext().VoidPtrTy,
5865 BaseInfo: AllocatorTraitsLVal.getBaseInfo(),
5866 TBAAInfo: AllocatorTraitsLVal.getTBAAInfo());
5867 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5868
5869 llvm::Value *AllocatorVal =
5870 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5871 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
5872 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
5873 // Store to allocator.
5874 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
5875 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
5876 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
5877 AllocatorVal =
5878 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
5879 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
5880 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
5881}
5882
5883void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5884 const Expr *Allocator) {
5885 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
5886 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
5887 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
5888 llvm::Value *AllocatorVal =
5889 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
5890 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
5891 DstTy: CGF.getContext().VoidPtrTy,
5892 Loc: Allocator->getExprLoc());
5893 (void)CGF.EmitRuntimeCall(
5894 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
5895 FnID: OMPRTL___kmpc_destroy_allocator),
5896 args: {ThreadId, AllocatorVal});
5897}
5898
5899void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5900 const OMPExecutableDirective &D, CodeGenFunction &CGF,
5901 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5902 int32_t &MaxTeamsVal) {
5903
5904 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5905 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
5906 /*UpperBoundOnly=*/true);
5907
5908 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5909 for (auto *A : C->getAttrs()) {
5910 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5911 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5912 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(Val: A))
5913 CGM.handleCUDALaunchBoundsAttr(F: nullptr, A: Attr, MaxThreadsVal: &AttrMaxThreadsVal,
5914 MinBlocksVal: &AttrMinBlocksVal, MaxClusterRankVal: &AttrMaxBlocksVal);
5915 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(Val: A))
5916 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5917 F: nullptr, A: Attr, /*ReqdWGS=*/nullptr, MinThreadsVal: &AttrMinThreadsVal,
5918 MaxThreadsVal: &AttrMaxThreadsVal);
5919 else
5920 continue;
5921
5922 MinThreadsVal = std::max(a: MinThreadsVal, b: AttrMinThreadsVal);
5923 if (AttrMaxThreadsVal > 0)
5924 MaxThreadsVal = MaxThreadsVal > 0
5925 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
5926 : AttrMaxThreadsVal;
5927 MinTeamsVal = std::max(a: MinTeamsVal, b: AttrMinBlocksVal);
5928 if (AttrMaxBlocksVal > 0)
5929 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
5930 : AttrMaxBlocksVal;
5931 }
5932 }
5933}
5934
5935void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5936 const OMPExecutableDirective &D, StringRef ParentName,
5937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5939
5940 llvm::TargetRegionEntryInfo EntryInfo =
5941 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
5942
5943 CodeGenFunction CGF(CGM, true);
5944 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5945 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5946 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
5947
5948 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5950 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, Loc: D.getBeginLoc());
5951 };
5952
5953 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction,
5954 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5955
5956 if (!OutlinedFn)
5957 return;
5958
5959 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
5960
5961 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5962 for (auto *A : C->getAttrs()) {
5963 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(Val: A))
5964 CGM.handleAMDGPUWavesPerEUAttr(F: OutlinedFn, A: Attr);
5965 }
5966 }
5967}
5968
5969/// Checks if the expression is constant or does not have non-trivial function
5970/// calls.
5971static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5972 // We can skip constant expressions.
5973 // We can skip expressions with trivial calls or simple expressions.
5974 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
5975 !E->hasNonTrivialCall(Ctx)) &&
5976 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5977}
5978
5979const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5980 const Stmt *Body) {
5981 const Stmt *Child = Body->IgnoreContainers();
5982 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
5983 Child = nullptr;
5984 for (const Stmt *S : C->body()) {
5985 if (const auto *E = dyn_cast<Expr>(Val: S)) {
5986 if (isTrivial(Ctx, E))
5987 continue;
5988 }
5989 // Some of the statements can be ignored.
5990 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
5991 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
5992 continue;
5993 // Analyze declarations.
5994 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
5995 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
5996 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
5997 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
5998 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
5999 isa<UsingDirectiveDecl>(Val: D) ||
6000 isa<OMPDeclareReductionDecl>(Val: D) ||
6001 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6002 return true;
6003 const auto *VD = dyn_cast<VarDecl>(Val: D);
6004 if (!VD)
6005 return false;
6006 return VD->hasGlobalStorage() || !VD->isUsed();
6007 }))
6008 continue;
6009 }
6010 // Found multiple children - cannot get the one child only.
6011 if (Child)
6012 return nullptr;
6013 Child = S;
6014 }
6015 if (Child)
6016 Child = Child->IgnoreContainers();
6017 }
6018 return Child;
6019}
6020
6021const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6022 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6023 int32_t &MaxTeamsVal) {
6024
6025 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6026 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6027 "Expected target-based executable directive.");
6028 switch (DirectiveKind) {
6029 case OMPD_target: {
6030 const auto *CS = D.getInnermostCapturedStmt();
6031 const auto *Body =
6032 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6033 const Stmt *ChildStmt =
6034 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6035 if (const auto *NestedDir =
6036 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6037 if (isOpenMPTeamsDirective(DKind: NestedDir->getDirectiveKind())) {
6038 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6039 const Expr *NumTeams =
6040 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6041 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6042 if (auto Constant =
6043 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6044 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6045 return NumTeams;
6046 }
6047 MinTeamsVal = MaxTeamsVal = 0;
6048 return nullptr;
6049 }
6050 if (isOpenMPParallelDirective(DKind: NestedDir->getDirectiveKind()) ||
6051 isOpenMPSimdDirective(DKind: NestedDir->getDirectiveKind())) {
6052 MinTeamsVal = MaxTeamsVal = 1;
6053 return nullptr;
6054 }
6055 MinTeamsVal = MaxTeamsVal = 1;
6056 return nullptr;
6057 }
6058 // A value of -1 is used to check if we need to emit no teams region
6059 MinTeamsVal = MaxTeamsVal = -1;
6060 return nullptr;
6061 }
6062 case OMPD_target_teams_loop:
6063 case OMPD_target_teams:
6064 case OMPD_target_teams_distribute:
6065 case OMPD_target_teams_distribute_simd:
6066 case OMPD_target_teams_distribute_parallel_for:
6067 case OMPD_target_teams_distribute_parallel_for_simd: {
6068 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6069 const Expr *NumTeams =
6070 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6071 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6072 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6073 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6074 return NumTeams;
6075 }
6076 MinTeamsVal = MaxTeamsVal = 0;
6077 return nullptr;
6078 }
6079 case OMPD_target_parallel:
6080 case OMPD_target_parallel_for:
6081 case OMPD_target_parallel_for_simd:
6082 case OMPD_target_parallel_loop:
6083 case OMPD_target_simd:
6084 MinTeamsVal = MaxTeamsVal = 1;
6085 return nullptr;
6086 case OMPD_parallel:
6087 case OMPD_for:
6088 case OMPD_parallel_for:
6089 case OMPD_parallel_loop:
6090 case OMPD_parallel_master:
6091 case OMPD_parallel_sections:
6092 case OMPD_for_simd:
6093 case OMPD_parallel_for_simd:
6094 case OMPD_cancel:
6095 case OMPD_cancellation_point:
6096 case OMPD_ordered:
6097 case OMPD_threadprivate:
6098 case OMPD_allocate:
6099 case OMPD_task:
6100 case OMPD_simd:
6101 case OMPD_tile:
6102 case OMPD_unroll:
6103 case OMPD_sections:
6104 case OMPD_section:
6105 case OMPD_single:
6106 case OMPD_master:
6107 case OMPD_critical:
6108 case OMPD_taskyield:
6109 case OMPD_barrier:
6110 case OMPD_taskwait:
6111 case OMPD_taskgroup:
6112 case OMPD_atomic:
6113 case OMPD_flush:
6114 case OMPD_depobj:
6115 case OMPD_scan:
6116 case OMPD_teams:
6117 case OMPD_target_data:
6118 case OMPD_target_exit_data:
6119 case OMPD_target_enter_data:
6120 case OMPD_distribute:
6121 case OMPD_distribute_simd:
6122 case OMPD_distribute_parallel_for:
6123 case OMPD_distribute_parallel_for_simd:
6124 case OMPD_teams_distribute:
6125 case OMPD_teams_distribute_simd:
6126 case OMPD_teams_distribute_parallel_for:
6127 case OMPD_teams_distribute_parallel_for_simd:
6128 case OMPD_target_update:
6129 case OMPD_declare_simd:
6130 case OMPD_declare_variant:
6131 case OMPD_begin_declare_variant:
6132 case OMPD_end_declare_variant:
6133 case OMPD_declare_target:
6134 case OMPD_end_declare_target:
6135 case OMPD_declare_reduction:
6136 case OMPD_declare_mapper:
6137 case OMPD_taskloop:
6138 case OMPD_taskloop_simd:
6139 case OMPD_master_taskloop:
6140 case OMPD_master_taskloop_simd:
6141 case OMPD_parallel_master_taskloop:
6142 case OMPD_parallel_master_taskloop_simd:
6143 case OMPD_requires:
6144 case OMPD_metadirective:
6145 case OMPD_unknown:
6146 break;
6147 default:
6148 break;
6149 }
6150 llvm_unreachable("Unexpected directive kind.");
6151}
6152
6153llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6154 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6155 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6156 "Clauses associated with the teams directive expected to be emitted "
6157 "only for the host!");
6158 CGBuilderTy &Bld = CGF.Builder;
6159 int32_t MinNT = -1, MaxNT = -1;
6160 const Expr *NumTeams =
6161 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6162 if (NumTeams != nullptr) {
6163 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6164
6165 switch (DirectiveKind) {
6166 case OMPD_target: {
6167 const auto *CS = D.getInnermostCapturedStmt();
6168 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6169 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6170 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6171 /*IgnoreResultAssign*/ true);
6172 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6173 /*isSigned=*/true);
6174 }
6175 case OMPD_target_teams:
6176 case OMPD_target_teams_distribute:
6177 case OMPD_target_teams_distribute_simd:
6178 case OMPD_target_teams_distribute_parallel_for:
6179 case OMPD_target_teams_distribute_parallel_for_simd: {
6180 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6181 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6182 /*IgnoreResultAssign*/ true);
6183 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6184 /*isSigned=*/true);
6185 }
6186 default:
6187 break;
6188 }
6189 }
6190
6191 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6192 return llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: MinNT);
6193}
6194
6195/// Check for a num threads constant value (stored in \p DefaultVal), or
6196/// expression (stored in \p E). If the value is conditional (via an if-clause),
6197/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6198/// nullptr, no expression evaluation is perfomed.
6199static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6200 const Expr **E, int32_t &UpperBound,
6201 bool UpperBoundOnly, llvm::Value **CondVal) {
6202 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6203 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6204 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6205 if (!Dir)
6206 return;
6207
6208 if (isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6209 // Handle if clause. If if clause present, the number of threads is
6210 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6211 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6212 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6213 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6214 const OMPIfClause *IfClause = nullptr;
6215 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6216 if (C->getNameModifier() == OMPD_unknown ||
6217 C->getNameModifier() == OMPD_parallel) {
6218 IfClause = C;
6219 break;
6220 }
6221 }
6222 if (IfClause) {
6223 const Expr *CondExpr = IfClause->getCondition();
6224 bool Result;
6225 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6226 if (!Result) {
6227 UpperBound = 1;
6228 return;
6229 }
6230 } else {
6231 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6232 if (const auto *PreInit =
6233 cast_or_null<DeclStmt>(Val: IfClause->getPreInitStmt())) {
6234 for (const auto *I : PreInit->decls()) {
6235 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6236 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6237 } else {
6238 CodeGenFunction::AutoVarEmission Emission =
6239 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6240 CGF.EmitAutoVarCleanups(emission: Emission);
6241 }
6242 }
6243 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6244 }
6245 }
6246 }
6247 }
6248 // Check the value of num_threads clause iff if clause was not specified
6249 // or is not evaluated to false.
6250 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6251 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6252 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6253 const auto *NumThreadsClause =
6254 Dir->getSingleClause<OMPNumThreadsClause>();
6255 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6256 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6257 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6258 UpperBound =
6259 UpperBound
6260 ? Constant->getZExtValue()
6261 : std::min(a: UpperBound,
6262 b: static_cast<int32_t>(Constant->getZExtValue()));
6263 // If we haven't found a upper bound, remember we saw a thread limiting
6264 // clause.
6265 if (UpperBound == -1)
6266 UpperBound = 0;
6267 if (!E)
6268 return;
6269 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6270 if (const auto *PreInit =
6271 cast_or_null<DeclStmt>(Val: NumThreadsClause->getPreInitStmt())) {
6272 for (const auto *I : PreInit->decls()) {
6273 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6274 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6275 } else {
6276 CodeGenFunction::AutoVarEmission Emission =
6277 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6278 CGF.EmitAutoVarCleanups(emission: Emission);
6279 }
6280 }
6281 }
6282 *E = NTExpr;
6283 }
6284 return;
6285 }
6286 if (isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6287 UpperBound = 1;
6288}
6289
6290const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6291 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6292 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6293 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6294 "Clauses associated with the teams directive expected to be emitted "
6295 "only for the host!");
6296 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6297 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6298 "Expected target-based executable directive.");
6299
6300 const Expr *NT = nullptr;
6301 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6302
6303 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6304 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6305 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6306 UpperBound = UpperBound ? Constant->getZExtValue()
6307 : std::min(a: UpperBound,
6308 b: int32_t(Constant->getZExtValue()));
6309 }
6310 // If we haven't found a upper bound, remember we saw a thread limiting
6311 // clause.
6312 if (UpperBound == -1)
6313 UpperBound = 0;
6314 if (EPtr)
6315 *EPtr = E;
6316 };
6317
6318 auto ReturnSequential = [&]() {
6319 UpperBound = 1;
6320 return NT;
6321 };
6322
6323 switch (DirectiveKind) {
6324 case OMPD_target: {
6325 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6326 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6327 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6328 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6329 // TODO: The standard is not clear how to resolve two thread limit clauses,
6330 // let's pick the teams one if it's present, otherwise the target one.
6331 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6332 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6333 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6334 ThreadLimitClause = TLC;
6335 if (ThreadLimitExpr) {
6336 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6337 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6338 CodeGenFunction::LexicalScope Scope(
6339 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6340 if (const auto *PreInit =
6341 cast_or_null<DeclStmt>(Val: ThreadLimitClause->getPreInitStmt())) {
6342 for (const auto *I : PreInit->decls()) {
6343 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6344 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6345 } else {
6346 CodeGenFunction::AutoVarEmission Emission =
6347 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6348 CGF.EmitAutoVarCleanups(emission: Emission);
6349 }
6350 }
6351 }
6352 }
6353 }
6354 }
6355 if (ThreadLimitClause)
6356 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6357 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6358 if (isOpenMPTeamsDirective(DKind: Dir->getDirectiveKind()) &&
6359 !isOpenMPDistributeDirective(DKind: Dir->getDirectiveKind())) {
6360 CS = Dir->getInnermostCapturedStmt();
6361 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6362 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6363 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6364 }
6365 if (Dir && isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6366 CS = Dir->getInnermostCapturedStmt();
6367 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6368 } else if (Dir && isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6369 return ReturnSequential();
6370 }
6371 return NT;
6372 }
6373 case OMPD_target_teams: {
6374 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6375 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6376 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6377 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6378 }
6379 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6380 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6381 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6382 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6383 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6384 if (Dir->getDirectiveKind() == OMPD_distribute) {
6385 CS = Dir->getInnermostCapturedStmt();
6386 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6387 }
6388 }
6389 return NT;
6390 }
6391 case OMPD_target_teams_distribute:
6392 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6393 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6394 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6395 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6396 }
6397 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6398 UpperBoundOnly, CondVal);
6399 return NT;
6400 case OMPD_target_teams_loop:
6401 case OMPD_target_parallel_loop:
6402 case OMPD_target_parallel:
6403 case OMPD_target_parallel_for:
6404 case OMPD_target_parallel_for_simd:
6405 case OMPD_target_teams_distribute_parallel_for:
6406 case OMPD_target_teams_distribute_parallel_for_simd: {
6407 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6408 const OMPIfClause *IfClause = nullptr;
6409 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6410 if (C->getNameModifier() == OMPD_unknown ||
6411 C->getNameModifier() == OMPD_parallel) {
6412 IfClause = C;
6413 break;
6414 }
6415 }
6416 if (IfClause) {
6417 const Expr *Cond = IfClause->getCondition();
6418 bool Result;
6419 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6420 if (!Result)
6421 return ReturnSequential();
6422 } else {
6423 CodeGenFunction::RunCleanupsScope Scope(CGF);
6424 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6425 }
6426 }
6427 }
6428 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6429 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6430 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6431 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6432 }
6433 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6434 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6435 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6436 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6437 return NumThreadsClause->getNumThreads();
6438 }
6439 return NT;
6440 }
6441 case OMPD_target_teams_distribute_simd:
6442 case OMPD_target_simd:
6443 return ReturnSequential();
6444 default:
6445 break;
6446 }
6447 llvm_unreachable("Unsupported directive kind.");
6448}
6449
6450llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6451 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6452 llvm::Value *NumThreadsVal = nullptr;
6453 llvm::Value *CondVal = nullptr;
6454 llvm::Value *ThreadLimitVal = nullptr;
6455 const Expr *ThreadLimitExpr = nullptr;
6456 int32_t UpperBound = -1;
6457
6458 const Expr *NT = getNumThreadsExprForTargetDirective(
6459 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6460 ThreadLimitExpr: &ThreadLimitExpr);
6461
6462 // Thread limit expressions are used below, emit them.
6463 if (ThreadLimitExpr) {
6464 ThreadLimitVal =
6465 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6466 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6467 /*isSigned=*/false);
6468 }
6469
6470 // Generate the num teams expression.
6471 if (UpperBound == 1) {
6472 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6473 } else if (NT) {
6474 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6475 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6476 /*isSigned=*/false);
6477 } else if (ThreadLimitVal) {
6478 // If we do not have a num threads value but a thread limit, replace the
6479 // former with the latter. We know handled the thread limit expression.
6480 NumThreadsVal = ThreadLimitVal;
6481 ThreadLimitVal = nullptr;
6482 } else {
6483 // Default to "0" which means runtime choice.
6484 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6485 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6486 }
6487
6488 // Handle if clause. If if clause present, the number of threads is
6489 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6490 if (CondVal) {
6491 CodeGenFunction::RunCleanupsScope Scope(CGF);
6492 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6493 False: CGF.Builder.getInt32(C: 1));
6494 }
6495
6496 // If the thread limit and num teams expression were present, take the
6497 // minimum.
6498 if (ThreadLimitVal) {
6499 NumThreadsVal = CGF.Builder.CreateSelect(
6500 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6501 True: ThreadLimitVal, False: NumThreadsVal);
6502 }
6503
6504 return NumThreadsVal;
6505}
6506
6507namespace {
6508LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6509
6510// Utility to handle information from clauses associated with a given
6511// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6512// It provides a convenient interface to obtain the information and generate
6513// code for that information.
6514class MappableExprsHandler {
6515public:
6516 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6517 static unsigned getFlagMemberOffset() {
6518 unsigned Offset = 0;
6519 for (uint64_t Remain =
6520 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6521 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6522 !(Remain & 1); Remain = Remain >> 1)
6523 Offset++;
6524 return Offset;
6525 }
6526
6527 /// Class that holds debugging information for a data mapping to be passed to
6528 /// the runtime library.
6529 class MappingExprInfo {
6530 /// The variable declaration used for the data mapping.
6531 const ValueDecl *MapDecl = nullptr;
6532 /// The original expression used in the map clause, or null if there is
6533 /// none.
6534 const Expr *MapExpr = nullptr;
6535
6536 public:
6537 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6538 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6539
6540 const ValueDecl *getMapDecl() const { return MapDecl; }
6541 const Expr *getMapExpr() const { return MapExpr; }
6542 };
6543
6544 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6545 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6546 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6547 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6548 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6549 using MapNonContiguousArrayTy =
6550 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6551 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6552 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6553
6554 /// This structure contains combined information generated for mappable
6555 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6556 /// mappers, and non-contiguous information.
6557 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6558 MapExprsArrayTy Exprs;
6559 MapValueDeclsArrayTy Mappers;
6560 MapValueDeclsArrayTy DevicePtrDecls;
6561
6562 /// Append arrays in \a CurInfo.
6563 void append(MapCombinedInfoTy &CurInfo) {
6564 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
6565 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
6566 in_end: CurInfo.DevicePtrDecls.end());
6567 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
6568 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6569 }
6570 };
6571
6572 /// Map between a struct and the its lowest & highest elements which have been
6573 /// mapped.
6574 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6575 /// HE(FieldIndex, Pointer)}
6576 struct StructRangeInfoTy {
6577 MapCombinedInfoTy PreliminaryMapData;
6578 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6579 0, Address::invalid()};
6580 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6581 0, Address::invalid()};
6582 Address Base = Address::invalid();
6583 Address LB = Address::invalid();
6584 bool IsArraySection = false;
6585 bool HasCompleteRecord = false;
6586 };
6587
6588private:
6589 /// Kind that defines how a device pointer has to be returned.
6590 struct MapInfo {
6591 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6592 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6593 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6594 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6595 bool ReturnDevicePointer = false;
6596 bool IsImplicit = false;
6597 const ValueDecl *Mapper = nullptr;
6598 const Expr *VarRef = nullptr;
6599 bool ForDeviceAddr = false;
6600
6601 MapInfo() = default;
6602 MapInfo(
6603 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6604 OpenMPMapClauseKind MapType,
6605 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6606 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6607 bool ReturnDevicePointer, bool IsImplicit,
6608 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6609 bool ForDeviceAddr = false)
6610 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6611 MotionModifiers(MotionModifiers),
6612 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6613 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6614 };
6615
6616 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6617 /// member and there is no map information about it, then emission of that
6618 /// entry is deferred until the whole struct has been processed.
6619 struct DeferredDevicePtrEntryTy {
6620 const Expr *IE = nullptr;
6621 const ValueDecl *VD = nullptr;
6622 bool ForDeviceAddr = false;
6623
6624 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6625 bool ForDeviceAddr)
6626 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6627 };
6628
6629 /// The target directive from where the mappable clauses were extracted. It
6630 /// is either a executable directive or a user-defined mapper directive.
6631 llvm::PointerUnion<const OMPExecutableDirective *,
6632 const OMPDeclareMapperDecl *>
6633 CurDir;
6634
6635 /// Function the directive is being generated for.
6636 CodeGenFunction &CGF;
6637
6638 /// Set of all first private variables in the current directive.
6639 /// bool data is set to true if the variable is implicitly marked as
6640 /// firstprivate, false otherwise.
6641 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6642
6643 /// Map between device pointer declarations and their expression components.
6644 /// The key value for declarations in 'this' is null.
6645 llvm::DenseMap<
6646 const ValueDecl *,
6647 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6648 DevPointersMap;
6649
6650 /// Map between device addr declarations and their expression components.
6651 /// The key value for declarations in 'this' is null.
6652 llvm::DenseMap<
6653 const ValueDecl *,
6654 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6655 HasDevAddrsMap;
6656
6657 /// Map between lambda declarations and their map type.
6658 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6659
6660 llvm::Value *getExprTypeSize(const Expr *E) const {
6661 QualType ExprTy = E->getType().getCanonicalType();
6662
6663 // Calculate the size for array shaping expression.
6664 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
6665 llvm::Value *Size =
6666 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
6667 for (const Expr *SE : OAE->getDimensions()) {
6668 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
6669 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
6670 DstTy: CGF.getContext().getSizeType(),
6671 Loc: SE->getExprLoc());
6672 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
6673 }
6674 return Size;
6675 }
6676
6677 // Reference types are ignored for mapping purposes.
6678 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6679 ExprTy = RefTy->getPointeeType().getCanonicalType();
6680
6681 // Given that an array section is considered a built-in type, we need to
6682 // do the calculation based on the length of the section instead of relying
6683 // on CGF.getTypeSize(E->getType()).
6684 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
6685 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6686 Base: OAE->getBase()->IgnoreParenImpCasts())
6687 .getCanonicalType();
6688
6689 // If there is no length associated with the expression and lower bound is
6690 // not specified too, that means we are using the whole length of the
6691 // base.
6692 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6693 !OAE->getLowerBound())
6694 return CGF.getTypeSize(Ty: BaseTy);
6695
6696 llvm::Value *ElemSize;
6697 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6698 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
6699 } else {
6700 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
6701 assert(ATy && "Expecting array type if not a pointer type.");
6702 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
6703 }
6704
6705 // If we don't have a length at this point, that is because we have an
6706 // array section with a single element.
6707 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6708 return ElemSize;
6709
6710 if (const Expr *LenExpr = OAE->getLength()) {
6711 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
6712 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
6713 DstTy: CGF.getContext().getSizeType(),
6714 Loc: LenExpr->getExprLoc());
6715 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
6716 }
6717 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6718 OAE->getLowerBound() && "expected array_section[lb:].");
6719 // Size = sizetype - lb * elemtype;
6720 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
6721 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
6722 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
6723 DstTy: CGF.getContext().getSizeType(),
6724 Loc: OAE->getLowerBound()->getExprLoc());
6725 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
6726 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
6727 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
6728 LengthVal = CGF.Builder.CreateSelect(
6729 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
6730 return LengthVal;
6731 }
6732 return CGF.getTypeSize(Ty: ExprTy);
6733 }
6734
6735 /// Return the corresponding bits for a given map clause modifier. Add
6736 /// a flag marking the map as a pointer if requested. Add a flag marking the
6737 /// map as the first one of a series of maps that relate to the same map
6738 /// expression.
6739 OpenMPOffloadMappingFlags getMapTypeBits(
6740 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6741 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6742 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6743 OpenMPOffloadMappingFlags Bits =
6744 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6745 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6746 switch (MapType) {
6747 case OMPC_MAP_alloc:
6748 case OMPC_MAP_release:
6749 // alloc and release is the default behavior in the runtime library, i.e.
6750 // if we don't pass any bits alloc/release that is what the runtime is
6751 // going to do. Therefore, we don't need to signal anything for these two
6752 // type modifiers.
6753 break;
6754 case OMPC_MAP_to:
6755 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6756 break;
6757 case OMPC_MAP_from:
6758 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6759 break;
6760 case OMPC_MAP_tofrom:
6761 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6762 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6763 break;
6764 case OMPC_MAP_delete:
6765 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6766 break;
6767 case OMPC_MAP_unknown:
6768 llvm_unreachable("Unexpected map type!");
6769 }
6770 if (AddPtrFlag)
6771 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6772 if (AddIsTargetParamFlag)
6773 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6774 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
6775 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6776 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
6777 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6778 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
6779 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
6780 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6781 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
6782 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6783 if (IsNonContiguous)
6784 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6785 return Bits;
6786 }
6787
6788 /// Return true if the provided expression is a final array section. A
6789 /// final array section, is one whose length can't be proved to be one.
6790 bool isFinalArraySectionExpression(const Expr *E) const {
6791 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
6792
6793 // It is not an array section and therefore not a unity-size one.
6794 if (!OASE)
6795 return false;
6796
6797 // An array section with no colon always refer to a single element.
6798 if (OASE->getColonLocFirst().isInvalid())
6799 return false;
6800
6801 const Expr *Length = OASE->getLength();
6802
6803 // If we don't have a length we have to check if the array has size 1
6804 // for this dimension. Also, we should always expect a length if the
6805 // base type is pointer.
6806 if (!Length) {
6807 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
6808 Base: OASE->getBase()->IgnoreParenImpCasts())
6809 .getCanonicalType();
6810 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
6811 return ATy->getSExtSize() != 1;
6812 // If we don't have a constant dimension length, we have to consider
6813 // the current section as having any size, so it is not necessarily
6814 // unitary. If it happen to be unity size, that's user fault.
6815 return true;
6816 }
6817
6818 // Check if the length evaluates to 1.
6819 Expr::EvalResult Result;
6820 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
6821 return true; // Can have more that size 1.
6822
6823 llvm::APSInt ConstLength = Result.Val.getInt();
6824 return ConstLength.getSExtValue() != 1;
6825 }
6826
6827 /// Generate the base pointers, section pointers, sizes, map type bits, and
6828 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6829 /// map type, map or motion modifiers, and expression components.
6830 /// \a IsFirstComponent should be set to true if the provided set of
6831 /// components is the first associated with a capture.
6832 void generateInfoForComponentList(
6833 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6834 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6835 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6836 MapCombinedInfoTy &CombinedInfo,
6837 MapCombinedInfoTy &StructBaseCombinedInfo,
6838 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6839 bool IsImplicit, bool GenerateAllInfoForClauses,
6840 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6841 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6842 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6843 OverlappedElements = std::nullopt,
6844 bool AreBothBasePtrAndPteeMapped = false) const {
6845 // The following summarizes what has to be generated for each map and the
6846 // types below. The generated information is expressed in this order:
6847 // base pointer, section pointer, size, flags
6848 // (to add to the ones that come from the map type and modifier).
6849 //
6850 // double d;
6851 // int i[100];
6852 // float *p;
6853 // int **a = &i;
6854 //
6855 // struct S1 {
6856 // int i;
6857 // float f[50];
6858 // }
6859 // struct S2 {
6860 // int i;
6861 // float f[50];
6862 // S1 s;
6863 // double *p;
6864 // struct S2 *ps;
6865 // int &ref;
6866 // }
6867 // S2 s;
6868 // S2 *ps;
6869 //
6870 // map(d)
6871 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6872 //
6873 // map(i)
6874 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6875 //
6876 // map(i[1:23])
6877 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6878 //
6879 // map(p)
6880 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6881 //
6882 // map(p[1:24])
6883 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6884 // in unified shared memory mode or for local pointers
6885 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6886 //
6887 // map((*a)[0:3])
6888 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6889 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6890 //
6891 // map(**a)
6892 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6893 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6894 //
6895 // map(s)
6896 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6897 //
6898 // map(s.i)
6899 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6900 //
6901 // map(s.s.f)
6902 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6903 //
6904 // map(s.p)
6905 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6906 //
6907 // map(to: s.p[:22])
6908 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6909 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6910 // &(s.p), &(s.p[0]), 22*sizeof(double),
6911 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6912 // (*) alloc space for struct members, only this is a target parameter
6913 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6914 // optimizes this entry out, same in the examples below)
6915 // (***) map the pointee (map: to)
6916 //
6917 // map(to: s.ref)
6918 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6919 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6920 // (*) alloc space for struct members, only this is a target parameter
6921 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6922 // optimizes this entry out, same in the examples below)
6923 // (***) map the pointee (map: to)
6924 //
6925 // map(s.ps)
6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6927 //
6928 // map(from: s.ps->s.i)
6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6932 //
6933 // map(to: s.ps->ps)
6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6937 //
6938 // map(s.ps->ps->ps)
6939 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6940 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6941 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6942 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6943 //
6944 // map(to: s.ps->ps->s.f[:22])
6945 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6946 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6947 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6948 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6949 //
6950 // map(ps)
6951 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6952 //
6953 // map(ps->i)
6954 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6955 //
6956 // map(ps->s.f)
6957 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6958 //
6959 // map(from: ps->p)
6960 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6961 //
6962 // map(to: ps->p[:22])
6963 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6964 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6965 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6966 //
6967 // map(ps->ps)
6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6969 //
6970 // map(from: ps->ps->s.i)
6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6974 //
6975 // map(from: ps->ps->ps)
6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6979 //
6980 // map(ps->ps->ps->ps)
6981 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6982 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6983 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6984 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6985 //
6986 // map(to: ps->ps->ps->s.f[:22])
6987 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6988 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6989 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6990 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6991 //
6992 // map(to: s.f[:22]) map(from: s.p[:33])
6993 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6994 // sizeof(double*) (**), TARGET_PARAM
6995 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6996 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6997 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6998 // (*) allocate contiguous space needed to fit all mapped members even if
6999 // we allocate space for members not mapped (in this example,
7000 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7001 // them as well because they fall between &s.f[0] and &s.p)
7002 //
7003 // map(from: s.f[:22]) map(to: ps->p[:33])
7004 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7005 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7006 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7007 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7008 // (*) the struct this entry pertains to is the 2nd element in the list of
7009 // arguments, hence MEMBER_OF(2)
7010 //
7011 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7012 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7013 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7014 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7015 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7016 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7017 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7018 // (*) the struct this entry pertains to is the 4th element in the list
7019 // of arguments, hence MEMBER_OF(4)
7020 //
7021 // map(p, p[:100])
7022 // ===> map(p[:100])
7023 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7024
7025 // Track if the map information being generated is the first for a capture.
7026 bool IsCaptureFirstInfo = IsFirstComponentList;
7027 // When the variable is on a declare target link or in a to clause with
7028 // unified memory, a reference is needed to hold the host/device address
7029 // of the variable.
7030 bool RequiresReference = false;
7031
7032 // Scan the components from the base to the complete expression.
7033 auto CI = Components.rbegin();
7034 auto CE = Components.rend();
7035 auto I = CI;
7036
7037 // Track if the map information being generated is the first for a list of
7038 // components.
7039 bool IsExpressionFirstInfo = true;
7040 bool FirstPointerInComplexData = false;
7041 Address BP = Address::invalid();
7042 const Expr *AssocExpr = I->getAssociatedExpression();
7043 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7044 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7045 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7046
7047 if (AreBothBasePtrAndPteeMapped && std::next(x: I) == CE)
7048 return;
7049 if (isa<MemberExpr>(Val: AssocExpr)) {
7050 // The base is the 'this' pointer. The content of the pointer is going
7051 // to be the base of the field being mapped.
7052 BP = CGF.LoadCXXThisAddress();
7053 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7054 (OASE &&
7055 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7056 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7057 } else if (OAShE &&
7058 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7059 BP = Address(
7060 CGF.EmitScalarExpr(E: OAShE->getBase()),
7061 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7062 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7063 } else {
7064 // The base is the reference to the variable.
7065 // BP = &Var.
7066 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7067 if (const auto *VD =
7068 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7069 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7070 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7071 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7072 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7073 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7074 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7075 RequiresReference = true;
7076 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7077 }
7078 }
7079 }
7080
7081 // If the variable is a pointer and is being dereferenced (i.e. is not
7082 // the last component), the base has to be the pointer itself, not its
7083 // reference. References are ignored for mapping purposes.
7084 QualType Ty =
7085 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7086 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7087 // No need to generate individual map information for the pointer, it
7088 // can be associated with the combined storage if shared memory mode is
7089 // active or the base declaration is not global variable.
7090 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
7091 if (!AreBothBasePtrAndPteeMapped &&
7092 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7093 !VD || VD->hasLocalStorage()))
7094 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7095 else
7096 FirstPointerInComplexData = true;
7097 ++I;
7098 }
7099 }
7100
7101 // Track whether a component of the list should be marked as MEMBER_OF some
7102 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7103 // in a component list should be marked as MEMBER_OF, all subsequent entries
7104 // do not belong to the base struct. E.g.
7105 // struct S2 s;
7106 // s.ps->ps->ps->f[:]
7107 // (1) (2) (3) (4)
7108 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7109 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7110 // is the pointee of ps(2) which is not member of struct s, so it should not
7111 // be marked as such (it is still PTR_AND_OBJ).
7112 // The variable is initialized to false so that PTR_AND_OBJ entries which
7113 // are not struct members are not considered (e.g. array of pointers to
7114 // data).
7115 bool ShouldBeMemberOf = false;
7116
7117 // Variable keeping track of whether or not we have encountered a component
7118 // in the component list which is a member expression. Useful when we have a
7119 // pointer or a final array section, in which case it is the previous
7120 // component in the list which tells us whether we have a member expression.
7121 // E.g. X.f[:]
7122 // While processing the final array section "[:]" it is "f" which tells us
7123 // whether we are dealing with a member of a declared struct.
7124 const MemberExpr *EncounteredME = nullptr;
7125
7126 // Track for the total number of dimension. Start from one for the dummy
7127 // dimension.
7128 uint64_t DimSize = 1;
7129
7130 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7131 bool IsPrevMemberReference = false;
7132
7133 // We need to check if we will be encountering any MEs. If we do not
7134 // encounter any ME expression it means we will be mapping the whole struct.
7135 // In that case we need to skip adding an entry for the struct to the
7136 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7137 // list only when generating all info for clauses.
7138 bool IsMappingWholeStruct = true;
7139 if (!GenerateAllInfoForClauses) {
7140 IsMappingWholeStruct = false;
7141 } else {
7142 for (auto TempI = I; TempI != CE; ++TempI) {
7143 const MemberExpr *PossibleME =
7144 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
7145 if (PossibleME) {
7146 IsMappingWholeStruct = false;
7147 break;
7148 }
7149 }
7150 }
7151
7152 for (; I != CE; ++I) {
7153 // If the current component is member of a struct (parent struct) mark it.
7154 if (!EncounteredME) {
7155 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
7156 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7157 // as MEMBER_OF the parent struct.
7158 if (EncounteredME) {
7159 ShouldBeMemberOf = true;
7160 // Do not emit as complex pointer if this is actually not array-like
7161 // expression.
7162 if (FirstPointerInComplexData) {
7163 QualType Ty = std::prev(x: I)
7164 ->getAssociatedDeclaration()
7165 ->getType()
7166 .getNonReferenceType();
7167 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7168 FirstPointerInComplexData = false;
7169 }
7170 }
7171 }
7172
7173 auto Next = std::next(x: I);
7174
7175 // We need to generate the addresses and sizes if this is the last
7176 // component, if the component is a pointer or if it is an array section
7177 // whose length can't be proved to be one. If this is a pointer, it
7178 // becomes the base address for the following components.
7179
7180 // A final array section, is one whose length can't be proved to be one.
7181 // If the map item is non-contiguous then we don't treat any array section
7182 // as final array section.
7183 bool IsFinalArraySection =
7184 !IsNonContiguous &&
7185 isFinalArraySectionExpression(E: I->getAssociatedExpression());
7186
7187 // If we have a declaration for the mapping use that, otherwise use
7188 // the base declaration of the map clause.
7189 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7190 ? I->getAssociatedDeclaration()
7191 : BaseDecl;
7192 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7193 : MapExpr;
7194
7195 // Get information on whether the element is a pointer. Have to do a
7196 // special treatment for array sections given that they are built-in
7197 // types.
7198 const auto *OASE =
7199 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
7200 const auto *OAShE =
7201 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
7202 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
7203 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
7204 bool IsPointer =
7205 OAShE ||
7206 (OASE && ArraySectionExpr::getBaseOriginalType(Base: OASE)
7207 .getCanonicalType()
7208 ->isAnyPointerType()) ||
7209 I->getAssociatedExpression()->getType()->isAnyPointerType();
7210 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
7211 MapDecl &&
7212 MapDecl->getType()->isLValueReferenceType();
7213 bool IsNonDerefPointer = IsPointer &&
7214 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7215 !IsNonContiguous;
7216
7217 if (OASE)
7218 ++DimSize;
7219
7220 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7221 IsFinalArraySection) {
7222 // If this is not the last component, we expect the pointer to be
7223 // associated with an array expression or member expression.
7224 assert((Next == CE ||
7225 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7226 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7227 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7228 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7229 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7230 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7231 "Unexpected expression");
7232
7233 Address LB = Address::invalid();
7234 Address LowestElem = Address::invalid();
7235 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7236 const MemberExpr *E) {
7237 const Expr *BaseExpr = E->getBase();
7238 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7239 // scalar.
7240 LValue BaseLV;
7241 if (E->isArrow()) {
7242 LValueBaseInfo BaseInfo;
7243 TBAAAccessInfo TBAAInfo;
7244 Address Addr =
7245 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
7246 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7247 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
7248 } else {
7249 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
7250 }
7251 return BaseLV;
7252 };
7253 if (OAShE) {
7254 LowestElem = LB =
7255 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
7256 CGF.ConvertTypeForMem(
7257 T: OAShE->getBase()->getType()->getPointeeType()),
7258 CGF.getContext().getTypeAlignInChars(
7259 T: OAShE->getBase()->getType()));
7260 } else if (IsMemberReference) {
7261 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
7262 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7263 LowestElem = CGF.EmitLValueForFieldInitialization(
7264 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
7265 .getAddress();
7266 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
7267 .getAddress();
7268 } else {
7269 LowestElem = LB =
7270 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
7271 .getAddress();
7272 }
7273
7274 // If this component is a pointer inside the base struct then we don't
7275 // need to create any entry for it - it will be combined with the object
7276 // it is pointing to into a single PTR_AND_OBJ entry.
7277 bool IsMemberPointerOrAddr =
7278 EncounteredME &&
7279 (((IsPointer || ForDeviceAddr) &&
7280 I->getAssociatedExpression() == EncounteredME) ||
7281 (IsPrevMemberReference && !IsPointer) ||
7282 (IsMemberReference && Next != CE &&
7283 !Next->getAssociatedExpression()->getType()->isPointerType()));
7284 if (!OverlappedElements.empty() && Next == CE) {
7285 // Handle base element with the info for overlapped elements.
7286 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7287 assert(!IsPointer &&
7288 "Unexpected base element with the pointer type.");
7289 // Mark the whole struct as the struct that requires allocation on the
7290 // device.
7291 PartialStruct.LowestElem = {0, LowestElem};
7292 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7293 T: I->getAssociatedExpression()->getType());
7294 Address HB = CGF.Builder.CreateConstGEP(
7295 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7296 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
7297 Index: TypeSize.getQuantity() - 1);
7298 PartialStruct.HighestElem = {
7299 std::numeric_limits<decltype(
7300 PartialStruct.HighestElem.first)>::max(),
7301 HB};
7302 PartialStruct.Base = BP;
7303 PartialStruct.LB = LB;
7304 assert(
7305 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7306 "Overlapped elements must be used only once for the variable.");
7307 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
7308 // Emit data for non-overlapped data.
7309 OpenMPOffloadMappingFlags Flags =
7310 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7311 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7312 /*AddPtrFlag=*/false,
7313 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7314 llvm::Value *Size = nullptr;
7315 // Do bitcopy of all non-overlapped structure elements.
7316 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7317 Component : OverlappedElements) {
7318 Address ComponentLB = Address::invalid();
7319 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7320 Component) {
7321 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7322 const auto *FD = dyn_cast<FieldDecl>(Val: VD);
7323 if (FD && FD->getType()->isLValueReferenceType()) {
7324 const auto *ME =
7325 cast<MemberExpr>(Val: MC.getAssociatedExpression());
7326 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7327 ComponentLB =
7328 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD)
7329 .getAddress();
7330 } else {
7331 ComponentLB =
7332 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression())
7333 .getAddress();
7334 }
7335 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7336 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7337 Size = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: ComponentLBPtr,
7338 RHS: LBPtr);
7339 break;
7340 }
7341 }
7342 assert(Size && "Failed to determine structure size");
7343 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7344 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7345 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7346 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7347 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7348 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7349 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7350 CombinedInfo.Types.push_back(Elt: Flags);
7351 CombinedInfo.Mappers.push_back(Elt: nullptr);
7352 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7353 : 1);
7354 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7355 }
7356 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7357 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7358 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7359 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7360 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7361 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7362 Size = CGF.Builder.CreatePtrDiff(
7363 ElemTy: CGF.Int8Ty, LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF),
7364 RHS: LBPtr);
7365 CombinedInfo.Sizes.push_back(
7366 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7367 CombinedInfo.Types.push_back(Elt: Flags);
7368 CombinedInfo.Mappers.push_back(Elt: nullptr);
7369 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7370 : 1);
7371 break;
7372 }
7373 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
7374 // Skip adding an entry in the CurInfo of this combined entry if the
7375 // whole struct is currently being mapped. The struct needs to be added
7376 // in the first position before any data internal to the struct is being
7377 // mapped.
7378 if (!IsMemberPointerOrAddr ||
7379 (Next == CE && MapType != OMPC_MAP_unknown)) {
7380 if (!IsMappingWholeStruct) {
7381 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7382 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7383 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7384 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7385 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7386 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7387 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7388 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7389 : 1);
7390 } else {
7391 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7392 StructBaseCombinedInfo.BasePointers.push_back(
7393 Elt: BP.emitRawPointer(CGF));
7394 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7395 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7396 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7397 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7398 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7399 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7400 Elt: IsNonContiguous ? DimSize : 1);
7401 }
7402
7403 // If Mapper is valid, the last component inherits the mapper.
7404 bool HasMapper = Mapper && Next == CE;
7405 if (!IsMappingWholeStruct)
7406 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
7407 else
7408 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
7409 : nullptr);
7410
7411 // We need to add a pointer flag for each map that comes from the
7412 // same expression except for the first one. We also need to signal
7413 // this map is the first one that relates with the current capture
7414 // (there is a set of entries for each capture).
7415 OpenMPOffloadMappingFlags Flags =
7416 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7417 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
7418 FirstPointerInComplexData || IsMemberReference,
7419 AddIsTargetParamFlag: AreBothBasePtrAndPteeMapped ||
7420 (IsCaptureFirstInfo && !RequiresReference),
7421 IsNonContiguous);
7422
7423 if (!IsExpressionFirstInfo || IsMemberReference) {
7424 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7425 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7426 if (IsPointer || (IsMemberReference && Next != CE))
7427 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7428 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7429 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7430 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7431 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7432
7433 if (ShouldBeMemberOf) {
7434 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7435 // should be later updated with the correct value of MEMBER_OF.
7436 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7437 // From now on, all subsequent PTR_AND_OBJ entries should not be
7438 // marked as MEMBER_OF.
7439 ShouldBeMemberOf = false;
7440 }
7441 }
7442
7443 if (!IsMappingWholeStruct)
7444 CombinedInfo.Types.push_back(Elt: Flags);
7445 else
7446 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
7447 }
7448
7449 // If we have encountered a member expression so far, keep track of the
7450 // mapped member. If the parent is "*this", then the value declaration
7451 // is nullptr.
7452 if (EncounteredME) {
7453 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
7454 unsigned FieldIndex = FD->getFieldIndex();
7455
7456 // Update info about the lowest and highest elements for this struct
7457 if (!PartialStruct.Base.isValid()) {
7458 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7459 if (IsFinalArraySection) {
7460 Address HB =
7461 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7462 .getAddress();
7463 PartialStruct.HighestElem = {FieldIndex, HB};
7464 } else {
7465 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7466 }
7467 PartialStruct.Base = BP;
7468 PartialStruct.LB = BP;
7469 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7470 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7471 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7472 if (IsFinalArraySection) {
7473 Address HB =
7474 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7475 .getAddress();
7476 PartialStruct.HighestElem = {FieldIndex, HB};
7477 } else {
7478 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7479 }
7480 }
7481 }
7482
7483 // Need to emit combined struct for array sections.
7484 if (IsFinalArraySection || IsNonContiguous)
7485 PartialStruct.IsArraySection = true;
7486
7487 // If we have a final array section, we are done with this expression.
7488 if (IsFinalArraySection)
7489 break;
7490
7491 // The pointer becomes the base for the next element.
7492 if (Next != CE)
7493 BP = IsMemberReference ? LowestElem : LB;
7494
7495 IsExpressionFirstInfo = false;
7496 IsCaptureFirstInfo = false;
7497 FirstPointerInComplexData = false;
7498 IsPrevMemberReference = IsMemberReference;
7499 } else if (FirstPointerInComplexData) {
7500 QualType Ty = Components.rbegin()
7501 ->getAssociatedDeclaration()
7502 ->getType()
7503 .getNonReferenceType();
7504 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7505 FirstPointerInComplexData = false;
7506 }
7507 }
7508 // If ran into the whole component - allocate the space for the whole
7509 // record.
7510 if (!EncounteredME)
7511 PartialStruct.HasCompleteRecord = true;
7512
7513 if (!IsNonContiguous)
7514 return;
7515
7516 const ASTContext &Context = CGF.getContext();
7517
7518 // For supporting stride in array section, we need to initialize the first
7519 // dimension size as 1, first offset as 0, and first count as 1
7520 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
7521 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7522 MapValuesArrayTy CurStrides;
7523 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7524 uint64_t ElementTypeSize;
7525
7526 // Collect Size information for each dimension and get the element size as
7527 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7528 // should be [10, 10] and the first stride is 4 btyes.
7529 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7530 Components) {
7531 const Expr *AssocExpr = Component.getAssociatedExpression();
7532 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7533
7534 if (!OASE)
7535 continue;
7536
7537 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
7538 auto *CAT = Context.getAsConstantArrayType(T: Ty);
7539 auto *VAT = Context.getAsVariableArrayType(T: Ty);
7540
7541 // We need all the dimension size except for the last dimension.
7542 assert((VAT || CAT || &Component == &*Components.begin()) &&
7543 "Should be either ConstantArray or VariableArray if not the "
7544 "first Component");
7545
7546 // Get element size if CurStrides is empty.
7547 if (CurStrides.empty()) {
7548 const Type *ElementType = nullptr;
7549 if (CAT)
7550 ElementType = CAT->getElementType().getTypePtr();
7551 else if (VAT)
7552 ElementType = VAT->getElementType().getTypePtr();
7553 else
7554 assert(&Component == &*Components.begin() &&
7555 "Only expect pointer (non CAT or VAT) when this is the "
7556 "first Component");
7557 // If ElementType is null, then it means the base is a pointer
7558 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7559 // for next iteration.
7560 if (ElementType) {
7561 // For the case that having pointer as base, we need to remove one
7562 // level of indirection.
7563 if (&Component != &*Components.begin())
7564 ElementType = ElementType->getPointeeOrArrayElementType();
7565 ElementTypeSize =
7566 Context.getTypeSizeInChars(T: ElementType).getQuantity();
7567 CurStrides.push_back(
7568 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
7569 }
7570 }
7571 // Get dimension value except for the last dimension since we don't need
7572 // it.
7573 if (DimSizes.size() < Components.size() - 1) {
7574 if (CAT)
7575 DimSizes.push_back(
7576 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
7577 else if (VAT)
7578 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
7579 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
7580 /*IsSigned=*/isSigned: false));
7581 }
7582 }
7583
7584 // Skip the dummy dimension since we have already have its information.
7585 auto *DI = DimSizes.begin() + 1;
7586 // Product of dimension.
7587 llvm::Value *DimProd =
7588 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
7589
7590 // Collect info for non-contiguous. Notice that offset, count, and stride
7591 // are only meaningful for array-section, so we insert a null for anything
7592 // other than array-section.
7593 // Also, the size of offset, count, and stride are not the same as
7594 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7595 // count, and stride are the same as the number of non-contiguous
7596 // declaration in target update to/from clause.
7597 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7598 Components) {
7599 const Expr *AssocExpr = Component.getAssociatedExpression();
7600
7601 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
7602 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7603 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
7604 /*isSigned=*/false);
7605 CurOffsets.push_back(Elt: Offset);
7606 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
7607 CurStrides.push_back(Elt: CurStrides.back());
7608 continue;
7609 }
7610
7611 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7612
7613 if (!OASE)
7614 continue;
7615
7616 // Offset
7617 const Expr *OffsetExpr = OASE->getLowerBound();
7618 llvm::Value *Offset = nullptr;
7619 if (!OffsetExpr) {
7620 // If offset is absent, then we just set it to zero.
7621 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
7622 } else {
7623 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
7624 DestTy: CGF.Int64Ty,
7625 /*isSigned=*/false);
7626 }
7627 CurOffsets.push_back(Elt: Offset);
7628
7629 // Count
7630 const Expr *CountExpr = OASE->getLength();
7631 llvm::Value *Count = nullptr;
7632 if (!CountExpr) {
7633 // In Clang, once a high dimension is an array section, we construct all
7634 // the lower dimension as array section, however, for case like
7635 // arr[0:2][2], Clang construct the inner dimension as an array section
7636 // but it actually is not in an array section form according to spec.
7637 if (!OASE->getColonLocFirst().isValid() &&
7638 !OASE->getColonLocSecond().isValid()) {
7639 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
7640 } else {
7641 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7642 // When the length is absent it defaults to ⌈(size −
7643 // lower-bound)/stride⌉, where size is the size of the array
7644 // dimension.
7645 const Expr *StrideExpr = OASE->getStride();
7646 llvm::Value *Stride =
7647 StrideExpr
7648 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7649 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7650 : nullptr;
7651 if (Stride)
7652 Count = CGF.Builder.CreateUDiv(
7653 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
7654 else
7655 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
7656 }
7657 } else {
7658 Count = CGF.EmitScalarExpr(E: CountExpr);
7659 }
7660 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
7661 CurCounts.push_back(Elt: Count);
7662
7663 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7664 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7665 // Offset Count Stride
7666 // D0 0 1 4 (int) <- dummy dimension
7667 // D1 0 2 8 (2 * (1) * 4)
7668 // D2 1 2 20 (1 * (1 * 5) * 4)
7669 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7670 const Expr *StrideExpr = OASE->getStride();
7671 llvm::Value *Stride =
7672 StrideExpr
7673 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7674 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7675 : nullptr;
7676 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
7677 if (Stride)
7678 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
7679 else
7680 CurStrides.push_back(Elt: DimProd);
7681 if (DI != DimSizes.end())
7682 ++DI;
7683 }
7684
7685 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
7686 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
7687 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
7688 }
7689
7690 /// Return the adjusted map modifiers if the declaration a capture refers to
7691 /// appears in a first-private clause. This is expected to be used only with
7692 /// directives that start with 'target'.
7693 OpenMPOffloadMappingFlags
7694 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7695 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7696
7697 // A first private variable captured by reference will use only the
7698 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7699 // declaration is known as first-private in this handler.
7700 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
7701 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7702 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7703 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7704 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7705 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7706 }
7707 auto I = LambdasMap.find(Val: Cap.getCapturedVar()->getCanonicalDecl());
7708 if (I != LambdasMap.end())
7709 // for map(to: lambda): using user specified map type.
7710 return getMapTypeBits(
7711 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
7712 /*MotionModifiers=*/std::nullopt, IsImplicit: I->getSecond()->isImplicit(),
7713 /*AddPtrFlag=*/false,
7714 /*AddIsTargetParamFlag=*/false,
7715 /*isNonContiguous=*/IsNonContiguous: false);
7716 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7717 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7718 }
7719
7720 void getPlainLayout(const CXXRecordDecl *RD,
7721 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7722 bool AsBase) const {
7723 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7724
7725 llvm::StructType *St =
7726 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7727
7728 unsigned NumElements = St->getNumElements();
7729 llvm::SmallVector<
7730 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7731 RecordLayout(NumElements);
7732
7733 // Fill bases.
7734 for (const auto &I : RD->bases()) {
7735 if (I.isVirtual())
7736 continue;
7737
7738 QualType BaseTy = I.getType();
7739 const auto *Base = BaseTy->getAsCXXRecordDecl();
7740 // Ignore empty bases.
7741 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
7742 CGF.getContext()
7743 .getASTRecordLayout(D: Base)
7744 .getNonVirtualSize()
7745 .isZero())
7746 continue;
7747
7748 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
7749 RecordLayout[FieldIndex] = Base;
7750 }
7751 // Fill in virtual bases.
7752 for (const auto &I : RD->vbases()) {
7753 QualType BaseTy = I.getType();
7754 // Ignore empty bases.
7755 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
7756 continue;
7757
7758 const auto *Base = BaseTy->getAsCXXRecordDecl();
7759 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
7760 if (RecordLayout[FieldIndex])
7761 continue;
7762 RecordLayout[FieldIndex] = Base;
7763 }
7764 // Fill in all the fields.
7765 assert(!RD->isUnion() && "Unexpected union.");
7766 for (const auto *Field : RD->fields()) {
7767 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7768 // will fill in later.)
7769 if (!Field->isBitField() &&
7770 !isEmptyFieldForLayout(Context: CGF.getContext(), FD: Field)) {
7771 unsigned FieldIndex = RL.getLLVMFieldNo(FD: Field);
7772 RecordLayout[FieldIndex] = Field;
7773 }
7774 }
7775 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7776 &Data : RecordLayout) {
7777 if (Data.isNull())
7778 continue;
7779 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7780 getPlainLayout(RD: Base, Layout, /*AsBase=*/true);
7781 else
7782 Layout.push_back(Elt: Data.get<const FieldDecl *>());
7783 }
7784 }
7785
7786 /// Generate all the base pointers, section pointers, sizes, map types, and
7787 /// mappers for the extracted mappable expressions (all included in \a
7788 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7789 /// pair of the relevant declaration and index where it occurs is appended to
7790 /// the device pointers info array.
7791 void generateAllInfoForClauses(
7792 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7793 llvm::OpenMPIRBuilder &OMPBuilder,
7794 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7795 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7796 // We have to process the component lists that relate with the same
7797 // declaration in a single chunk so that we can generate the map flags
7798 // correctly. Therefore, we organize all lists in a map.
7799 enum MapKind { Present, Allocs, Other, Total };
7800 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7801 SmallVector<SmallVector<MapInfo, 8>, 4>>
7802 Info;
7803
7804 // Helper function to fill the information map for the different supported
7805 // clauses.
7806 auto &&InfoGen =
7807 [&Info, &SkipVarSet](
7808 const ValueDecl *D, MapKind Kind,
7809 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7810 OpenMPMapClauseKind MapType,
7811 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7812 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7813 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7814 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7815 if (SkipVarSet.contains(V: D))
7816 return;
7817 auto It = Info.find(Key: D);
7818 if (It == Info.end())
7819 It = Info
7820 .insert(KV: std::make_pair(
7821 x&: D, y: SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7822 .first;
7823 It->second[Kind].emplace_back(
7824 Args&: L, Args&: MapType, Args&: MapModifiers, Args&: MotionModifiers, Args&: ReturnDevicePointer,
7825 Args&: IsImplicit, Args&: Mapper, Args&: VarRef, Args&: ForDeviceAddr);
7826 };
7827
7828 for (const auto *Cl : Clauses) {
7829 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
7830 if (!C)
7831 continue;
7832 MapKind Kind = Other;
7833 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
7834 Element: OMPC_MAP_MODIFIER_present))
7835 Kind = Present;
7836 else if (C->getMapType() == OMPC_MAP_alloc)
7837 Kind = Allocs;
7838 const auto *EI = C->getVarRefs().begin();
7839 for (const auto L : C->component_lists()) {
7840 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7841 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), C->getMapType(),
7842 C->getMapTypeModifiers(), std::nullopt,
7843 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
7844 E);
7845 ++EI;
7846 }
7847 }
7848 for (const auto *Cl : Clauses) {
7849 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
7850 if (!C)
7851 continue;
7852 MapKind Kind = Other;
7853 if (llvm::is_contained(Range: C->getMotionModifiers(),
7854 Element: OMPC_MOTION_MODIFIER_present))
7855 Kind = Present;
7856 const auto *EI = C->getVarRefs().begin();
7857 for (const auto L : C->component_lists()) {
7858 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_to, std::nullopt,
7859 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7860 C->isImplicit(), std::get<2>(t: L), *EI);
7861 ++EI;
7862 }
7863 }
7864 for (const auto *Cl : Clauses) {
7865 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
7866 if (!C)
7867 continue;
7868 MapKind Kind = Other;
7869 if (llvm::is_contained(Range: C->getMotionModifiers(),
7870 Element: OMPC_MOTION_MODIFIER_present))
7871 Kind = Present;
7872 const auto *EI = C->getVarRefs().begin();
7873 for (const auto L : C->component_lists()) {
7874 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_from,
7875 std::nullopt, C->getMotionModifiers(),
7876 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
7877 *EI);
7878 ++EI;
7879 }
7880 }
7881
7882 // Look at the use_device_ptr and use_device_addr clauses information and
7883 // mark the existing map entries as such. If there is no map information for
7884 // an entry in the use_device_ptr and use_device_addr list, we create one
7885 // with map type 'alloc' and zero size section. It is the user fault if that
7886 // was not mapped before. If there is no map information and the pointer is
7887 // a struct member, then we defer the emission of that entry until the whole
7888 // struct has been processed.
7889 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7890 SmallVector<DeferredDevicePtrEntryTy, 4>>
7891 DeferredInfo;
7892 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7893
7894 auto &&UseDeviceDataCombinedInfoGen =
7895 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7896 CodeGenFunction &CGF, bool IsDevAddr) {
7897 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
7898 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
7899 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
7900 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7901 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7902 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
7903 UseDeviceDataCombinedInfo.Sizes.push_back(
7904 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
7905 UseDeviceDataCombinedInfo.Types.push_back(
7906 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7907 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
7908 };
7909
7910 auto &&MapInfoGen =
7911 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7912 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7913 OMPClauseMappableExprCommon::MappableExprComponentListRef
7914 Components,
7915 bool IsImplicit, bool IsDevAddr) {
7916 // We didn't find any match in our map information - generate a zero
7917 // size array section - if the pointer is a struct member we defer
7918 // this action until the whole struct has been processed.
7919 if (isa<MemberExpr>(Val: IE)) {
7920 // Insert the pointer into Info to be processed by
7921 // generateInfoForComponentList. Because it is a member pointer
7922 // without a pointee, no entry will be generated for it, therefore
7923 // we need to generate one after the whole struct has been
7924 // processed. Nonetheless, generateInfoForComponentList must be
7925 // called to take the pointer into account for the calculation of
7926 // the range of the partial struct.
7927 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7928 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7929 nullptr, nullptr, IsDevAddr);
7930 DeferredInfo[nullptr].emplace_back(Args&: IE, Args&: VD, Args&: IsDevAddr);
7931 } else {
7932 llvm::Value *Ptr;
7933 if (IsDevAddr) {
7934 if (IE->isGLValue())
7935 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
7936 else
7937 Ptr = CGF.EmitScalarExpr(E: IE);
7938 } else {
7939 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
7940 }
7941 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7942 }
7943 };
7944
7945 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7946 const Expr *IE, bool IsDevAddr) -> bool {
7947 // We potentially have map information for this declaration already.
7948 // Look for the first set of components that refer to it. If found,
7949 // return true.
7950 // If the first component is a member expression, we have to look into
7951 // 'this', which maps to null in the map of map information. Otherwise
7952 // look directly for the information.
7953 auto It = Info.find(Key: isa<MemberExpr>(Val: IE) ? nullptr : VD);
7954 if (It != Info.end()) {
7955 bool Found = false;
7956 for (auto &Data : It->second) {
7957 auto *CI = llvm::find_if(Range&: Data, P: [VD](const MapInfo &MI) {
7958 return MI.Components.back().getAssociatedDeclaration() == VD;
7959 });
7960 // If we found a map entry, signal that the pointer has to be
7961 // returned and move on to the next declaration. Exclude cases where
7962 // the base pointer is mapped as array subscript, array section or
7963 // array shaping. The base address is passed as a pointer to base in
7964 // this case and cannot be used as a base for use_device_ptr list
7965 // item.
7966 if (CI != Data.end()) {
7967 if (IsDevAddr) {
7968 CI->ForDeviceAddr = IsDevAddr;
7969 CI->ReturnDevicePointer = true;
7970 Found = true;
7971 break;
7972 } else {
7973 auto PrevCI = std::next(x: CI->Components.rbegin());
7974 const auto *VarD = dyn_cast<VarDecl>(Val: VD);
7975 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7976 isa<MemberExpr>(Val: IE) ||
7977 !VD->getType().getNonReferenceType()->isPointerType() ||
7978 PrevCI == CI->Components.rend() ||
7979 isa<MemberExpr>(Val: PrevCI->getAssociatedExpression()) || !VarD ||
7980 VarD->hasLocalStorage()) {
7981 CI->ForDeviceAddr = IsDevAddr;
7982 CI->ReturnDevicePointer = true;
7983 Found = true;
7984 break;
7985 }
7986 }
7987 }
7988 }
7989 return Found;
7990 }
7991 return false;
7992 };
7993
7994 // Look at the use_device_ptr clause information and mark the existing map
7995 // entries as such. If there is no map information for an entry in the
7996 // use_device_ptr list, we create one with map type 'alloc' and zero size
7997 // section. It is the user fault if that was not mapped before. If there is
7998 // no map information and the pointer is a struct member, then we defer the
7999 // emission of that entry until the whole struct has been processed.
8000 for (const auto *Cl : Clauses) {
8001 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
8002 if (!C)
8003 continue;
8004 for (const auto L : C->component_lists()) {
8005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8006 std::get<1>(t: L);
8007 assert(!Components.empty() &&
8008 "Not expecting empty list of components!");
8009 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8010 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
8011 const Expr *IE = Components.back().getAssociatedExpression();
8012 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8013 continue;
8014 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8015 /*IsDevAddr=*/false);
8016 }
8017 }
8018
8019 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8020 for (const auto *Cl : Clauses) {
8021 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
8022 if (!C)
8023 continue;
8024 for (const auto L : C->component_lists()) {
8025 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8026 std::get<1>(t: L);
8027 assert(!std::get<1>(L).empty() &&
8028 "Not expecting empty list of components!");
8029 const ValueDecl *VD = std::get<1>(t: L).back().getAssociatedDeclaration();
8030 if (!Processed.insert(V: VD).second)
8031 continue;
8032 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
8033 const Expr *IE = std::get<1>(t: L).back().getAssociatedExpression();
8034 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8035 continue;
8036 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8037 /*IsDevAddr=*/true);
8038 }
8039 }
8040
8041 for (const auto &Data : Info) {
8042 StructRangeInfoTy PartialStruct;
8043 // Current struct information:
8044 MapCombinedInfoTy CurInfo;
8045 // Current struct base information:
8046 MapCombinedInfoTy StructBaseCurInfo;
8047 const Decl *D = Data.first;
8048 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
8049 bool HasMapBasePtr = false;
8050 bool HasMapArraySec = false;
8051 if (VD && VD->getType()->isAnyPointerType()) {
8052 for (const auto &M : Data.second) {
8053 HasMapBasePtr = any_of(Range: M, P: [](const MapInfo &L) {
8054 return isa_and_present<DeclRefExpr>(Val: L.VarRef);
8055 });
8056 HasMapArraySec = any_of(Range: M, P: [](const MapInfo &L) {
8057 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8058 Val: L.VarRef);
8059 });
8060 if (HasMapBasePtr && HasMapArraySec)
8061 break;
8062 }
8063 }
8064 for (const auto &M : Data.second) {
8065 for (const MapInfo &L : M) {
8066 assert(!L.Components.empty() &&
8067 "Not expecting declaration with no component lists.");
8068
8069 // Remember the current base pointer index.
8070 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8071 unsigned StructBasePointersIdx =
8072 StructBaseCurInfo.BasePointers.size();
8073 CurInfo.NonContigInfo.IsNonContiguous =
8074 L.Components.back().isNonContiguous();
8075 generateInfoForComponentList(
8076 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
8077 CombinedInfo&: CurInfo, StructBaseCombinedInfo&: StructBaseCurInfo, PartialStruct,
8078 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
8079 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
8080 MapExpr: L.VarRef, /*OverlappedElements*/ std::nullopt,
8081 AreBothBasePtrAndPteeMapped: HasMapBasePtr && HasMapArraySec);
8082
8083 // If this entry relates to a device pointer, set the relevant
8084 // declaration and add the 'return pointer' flag.
8085 if (L.ReturnDevicePointer) {
8086 // Check whether a value was added to either CurInfo or
8087 // StructBaseCurInfo and error if no value was added to either of
8088 // them:
8089 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8090 StructBasePointersIdx <
8091 StructBaseCurInfo.BasePointers.size()) &&
8092 "Unexpected number of mapped base pointers.");
8093
8094 // Choose a base pointer index which is always valid:
8095 const ValueDecl *RelevantVD =
8096 L.Components.back().getAssociatedDeclaration();
8097 assert(RelevantVD &&
8098 "No relevant declaration related with device pointer??");
8099
8100 // If StructBaseCurInfo has been updated this iteration then work on
8101 // the first new entry added to it i.e. make sure that when multiple
8102 // values are added to any of the lists, the first value added is
8103 // being modified by the assignments below (not the last value
8104 // added).
8105 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8106 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8107 RelevantVD;
8108 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8109 L.ForDeviceAddr ? DeviceInfoTy::Address
8110 : DeviceInfoTy::Pointer;
8111 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8113 } else {
8114 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8115 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8116 L.ForDeviceAddr ? DeviceInfoTy::Address
8117 : DeviceInfoTy::Pointer;
8118 CurInfo.Types[CurrentBasePointersIdx] |=
8119 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8120 }
8121 }
8122 }
8123 }
8124
8125 // Append any pending zero-length pointers which are struct members and
8126 // used with use_device_ptr or use_device_addr.
8127 auto CI = DeferredInfo.find(Key: Data.first);
8128 if (CI != DeferredInfo.end()) {
8129 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8130 llvm::Value *BasePtr;
8131 llvm::Value *Ptr;
8132 if (L.ForDeviceAddr) {
8133 if (L.IE->isGLValue())
8134 Ptr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8135 else
8136 Ptr = this->CGF.EmitScalarExpr(E: L.IE);
8137 BasePtr = Ptr;
8138 // Entry is RETURN_PARAM. Also, set the placeholder value
8139 // MEMBER_OF=FFFF so that the entry is later updated with the
8140 // correct value of MEMBER_OF.
8141 CurInfo.Types.push_back(
8142 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8143 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8144 } else {
8145 BasePtr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8146 Ptr = this->CGF.EmitLoadOfScalar(lvalue: this->CGF.EmitLValue(E: L.IE),
8147 Loc: L.IE->getExprLoc());
8148 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8149 // placeholder value MEMBER_OF=FFFF so that the entry is later
8150 // updated with the correct value of MEMBER_OF.
8151 CurInfo.Types.push_back(
8152 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8153 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8154 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8155 }
8156 CurInfo.Exprs.push_back(Elt: L.VD);
8157 CurInfo.BasePointers.emplace_back(Args&: BasePtr);
8158 CurInfo.DevicePtrDecls.emplace_back(Args: L.VD);
8159 CurInfo.DevicePointers.emplace_back(
8160 Args: L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8161 CurInfo.Pointers.push_back(Elt: Ptr);
8162 CurInfo.Sizes.push_back(
8163 Elt: llvm::Constant::getNullValue(Ty: this->CGF.Int64Ty));
8164 CurInfo.Mappers.push_back(Elt: nullptr);
8165 }
8166 }
8167
8168 // Unify entries in one list making sure the struct mapping precedes the
8169 // individual fields:
8170 MapCombinedInfoTy UnionCurInfo;
8171 UnionCurInfo.append(CurInfo&: StructBaseCurInfo);
8172 UnionCurInfo.append(CurInfo);
8173
8174 // If there is an entry in PartialStruct it means we have a struct with
8175 // individual members mapped. Emit an extra combined entry.
8176 if (PartialStruct.Base.isValid()) {
8177 UnionCurInfo.NonContigInfo.Dims.push_back(Elt: 0);
8178 // Emit a combined entry:
8179 emitCombinedEntry(CombinedInfo, CurTypes&: UnionCurInfo.Types, PartialStruct,
8180 /*IsMapThis*/ !VD, OMPBuilder, VD);
8181 }
8182
8183 // We need to append the results of this capture to what we already have.
8184 CombinedInfo.append(CurInfo&: UnionCurInfo);
8185 }
8186 // Append data for use_device_ptr clauses.
8187 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
8188 }
8189
8190public:
8191 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8192 : CurDir(&Dir), CGF(CGF) {
8193 // Extract firstprivate clause information.
8194 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8195 for (const auto *D : C->varlists())
8196 FirstPrivateDecls.try_emplace(
8197 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl()), Args: C->isImplicit());
8198 // Extract implicit firstprivates from uses_allocators clauses.
8199 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8200 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8201 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8202 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
8203 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
8204 /*Implicit=*/Args: true);
8205 else if (const auto *VD = dyn_cast<VarDecl>(
8206 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
8207 ->getDecl()))
8208 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
8209 }
8210 }
8211 // Extract device pointer clause information.
8212 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8213 for (auto L : C->component_lists())
8214 DevPointersMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
8215 // Extract device addr clause information.
8216 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8217 for (auto L : C->component_lists())
8218 HasDevAddrsMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
8219 // Extract map information.
8220 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8221 if (C->getMapType() != OMPC_MAP_to)
8222 continue;
8223 for (auto L : C->component_lists()) {
8224 const ValueDecl *VD = std::get<0>(t&: L);
8225 const auto *RD = VD ? VD->getType()
8226 .getCanonicalType()
8227 .getNonReferenceType()
8228 ->getAsCXXRecordDecl()
8229 : nullptr;
8230 if (RD && RD->isLambda())
8231 LambdasMap.try_emplace(Key: std::get<0>(t&: L), Args&: C);
8232 }
8233 }
8234 }
8235
8236 /// Constructor for the declare mapper directive.
8237 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8238 : CurDir(&Dir), CGF(CGF) {}
8239
8240 /// Generate code for the combined entry if we have a partially mapped struct
8241 /// and take care of the mapping flags of the arguments corresponding to
8242 /// individual struct members.
8243 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8244 MapFlagsArrayTy &CurTypes,
8245 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8246 llvm::OpenMPIRBuilder &OMPBuilder,
8247 const ValueDecl *VD = nullptr,
8248 bool NotTargetParams = true) const {
8249 if (CurTypes.size() == 1 &&
8250 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8251 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8252 !PartialStruct.IsArraySection)
8253 return;
8254 Address LBAddr = PartialStruct.LowestElem.second;
8255 Address HBAddr = PartialStruct.HighestElem.second;
8256 if (PartialStruct.HasCompleteRecord) {
8257 LBAddr = PartialStruct.LB;
8258 HBAddr = PartialStruct.LB;
8259 }
8260 CombinedInfo.Exprs.push_back(Elt: VD);
8261 // Base is the base of the struct
8262 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8263 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8264 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8265 // Pointer is the address of the lowest element
8266 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8267 const CXXMethodDecl *MD =
8268 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
8269 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8270 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8271 // There should not be a mapper for a combined entry.
8272 if (HasBaseClass) {
8273 // OpenMP 5.2 148:21:
8274 // If the target construct is within a class non-static member function,
8275 // and a variable is an accessible data member of the object for which the
8276 // non-static data member function is invoked, the variable is treated as
8277 // if the this[:1] expression had appeared in a map clause with a map-type
8278 // of tofrom.
8279 // Emit this[:1]
8280 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8281 QualType Ty = MD->getFunctionObjectParameterType();
8282 llvm::Value *Size =
8283 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
8284 /*isSigned=*/true);
8285 CombinedInfo.Sizes.push_back(Elt: Size);
8286 } else {
8287 CombinedInfo.Pointers.push_back(Elt: LB);
8288 // Size is (addr of {highest+1} element) - (addr of lowest element)
8289 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8290 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8291 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
8292 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
8293 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
8294 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: CHAddr, RHS: CLAddr);
8295 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
8296 /*isSigned=*/false);
8297 CombinedInfo.Sizes.push_back(Elt: Size);
8298 }
8299 CombinedInfo.Mappers.push_back(Elt: nullptr);
8300 // Map type is always TARGET_PARAM, if generate info for captures.
8301 CombinedInfo.Types.push_back(
8302 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8303 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8304 // If any element has the present modifier, then make sure the runtime
8305 // doesn't attempt to allocate the struct.
8306 if (CurTypes.end() !=
8307 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8308 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8309 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8310 }))
8311 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8312 // Remove TARGET_PARAM flag from the first element
8313 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8314 // If any element has the ompx_hold modifier, then make sure the runtime
8315 // uses the hold reference count for the struct as a whole so that it won't
8316 // be unmapped by an extra dynamic reference count decrement. Add it to all
8317 // elements as well so the runtime knows which reference count to check
8318 // when determining whether it's time for device-to-host transfers of
8319 // individual elements.
8320 if (CurTypes.end() !=
8321 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8322 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8323 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8324 })) {
8325 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8326 for (auto &M : CurTypes)
8327 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8328 }
8329
8330 // All other current entries will be MEMBER_OF the combined entry
8331 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8332 // 0xFFFF in the MEMBER_OF field).
8333 OpenMPOffloadMappingFlags MemberOfFlag =
8334 OMPBuilder.getMemberOfFlag(Position: CombinedInfo.BasePointers.size() - 1);
8335 for (auto &M : CurTypes)
8336 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
8337 }
8338
8339 /// Generate all the base pointers, section pointers, sizes, map types, and
8340 /// mappers for the extracted mappable expressions (all included in \a
8341 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8342 /// pair of the relevant declaration and index where it occurs is appended to
8343 /// the device pointers info array.
8344 void generateAllInfo(
8345 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8346 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8347 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8348 assert(CurDir.is<const OMPExecutableDirective *>() &&
8349 "Expect a executable directive");
8350 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8351 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8352 SkipVarSet);
8353 }
8354
8355 /// Generate all the base pointers, section pointers, sizes, map types, and
8356 /// mappers for the extracted map clauses of user-defined mapper (all included
8357 /// in \a CombinedInfo).
8358 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8359 llvm::OpenMPIRBuilder &OMPBuilder) const {
8360 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8361 "Expect a declare mapper directive");
8362 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8363 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
8364 OMPBuilder);
8365 }
8366
8367 /// Emit capture info for lambdas for variables captured by reference.
8368 void generateInfoForLambdaCaptures(
8369 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8370 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8371 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8372 const auto *RD = VDType->getAsCXXRecordDecl();
8373 if (!RD || !RD->isLambda())
8374 return;
8375 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
8376 CGF.getContext().getDeclAlign(D: VD));
8377 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
8378 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8379 FieldDecl *ThisCapture = nullptr;
8380 RD->getCaptureFields(Captures, ThisCapture);
8381 if (ThisCapture) {
8382 LValue ThisLVal =
8383 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
8384 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
8385 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
8386 Args: VDLVal.getPointer(CGF));
8387 CombinedInfo.Exprs.push_back(Elt: VD);
8388 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
8389 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8390 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8391 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
8392 CombinedInfo.Sizes.push_back(
8393 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
8394 DestTy: CGF.Int64Ty, /*isSigned=*/true));
8395 CombinedInfo.Types.push_back(
8396 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8397 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8398 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8399 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8400 CombinedInfo.Mappers.push_back(Elt: nullptr);
8401 }
8402 for (const LambdaCapture &LC : RD->captures()) {
8403 if (!LC.capturesVariable())
8404 continue;
8405 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
8406 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8407 continue;
8408 auto It = Captures.find(Val: VD);
8409 assert(It != Captures.end() && "Found lambda capture without field.");
8410 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
8411 if (LC.getCaptureKind() == LCK_ByRef) {
8412 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
8413 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8414 Args: VDLVal.getPointer(CGF));
8415 CombinedInfo.Exprs.push_back(Elt: VD);
8416 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8417 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8418 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8419 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
8420 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8421 V: CGF.getTypeSize(
8422 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
8423 DestTy: CGF.Int64Ty, /*isSigned=*/true));
8424 } else {
8425 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
8426 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8427 Args: VDLVal.getPointer(CGF));
8428 CombinedInfo.Exprs.push_back(Elt: VD);
8429 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8430 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8431 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8432 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
8433 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
8434 }
8435 CombinedInfo.Types.push_back(
8436 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8437 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8438 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8439 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8440 CombinedInfo.Mappers.push_back(Elt: nullptr);
8441 }
8442 }
8443
8444 /// Set correct indices for lambdas captures.
8445 void adjustMemberOfForLambdaCaptures(
8446 llvm::OpenMPIRBuilder &OMPBuilder,
8447 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8448 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8449 MapFlagsArrayTy &Types) const {
8450 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8451 // Set correct member_of idx for all implicit lambda captures.
8452 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8453 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8454 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8455 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8456 continue;
8457 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
8458 assert(BasePtr && "Unable to find base lambda address.");
8459 int TgtIdx = -1;
8460 for (unsigned J = I; J > 0; --J) {
8461 unsigned Idx = J - 1;
8462 if (Pointers[Idx] != BasePtr)
8463 continue;
8464 TgtIdx = Idx;
8465 break;
8466 }
8467 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8468 // All other current entries will be MEMBER_OF the combined entry
8469 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8470 // 0xFFFF in the MEMBER_OF field).
8471 OpenMPOffloadMappingFlags MemberOfFlag =
8472 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
8473 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
8474 }
8475 }
8476
8477 /// Generate the base pointers, section pointers, sizes, map types, and
8478 /// mappers associated to a given capture (all included in \a CombinedInfo).
8479 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8480 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8481 StructRangeInfoTy &PartialStruct) const {
8482 assert(!Cap->capturesVariableArrayType() &&
8483 "Not expecting to generate map info for a variable array type!");
8484
8485 // We need to know when we generating information for the first component
8486 const ValueDecl *VD = Cap->capturesThis()
8487 ? nullptr
8488 : Cap->getCapturedVar()->getCanonicalDecl();
8489
8490 // for map(to: lambda): skip here, processing it in
8491 // generateDefaultMapInfo
8492 if (LambdasMap.count(Val: VD))
8493 return;
8494
8495 // If this declaration appears in a is_device_ptr clause we just have to
8496 // pass the pointer by value. If it is a reference to a declaration, we just
8497 // pass its value.
8498 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
8499 CombinedInfo.Exprs.push_back(Elt: VD);
8500 CombinedInfo.BasePointers.emplace_back(Args&: Arg);
8501 CombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8502 CombinedInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
8503 CombinedInfo.Pointers.push_back(Elt: Arg);
8504 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8505 V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), DestTy: CGF.Int64Ty,
8506 /*isSigned=*/true));
8507 CombinedInfo.Types.push_back(
8508 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8509 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8510 CombinedInfo.Mappers.push_back(Elt: nullptr);
8511 return;
8512 }
8513
8514 using MapData =
8515 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8516 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8517 const ValueDecl *, const Expr *>;
8518 SmallVector<MapData, 4> DeclComponentLists;
8519 // For member fields list in is_device_ptr, store it in
8520 // DeclComponentLists for generating components info.
8521 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8522 auto It = DevPointersMap.find(Val: VD);
8523 if (It != DevPointersMap.end())
8524 for (const auto &MCL : It->second)
8525 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_to, Args: Unknown,
8526 /*IsImpicit = */ Args: true, Args: nullptr,
8527 Args: nullptr);
8528 auto I = HasDevAddrsMap.find(Val: VD);
8529 if (I != HasDevAddrsMap.end())
8530 for (const auto &MCL : I->second)
8531 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_tofrom, Args: Unknown,
8532 /*IsImpicit = */ Args: true, Args: nullptr,
8533 Args: nullptr);
8534 assert(CurDir.is<const OMPExecutableDirective *>() &&
8535 "Expect a executable directive");
8536 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8537 bool HasMapBasePtr = false;
8538 bool HasMapArraySec = false;
8539 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8540 const auto *EI = C->getVarRefs().begin();
8541 for (const auto L : C->decl_component_lists(VD)) {
8542 const ValueDecl *VDecl, *Mapper;
8543 // The Expression is not correct if the mapping is implicit
8544 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8545 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8546 std::tie(args&: VDecl, args&: Components, args&: Mapper) = L;
8547 assert(VDecl == VD && "We got information for the wrong declaration??");
8548 assert(!Components.empty() &&
8549 "Not expecting declaration with no component lists.");
8550 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(Val: E))
8551 HasMapBasePtr = true;
8552 if (VD && E && VD->getType()->isAnyPointerType() &&
8553 (isa<ArraySectionExpr>(Val: E) || isa<ArraySubscriptExpr>(Val: E)))
8554 HasMapArraySec = true;
8555 DeclComponentLists.emplace_back(Args&: Components, Args: C->getMapType(),
8556 Args: C->getMapTypeModifiers(),
8557 Args: C->isImplicit(), Args&: Mapper, Args&: E);
8558 ++EI;
8559 }
8560 }
8561 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
8562 const MapData &RHS) {
8563 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
8564 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
8565 bool HasPresent =
8566 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8567 bool HasAllocs = MapType == OMPC_MAP_alloc;
8568 MapModifiers = std::get<2>(t: RHS);
8569 MapType = std::get<1>(t: LHS);
8570 bool HasPresentR =
8571 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8572 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8573 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8574 });
8575
8576 // Find overlapping elements (including the offset from the base element).
8577 llvm::SmallDenseMap<
8578 const MapData *,
8579 llvm::SmallVector<
8580 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8581 4>
8582 OverlappedData;
8583 size_t Count = 0;
8584 for (const MapData &L : DeclComponentLists) {
8585 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8586 OpenMPMapClauseKind MapType;
8587 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8588 bool IsImplicit;
8589 const ValueDecl *Mapper;
8590 const Expr *VarRef;
8591 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8592 L;
8593 ++Count;
8594 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
8595 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8596 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
8597 args&: VarRef) = L1;
8598 auto CI = Components.rbegin();
8599 auto CE = Components.rend();
8600 auto SI = Components1.rbegin();
8601 auto SE = Components1.rend();
8602 for (; CI != CE && SI != SE; ++CI, ++SI) {
8603 if (CI->getAssociatedExpression()->getStmtClass() !=
8604 SI->getAssociatedExpression()->getStmtClass())
8605 break;
8606 // Are we dealing with different variables/fields?
8607 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8608 break;
8609 }
8610 // Found overlapping if, at least for one component, reached the head
8611 // of the components list.
8612 if (CI == CE || SI == SE) {
8613 // Ignore it if it is the same component.
8614 if (CI == CE && SI == SE)
8615 continue;
8616 const auto It = (SI == SE) ? CI : SI;
8617 // If one component is a pointer and another one is a kind of
8618 // dereference of this pointer (array subscript, section, dereference,
8619 // etc.), it is not an overlapping.
8620 // Same, if one component is a base and another component is a
8621 // dereferenced pointer memberexpr with the same base.
8622 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
8623 (std::prev(x: It)->getAssociatedDeclaration() &&
8624 std::prev(x: It)
8625 ->getAssociatedDeclaration()
8626 ->getType()
8627 ->isPointerType()) ||
8628 (It->getAssociatedDeclaration() &&
8629 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8630 std::next(x: It) != CE && std::next(x: It) != SE))
8631 continue;
8632 const MapData &BaseData = CI == CE ? L : L1;
8633 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8634 SI == SE ? Components : Components1;
8635 auto &OverlappedElements = OverlappedData.FindAndConstruct(Key: &BaseData);
8636 OverlappedElements.getSecond().push_back(Elt: SubData);
8637 }
8638 }
8639 }
8640 // Sort the overlapped elements for each item.
8641 llvm::SmallVector<const FieldDecl *, 4> Layout;
8642 if (!OverlappedData.empty()) {
8643 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8644 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8645 while (BaseType != OrigType) {
8646 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8647 OrigType = BaseType->getPointeeOrArrayElementType();
8648 }
8649
8650 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8651 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
8652 else {
8653 const auto *RD = BaseType->getAsRecordDecl();
8654 Layout.append(in_start: RD->field_begin(), in_end: RD->field_end());
8655 }
8656 }
8657 for (auto &Pair : OverlappedData) {
8658 llvm::stable_sort(
8659 Range&: Pair.getSecond(),
8660 C: [&Layout](
8661 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8662 OMPClauseMappableExprCommon::MappableExprComponentListRef
8663 Second) {
8664 auto CI = First.rbegin();
8665 auto CE = First.rend();
8666 auto SI = Second.rbegin();
8667 auto SE = Second.rend();
8668 for (; CI != CE && SI != SE; ++CI, ++SI) {
8669 if (CI->getAssociatedExpression()->getStmtClass() !=
8670 SI->getAssociatedExpression()->getStmtClass())
8671 break;
8672 // Are we dealing with different variables/fields?
8673 if (CI->getAssociatedDeclaration() !=
8674 SI->getAssociatedDeclaration())
8675 break;
8676 }
8677
8678 // Lists contain the same elements.
8679 if (CI == CE && SI == SE)
8680 return false;
8681
8682 // List with less elements is less than list with more elements.
8683 if (CI == CE || SI == SE)
8684 return CI == CE;
8685
8686 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
8687 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
8688 if (FD1->getParent() == FD2->getParent())
8689 return FD1->getFieldIndex() < FD2->getFieldIndex();
8690 const auto *It =
8691 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
8692 return FD == FD1 || FD == FD2;
8693 });
8694 return *It == FD1;
8695 });
8696 }
8697
8698 // Associated with a capture, because the mapping flags depend on it.
8699 // Go through all of the elements with the overlapped elements.
8700 bool IsFirstComponentList = true;
8701 MapCombinedInfoTy StructBaseCombinedInfo;
8702 for (const auto &Pair : OverlappedData) {
8703 const MapData &L = *Pair.getFirst();
8704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8705 OpenMPMapClauseKind MapType;
8706 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8707 bool IsImplicit;
8708 const ValueDecl *Mapper;
8709 const Expr *VarRef;
8710 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8711 L;
8712 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8713 OverlappedComponents = Pair.getSecond();
8714 generateInfoForComponentList(
8715 MapType, MapModifiers, MotionModifiers: std::nullopt, Components, CombinedInfo,
8716 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8717 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8718 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
8719 IsFirstComponentList = false;
8720 }
8721 // Go through other elements without overlapped elements.
8722 for (const MapData &L : DeclComponentLists) {
8723 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8724 OpenMPMapClauseKind MapType;
8725 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8726 bool IsImplicit;
8727 const ValueDecl *Mapper;
8728 const Expr *VarRef;
8729 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8730 L;
8731 auto It = OverlappedData.find(Val: &L);
8732 if (It == OverlappedData.end())
8733 generateInfoForComponentList(
8734 MapType, MapModifiers, MotionModifiers: std::nullopt, Components, CombinedInfo,
8735 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8736 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8737 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
8738 /*OverlappedElements*/ std::nullopt,
8739 AreBothBasePtrAndPteeMapped: HasMapBasePtr && HasMapArraySec);
8740 IsFirstComponentList = false;
8741 }
8742 }
8743
8744 /// Generate the default map information for a given capture \a CI,
8745 /// record field declaration \a RI and captured value \a CV.
8746 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8747 const FieldDecl &RI, llvm::Value *CV,
8748 MapCombinedInfoTy &CombinedInfo) const {
8749 bool IsImplicit = true;
8750 // Do the default mapping.
8751 if (CI.capturesThis()) {
8752 CombinedInfo.Exprs.push_back(Elt: nullptr);
8753 CombinedInfo.BasePointers.push_back(Elt: CV);
8754 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8755 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8756 CombinedInfo.Pointers.push_back(Elt: CV);
8757 const auto *PtrTy = cast<PointerType>(Val: RI.getType().getTypePtr());
8758 CombinedInfo.Sizes.push_back(
8759 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
8760 DestTy: CGF.Int64Ty, /*isSigned=*/true));
8761 // Default map type.
8762 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
8763 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8764 } else if (CI.capturesVariableByCopy()) {
8765 const VarDecl *VD = CI.getCapturedVar();
8766 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
8767 CombinedInfo.BasePointers.push_back(Elt: CV);
8768 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8769 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8770 CombinedInfo.Pointers.push_back(Elt: CV);
8771 if (!RI.getType()->isAnyPointerType()) {
8772 // We have to signal to the runtime captures passed by value that are
8773 // not pointers.
8774 CombinedInfo.Types.push_back(
8775 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8776 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8777 V: CGF.getTypeSize(Ty: RI.getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
8778 } else {
8779 // Pointers are implicitly mapped with a zero size and no flags
8780 // (other than first map that is added for all implicit maps).
8781 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8782 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8783 }
8784 auto I = FirstPrivateDecls.find(Val: VD);
8785 if (I != FirstPrivateDecls.end())
8786 IsImplicit = I->getSecond();
8787 } else {
8788 assert(CI.capturesVariable() && "Expected captured reference.");
8789 const auto *PtrTy = cast<ReferenceType>(Val: RI.getType().getTypePtr());
8790 QualType ElementType = PtrTy->getPointeeType();
8791 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8792 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
8793 // The default map type for a scalar/complex type is 'to' because by
8794 // default the value doesn't have to be retrieved. For an aggregate
8795 // type, the default is 'tofrom'.
8796 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
8797 const VarDecl *VD = CI.getCapturedVar();
8798 auto I = FirstPrivateDecls.find(Val: VD);
8799 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
8800 CombinedInfo.BasePointers.push_back(Elt: CV);
8801 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8802 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8803 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8804 Address PtrAddr = CGF.EmitLoadOfReference(RefLVal: CGF.MakeAddrLValue(
8805 V: CV, T: ElementType, Alignment: CGF.getContext().getDeclAlign(D: VD),
8806 Source: AlignmentSource::Decl));
8807 CombinedInfo.Pointers.push_back(Elt: PtrAddr.emitRawPointer(CGF));
8808 } else {
8809 CombinedInfo.Pointers.push_back(Elt: CV);
8810 }
8811 if (I != FirstPrivateDecls.end())
8812 IsImplicit = I->getSecond();
8813 }
8814 // Every default map produces a single argument which is a target parameter.
8815 CombinedInfo.Types.back() |=
8816 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8817
8818 // Add flag stating this is an implicit map.
8819 if (IsImplicit)
8820 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8821
8822 // No user-defined mapper for default mapping.
8823 CombinedInfo.Mappers.push_back(Elt: nullptr);
8824 }
8825};
8826} // anonymous namespace
8827
8828// Try to extract the base declaration from a `this->x` expression if possible.
8829static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8830 if (!E)
8831 return nullptr;
8832
8833 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
8834 if (const MemberExpr *ME =
8835 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
8836 return ME->getMemberDecl();
8837 return nullptr;
8838}
8839
8840/// Emit a string constant containing the names of the values mapped to the
8841/// offloading runtime library.
8842llvm::Constant *
8843emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8844 MappableExprsHandler::MappingExprInfo &MapExprs) {
8845
8846 uint32_t SrcLocStrSize;
8847 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8848 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8849
8850 SourceLocation Loc;
8851 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8852 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
8853 Loc = VD->getLocation();
8854 else
8855 Loc = MapExprs.getMapExpr()->getExprLoc();
8856 } else {
8857 Loc = MapExprs.getMapDecl()->getLocation();
8858 }
8859
8860 std::string ExprName;
8861 if (MapExprs.getMapExpr()) {
8862 PrintingPolicy P(CGF.getContext().getLangOpts());
8863 llvm::raw_string_ostream OS(ExprName);
8864 MapExprs.getMapExpr()->printPretty(OS, Helper: nullptr, Policy: P);
8865 OS.flush();
8866 } else {
8867 ExprName = MapExprs.getMapDecl()->getNameAsString();
8868 }
8869
8870 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8871 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: PLoc.getFilename(), FileName: ExprName,
8872 Line: PLoc.getLine(), Column: PLoc.getColumn(),
8873 SrcLocStrSize);
8874}
8875
8876/// Emit the arrays used to pass the captures and map information to the
8877/// offloading runtime library. If there is no map or capture information,
8878/// return nullptr by reference.
8879static void emitOffloadingArrays(
8880 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8881 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8882 bool IsNonContiguous = false) {
8883 CodeGenModule &CGM = CGF.CGM;
8884
8885 // Reset the array information.
8886 Info.clearArrayInfo();
8887 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8888
8889 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8890 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8891 CGF.AllocaInsertPt->getIterator());
8892 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8893 CGF.Builder.GetInsertPoint());
8894
8895 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8896 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
8897 };
8898 if (CGM.getCodeGenOpts().getDebugInfo() !=
8899 llvm::codegenoptions::NoDebugInfo) {
8900 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
8901 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
8902 F: FillInfoMap);
8903 }
8904
8905 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8906 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8907 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
8908 }
8909 };
8910
8911 auto CustomMapperCB = [&](unsigned int I) {
8912 llvm::Value *MFunc = nullptr;
8913 if (CombinedInfo.Mappers[I]) {
8914 Info.HasMapper = true;
8915 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8916 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
8917 }
8918 return MFunc;
8919 };
8920 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8921 /*IsNonContiguous=*/true, DeviceAddrCB,
8922 CustomMapperCB);
8923}
8924
8925/// Check for inner distribute directive.
8926static const OMPExecutableDirective *
8927getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8928 const auto *CS = D.getInnermostCapturedStmt();
8929 const auto *Body =
8930 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8931 const Stmt *ChildStmt =
8932 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8933
8934 if (const auto *NestedDir =
8935 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
8936 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8937 switch (D.getDirectiveKind()) {
8938 case OMPD_target:
8939 // For now, treat 'target' with nested 'teams loop' as if it's
8940 // distributed (target teams distribute).
8941 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8942 return NestedDir;
8943 if (DKind == OMPD_teams) {
8944 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8945 /*IgnoreCaptured=*/true);
8946 if (!Body)
8947 return nullptr;
8948 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8949 if (const auto *NND =
8950 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
8951 DKind = NND->getDirectiveKind();
8952 if (isOpenMPDistributeDirective(DKind))
8953 return NND;
8954 }
8955 }
8956 return nullptr;
8957 case OMPD_target_teams:
8958 if (isOpenMPDistributeDirective(DKind))
8959 return NestedDir;
8960 return nullptr;
8961 case OMPD_target_parallel:
8962 case OMPD_target_simd:
8963 case OMPD_target_parallel_for:
8964 case OMPD_target_parallel_for_simd:
8965 return nullptr;
8966 case OMPD_target_teams_distribute:
8967 case OMPD_target_teams_distribute_simd:
8968 case OMPD_target_teams_distribute_parallel_for:
8969 case OMPD_target_teams_distribute_parallel_for_simd:
8970 case OMPD_parallel:
8971 case OMPD_for:
8972 case OMPD_parallel_for:
8973 case OMPD_parallel_master:
8974 case OMPD_parallel_sections:
8975 case OMPD_for_simd:
8976 case OMPD_parallel_for_simd:
8977 case OMPD_cancel:
8978 case OMPD_cancellation_point:
8979 case OMPD_ordered:
8980 case OMPD_threadprivate:
8981 case OMPD_allocate:
8982 case OMPD_task:
8983 case OMPD_simd:
8984 case OMPD_tile:
8985 case OMPD_unroll:
8986 case OMPD_sections:
8987 case OMPD_section:
8988 case OMPD_single:
8989 case OMPD_master:
8990 case OMPD_critical:
8991 case OMPD_taskyield:
8992 case OMPD_barrier:
8993 case OMPD_taskwait:
8994 case OMPD_taskgroup:
8995 case OMPD_atomic:
8996 case OMPD_flush:
8997 case OMPD_depobj:
8998 case OMPD_scan:
8999 case OMPD_teams:
9000 case OMPD_target_data:
9001 case OMPD_target_exit_data:
9002 case OMPD_target_enter_data:
9003 case OMPD_distribute:
9004 case OMPD_distribute_simd:
9005 case OMPD_distribute_parallel_for:
9006 case OMPD_distribute_parallel_for_simd:
9007 case OMPD_teams_distribute:
9008 case OMPD_teams_distribute_simd:
9009 case OMPD_teams_distribute_parallel_for:
9010 case OMPD_teams_distribute_parallel_for_simd:
9011 case OMPD_target_update:
9012 case OMPD_declare_simd:
9013 case OMPD_declare_variant:
9014 case OMPD_begin_declare_variant:
9015 case OMPD_end_declare_variant:
9016 case OMPD_declare_target:
9017 case OMPD_end_declare_target:
9018 case OMPD_declare_reduction:
9019 case OMPD_declare_mapper:
9020 case OMPD_taskloop:
9021 case OMPD_taskloop_simd:
9022 case OMPD_master_taskloop:
9023 case OMPD_master_taskloop_simd:
9024 case OMPD_parallel_master_taskloop:
9025 case OMPD_parallel_master_taskloop_simd:
9026 case OMPD_requires:
9027 case OMPD_metadirective:
9028 case OMPD_unknown:
9029 default:
9030 llvm_unreachable("Unexpected directive.");
9031 }
9032 }
9033
9034 return nullptr;
9035}
9036
9037/// Emit the user-defined mapper function. The code generation follows the
9038/// pattern in the example below.
9039/// \code
9040/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9041/// void *base, void *begin,
9042/// int64_t size, int64_t type,
9043/// void *name = nullptr) {
9044/// // Allocate space for an array section first or add a base/begin for
9045/// // pointer dereference.
9046/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9047/// !maptype.IsDelete)
9048/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9049/// size*sizeof(Ty), clearToFromMember(type));
9050/// // Map members.
9051/// for (unsigned i = 0; i < size; i++) {
9052/// // For each component specified by this mapper:
9053/// for (auto c : begin[i]->all_components) {
9054/// if (c.hasMapper())
9055/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9056/// c.arg_type, c.arg_name);
9057/// else
9058/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9059/// c.arg_begin, c.arg_size, c.arg_type,
9060/// c.arg_name);
9061/// }
9062/// }
9063/// // Delete the array section.
9064/// if (size > 1 && maptype.IsDelete)
9065/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9066/// size*sizeof(Ty), clearToFromMember(type));
9067/// }
9068/// \endcode
9069void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9070 CodeGenFunction *CGF) {
9071 if (UDMMap.count(Val: D) > 0)
9072 return;
9073 ASTContext &C = CGM.getContext();
9074 QualType Ty = D->getType();
9075 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
9076 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9077 auto *MapperVarDecl =
9078 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
9079 SourceLocation Loc = D->getLocation();
9080 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
9081 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
9082
9083 // Prepare mapper function arguments and attributes.
9084 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9085 C.VoidPtrTy, ImplicitParamKind::Other);
9086 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9087 ImplicitParamKind::Other);
9088 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9089 C.VoidPtrTy, ImplicitParamKind::Other);
9090 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9091 ImplicitParamKind::Other);
9092 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9093 ImplicitParamKind::Other);
9094 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9095 ImplicitParamKind::Other);
9096 FunctionArgList Args;
9097 Args.push_back(Elt: &HandleArg);
9098 Args.push_back(Elt: &BaseArg);
9099 Args.push_back(Elt: &BeginArg);
9100 Args.push_back(Elt: &SizeArg);
9101 Args.push_back(Elt: &TypeArg);
9102 Args.push_back(Elt: &NameArg);
9103 const CGFunctionInfo &FnInfo =
9104 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
9105 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
9106 SmallString<64> TyStr;
9107 llvm::raw_svector_ostream Out(TyStr);
9108 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
9109 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
9110 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
9111 N: Name, M: &CGM.getModule());
9112 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
9113 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
9114 // Start the mapper function code generation.
9115 CodeGenFunction MapperCGF(CGM);
9116 MapperCGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
9117 // Compute the starting and end addresses of array elements.
9118 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9119 Addr: MapperCGF.GetAddrOfLocalVar(VD: &SizeArg), /*Volatile=*/false,
9120 Ty: C.getPointerType(T: Int64Ty), Loc);
9121 // Prepare common arguments for array initiation and deletion.
9122 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9123 Addr: MapperCGF.GetAddrOfLocalVar(VD: &HandleArg),
9124 /*Volatile=*/false, Ty: C.getPointerType(T: C.VoidPtrTy), Loc);
9125 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9126 Addr: MapperCGF.GetAddrOfLocalVar(VD: &BaseArg),
9127 /*Volatile=*/false, Ty: C.getPointerType(T: C.VoidPtrTy), Loc);
9128 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9129 Addr: MapperCGF.GetAddrOfLocalVar(VD: &BeginArg),
9130 /*Volatile=*/false, Ty: C.getPointerType(T: C.VoidPtrTy), Loc);
9131 // Convert the size in bytes into the number of array elements.
9132 Size = MapperCGF.Builder.CreateExactUDiv(
9133 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: ElementSize.getQuantity()));
9134 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9135 V: BeginIn, DestTy: CGM.getTypes().ConvertTypeForMem(T: PtrTy));
9136 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(Ty: ElemTy, Ptr: PtrBegin, IdxList: Size);
9137 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9138 Addr: MapperCGF.GetAddrOfLocalVar(VD: &TypeArg), /*Volatile=*/false,
9139 Ty: C.getPointerType(T: Int64Ty), Loc);
9140 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9141 Addr: MapperCGF.GetAddrOfLocalVar(VD: &NameArg),
9142 /*Volatile=*/false, Ty: C.getPointerType(T: C.VoidPtrTy), Loc);
9143
9144 // Emit array initiation if this is an array section and \p MapType indicates
9145 // that memory allocation is required.
9146 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock(name: "omp.arraymap.head");
9147 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BasePtr: BaseIn, Ptr: BeginIn, Size, MapType,
9148 MapName, ElementSize, ExitBB: HeadBB, /*IsInit=*/true);
9149
9150 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9151
9152 // Emit the loop header block.
9153 MapperCGF.EmitBlock(BB: HeadBB);
9154 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock(name: "omp.arraymap.body");
9155 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock(name: "omp.done");
9156 // Evaluate whether the initial condition is satisfied.
9157 llvm::Value *IsEmpty =
9158 MapperCGF.Builder.CreateICmpEQ(LHS: PtrBegin, RHS: PtrEnd, Name: "omp.arraymap.isempty");
9159 MapperCGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
9160 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9161
9162 // Emit the loop body block.
9163 MapperCGF.EmitBlock(BB: BodyBB);
9164 llvm::BasicBlock *LastBB = BodyBB;
9165 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9166 Ty: PtrBegin->getType(), NumReservedValues: 2, Name: "omp.arraymap.ptrcurrent");
9167 PtrPHI->addIncoming(V: PtrBegin, BB: EntryBB);
9168 Address PtrCurrent(PtrPHI, ElemTy,
9169 MapperCGF.GetAddrOfLocalVar(VD: &BeginArg)
9170 .getAlignment()
9171 .alignmentOfArrayElement(elementSize: ElementSize));
9172 // Privatize the declared variable of mapper to be the current array element.
9173 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9174 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
9175 (void)Scope.Privatize();
9176
9177 // Get map clause information. Fill up the arrays with all mapped variables.
9178 MappableExprsHandler::MapCombinedInfoTy Info;
9179 MappableExprsHandler MEHandler(*D, MapperCGF);
9180 MEHandler.generateAllInfoForMapper(CombinedInfo&: Info, OMPBuilder);
9181
9182 // Call the runtime API __tgt_mapper_num_components to get the number of
9183 // pre-existing components.
9184 llvm::Value *OffloadingArgs[] = {Handle};
9185 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9186 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
9187 FnID: OMPRTL___tgt_mapper_num_components),
9188 args: OffloadingArgs);
9189 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9190 LHS: PreviousSize,
9191 RHS: MapperCGF.Builder.getInt64(C: MappableExprsHandler::getFlagMemberOffset()));
9192
9193 // Fill up the runtime mapper handle for all components.
9194 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9195 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9196 V: Info.BasePointers[I], DestTy: CGM.getTypes().ConvertTypeForMem(T: C.VoidPtrTy));
9197 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9198 V: Info.Pointers[I], DestTy: CGM.getTypes().ConvertTypeForMem(T: C.VoidPtrTy));
9199 llvm::Value *CurSizeArg = Info.Sizes[I];
9200 llvm::Value *CurNameArg =
9201 (CGM.getCodeGenOpts().getDebugInfo() ==
9202 llvm::codegenoptions::NoDebugInfo)
9203 ? llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy)
9204 : emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: Info.Exprs[I]);
9205
9206 // Extract the MEMBER_OF field from the map type.
9207 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9208 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9209 Info.Types[I]));
9210 llvm::Value *MemberMapType =
9211 MapperCGF.Builder.CreateNUWAdd(LHS: OriMapType, RHS: ShiftedPreviousSize);
9212
9213 // Combine the map type inherited from user-defined mapper with that
9214 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9215 // bits of the \a MapType, which is the input argument of the mapper
9216 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9217 // bits of MemberMapType.
9218 // [OpenMP 5.0], 1.2.6. map-type decay.
9219 // | alloc | to | from | tofrom | release | delete
9220 // ----------------------------------------------------------
9221 // alloc | alloc | alloc | alloc | alloc | release | delete
9222 // to | alloc | to | alloc | to | release | delete
9223 // from | alloc | alloc | from | from | release | delete
9224 // tofrom | alloc | to | from | tofrom | release | delete
9225 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9226 LHS: MapType,
9227 RHS: MapperCGF.Builder.getInt64(
9228 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9229 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9231 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock(name: "omp.type.alloc");
9232 llvm::BasicBlock *AllocElseBB =
9233 MapperCGF.createBasicBlock(name: "omp.type.alloc.else");
9234 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock(name: "omp.type.to");
9235 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock(name: "omp.type.to.else");
9236 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock(name: "omp.type.from");
9237 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock(name: "omp.type.end");
9238 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(Arg: LeftToFrom);
9239 MapperCGF.Builder.CreateCondBr(Cond: IsAlloc, True: AllocBB, False: AllocElseBB);
9240 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9241 MapperCGF.EmitBlock(BB: AllocBB);
9242 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9243 LHS: MemberMapType,
9244 RHS: MapperCGF.Builder.getInt64(
9245 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9246 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9247 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9248 MapperCGF.Builder.CreateBr(Dest: EndBB);
9249 MapperCGF.EmitBlock(BB: AllocElseBB);
9250 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9251 LHS: LeftToFrom,
9252 RHS: MapperCGF.Builder.getInt64(
9253 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9254 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9255 MapperCGF.Builder.CreateCondBr(Cond: IsTo, True: ToBB, False: ToElseBB);
9256 // In case of to, clear OMP_MAP_FROM.
9257 MapperCGF.EmitBlock(BB: ToBB);
9258 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9259 LHS: MemberMapType,
9260 RHS: MapperCGF.Builder.getInt64(
9261 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9262 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9263 MapperCGF.Builder.CreateBr(Dest: EndBB);
9264 MapperCGF.EmitBlock(BB: ToElseBB);
9265 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9266 LHS: LeftToFrom,
9267 RHS: MapperCGF.Builder.getInt64(
9268 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9269 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9270 MapperCGF.Builder.CreateCondBr(Cond: IsFrom, True: FromBB, False: EndBB);
9271 // In case of from, clear OMP_MAP_TO.
9272 MapperCGF.EmitBlock(BB: FromBB);
9273 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9274 LHS: MemberMapType,
9275 RHS: MapperCGF.Builder.getInt64(
9276 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9277 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9278 // In case of tofrom, do nothing.
9279 MapperCGF.EmitBlock(BB: EndBB);
9280 LastBB = EndBB;
9281 llvm::PHINode *CurMapType =
9282 MapperCGF.Builder.CreatePHI(Ty: CGM.Int64Ty, NumReservedValues: 4, Name: "omp.maptype");
9283 CurMapType->addIncoming(V: AllocMapType, BB: AllocBB);
9284 CurMapType->addIncoming(V: ToMapType, BB: ToBB);
9285 CurMapType->addIncoming(V: FromMapType, BB: FromBB);
9286 CurMapType->addIncoming(V: MemberMapType, BB: ToElseBB);
9287
9288 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9289 CurSizeArg, CurMapType, CurNameArg};
9290 if (Info.Mappers[I]) {
9291 // Call the corresponding mapper function.
9292 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9293 D: cast<OMPDeclareMapperDecl>(Val: Info.Mappers[I]));
9294 assert(MapperFunc && "Expect a valid mapper function is available.");
9295 MapperCGF.EmitNounwindRuntimeCall(callee: MapperFunc, args: OffloadingArgs);
9296 } else {
9297 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9298 // data structure.
9299 MapperCGF.EmitRuntimeCall(
9300 callee: OMPBuilder.getOrCreateRuntimeFunction(
9301 M&: CGM.getModule(), FnID: OMPRTL___tgt_push_mapper_component),
9302 args: OffloadingArgs);
9303 }
9304 }
9305
9306 // Update the pointer to point to the next element that needs to be mapped,
9307 // and check whether we have mapped all elements.
9308 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9309 Ty: ElemTy, Ptr: PtrPHI, /*Idx0=*/1, Name: "omp.arraymap.next");
9310 PtrPHI->addIncoming(V: PtrNext, BB: LastBB);
9311 llvm::Value *IsDone =
9312 MapperCGF.Builder.CreateICmpEQ(LHS: PtrNext, RHS: PtrEnd, Name: "omp.arraymap.isdone");
9313 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock(name: "omp.arraymap.exit");
9314 MapperCGF.Builder.CreateCondBr(Cond: IsDone, True: ExitBB, False: BodyBB);
9315
9316 MapperCGF.EmitBlock(BB: ExitBB);
9317 // Emit array deletion if this is an array section and \p MapType indicates
9318 // that deletion is required.
9319 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BasePtr: BaseIn, Ptr: BeginIn, Size, MapType,
9320 MapName, ElementSize, ExitBB: DoneBB, /*IsInit=*/false);
9321
9322 // Emit the function exit block.
9323 MapperCGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
9324 MapperCGF.FinishFunction();
9325 UDMMap.try_emplace(Key: D, Args&: Fn);
9326 if (CGF) {
9327 auto &Decls = FunctionUDMMap.FindAndConstruct(Key: CGF->CurFn);
9328 Decls.second.push_back(Elt: D);
9329 }
9330}
9331
9332/// Emit the array initialization or deletion portion for user-defined mapper
9333/// code generation. First, it evaluates whether an array section is mapped and
9334/// whether the \a MapType instructs to delete this section. If \a IsInit is
9335/// true, and \a MapType indicates to not delete this array, array
9336/// initialization code is generated. If \a IsInit is false, and \a MapType
9337/// indicates to not this array, array deletion code is generated.
9338void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9339 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9340 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9341 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9342 bool IsInit) {
9343 StringRef Prefix = IsInit ? ".init" : ".del";
9344
9345 // Evaluate if this is an array section.
9346 llvm::BasicBlock *BodyBB =
9347 MapperCGF.createBasicBlock(name: getName(Parts: {"omp.array", Prefix}));
9348 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9349 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: 1), Name: "omp.arrayinit.isarray");
9350 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9351 LHS: MapType,
9352 RHS: MapperCGF.Builder.getInt64(
9353 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9354 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9355 llvm::Value *DeleteCond;
9356 llvm::Value *Cond;
9357 if (IsInit) {
9358 // base != begin?
9359 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(LHS: Base, RHS: Begin);
9360 // IsPtrAndObj?
9361 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9362 LHS: MapType,
9363 RHS: MapperCGF.Builder.getInt64(
9364 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9365 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9366 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(Arg: PtrAndObjBit);
9367 BaseIsBegin = MapperCGF.Builder.CreateAnd(LHS: BaseIsBegin, RHS: PtrAndObjBit);
9368 Cond = MapperCGF.Builder.CreateOr(LHS: IsArray, RHS: BaseIsBegin);
9369 DeleteCond = MapperCGF.Builder.CreateIsNull(
9370 Arg: DeleteBit, Name: getName(Parts: {"omp.array", Prefix, ".delete"}));
9371 } else {
9372 Cond = IsArray;
9373 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9374 Arg: DeleteBit, Name: getName(Parts: {"omp.array", Prefix, ".delete"}));
9375 }
9376 Cond = MapperCGF.Builder.CreateAnd(LHS: Cond, RHS: DeleteCond);
9377 MapperCGF.Builder.CreateCondBr(Cond, True: BodyBB, False: ExitBB);
9378
9379 MapperCGF.EmitBlock(BB: BodyBB);
9380 // Get the array size by multiplying element size and element number (i.e., \p
9381 // Size).
9382 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9383 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: ElementSize.getQuantity()));
9384 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9385 // memory allocation/deletion purpose only.
9386 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9387 LHS: MapType,
9388 RHS: MapperCGF.Builder.getInt64(
9389 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9390 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9391 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9392 MapTypeArg = MapperCGF.Builder.CreateOr(
9393 LHS: MapTypeArg,
9394 RHS: MapperCGF.Builder.getInt64(
9395 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9396 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9397
9398 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9399 // data structure.
9400 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9401 ArraySize, MapTypeArg, MapName};
9402 MapperCGF.EmitRuntimeCall(
9403 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
9404 FnID: OMPRTL___tgt_push_mapper_component),
9405 args: OffloadingArgs);
9406}
9407
9408llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9409 const OMPDeclareMapperDecl *D) {
9410 auto I = UDMMap.find(Val: D);
9411 if (I != UDMMap.end())
9412 return I->second;
9413 emitUserDefinedMapper(D);
9414 return UDMMap.lookup(Val: D);
9415}
9416
9417llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9418 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9419 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9420 const OMPLoopDirective &D)>
9421 SizeEmitter) {
9422 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9423 const OMPExecutableDirective *TD = &D;
9424 // Get nested teams distribute kind directive, if any. For now, treat
9425 // 'target_teams_loop' as if it's really a target_teams_distribute.
9426 if ((!isOpenMPDistributeDirective(DKind: Kind) || !isOpenMPTeamsDirective(DKind: Kind)) &&
9427 Kind != OMPD_target_teams_loop)
9428 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
9429 if (!TD)
9430 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9431
9432 const auto *LD = cast<OMPLoopDirective>(Val: TD);
9433 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9434 return NumIterations;
9435 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9436}
9437
9438static void
9439emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9440 const OMPExecutableDirective &D,
9441 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9442 bool RequiresOuterTask, const CapturedStmt &CS,
9443 bool OffloadingMandatory, CodeGenFunction &CGF) {
9444 if (OffloadingMandatory) {
9445 CGF.Builder.CreateUnreachable();
9446 } else {
9447 if (RequiresOuterTask) {
9448 CapturedVars.clear();
9449 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9450 }
9451 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
9452 Args: CapturedVars);
9453 }
9454}
9455
9456static llvm::Value *emitDeviceID(
9457 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9458 CodeGenFunction &CGF) {
9459 // Emit device ID if any.
9460 llvm::Value *DeviceID;
9461 if (Device.getPointer()) {
9462 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9463 Device.getInt() == OMPC_DEVICE_device_num) &&
9464 "Expected device_num modifier.");
9465 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
9466 DeviceID =
9467 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
9468 } else {
9469 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
9470 }
9471 return DeviceID;
9472}
9473
9474llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9475 CodeGenFunction &CGF) {
9476 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(C: 0);
9477
9478 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9479 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9480 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9481 E: DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9482 DynCGroupMem = CGF.Builder.CreateIntCast(V: DynCGroupMemVal, DestTy: CGF.Int32Ty,
9483 /*isSigned=*/false);
9484 }
9485 return DynCGroupMem;
9486}
9487
9488static void emitTargetCallKernelLaunch(
9489 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9490 const OMPExecutableDirective &D,
9491 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9492 const CapturedStmt &CS, bool OffloadingMandatory,
9493 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9494 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9495 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9496 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9497 const OMPLoopDirective &D)>
9498 SizeEmitter,
9499 CodeGenFunction &CGF, CodeGenModule &CGM) {
9500 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9501
9502 // Fill up the arrays with all the captured variables.
9503 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9504
9505 // Get mappable expression information.
9506 MappableExprsHandler MEHandler(D, CGF);
9507 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9508 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9509
9510 auto RI = CS.getCapturedRecordDecl()->field_begin();
9511 auto *CV = CapturedVars.begin();
9512 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9513 CE = CS.capture_end();
9514 CI != CE; ++CI, ++RI, ++CV) {
9515 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9516 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9517
9518 // VLA sizes are passed to the outlined region by copy and do not have map
9519 // information associated.
9520 if (CI->capturesVariableArrayType()) {
9521 CurInfo.Exprs.push_back(Elt: nullptr);
9522 CurInfo.BasePointers.push_back(Elt: *CV);
9523 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
9524 CurInfo.DevicePointers.push_back(
9525 Elt: MappableExprsHandler::DeviceInfoTy::None);
9526 CurInfo.Pointers.push_back(Elt: *CV);
9527 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9528 V: CGF.getTypeSize(Ty: RI->getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9529 // Copy to the device as an argument. No need to retrieve it.
9530 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9531 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9532 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9533 CurInfo.Mappers.push_back(Elt: nullptr);
9534 } else {
9535 // If we have any information in the map clause, we use it, otherwise we
9536 // just do a default mapping.
9537 MEHandler.generateInfoForCapture(Cap: CI, Arg: *CV, CombinedInfo&: CurInfo, PartialStruct);
9538 if (!CI->capturesThis())
9539 MappedVarSet.insert(V: CI->getCapturedVar());
9540 else
9541 MappedVarSet.insert(V: nullptr);
9542 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9543 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
9544 // Generate correct mapping for variables captured by reference in
9545 // lambdas.
9546 if (CI->capturesVariable())
9547 MEHandler.generateInfoForLambdaCaptures(VD: CI->getCapturedVar(), Arg: *CV,
9548 CombinedInfo&: CurInfo, LambdaPointers);
9549 }
9550 // We expect to have at least an element of information for this capture.
9551 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9552 "Non-existing map pointer for capture!");
9553 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9554 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9555 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9556 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9557 "Inconsistent map information sizes!");
9558
9559 // If there is an entry in PartialStruct it means we have a struct with
9560 // individual members mapped. Emit an extra combined entry.
9561 if (PartialStruct.Base.isValid()) {
9562 CombinedInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9563 MEHandler.emitCombinedEntry(
9564 CombinedInfo, CurTypes&: CurInfo.Types, PartialStruct, IsMapThis: CI->capturesThis(),
9565 OMPBuilder, VD: nullptr,
9566 NotTargetParams: !PartialStruct.PreliminaryMapData.BasePointers.empty());
9567 }
9568
9569 // We need to append the results of this capture to what we already have.
9570 CombinedInfo.append(CurInfo);
9571 }
9572 // Adjust MEMBER_OF flags for the lambdas captures.
9573 MEHandler.adjustMemberOfForLambdaCaptures(
9574 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
9575 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
9576 // Map any list items in a map clause that were not captures because they
9577 // weren't referenced within the construct.
9578 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: MappedVarSet);
9579
9580 CGOpenMPRuntime::TargetDataInfo Info;
9581 // Fill up the arrays and create the arguments.
9582 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9583 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9584 llvm::codegenoptions::NoDebugInfo;
9585 OMPBuilder.emitOffloadingArraysArgument(Builder&: CGF.Builder, RTArgs&: Info.RTArgs, Info,
9586 EmitDebug,
9587 /*ForEndCall=*/false);
9588
9589 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9590 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9591 CGF.VoidPtrTy, CGM.getPointerAlign());
9592 InputInfo.PointersArray =
9593 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9594 InputInfo.SizesArray =
9595 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9596 InputInfo.MappersArray =
9597 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9598 MapTypesArray = Info.RTArgs.MapTypesArray;
9599 MapNamesArray = Info.RTArgs.MapNamesArray;
9600
9601 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9602 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9603 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9604 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9605 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9606
9607 if (IsReverseOffloading) {
9608 // Reverse offloading is not supported, so just execute on the host.
9609 // FIXME: This fallback solution is incorrect since it ignores the
9610 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9611 // assert here and ensure SEMA emits an error.
9612 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9613 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9614 return;
9615 }
9616
9617 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9618 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9619
9620 llvm::Value *BasePointersArray =
9621 InputInfo.BasePointersArray.emitRawPointer(CGF);
9622 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9623 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9624 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9625
9626 auto &&EmitTargetCallFallbackCB =
9627 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9628 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9629 -> llvm::OpenMPIRBuilder::InsertPointTy {
9630 CGF.Builder.restoreIP(IP);
9631 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9632 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9633 return CGF.Builder.saveIP();
9634 };
9635
9636 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9637 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9638 llvm::Value *NumThreads =
9639 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9640 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
9641 llvm::Value *NumIterations =
9642 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9643 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9644 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9645 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9646
9647 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9648 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9649 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9650
9651 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9652 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9653 DynCGGroupMem, HasNoWait);
9654
9655 CGF.Builder.restoreIP(IP: OMPRuntime->getOMPBuilder().emitKernelLaunch(
9656 Loc: CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9657 DeviceID, RTLoc, AllocaIP));
9658 };
9659
9660 if (RequiresOuterTask)
9661 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
9662 else
9663 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
9664}
9665
9666static void
9667emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9668 const OMPExecutableDirective &D,
9669 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9670 bool RequiresOuterTask, const CapturedStmt &CS,
9671 bool OffloadingMandatory, CodeGenFunction &CGF) {
9672
9673 // Notify that the host version must be executed.
9674 auto &&ElseGen =
9675 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9676 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9677 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9678 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9679 };
9680
9681 if (RequiresOuterTask) {
9682 CodeGenFunction::OMPTargetDataInfo InputInfo;
9683 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
9684 } else {
9685 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ElseGen);
9686 }
9687}
9688
9689void CGOpenMPRuntime::emitTargetCall(
9690 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9691 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9692 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9693 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9694 const OMPLoopDirective &D)>
9695 SizeEmitter) {
9696 if (!CGF.HaveInsertPoint())
9697 return;
9698
9699 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9700 CGM.getLangOpts().OpenMPOffloadMandatory;
9701
9702 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9703
9704 const bool RequiresOuterTask =
9705 D.hasClausesOfKind<OMPDependClause>() ||
9706 D.hasClausesOfKind<OMPNowaitClause>() ||
9707 D.hasClausesOfKind<OMPInReductionClause>() ||
9708 (CGM.getLangOpts().OpenMP >= 51 &&
9709 needsTaskBasedThreadLimit(DKind: D.getDirectiveKind()) &&
9710 D.hasClausesOfKind<OMPThreadLimitClause>());
9711 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9712 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
9713 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9714 PrePostActionTy &) {
9715 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9716 };
9717 emitInlinedDirective(CGF, InnerKind: OMPD_unknown, CodeGen: ArgsCodegen);
9718
9719 CodeGenFunction::OMPTargetDataInfo InputInfo;
9720 llvm::Value *MapTypesArray = nullptr;
9721 llvm::Value *MapNamesArray = nullptr;
9722
9723 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9724 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9725 OutlinedFnID, &InputInfo, &MapTypesArray,
9726 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9727 PrePostActionTy &) {
9728 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
9729 RequiresOuterTask, CS, OffloadingMandatory,
9730 Device, OutlinedFnID, InputInfo, MapTypesArray,
9731 MapNamesArray, SizeEmitter, CGF, CGM);
9732 };
9733
9734 auto &&TargetElseGen =
9735 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9736 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9737 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9738 CS, OffloadingMandatory, CGF);
9739 };
9740
9741 // If we have a target function ID it means that we need to support
9742 // offloading, otherwise, just execute on the host. We need to execute on host
9743 // regardless of the conditional in the if clause if, e.g., the user do not
9744 // specify target triples.
9745 if (OutlinedFnID) {
9746 if (IfCond) {
9747 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
9748 } else {
9749 RegionCodeGenTy ThenRCG(TargetThenGen);
9750 ThenRCG(CGF);
9751 }
9752 } else {
9753 RegionCodeGenTy ElseRCG(TargetElseGen);
9754 ElseRCG(CGF);
9755 }
9756}
9757
9758void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9759 StringRef ParentName) {
9760 if (!S)
9761 return;
9762
9763 // Codegen OMP target directives that offload compute to the device.
9764 bool RequiresDeviceCodegen =
9765 isa<OMPExecutableDirective>(Val: S) &&
9766 isOpenMPTargetExecutionDirective(
9767 DKind: cast<OMPExecutableDirective>(Val: S)->getDirectiveKind());
9768
9769 if (RequiresDeviceCodegen) {
9770 const auto &E = *cast<OMPExecutableDirective>(Val: S);
9771
9772 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9773 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
9774
9775 // Is this a target region that should not be emitted as an entry point? If
9776 // so just signal we are done with this target region.
9777 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9778 return;
9779
9780 switch (E.getDirectiveKind()) {
9781 case OMPD_target:
9782 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9783 S: cast<OMPTargetDirective>(Val: E));
9784 break;
9785 case OMPD_target_parallel:
9786 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9787 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
9788 break;
9789 case OMPD_target_teams:
9790 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9791 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
9792 break;
9793 case OMPD_target_teams_distribute:
9794 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9795 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
9796 break;
9797 case OMPD_target_teams_distribute_simd:
9798 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9799 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
9800 break;
9801 case OMPD_target_parallel_for:
9802 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9803 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
9804 break;
9805 case OMPD_target_parallel_for_simd:
9806 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9807 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
9808 break;
9809 case OMPD_target_simd:
9810 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9811 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
9812 break;
9813 case OMPD_target_teams_distribute_parallel_for:
9814 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9815 CGM, ParentName,
9816 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
9817 break;
9818 case OMPD_target_teams_distribute_parallel_for_simd:
9819 CodeGenFunction::
9820 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9821 CGM, ParentName,
9822 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
9823 break;
9824 case OMPD_target_teams_loop:
9825 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9826 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
9827 break;
9828 case OMPD_target_parallel_loop:
9829 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9830 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
9831 break;
9832 case OMPD_parallel:
9833 case OMPD_for:
9834 case OMPD_parallel_for:
9835 case OMPD_parallel_master:
9836 case OMPD_parallel_sections:
9837 case OMPD_for_simd:
9838 case OMPD_parallel_for_simd:
9839 case OMPD_cancel:
9840 case OMPD_cancellation_point:
9841 case OMPD_ordered:
9842 case OMPD_threadprivate:
9843 case OMPD_allocate:
9844 case OMPD_task:
9845 case OMPD_simd:
9846 case OMPD_tile:
9847 case OMPD_unroll:
9848 case OMPD_sections:
9849 case OMPD_section:
9850 case OMPD_single:
9851 case OMPD_master:
9852 case OMPD_critical:
9853 case OMPD_taskyield:
9854 case OMPD_barrier:
9855 case OMPD_taskwait:
9856 case OMPD_taskgroup:
9857 case OMPD_atomic:
9858 case OMPD_flush:
9859 case OMPD_depobj:
9860 case OMPD_scan:
9861 case OMPD_teams:
9862 case OMPD_target_data:
9863 case OMPD_target_exit_data:
9864 case OMPD_target_enter_data:
9865 case OMPD_distribute:
9866 case OMPD_distribute_simd:
9867 case OMPD_distribute_parallel_for:
9868 case OMPD_distribute_parallel_for_simd:
9869 case OMPD_teams_distribute:
9870 case OMPD_teams_distribute_simd:
9871 case OMPD_teams_distribute_parallel_for:
9872 case OMPD_teams_distribute_parallel_for_simd:
9873 case OMPD_target_update:
9874 case OMPD_declare_simd:
9875 case OMPD_declare_variant:
9876 case OMPD_begin_declare_variant:
9877 case OMPD_end_declare_variant:
9878 case OMPD_declare_target:
9879 case OMPD_end_declare_target:
9880 case OMPD_declare_reduction:
9881 case OMPD_declare_mapper:
9882 case OMPD_taskloop:
9883 case OMPD_taskloop_simd:
9884 case OMPD_master_taskloop:
9885 case OMPD_master_taskloop_simd:
9886 case OMPD_parallel_master_taskloop:
9887 case OMPD_parallel_master_taskloop_simd:
9888 case OMPD_requires:
9889 case OMPD_metadirective:
9890 case OMPD_unknown:
9891 default:
9892 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9893 }
9894 return;
9895 }
9896
9897 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
9898 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9899 return;
9900
9901 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
9902 return;
9903 }
9904
9905 // If this is a lambda function, look into its body.
9906 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
9907 S = L->getBody();
9908
9909 // Keep looking for target regions recursively.
9910 for (const Stmt *II : S->children())
9911 scanForTargetRegionsFunctions(S: II, ParentName);
9912}
9913
9914static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9915 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9916 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9917 if (!DevTy)
9918 return false;
9919 // Do not emit device_type(nohost) functions for the host.
9920 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9921 return true;
9922 // Do not emit device_type(host) functions for the device.
9923 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9924 return true;
9925 return false;
9926}
9927
9928bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9929 // If emitting code for the host, we do not process FD here. Instead we do
9930 // the normal code generation.
9931 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9932 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
9933 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9934 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9935 return true;
9936 return false;
9937 }
9938
9939 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
9940 // Try to detect target regions in the function.
9941 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
9942 StringRef Name = CGM.getMangledName(GD);
9943 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
9944 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9945 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9946 return true;
9947 }
9948
9949 // Do not to emit function if it is not marked as declare target.
9950 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9951 AlreadyEmittedTargetDecls.count(V: VD) == 0;
9952}
9953
9954bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9955 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
9956 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9957 return true;
9958
9959 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9960 return false;
9961
9962 // Check if there are Ctors/Dtors in this declaration and look for target
9963 // regions in it. We use the complete variant to produce the kernel name
9964 // mangling.
9965 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
9966 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9967 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9968 StringRef ParentName =
9969 CGM.getMangledName(GD: GlobalDecl(Ctor, Ctor_Complete));
9970 scanForTargetRegionsFunctions(S: Ctor->getBody(), ParentName);
9971 }
9972 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9973 StringRef ParentName =
9974 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
9975 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
9976 }
9977 }
9978
9979 // Do not to emit variable if it is not marked as declare target.
9980 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9981 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9982 VD: cast<VarDecl>(Val: GD.getDecl()));
9983 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9984 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9985 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9986 HasRequiresUnifiedSharedMemory)) {
9987 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
9988 return true;
9989 }
9990 return false;
9991}
9992
9993void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9994 llvm::Constant *Addr) {
9995 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9996 !CGM.getLangOpts().OpenMPIsTargetDevice)
9997 return;
9998
9999 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10000 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10001
10002 // If this is an 'extern' declaration we defer to the canonical definition and
10003 // do not emit an offloading entry.
10004 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10005 VD->hasExternalStorage())
10006 return;
10007
10008 if (!Res) {
10009 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10010 // Register non-target variables being emitted in device code (debug info
10011 // may cause this).
10012 StringRef VarName = CGM.getMangledName(GD: VD);
10013 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
10014 }
10015 return;
10016 }
10017
10018 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
10019 auto LinkageForVariable = [&VD, this]() {
10020 return CGM.getLLVMLinkageVarDefinition(VD);
10021 };
10022
10023 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10024 OMPBuilder.registerTargetGlobalVariable(
10025 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
10026 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10027 IsExternallyVisible: VD->isExternallyVisible(),
10028 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10029 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
10030 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
10031 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
10032 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
10033 T: CGM.getContext().getPointerType(T: VD->getType())),
10034 Addr);
10035
10036 for (auto *ref : GeneratedRefs)
10037 CGM.addCompilerUsedGlobal(GV: ref);
10038}
10039
10040bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10041 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
10042 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
10043 return emitTargetFunctions(GD);
10044
10045 return emitTargetGlobalVariable(GD);
10046}
10047
10048void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10049 for (const VarDecl *VD : DeferredGlobalVariables) {
10050 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10051 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10052 if (!Res)
10053 continue;
10054 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10055 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10056 !HasRequiresUnifiedSharedMemory) {
10057 CGM.EmitGlobal(D: VD);
10058 } else {
10059 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10060 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10061 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10062 HasRequiresUnifiedSharedMemory)) &&
10063 "Expected link clause or to clause with unified memory.");
10064 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10065 }
10066 }
10067}
10068
10069void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10070 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10071 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10072 " Expected target-based directive.");
10073}
10074
10075void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10076 for (const OMPClause *Clause : D->clauselists()) {
10077 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10078 HasRequiresUnifiedSharedMemory = true;
10079 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10080 } else if (const auto *AC =
10081 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
10082 switch (AC->getAtomicDefaultMemOrderKind()) {
10083 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10084 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10085 break;
10086 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10087 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10088 break;
10089 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10090 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10091 break;
10092 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10093 break;
10094 }
10095 }
10096 }
10097}
10098
10099llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10100 return RequiresAtomicOrdering;
10101}
10102
10103bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10104 LangAS &AS) {
10105 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10106 return false;
10107 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10108 switch(A->getAllocatorType()) {
10109 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10110 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10111 // Not supported, fallback to the default mem space.
10112 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10113 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10114 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10115 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10116 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10117 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10118 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10119 AS = LangAS::Default;
10120 return true;
10121 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10122 llvm_unreachable("Expected predefined allocator for the variables with the "
10123 "static storage.");
10124 }
10125 return false;
10126}
10127
10128bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10129 return HasRequiresUnifiedSharedMemory;
10130}
10131
10132CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10133 CodeGenModule &CGM)
10134 : CGM(CGM) {
10135 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10136 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10137 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10138 }
10139}
10140
10141CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10142 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10143 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10144}
10145
10146bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10147 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10148 return true;
10149
10150 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
10151 // Do not to emit function if it is marked as declare target as it was already
10152 // emitted.
10153 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: D)) {
10154 if (D->hasBody() && AlreadyEmittedTargetDecls.count(V: D) == 0) {
10155 if (auto *F = dyn_cast_or_null<llvm::Function>(
10156 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
10157 return !F->isDeclaration();
10158 return false;
10159 }
10160 return true;
10161 }
10162
10163 return !AlreadyEmittedTargetDecls.insert(V: D).second;
10164}
10165
10166void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10167 const OMPExecutableDirective &D,
10168 SourceLocation Loc,
10169 llvm::Function *OutlinedFn,
10170 ArrayRef<llvm::Value *> CapturedVars) {
10171 if (!CGF.HaveInsertPoint())
10172 return;
10173
10174 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10175 CodeGenFunction::RunCleanupsScope Scope(CGF);
10176
10177 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10178 llvm::Value *Args[] = {
10179 RTLoc,
10180 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
10181 CGF.Builder.CreateBitCast(V: OutlinedFn, DestTy: getKmpc_MicroPointerTy())};
10182 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10183 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
10184 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
10185
10186 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10187 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
10188 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
10189}
10190
10191void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10192 const Expr *NumTeams,
10193 const Expr *ThreadLimit,
10194 SourceLocation Loc) {
10195 if (!CGF.HaveInsertPoint())
10196 return;
10197
10198 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10199
10200 llvm::Value *NumTeamsVal =
10201 NumTeams
10202 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
10203 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10204 : CGF.Builder.getInt32(C: 0);
10205
10206 llvm::Value *ThreadLimitVal =
10207 ThreadLimit
10208 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10209 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10210 : CGF.Builder.getInt32(C: 0);
10211
10212 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10213 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10214 ThreadLimitVal};
10215 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10216 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
10217 args: PushNumTeamsArgs);
10218}
10219
10220void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10221 const Expr *ThreadLimit,
10222 SourceLocation Loc) {
10223 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10224 llvm::Value *ThreadLimitVal =
10225 ThreadLimit
10226 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10227 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10228 : CGF.Builder.getInt32(C: 0);
10229
10230 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10231 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10232 ThreadLimitVal};
10233 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10234 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
10235 args: ThreadLimitArgs);
10236}
10237
10238void CGOpenMPRuntime::emitTargetDataCalls(
10239 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10240 const Expr *Device, const RegionCodeGenTy &CodeGen,
10241 CGOpenMPRuntime::TargetDataInfo &Info) {
10242 if (!CGF.HaveInsertPoint())
10243 return;
10244
10245 // Action used to replace the default codegen action and turn privatization
10246 // off.
10247 PrePostActionTy NoPrivAction;
10248
10249 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10250
10251 llvm::Value *IfCondVal = nullptr;
10252 if (IfCond)
10253 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
10254
10255 // Emit device ID if any.
10256 llvm::Value *DeviceID = nullptr;
10257 if (Device) {
10258 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10259 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10260 } else {
10261 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10262 }
10263
10264 // Fill up the arrays with all the mapped variables.
10265 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10266 auto GenMapInfoCB =
10267 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10268 CGF.Builder.restoreIP(IP: CodeGenIP);
10269 // Get map clause information.
10270 MappableExprsHandler MEHandler(D, CGF);
10271 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10272
10273 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10274 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10275 };
10276 if (CGM.getCodeGenOpts().getDebugInfo() !=
10277 llvm::codegenoptions::NoDebugInfo) {
10278 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10279 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10280 F: FillInfoMap);
10281 }
10282
10283 return CombinedInfo;
10284 };
10285 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10286 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10287 CGF.Builder.restoreIP(IP: CodeGenIP);
10288 switch (BodyGenType) {
10289 case BodyGenTy::Priv:
10290 if (!Info.CaptureDeviceAddrMap.empty())
10291 CodeGen(CGF);
10292 break;
10293 case BodyGenTy::DupNoPriv:
10294 if (!Info.CaptureDeviceAddrMap.empty()) {
10295 CodeGen.setAction(NoPrivAction);
10296 CodeGen(CGF);
10297 }
10298 break;
10299 case BodyGenTy::NoPriv:
10300 if (Info.CaptureDeviceAddrMap.empty()) {
10301 CodeGen.setAction(NoPrivAction);
10302 CodeGen(CGF);
10303 }
10304 break;
10305 }
10306 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10307 CGF.Builder.GetInsertPoint());
10308 };
10309
10310 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10311 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10312 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10313 }
10314 };
10315
10316 auto CustomMapperCB = [&](unsigned int I) {
10317 llvm::Value *MFunc = nullptr;
10318 if (CombinedInfo.Mappers[I]) {
10319 Info.HasMapper = true;
10320 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10321 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10322 }
10323 return MFunc;
10324 };
10325
10326 // Source location for the ident struct
10327 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10328
10329 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10330 CGF.AllocaInsertPt->getIterator());
10331 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10332 CGF.Builder.GetInsertPoint());
10333 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10334 CGF.Builder.restoreIP(IP: OMPBuilder.createTargetData(
10335 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
10336 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, CustomMapperCB, SrcLocInfo: RTLoc));
10337}
10338
10339void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10340 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10341 const Expr *Device) {
10342 if (!CGF.HaveInsertPoint())
10343 return;
10344
10345 assert((isa<OMPTargetEnterDataDirective>(D) ||
10346 isa<OMPTargetExitDataDirective>(D) ||
10347 isa<OMPTargetUpdateDirective>(D)) &&
10348 "Expecting either target enter, exit data, or update directives.");
10349
10350 CodeGenFunction::OMPTargetDataInfo InputInfo;
10351 llvm::Value *MapTypesArray = nullptr;
10352 llvm::Value *MapNamesArray = nullptr;
10353 // Generate the code for the opening of the data environment.
10354 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10355 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10356 // Emit device ID if any.
10357 llvm::Value *DeviceID = nullptr;
10358 if (Device) {
10359 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10360 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10361 } else {
10362 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10363 }
10364
10365 // Emit the number of elements in the offloading arrays.
10366 llvm::Constant *PointerNum =
10367 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
10368
10369 // Source location for the ident struct
10370 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10371
10372 SmallVector<llvm::Value *, 13> OffloadingArgs(
10373 {RTLoc, DeviceID, PointerNum,
10374 InputInfo.BasePointersArray.emitRawPointer(CGF),
10375 InputInfo.PointersArray.emitRawPointer(CGF),
10376 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10377 InputInfo.MappersArray.emitRawPointer(CGF)});
10378
10379 // Select the right runtime function call for each standalone
10380 // directive.
10381 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10382 RuntimeFunction RTLFn;
10383 switch (D.getDirectiveKind()) {
10384 case OMPD_target_enter_data:
10385 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10386 : OMPRTL___tgt_target_data_begin_mapper;
10387 break;
10388 case OMPD_target_exit_data:
10389 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10390 : OMPRTL___tgt_target_data_end_mapper;
10391 break;
10392 case OMPD_target_update:
10393 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10394 : OMPRTL___tgt_target_data_update_mapper;
10395 break;
10396 case OMPD_parallel:
10397 case OMPD_for:
10398 case OMPD_parallel_for:
10399 case OMPD_parallel_master:
10400 case OMPD_parallel_sections:
10401 case OMPD_for_simd:
10402 case OMPD_parallel_for_simd:
10403 case OMPD_cancel:
10404 case OMPD_cancellation_point:
10405 case OMPD_ordered:
10406 case OMPD_threadprivate:
10407 case OMPD_allocate:
10408 case OMPD_task:
10409 case OMPD_simd:
10410 case OMPD_tile:
10411 case OMPD_unroll:
10412 case OMPD_sections:
10413 case OMPD_section:
10414 case OMPD_single:
10415 case OMPD_master:
10416 case OMPD_critical:
10417 case OMPD_taskyield:
10418 case OMPD_barrier:
10419 case OMPD_taskwait:
10420 case OMPD_taskgroup:
10421 case OMPD_atomic:
10422 case OMPD_flush:
10423 case OMPD_depobj:
10424 case OMPD_scan:
10425 case OMPD_teams:
10426 case OMPD_target_data:
10427 case OMPD_distribute:
10428 case OMPD_distribute_simd:
10429 case OMPD_distribute_parallel_for:
10430 case OMPD_distribute_parallel_for_simd:
10431 case OMPD_teams_distribute:
10432 case OMPD_teams_distribute_simd:
10433 case OMPD_teams_distribute_parallel_for:
10434 case OMPD_teams_distribute_parallel_for_simd:
10435 case OMPD_declare_simd:
10436 case OMPD_declare_variant:
10437 case OMPD_begin_declare_variant:
10438 case OMPD_end_declare_variant:
10439 case OMPD_declare_target:
10440 case OMPD_end_declare_target:
10441 case OMPD_declare_reduction:
10442 case OMPD_declare_mapper:
10443 case OMPD_taskloop:
10444 case OMPD_taskloop_simd:
10445 case OMPD_master_taskloop:
10446 case OMPD_master_taskloop_simd:
10447 case OMPD_parallel_master_taskloop:
10448 case OMPD_parallel_master_taskloop_simd:
10449 case OMPD_target:
10450 case OMPD_target_simd:
10451 case OMPD_target_teams_distribute:
10452 case OMPD_target_teams_distribute_simd:
10453 case OMPD_target_teams_distribute_parallel_for:
10454 case OMPD_target_teams_distribute_parallel_for_simd:
10455 case OMPD_target_teams:
10456 case OMPD_target_parallel:
10457 case OMPD_target_parallel_for:
10458 case OMPD_target_parallel_for_simd:
10459 case OMPD_requires:
10460 case OMPD_metadirective:
10461 case OMPD_unknown:
10462 default:
10463 llvm_unreachable("Unexpected standalone target data directive.");
10464 break;
10465 }
10466 if (HasNowait) {
10467 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10468 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10469 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10470 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10471 }
10472 CGF.EmitRuntimeCall(
10473 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
10474 args: OffloadingArgs);
10475 };
10476
10477 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10478 &MapNamesArray](CodeGenFunction &CGF,
10479 PrePostActionTy &) {
10480 // Fill up the arrays with all the mapped variables.
10481 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10482
10483 // Get map clause information.
10484 MappableExprsHandler MEHandler(D, CGF);
10485 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10486
10487 CGOpenMPRuntime::TargetDataInfo Info;
10488 // Fill up the arrays and create the arguments.
10489 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10490 /*IsNonContiguous=*/true);
10491 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10492 D.hasClausesOfKind<OMPNowaitClause>();
10493 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10494 llvm::codegenoptions::NoDebugInfo;
10495 OMPBuilder.emitOffloadingArraysArgument(Builder&: CGF.Builder, RTArgs&: Info.RTArgs, Info,
10496 EmitDebug,
10497 /*ForEndCall=*/false);
10498 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10499 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10500 CGF.VoidPtrTy, CGM.getPointerAlign());
10501 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10502 CGM.getPointerAlign());
10503 InputInfo.SizesArray =
10504 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10505 InputInfo.MappersArray =
10506 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10507 MapTypesArray = Info.RTArgs.MapTypesArray;
10508 MapNamesArray = Info.RTArgs.MapNamesArray;
10509 if (RequiresOuterTask)
10510 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10511 else
10512 emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
10513 };
10514
10515 if (IfCond) {
10516 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
10517 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
10518 } else {
10519 RegionCodeGenTy ThenRCG(TargetThenGen);
10520 ThenRCG(CGF);
10521 }
10522}
10523
10524namespace {
10525 /// Kind of parameter in a function with 'declare simd' directive.
10526enum ParamKindTy {
10527 Linear,
10528 LinearRef,
10529 LinearUVal,
10530 LinearVal,
10531 Uniform,
10532 Vector,
10533};
10534/// Attribute set of the parameter.
10535struct ParamAttrTy {
10536 ParamKindTy Kind = Vector;
10537 llvm::APSInt StrideOrArg;
10538 llvm::APSInt Alignment;
10539 bool HasVarStride = false;
10540};
10541} // namespace
10542
10543static unsigned evaluateCDTSize(const FunctionDecl *FD,
10544 ArrayRef<ParamAttrTy> ParamAttrs) {
10545 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10546 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10547 // of that clause. The VLEN value must be power of 2.
10548 // In other case the notion of the function`s "characteristic data type" (CDT)
10549 // is used to compute the vector length.
10550 // CDT is defined in the following order:
10551 // a) For non-void function, the CDT is the return type.
10552 // b) If the function has any non-uniform, non-linear parameters, then the
10553 // CDT is the type of the first such parameter.
10554 // c) If the CDT determined by a) or b) above is struct, union, or class
10555 // type which is pass-by-value (except for the type that maps to the
10556 // built-in complex data type), the characteristic data type is int.
10557 // d) If none of the above three cases is applicable, the CDT is int.
10558 // The VLEN is then determined based on the CDT and the size of vector
10559 // register of that ISA for which current vector version is generated. The
10560 // VLEN is computed using the formula below:
10561 // VLEN = sizeof(vector_register) / sizeof(CDT),
10562 // where vector register size specified in section 3.2.1 Registers and the
10563 // Stack Frame of original AMD64 ABI document.
10564 QualType RetType = FD->getReturnType();
10565 if (RetType.isNull())
10566 return 0;
10567 ASTContext &C = FD->getASTContext();
10568 QualType CDT;
10569 if (!RetType.isNull() && !RetType->isVoidType()) {
10570 CDT = RetType;
10571 } else {
10572 unsigned Offset = 0;
10573 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
10574 if (ParamAttrs[Offset].Kind == Vector)
10575 CDT = C.getPointerType(T: C.getRecordType(Decl: MD->getParent()));
10576 ++Offset;
10577 }
10578 if (CDT.isNull()) {
10579 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10580 if (ParamAttrs[I + Offset].Kind == Vector) {
10581 CDT = FD->getParamDecl(i: I)->getType();
10582 break;
10583 }
10584 }
10585 }
10586 }
10587 if (CDT.isNull())
10588 CDT = C.IntTy;
10589 CDT = CDT->getCanonicalTypeUnqualified();
10590 if (CDT->isRecordType() || CDT->isUnionType())
10591 CDT = C.IntTy;
10592 return C.getTypeSize(T: CDT);
10593}
10594
10595/// Mangle the parameter part of the vector function name according to
10596/// their OpenMP classification. The mangling function is defined in
10597/// section 4.5 of the AAVFABI(2021Q1).
10598static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10599 SmallString<256> Buffer;
10600 llvm::raw_svector_ostream Out(Buffer);
10601 for (const auto &ParamAttr : ParamAttrs) {
10602 switch (ParamAttr.Kind) {
10603 case Linear:
10604 Out << 'l';
10605 break;
10606 case LinearRef:
10607 Out << 'R';
10608 break;
10609 case LinearUVal:
10610 Out << 'U';
10611 break;
10612 case LinearVal:
10613 Out << 'L';
10614 break;
10615 case Uniform:
10616 Out << 'u';
10617 break;
10618 case Vector:
10619 Out << 'v';
10620 break;
10621 }
10622 if (ParamAttr.HasVarStride)
10623 Out << "s" << ParamAttr.StrideOrArg;
10624 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10625 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10626 // Don't print the step value if it is not present or if it is
10627 // equal to 1.
10628 if (ParamAttr.StrideOrArg < 0)
10629 Out << 'n' << -ParamAttr.StrideOrArg;
10630 else if (ParamAttr.StrideOrArg != 1)
10631 Out << ParamAttr.StrideOrArg;
10632 }
10633
10634 if (!!ParamAttr.Alignment)
10635 Out << 'a' << ParamAttr.Alignment;
10636 }
10637
10638 return std::string(Out.str());
10639}
10640
10641static void
10642emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10643 const llvm::APSInt &VLENVal,
10644 ArrayRef<ParamAttrTy> ParamAttrs,
10645 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10646 struct ISADataTy {
10647 char ISA;
10648 unsigned VecRegSize;
10649 };
10650 ISADataTy ISAData[] = {
10651 {
10652 .ISA: 'b', .VecRegSize: 128
10653 }, // SSE
10654 {
10655 .ISA: 'c', .VecRegSize: 256
10656 }, // AVX
10657 {
10658 .ISA: 'd', .VecRegSize: 256
10659 }, // AVX2
10660 {
10661 .ISA: 'e', .VecRegSize: 512
10662 }, // AVX512
10663 };
10664 llvm::SmallVector<char, 2> Masked;
10665 switch (State) {
10666 case OMPDeclareSimdDeclAttr::BS_Undefined:
10667 Masked.push_back(Elt: 'N');
10668 Masked.push_back(Elt: 'M');
10669 break;
10670 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10671 Masked.push_back(Elt: 'N');
10672 break;
10673 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10674 Masked.push_back(Elt: 'M');
10675 break;
10676 }
10677 for (char Mask : Masked) {
10678 for (const ISADataTy &Data : ISAData) {
10679 SmallString<256> Buffer;
10680 llvm::raw_svector_ostream Out(Buffer);
10681 Out << "_ZGV" << Data.ISA << Mask;
10682 if (!VLENVal) {
10683 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10684 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10685 Out << llvm::APSInt::getUnsigned(X: Data.VecRegSize / NumElts);
10686 } else {
10687 Out << VLENVal;
10688 }
10689 Out << mangleVectorParameters(ParamAttrs);
10690 Out << '_' << Fn->getName();
10691 Fn->addFnAttr(Kind: Out.str());
10692 }
10693 }
10694}
10695
10696// This are the Functions that are needed to mangle the name of the
10697// vector functions generated by the compiler, according to the rules
10698// defined in the "Vector Function ABI specifications for AArch64",
10699// available at
10700// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10701
10702/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10703static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10704 QT = QT.getCanonicalType();
10705
10706 if (QT->isVoidType())
10707 return false;
10708
10709 if (Kind == ParamKindTy::Uniform)
10710 return false;
10711
10712 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10713 return false;
10714
10715 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10716 !QT->isReferenceType())
10717 return false;
10718
10719 return true;
10720}
10721
10722/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10723static bool getAArch64PBV(QualType QT, ASTContext &C) {
10724 QT = QT.getCanonicalType();
10725 unsigned Size = C.getTypeSize(T: QT);
10726
10727 // Only scalars and complex within 16 bytes wide set PVB to true.
10728 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10729 return false;
10730
10731 if (QT->isFloatingType())
10732 return true;
10733
10734 if (QT->isIntegerType())
10735 return true;
10736
10737 if (QT->isPointerType())
10738 return true;
10739
10740 // TODO: Add support for complex types (section 3.1.2, item 2).
10741
10742 return false;
10743}
10744
10745/// Computes the lane size (LS) of a return type or of an input parameter,
10746/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10747/// TODO: Add support for references, section 3.2.1, item 1.
10748static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10749 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10750 QualType PTy = QT.getCanonicalType()->getPointeeType();
10751 if (getAArch64PBV(QT: PTy, C))
10752 return C.getTypeSize(T: PTy);
10753 }
10754 if (getAArch64PBV(QT, C))
10755 return C.getTypeSize(T: QT);
10756
10757 return C.getTypeSize(T: C.getUIntPtrType());
10758}
10759
10760// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10761// signature of the scalar function, as defined in 3.2.2 of the
10762// AAVFABI.
10763static std::tuple<unsigned, unsigned, bool>
10764getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10765 QualType RetType = FD->getReturnType().getCanonicalType();
10766
10767 ASTContext &C = FD->getASTContext();
10768
10769 bool OutputBecomesInput = false;
10770
10771 llvm::SmallVector<unsigned, 8> Sizes;
10772 if (!RetType->isVoidType()) {
10773 Sizes.push_back(Elt: getAArch64LS(QT: RetType, Kind: ParamKindTy::Vector, C));
10774 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
10775 OutputBecomesInput = true;
10776 }
10777 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10778 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
10779 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
10780 }
10781
10782 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10783 // The LS of a function parameter / return value can only be a power
10784 // of 2, starting from 8 bits, up to 128.
10785 assert(llvm::all_of(Sizes,
10786 [](unsigned Size) {
10787 return Size == 8 || Size == 16 || Size == 32 ||
10788 Size == 64 || Size == 128;
10789 }) &&
10790 "Invalid size");
10791
10792 return std::make_tuple(args&: *std::min_element(first: std::begin(cont&: Sizes), last: std::end(cont&: Sizes)),
10793 args&: *std::max_element(first: std::begin(cont&: Sizes), last: std::end(cont&: Sizes)),
10794 args&: OutputBecomesInput);
10795}
10796
10797// Function used to add the attribute. The parameter `VLEN` is
10798// templated to allow the use of "x" when targeting scalable functions
10799// for SVE.
10800template <typename T>
10801static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10802 char ISA, StringRef ParSeq,
10803 StringRef MangledName, bool OutputBecomesInput,
10804 llvm::Function *Fn) {
10805 SmallString<256> Buffer;
10806 llvm::raw_svector_ostream Out(Buffer);
10807 Out << Prefix << ISA << LMask << VLEN;
10808 if (OutputBecomesInput)
10809 Out << "v";
10810 Out << ParSeq << "_" << MangledName;
10811 Fn->addFnAttr(Kind: Out.str());
10812}
10813
10814// Helper function to generate the Advanced SIMD names depending on
10815// the value of the NDS when simdlen is not present.
10816static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10817 StringRef Prefix, char ISA,
10818 StringRef ParSeq, StringRef MangledName,
10819 bool OutputBecomesInput,
10820 llvm::Function *Fn) {
10821 switch (NDS) {
10822 case 8:
10823 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10824 OutputBecomesInput, Fn);
10825 addAArch64VectorName(VLEN: 16, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10826 OutputBecomesInput, Fn);
10827 break;
10828 case 16:
10829 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10830 OutputBecomesInput, Fn);
10831 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10832 OutputBecomesInput, Fn);
10833 break;
10834 case 32:
10835 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10836 OutputBecomesInput, Fn);
10837 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10838 OutputBecomesInput, Fn);
10839 break;
10840 case 64:
10841 case 128:
10842 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10843 OutputBecomesInput, Fn);
10844 break;
10845 default:
10846 llvm_unreachable("Scalar type is too wide.");
10847 }
10848}
10849
10850/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10851static void emitAArch64DeclareSimdFunction(
10852 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10853 ArrayRef<ParamAttrTy> ParamAttrs,
10854 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10855 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10856
10857 // Get basic data for building the vector signature.
10858 const auto Data = getNDSWDS(FD, ParamAttrs);
10859 const unsigned NDS = std::get<0>(t: Data);
10860 const unsigned WDS = std::get<1>(t: Data);
10861 const bool OutputBecomesInput = std::get<2>(t: Data);
10862
10863 // Check the values provided via `simdlen` by the user.
10864 // 1. A `simdlen(1)` doesn't produce vector signatures,
10865 if (UserVLEN == 1) {
10866 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10867 L: DiagnosticsEngine::Warning,
10868 FormatString: "The clause simdlen(1) has no effect when targeting aarch64.");
10869 CGM.getDiags().Report(Loc: SLoc, DiagID);
10870 return;
10871 }
10872
10873 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10874 // Advanced SIMD output.
10875 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
10876 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10877 L: DiagnosticsEngine::Warning, FormatString: "The value specified in simdlen must be a "
10878 "power of 2 when targeting Advanced SIMD.");
10879 CGM.getDiags().Report(Loc: SLoc, DiagID);
10880 return;
10881 }
10882
10883 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10884 // limits.
10885 if (ISA == 's' && UserVLEN != 0) {
10886 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10887 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10888 L: DiagnosticsEngine::Warning, FormatString: "The clause simdlen must fit the %0-bit "
10889 "lanes in the architectural constraints "
10890 "for SVE (min is 128-bit, max is "
10891 "2048-bit, by steps of 128-bit)");
10892 CGM.getDiags().Report(Loc: SLoc, DiagID) << WDS;
10893 return;
10894 }
10895 }
10896
10897 // Sort out parameter sequence.
10898 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10899 StringRef Prefix = "_ZGV";
10900 // Generate simdlen from user input (if any).
10901 if (UserVLEN) {
10902 if (ISA == 's') {
10903 // SVE generates only a masked function.
10904 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10905 OutputBecomesInput, Fn);
10906 } else {
10907 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10908 // Advanced SIMD generates one or two functions, depending on
10909 // the `[not]inbranch` clause.
10910 switch (State) {
10911 case OMPDeclareSimdDeclAttr::BS_Undefined:
10912 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10913 OutputBecomesInput, Fn);
10914 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10915 OutputBecomesInput, Fn);
10916 break;
10917 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10918 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10919 OutputBecomesInput, Fn);
10920 break;
10921 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10922 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10923 OutputBecomesInput, Fn);
10924 break;
10925 }
10926 }
10927 } else {
10928 // If no user simdlen is provided, follow the AAVFABI rules for
10929 // generating the vector length.
10930 if (ISA == 's') {
10931 // SVE, section 3.4.1, item 1.
10932 addAArch64VectorName(VLEN: "x", LMask: "M", Prefix, ISA, ParSeq, MangledName,
10933 OutputBecomesInput, Fn);
10934 } else {
10935 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10936 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10937 // two vector names depending on the use of the clause
10938 // `[not]inbranch`.
10939 switch (State) {
10940 case OMPDeclareSimdDeclAttr::BS_Undefined:
10941 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10942 OutputBecomesInput, Fn);
10943 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10944 OutputBecomesInput, Fn);
10945 break;
10946 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10947 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10948 OutputBecomesInput, Fn);
10949 break;
10950 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10951 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10952 OutputBecomesInput, Fn);
10953 break;
10954 }
10955 }
10956 }
10957}
10958
10959void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10960 llvm::Function *Fn) {
10961 ASTContext &C = CGM.getContext();
10962 FD = FD->getMostRecentDecl();
10963 while (FD) {
10964 // Map params to their positions in function decl.
10965 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10966 if (isa<CXXMethodDecl>(Val: FD))
10967 ParamPositions.try_emplace(Key: FD, Args: 0);
10968 unsigned ParamPos = ParamPositions.size();
10969 for (const ParmVarDecl *P : FD->parameters()) {
10970 ParamPositions.try_emplace(Key: P->getCanonicalDecl(), Args&: ParamPos);
10971 ++ParamPos;
10972 }
10973 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10974 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10975 // Mark uniform parameters.
10976 for (const Expr *E : Attr->uniforms()) {
10977 E = E->IgnoreParenImpCasts();
10978 unsigned Pos;
10979 if (isa<CXXThisExpr>(Val: E)) {
10980 Pos = ParamPositions[FD];
10981 } else {
10982 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
10983 ->getCanonicalDecl();
10984 auto It = ParamPositions.find(Val: PVD);
10985 assert(It != ParamPositions.end() && "Function parameter not found");
10986 Pos = It->second;
10987 }
10988 ParamAttrs[Pos].Kind = Uniform;
10989 }
10990 // Get alignment info.
10991 auto *NI = Attr->alignments_begin();
10992 for (const Expr *E : Attr->aligneds()) {
10993 E = E->IgnoreParenImpCasts();
10994 unsigned Pos;
10995 QualType ParmTy;
10996 if (isa<CXXThisExpr>(Val: E)) {
10997 Pos = ParamPositions[FD];
10998 ParmTy = E->getType();
10999 } else {
11000 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11001 ->getCanonicalDecl();
11002 auto It = ParamPositions.find(Val: PVD);
11003 assert(It != ParamPositions.end() && "Function parameter not found");
11004 Pos = It->second;
11005 ParmTy = PVD->getType();
11006 }
11007 ParamAttrs[Pos].Alignment =
11008 (*NI)
11009 ? (*NI)->EvaluateKnownConstInt(Ctx: C)
11010 : llvm::APSInt::getUnsigned(
11011 X: C.toCharUnitsFromBits(BitSize: C.getOpenMPDefaultSimdAlign(T: ParmTy))
11012 .getQuantity());
11013 ++NI;
11014 }
11015 // Mark linear parameters.
11016 auto *SI = Attr->steps_begin();
11017 auto *MI = Attr->modifiers_begin();
11018 for (const Expr *E : Attr->linears()) {
11019 E = E->IgnoreParenImpCasts();
11020 unsigned Pos;
11021 bool IsReferenceType = false;
11022 // Rescaling factor needed to compute the linear parameter
11023 // value in the mangled name.
11024 unsigned PtrRescalingFactor = 1;
11025 if (isa<CXXThisExpr>(Val: E)) {
11026 Pos = ParamPositions[FD];
11027 auto *P = cast<PointerType>(Val: E->getType());
11028 PtrRescalingFactor = CGM.getContext()
11029 .getTypeSizeInChars(T: P->getPointeeType())
11030 .getQuantity();
11031 } else {
11032 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11033 ->getCanonicalDecl();
11034 auto It = ParamPositions.find(Val: PVD);
11035 assert(It != ParamPositions.end() && "Function parameter not found");
11036 Pos = It->second;
11037 if (auto *P = dyn_cast<PointerType>(Val: PVD->getType()))
11038 PtrRescalingFactor = CGM.getContext()
11039 .getTypeSizeInChars(T: P->getPointeeType())
11040 .getQuantity();
11041 else if (PVD->getType()->isReferenceType()) {
11042 IsReferenceType = true;
11043 PtrRescalingFactor =
11044 CGM.getContext()
11045 .getTypeSizeInChars(T: PVD->getType().getNonReferenceType())
11046 .getQuantity();
11047 }
11048 }
11049 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11050 if (*MI == OMPC_LINEAR_ref)
11051 ParamAttr.Kind = LinearRef;
11052 else if (*MI == OMPC_LINEAR_uval)
11053 ParamAttr.Kind = LinearUVal;
11054 else if (IsReferenceType)
11055 ParamAttr.Kind = LinearVal;
11056 else
11057 ParamAttr.Kind = Linear;
11058 // Assuming a stride of 1, for `linear` without modifiers.
11059 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: 1);
11060 if (*SI) {
11061 Expr::EvalResult Result;
11062 if (!(*SI)->EvaluateAsInt(Result, Ctx: C, AllowSideEffects: Expr::SE_AllowSideEffects)) {
11063 if (const auto *DRE =
11064 cast<DeclRefExpr>(Val: (*SI)->IgnoreParenImpCasts())) {
11065 if (const auto *StridePVD =
11066 dyn_cast<ParmVarDecl>(Val: DRE->getDecl())) {
11067 ParamAttr.HasVarStride = true;
11068 auto It = ParamPositions.find(Val: StridePVD->getCanonicalDecl());
11069 assert(It != ParamPositions.end() &&
11070 "Function parameter not found");
11071 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: It->second);
11072 }
11073 }
11074 } else {
11075 ParamAttr.StrideOrArg = Result.Val.getInt();
11076 }
11077 }
11078 // If we are using a linear clause on a pointer, we need to
11079 // rescale the value of linear_step with the byte size of the
11080 // pointee type.
11081 if (!ParamAttr.HasVarStride &&
11082 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11083 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11084 ++SI;
11085 ++MI;
11086 }
11087 llvm::APSInt VLENVal;
11088 SourceLocation ExprLoc;
11089 const Expr *VLENExpr = Attr->getSimdlen();
11090 if (VLENExpr) {
11091 VLENVal = VLENExpr->EvaluateKnownConstInt(Ctx: C);
11092 ExprLoc = VLENExpr->getExprLoc();
11093 }
11094 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11095 if (CGM.getTriple().isX86()) {
11096 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11097 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11098 unsigned VLEN = VLENVal.getExtValue();
11099 StringRef MangledName = Fn->getName();
11100 if (CGM.getTarget().hasFeature(Feature: "sve"))
11101 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
11102 MangledName, ISA: 's', VecRegSize: 128, Fn, SLoc: ExprLoc);
11103 else if (CGM.getTarget().hasFeature(Feature: "neon"))
11104 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
11105 MangledName, ISA: 'n', VecRegSize: 128, Fn, SLoc: ExprLoc);
11106 }
11107 }
11108 FD = FD->getPreviousDecl();
11109 }
11110}
11111
11112namespace {
11113/// Cleanup action for doacross support.
11114class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11115public:
11116 static const int DoacrossFinArgs = 2;
11117
11118private:
11119 llvm::FunctionCallee RTLFn;
11120 llvm::Value *Args[DoacrossFinArgs];
11121
11122public:
11123 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11124 ArrayRef<llvm::Value *> CallArgs)
11125 : RTLFn(RTLFn) {
11126 assert(CallArgs.size() == DoacrossFinArgs);
11127 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(arr&: Args));
11128 }
11129 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11130 if (!CGF.HaveInsertPoint())
11131 return;
11132 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11133 }
11134};
11135} // namespace
11136
11137void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11138 const OMPLoopDirective &D,
11139 ArrayRef<Expr *> NumIterations) {
11140 if (!CGF.HaveInsertPoint())
11141 return;
11142
11143 ASTContext &C = CGM.getContext();
11144 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11145 RecordDecl *RD;
11146 if (KmpDimTy.isNull()) {
11147 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11148 // kmp_int64 lo; // lower
11149 // kmp_int64 up; // upper
11150 // kmp_int64 st; // stride
11151 // };
11152 RD = C.buildImplicitRecord(Name: "kmp_dim");
11153 RD->startDefinition();
11154 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11155 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11156 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11157 RD->completeDefinition();
11158 KmpDimTy = C.getRecordType(Decl: RD);
11159 } else {
11160 RD = cast<RecordDecl>(Val: KmpDimTy->getAsTagDecl());
11161 }
11162 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11163 QualType ArrayTy = C.getConstantArrayType(EltTy: KmpDimTy, ArySize: Size, SizeExpr: nullptr,
11164 ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11165
11166 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
11167 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
11168 enum { LowerFD = 0, UpperFD, StrideFD };
11169 // Fill dims with data.
11170 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11171 LValue DimsLVal = CGF.MakeAddrLValue(
11172 Addr: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: I), T: KmpDimTy);
11173 // dims.upper = num_iterations;
11174 LValue UpperLVal = CGF.EmitLValueForField(
11175 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
11176 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11177 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
11178 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
11179 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
11180 // dims.stride = 1;
11181 LValue StrideLVal = CGF.EmitLValueForField(
11182 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
11183 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
11184 lvalue: StrideLVal);
11185 }
11186
11187 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11188 // kmp_int32 num_dims, struct kmp_dim * dims);
11189 llvm::Value *Args[] = {
11190 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
11191 getThreadID(CGF, Loc: D.getBeginLoc()),
11192 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
11193 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11194 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
11195 DestTy: CGM.VoidPtrTy)};
11196
11197 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11198 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
11199 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11200 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11201 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
11202 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11203 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
11204 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
11205 A: llvm::ArrayRef(FiniArgs));
11206}
11207
11208template <typename T>
11209static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11210 const T *C, llvm::Value *ULoc,
11211 llvm::Value *ThreadID) {
11212 QualType Int64Ty =
11213 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11214 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11215 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11216 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11217 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
11218 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11219 const Expr *CounterVal = C->getLoopData(I);
11220 assert(CounterVal);
11221 llvm::Value *CntVal = CGF.EmitScalarConversion(
11222 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
11223 Loc: CounterVal->getExprLoc());
11224 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
11225 /*Volatile=*/false, Ty: Int64Ty);
11226 }
11227 llvm::Value *Args[] = {
11228 ULoc, ThreadID,
11229 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
11230 llvm::FunctionCallee RTLFn;
11231 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11232 OMPDoacrossKind<T> ODK;
11233 if (ODK.isSource(C)) {
11234 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11235 FnID: OMPRTL___kmpc_doacross_post);
11236 } else {
11237 assert(ODK.isSink(C) && "Expect sink modifier.");
11238 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11239 FnID: OMPRTL___kmpc_doacross_wait);
11240 }
11241 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11242}
11243
11244void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11245 const OMPDependClause *C) {
11246 return EmitDoacrossOrdered<OMPDependClause>(
11247 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11248 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
11249}
11250
11251void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11252 const OMPDoacrossClause *C) {
11253 return EmitDoacrossOrdered<OMPDoacrossClause>(
11254 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11255 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
11256}
11257
11258void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11259 llvm::FunctionCallee Callee,
11260 ArrayRef<llvm::Value *> Args) const {
11261 assert(Loc.isValid() && "Outlined function call location must be valid.");
11262 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
11263
11264 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
11265 if (Fn->doesNotThrow()) {
11266 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
11267 return;
11268 }
11269 }
11270 CGF.EmitRuntimeCall(callee: Callee, args: Args);
11271}
11272
11273void CGOpenMPRuntime::emitOutlinedFunctionCall(
11274 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11275 ArrayRef<llvm::Value *> Args) const {
11276 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
11277}
11278
11279void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11280 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
11281 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: FD))
11282 HasEmittedDeclareTargetRegion = true;
11283}
11284
11285Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11286 const VarDecl *NativeParam,
11287 const VarDecl *TargetParam) const {
11288 return CGF.GetAddrOfLocalVar(VD: NativeParam);
11289}
11290
11291/// Return allocator value from expression, or return a null allocator (default
11292/// when no allocator specified).
11293static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11294 const Expr *Allocator) {
11295 llvm::Value *AllocVal;
11296 if (Allocator) {
11297 AllocVal = CGF.EmitScalarExpr(E: Allocator);
11298 // According to the standard, the original allocator type is a enum
11299 // (integer). Convert to pointer type, if required.
11300 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
11301 DstTy: CGF.getContext().VoidPtrTy,
11302 Loc: Allocator->getExprLoc());
11303 } else {
11304 // If no allocator specified, it defaults to the null allocator.
11305 AllocVal = llvm::Constant::getNullValue(
11306 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
11307 }
11308 return AllocVal;
11309}
11310
11311/// Return the alignment from an allocate directive if present.
11312static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11313 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11314
11315 if (!AllocateAlignment)
11316 return nullptr;
11317
11318 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
11319}
11320
11321Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11322 const VarDecl *VD) {
11323 if (!VD)
11324 return Address::invalid();
11325 Address UntiedAddr = Address::invalid();
11326 Address UntiedRealAddr = Address::invalid();
11327 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11328 if (It != FunctionToUntiedTaskStackMap.end()) {
11329 const UntiedLocalVarsAddressesMap &UntiedData =
11330 UntiedLocalVarsStack[It->second];
11331 auto I = UntiedData.find(Key: VD);
11332 if (I != UntiedData.end()) {
11333 UntiedAddr = I->second.first;
11334 UntiedRealAddr = I->second.second;
11335 }
11336 }
11337 const VarDecl *CVD = VD->getCanonicalDecl();
11338 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11339 // Use the default allocation.
11340 if (!isAllocatableDecl(VD))
11341 return UntiedAddr;
11342 llvm::Value *Size;
11343 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
11344 if (CVD->getType()->isVariablyModifiedType()) {
11345 Size = CGF.getTypeSize(Ty: CVD->getType());
11346 // Align the size: ((size + align - 1) / align) * align
11347 Size = CGF.Builder.CreateNUWAdd(
11348 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
11349 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
11350 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
11351 } else {
11352 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
11353 Size = CGM.getSize(numChars: Sz.alignTo(Align));
11354 }
11355 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
11356 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11357 const Expr *Allocator = AA->getAllocator();
11358 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11359 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
11360 SmallVector<llvm::Value *, 4> Args;
11361 Args.push_back(Elt: ThreadID);
11362 if (Alignment)
11363 Args.push_back(Elt: Alignment);
11364 Args.push_back(Elt: Size);
11365 Args.push_back(Elt: AllocVal);
11366 llvm::omp::RuntimeFunction FnID =
11367 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11368 llvm::Value *Addr = CGF.EmitRuntimeCall(
11369 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args,
11370 name: getName(Parts: {CVD->getName(), ".void.addr"}));
11371 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11372 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
11373 QualType Ty = CGM.getContext().getPointerType(T: CVD->getType());
11374 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11375 V: Addr, DestTy: CGF.ConvertTypeForMem(T: Ty), Name: getName(Parts: {CVD->getName(), ".addr"}));
11376 if (UntiedAddr.isValid())
11377 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
11378
11379 // Cleanup action for allocate support.
11380 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11381 llvm::FunctionCallee RTLFn;
11382 SourceLocation::UIntTy LocEncoding;
11383 Address Addr;
11384 const Expr *AllocExpr;
11385
11386 public:
11387 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11388 SourceLocation::UIntTy LocEncoding, Address Addr,
11389 const Expr *AllocExpr)
11390 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11391 AllocExpr(AllocExpr) {}
11392 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11393 if (!CGF.HaveInsertPoint())
11394 return;
11395 llvm::Value *Args[3];
11396 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11397 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
11398 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11399 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
11400 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
11401 Args[2] = AllocVal;
11402 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11403 }
11404 };
11405 Address VDAddr =
11406 UntiedRealAddr.isValid()
11407 ? UntiedRealAddr
11408 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
11409 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11410 Kind: NormalAndEHCleanup, A: FiniRTLFn, A: CVD->getLocation().getRawEncoding(),
11411 A: VDAddr, A: Allocator);
11412 if (UntiedRealAddr.isValid())
11413 if (auto *Region =
11414 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
11415 Region->emitUntiedSwitch(CGF);
11416 return VDAddr;
11417 }
11418 return UntiedAddr;
11419}
11420
11421bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11422 const VarDecl *VD) const {
11423 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11424 if (It == FunctionToUntiedTaskStackMap.end())
11425 return false;
11426 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
11427}
11428
11429CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11430 CodeGenModule &CGM, const OMPLoopDirective &S)
11431 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11432 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11433 if (!NeedToPush)
11434 return;
11435 NontemporalDeclsSet &DS =
11436 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11437 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11438 for (const Stmt *Ref : C->private_refs()) {
11439 const auto *SimpleRefExpr = cast<Expr>(Val: Ref)->IgnoreParenImpCasts();
11440 const ValueDecl *VD;
11441 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SimpleRefExpr)) {
11442 VD = DRE->getDecl();
11443 } else {
11444 const auto *ME = cast<MemberExpr>(Val: SimpleRefExpr);
11445 assert((ME->isImplicitCXXThis() ||
11446 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11447 "Expected member of current class.");
11448 VD = ME->getMemberDecl();
11449 }
11450 DS.insert(V: VD);
11451 }
11452 }
11453}
11454
11455CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11456 if (!NeedToPush)
11457 return;
11458 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11459}
11460
11461CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11462 CodeGenFunction &CGF,
11463 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11464 std::pair<Address, Address>> &LocalVars)
11465 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11466 if (!NeedToPush)
11467 return;
11468 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11469 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11470 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
11471}
11472
11473CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11474 if (!NeedToPush)
11475 return;
11476 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11477}
11478
11479bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11480 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11481
11482 return llvm::any_of(
11483 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
11484 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(V: VD); });
11485}
11486
11487void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11488 const OMPExecutableDirective &S,
11489 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11490 const {
11491 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11492 // Vars in target/task regions must be excluded completely.
11493 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()) ||
11494 isOpenMPTaskingDirective(Kind: S.getDirectiveKind())) {
11495 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11496 getOpenMPCaptureRegions(CaptureRegions, DKind: S.getDirectiveKind());
11497 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CaptureRegions.front());
11498 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11499 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11500 NeedToCheckForLPCs.insert(V: Cap.getCapturedVar());
11501 }
11502 }
11503 // Exclude vars in private clauses.
11504 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11505 for (const Expr *Ref : C->varlists()) {
11506 if (!Ref->getType()->isScalarType())
11507 continue;
11508 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11509 if (!DRE)
11510 continue;
11511 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11512 }
11513 }
11514 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11515 for (const Expr *Ref : C->varlists()) {
11516 if (!Ref->getType()->isScalarType())
11517 continue;
11518 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11519 if (!DRE)
11520 continue;
11521 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11522 }
11523 }
11524 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11525 for (const Expr *Ref : C->varlists()) {
11526 if (!Ref->getType()->isScalarType())
11527 continue;
11528 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11529 if (!DRE)
11530 continue;
11531 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11532 }
11533 }
11534 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11535 for (const Expr *Ref : C->varlists()) {
11536 if (!Ref->getType()->isScalarType())
11537 continue;
11538 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11539 if (!DRE)
11540 continue;
11541 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11542 }
11543 }
11544 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11545 for (const Expr *Ref : C->varlists()) {
11546 if (!Ref->getType()->isScalarType())
11547 continue;
11548 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11549 if (!DRE)
11550 continue;
11551 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11552 }
11553 }
11554 for (const Decl *VD : NeedToCheckForLPCs) {
11555 for (const LastprivateConditionalData &Data :
11556 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11557 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
11558 if (!Data.Disabled)
11559 NeedToAddForLPCsAsDisabled.insert(V: VD);
11560 break;
11561 }
11562 }
11563 }
11564}
11565
11566CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11567 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11568 : CGM(CGF.CGM),
11569 Action((CGM.getLangOpts().OpenMP >= 50 &&
11570 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
11571 P: [](const OMPLastprivateClause *C) {
11572 return C->getKind() ==
11573 OMPC_LASTPRIVATE_conditional;
11574 }))
11575 ? ActionToDo::PushAsLastprivateConditional
11576 : ActionToDo::DoNotPush) {
11577 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11578 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11579 return;
11580 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11581 "Expected a push action.");
11582 LastprivateConditionalData &Data =
11583 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11584 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11585 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11586 continue;
11587
11588 for (const Expr *Ref : C->varlists()) {
11589 Data.DeclToUniqueName.insert(KV: std::make_pair(
11590 x: cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts())->getDecl(),
11591 y: SmallString<16>(generateUniqueName(CGM, Prefix: "pl_cond", Ref))));
11592 }
11593 }
11594 Data.IVLVal = IVLVal;
11595 Data.Fn = CGF.CurFn;
11596}
11597
11598CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11599 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11600 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11601 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11602 if (CGM.getLangOpts().OpenMP < 50)
11603 return;
11604 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11605 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11606 if (!NeedToAddForLPCsAsDisabled.empty()) {
11607 Action = ActionToDo::DisableLastprivateConditional;
11608 LastprivateConditionalData &Data =
11609 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11610 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11611 Data.DeclToUniqueName.insert(KV: std::make_pair(x&: VD, y: SmallString<16>()));
11612 Data.Fn = CGF.CurFn;
11613 Data.Disabled = true;
11614 }
11615}
11616
11617CGOpenMPRuntime::LastprivateConditionalRAII
11618CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11619 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11620 return LastprivateConditionalRAII(CGF, S);
11621}
11622
11623CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11624 if (CGM.getLangOpts().OpenMP < 50)
11625 return;
11626 if (Action == ActionToDo::DisableLastprivateConditional) {
11627 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11628 "Expected list of disabled private vars.");
11629 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11630 }
11631 if (Action == ActionToDo::PushAsLastprivateConditional) {
11632 assert(
11633 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11634 "Expected list of lastprivate conditional vars.");
11635 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11636 }
11637}
11638
11639Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11640 const VarDecl *VD) {
11641 ASTContext &C = CGM.getContext();
11642 auto I = LastprivateConditionalToTypes.find(Val: CGF.CurFn);
11643 if (I == LastprivateConditionalToTypes.end())
11644 I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
11645 QualType NewType;
11646 const FieldDecl *VDField;
11647 const FieldDecl *FiredField;
11648 LValue BaseLVal;
11649 auto VI = I->getSecond().find(Val: VD);
11650 if (VI == I->getSecond().end()) {
11651 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
11652 RD->startDefinition();
11653 VDField = addFieldToRecordDecl(C, DC: RD, FieldTy: VD->getType().getNonReferenceType());
11654 FiredField = addFieldToRecordDecl(C, DC: RD, FieldTy: C.CharTy);
11655 RD->completeDefinition();
11656 NewType = C.getRecordType(Decl: RD);
11657 Address Addr = CGF.CreateMemTemp(T: NewType, Align: C.getDeclAlign(D: VD), Name: VD->getName());
11658 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
11659 I->getSecond().try_emplace(Key: VD, Args&: NewType, Args&: VDField, Args&: FiredField, Args&: BaseLVal);
11660 } else {
11661 NewType = std::get<0>(t&: VI->getSecond());
11662 VDField = std::get<1>(t&: VI->getSecond());
11663 FiredField = std::get<2>(t&: VI->getSecond());
11664 BaseLVal = std::get<3>(t&: VI->getSecond());
11665 }
11666 LValue FiredLVal =
11667 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
11668 CGF.EmitStoreOfScalar(
11669 value: llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
11670 lvalue: FiredLVal);
11671 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
11672}
11673
11674namespace {
11675/// Checks if the lastprivate conditional variable is referenced in LHS.
11676class LastprivateConditionalRefChecker final
11677 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11678 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11679 const Expr *FoundE = nullptr;
11680 const Decl *FoundD = nullptr;
11681 StringRef UniqueDeclName;
11682 LValue IVLVal;
11683 llvm::Function *FoundFn = nullptr;
11684 SourceLocation Loc;
11685
11686public:
11687 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11688 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11689 llvm::reverse(C&: LPM)) {
11690 auto It = D.DeclToUniqueName.find(Key: E->getDecl());
11691 if (It == D.DeclToUniqueName.end())
11692 continue;
11693 if (D.Disabled)
11694 return false;
11695 FoundE = E;
11696 FoundD = E->getDecl()->getCanonicalDecl();
11697 UniqueDeclName = It->second;
11698 IVLVal = D.IVLVal;
11699 FoundFn = D.Fn;
11700 break;
11701 }
11702 return FoundE == E;
11703 }
11704 bool VisitMemberExpr(const MemberExpr *E) {
11705 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
11706 return false;
11707 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11708 llvm::reverse(C&: LPM)) {
11709 auto It = D.DeclToUniqueName.find(Key: E->getMemberDecl());
11710 if (It == D.DeclToUniqueName.end())
11711 continue;
11712 if (D.Disabled)
11713 return false;
11714 FoundE = E;
11715 FoundD = E->getMemberDecl()->getCanonicalDecl();
11716 UniqueDeclName = It->second;
11717 IVLVal = D.IVLVal;
11718 FoundFn = D.Fn;
11719 break;
11720 }
11721 return FoundE == E;
11722 }
11723 bool VisitStmt(const Stmt *S) {
11724 for (const Stmt *Child : S->children()) {
11725 if (!Child)
11726 continue;
11727 if (const auto *E = dyn_cast<Expr>(Val: Child))
11728 if (!E->isGLValue())
11729 continue;
11730 if (Visit(S: Child))
11731 return true;
11732 }
11733 return false;
11734 }
11735 explicit LastprivateConditionalRefChecker(
11736 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11737 : LPM(LPM) {}
11738 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11739 getFoundData() const {
11740 return std::make_tuple(args: FoundE, args: FoundD, args: UniqueDeclName, args: IVLVal, args: FoundFn);
11741 }
11742};
11743} // namespace
11744
11745void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11746 LValue IVLVal,
11747 StringRef UniqueDeclName,
11748 LValue LVal,
11749 SourceLocation Loc) {
11750 // Last updated loop counter for the lastprivate conditional var.
11751 // int<xx> last_iv = 0;
11752 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
11753 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11754 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
11755 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
11756 IVLVal.getAlignment().getAsAlign());
11757 LValue LastIVLVal =
11758 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
11759
11760 // Last value of the lastprivate conditional.
11761 // decltype(priv_a) last_a;
11762 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11763 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
11764 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
11765 LVal.getAlignment().getAsAlign());
11766 LValue LastLVal =
11767 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
11768
11769 // Global loop counter. Required to handle inner parallel-for regions.
11770 // iv
11771 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
11772
11773 // #pragma omp critical(a)
11774 // if (last_iv <= iv) {
11775 // last_iv = iv;
11776 // last_a = priv_a;
11777 // }
11778 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11779 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11780 Action.Enter(CGF);
11781 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
11782 // (last_iv <= iv) ? Check if the variable is updated and store new
11783 // value in global var.
11784 llvm::Value *CmpRes;
11785 if (IVLVal.getType()->isSignedIntegerType()) {
11786 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
11787 } else {
11788 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11789 "Loop iteration variable must be integer.");
11790 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
11791 }
11792 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
11793 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
11794 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
11795 // {
11796 CGF.EmitBlock(BB: ThenBB);
11797
11798 // last_iv = iv;
11799 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
11800
11801 // last_a = priv_a;
11802 switch (CGF.getEvaluationKind(T: LVal.getType())) {
11803 case TEK_Scalar: {
11804 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
11805 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
11806 break;
11807 }
11808 case TEK_Complex: {
11809 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
11810 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
11811 break;
11812 }
11813 case TEK_Aggregate:
11814 llvm_unreachable(
11815 "Aggregates are not supported in lastprivate conditional.");
11816 }
11817 // }
11818 CGF.EmitBranch(Block: ExitBB);
11819 // There is no need to emit line number for unconditional branch.
11820 (void)ApplyDebugLocation::CreateEmpty(CGF);
11821 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
11822 };
11823
11824 if (CGM.getLangOpts().OpenMPSimd) {
11825 // Do not emit as a critical region as no parallel region could be emitted.
11826 RegionCodeGenTy ThenRCG(CodeGen);
11827 ThenRCG(CGF);
11828 } else {
11829 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
11830 }
11831}
11832
11833void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11834 const Expr *LHS) {
11835 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11836 return;
11837 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11838 if (!Checker.Visit(S: LHS))
11839 return;
11840 const Expr *FoundE;
11841 const Decl *FoundD;
11842 StringRef UniqueDeclName;
11843 LValue IVLVal;
11844 llvm::Function *FoundFn;
11845 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
11846 Checker.getFoundData();
11847 if (FoundFn != CGF.CurFn) {
11848 // Special codegen for inner parallel regions.
11849 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11850 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
11851 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11852 "Lastprivate conditional is not found in outer region.");
11853 QualType StructTy = std::get<0>(t&: It->getSecond());
11854 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
11855 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
11856 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11857 Addr: PrivLVal.getAddress(),
11858 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
11859 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
11860 LValue BaseLVal =
11861 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
11862 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
11863 CGF.EmitAtomicStore(rvalue: RValue::get(V: llvm::ConstantInt::get(
11864 Ty: CGF.ConvertTypeForMem(T: FiredDecl->getType()), V: 1)),
11865 lvalue: FiredLVal, AO: llvm::AtomicOrdering::Unordered,
11866 /*IsVolatile=*/true, /*isInit=*/false);
11867 return;
11868 }
11869
11870 // Private address of the lastprivate conditional in the current context.
11871 // priv_a
11872 LValue LVal = CGF.EmitLValue(E: FoundE);
11873 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11874 Loc: FoundE->getExprLoc());
11875}
11876
11877void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11878 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11879 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11880 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11881 return;
11882 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
11883 auto It = llvm::find_if(
11884 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
11885 if (It == Range.end() || It->Fn != CGF.CurFn)
11886 return;
11887 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
11888 assert(LPCI != LastprivateConditionalToTypes.end() &&
11889 "Lastprivates must be registered already.");
11890 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11891 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
11892 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: CaptureRegions.back());
11893 for (const auto &Pair : It->DeclToUniqueName) {
11894 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
11895 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
11896 continue;
11897 auto I = LPCI->getSecond().find(Val: Pair.first);
11898 assert(I != LPCI->getSecond().end() &&
11899 "Lastprivate must be rehistered already.");
11900 // bool Cmp = priv_a.Fired != 0;
11901 LValue BaseLVal = std::get<3>(t&: I->getSecond());
11902 LValue FiredLVal =
11903 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
11904 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
11905 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
11906 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
11907 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
11908 // if (Cmp) {
11909 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
11910 CGF.EmitBlock(BB: ThenBB);
11911 Address Addr = CGF.GetAddrOfLocalVar(VD);
11912 LValue LVal;
11913 if (VD->getType()->isReferenceType())
11914 LVal = CGF.EmitLoadOfReferenceLValue(RefAddr: Addr, RefTy: VD->getType(),
11915 Source: AlignmentSource::Decl);
11916 else
11917 LVal = CGF.MakeAddrLValue(Addr, T: VD->getType().getNonReferenceType(),
11918 Source: AlignmentSource::Decl);
11919 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
11920 Loc: D.getBeginLoc());
11921 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11922 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
11923 // }
11924 }
11925}
11926
11927void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11928 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11929 SourceLocation Loc) {
11930 if (CGF.getLangOpts().OpenMP < 50)
11931 return;
11932 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(Key: VD);
11933 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11934 "Unknown lastprivate conditional variable.");
11935 StringRef UniqueName = It->second;
11936 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
11937 // The variable was not updated in the region - exit.
11938 if (!GV)
11939 return;
11940 LValue LPLVal = CGF.MakeRawAddrLValue(
11941 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
11942 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
11943 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
11944}
11945
11946llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11947 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11948 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11949 const RegionCodeGenTy &CodeGen) {
11950 llvm_unreachable("Not supported in SIMD-only mode");
11951}
11952
11953llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11954 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11955 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11956 const RegionCodeGenTy &CodeGen) {
11957 llvm_unreachable("Not supported in SIMD-only mode");
11958}
11959
11960llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11961 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11962 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11963 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11964 bool Tied, unsigned &NumberOfParts) {
11965 llvm_unreachable("Not supported in SIMD-only mode");
11966}
11967
11968void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11969 SourceLocation Loc,
11970 llvm::Function *OutlinedFn,
11971 ArrayRef<llvm::Value *> CapturedVars,
11972 const Expr *IfCond,
11973 llvm::Value *NumThreads) {
11974 llvm_unreachable("Not supported in SIMD-only mode");
11975}
11976
11977void CGOpenMPSIMDRuntime::emitCriticalRegion(
11978 CodeGenFunction &CGF, StringRef CriticalName,
11979 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11980 const Expr *Hint) {
11981 llvm_unreachable("Not supported in SIMD-only mode");
11982}
11983
11984void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11985 const RegionCodeGenTy &MasterOpGen,
11986 SourceLocation Loc) {
11987 llvm_unreachable("Not supported in SIMD-only mode");
11988}
11989
11990void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11991 const RegionCodeGenTy &MasterOpGen,
11992 SourceLocation Loc,
11993 const Expr *Filter) {
11994 llvm_unreachable("Not supported in SIMD-only mode");
11995}
11996
11997void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11998 SourceLocation Loc) {
11999 llvm_unreachable("Not supported in SIMD-only mode");
12000}
12001
12002void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12003 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12004 SourceLocation Loc) {
12005 llvm_unreachable("Not supported in SIMD-only mode");
12006}
12007
12008void CGOpenMPSIMDRuntime::emitSingleRegion(
12009 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12010 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12011 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12012 ArrayRef<const Expr *> AssignmentOps) {
12013 llvm_unreachable("Not supported in SIMD-only mode");
12014}
12015
12016void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12017 const RegionCodeGenTy &OrderedOpGen,
12018 SourceLocation Loc,
12019 bool IsThreads) {
12020 llvm_unreachable("Not supported in SIMD-only mode");
12021}
12022
12023void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12024 SourceLocation Loc,
12025 OpenMPDirectiveKind Kind,
12026 bool EmitChecks,
12027 bool ForceSimpleCall) {
12028 llvm_unreachable("Not supported in SIMD-only mode");
12029}
12030
12031void CGOpenMPSIMDRuntime::emitForDispatchInit(
12032 CodeGenFunction &CGF, SourceLocation Loc,
12033 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12034 bool Ordered, const DispatchRTInput &DispatchValues) {
12035 llvm_unreachable("Not supported in SIMD-only mode");
12036}
12037
12038void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12039 SourceLocation Loc) {
12040 llvm_unreachable("Not supported in SIMD-only mode");
12041}
12042
12043void CGOpenMPSIMDRuntime::emitForStaticInit(
12044 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12045 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12046 llvm_unreachable("Not supported in SIMD-only mode");
12047}
12048
12049void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12050 CodeGenFunction &CGF, SourceLocation Loc,
12051 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12052 llvm_unreachable("Not supported in SIMD-only mode");
12053}
12054
12055void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12056 SourceLocation Loc,
12057 unsigned IVSize,
12058 bool IVSigned) {
12059 llvm_unreachable("Not supported in SIMD-only mode");
12060}
12061
12062void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12063 SourceLocation Loc,
12064 OpenMPDirectiveKind DKind) {
12065 llvm_unreachable("Not supported in SIMD-only mode");
12066}
12067
12068llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12069 SourceLocation Loc,
12070 unsigned IVSize, bool IVSigned,
12071 Address IL, Address LB,
12072 Address UB, Address ST) {
12073 llvm_unreachable("Not supported in SIMD-only mode");
12074}
12075
12076void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12077 llvm::Value *NumThreads,
12078 SourceLocation Loc) {
12079 llvm_unreachable("Not supported in SIMD-only mode");
12080}
12081
12082void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12083 ProcBindKind ProcBind,
12084 SourceLocation Loc) {
12085 llvm_unreachable("Not supported in SIMD-only mode");
12086}
12087
12088Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12089 const VarDecl *VD,
12090 Address VDAddr,
12091 SourceLocation Loc) {
12092 llvm_unreachable("Not supported in SIMD-only mode");
12093}
12094
12095llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12096 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12097 CodeGenFunction *CGF) {
12098 llvm_unreachable("Not supported in SIMD-only mode");
12099}
12100
12101Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12102 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12103 llvm_unreachable("Not supported in SIMD-only mode");
12104}
12105
12106void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12107 ArrayRef<const Expr *> Vars,
12108 SourceLocation Loc,
12109 llvm::AtomicOrdering AO) {
12110 llvm_unreachable("Not supported in SIMD-only mode");
12111}
12112
12113void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12114 const OMPExecutableDirective &D,
12115 llvm::Function *TaskFunction,
12116 QualType SharedsTy, Address Shareds,
12117 const Expr *IfCond,
12118 const OMPTaskDataTy &Data) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12120}
12121
12122void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12123 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12124 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12125 const Expr *IfCond, const OMPTaskDataTy &Data) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12127}
12128
12129void CGOpenMPSIMDRuntime::emitReduction(
12130 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12131 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12132 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12133 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12134 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12135 ReductionOps, Options);
12136}
12137
12138llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12139 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12140 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12141 llvm_unreachable("Not supported in SIMD-only mode");
12142}
12143
12144void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12145 SourceLocation Loc,
12146 bool IsWorksharingReduction) {
12147 llvm_unreachable("Not supported in SIMD-only mode");
12148}
12149
12150void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12151 SourceLocation Loc,
12152 ReductionCodeGen &RCG,
12153 unsigned N) {
12154 llvm_unreachable("Not supported in SIMD-only mode");
12155}
12156
12157Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12158 SourceLocation Loc,
12159 llvm::Value *ReductionsPtr,
12160 LValue SharedLVal) {
12161 llvm_unreachable("Not supported in SIMD-only mode");
12162}
12163
12164void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12165 SourceLocation Loc,
12166 const OMPTaskDataTy &Data) {
12167 llvm_unreachable("Not supported in SIMD-only mode");
12168}
12169
12170void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12171 CodeGenFunction &CGF, SourceLocation Loc,
12172 OpenMPDirectiveKind CancelRegion) {
12173 llvm_unreachable("Not supported in SIMD-only mode");
12174}
12175
12176void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12177 SourceLocation Loc, const Expr *IfCond,
12178 OpenMPDirectiveKind CancelRegion) {
12179 llvm_unreachable("Not supported in SIMD-only mode");
12180}
12181
12182void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12183 const OMPExecutableDirective &D, StringRef ParentName,
12184 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12185 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12186 llvm_unreachable("Not supported in SIMD-only mode");
12187}
12188
12189void CGOpenMPSIMDRuntime::emitTargetCall(
12190 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12191 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12192 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12193 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12194 const OMPLoopDirective &D)>
12195 SizeEmitter) {
12196 llvm_unreachable("Not supported in SIMD-only mode");
12197}
12198
12199bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12200 llvm_unreachable("Not supported in SIMD-only mode");
12201}
12202
12203bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12204 llvm_unreachable("Not supported in SIMD-only mode");
12205}
12206
12207bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12208 return false;
12209}
12210
12211void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12212 const OMPExecutableDirective &D,
12213 SourceLocation Loc,
12214 llvm::Function *OutlinedFn,
12215 ArrayRef<llvm::Value *> CapturedVars) {
12216 llvm_unreachable("Not supported in SIMD-only mode");
12217}
12218
12219void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12220 const Expr *NumTeams,
12221 const Expr *ThreadLimit,
12222 SourceLocation Loc) {
12223 llvm_unreachable("Not supported in SIMD-only mode");
12224}
12225
12226void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12227 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12228 const Expr *Device, const RegionCodeGenTy &CodeGen,
12229 CGOpenMPRuntime::TargetDataInfo &Info) {
12230 llvm_unreachable("Not supported in SIMD-only mode");
12231}
12232
12233void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12234 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12235 const Expr *Device) {
12236 llvm_unreachable("Not supported in SIMD-only mode");
12237}
12238
12239void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12240 const OMPLoopDirective &D,
12241 ArrayRef<Expr *> NumIterations) {
12242 llvm_unreachable("Not supported in SIMD-only mode");
12243}
12244
12245void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12246 const OMPDependClause *C) {
12247 llvm_unreachable("Not supported in SIMD-only mode");
12248}
12249
12250void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12251 const OMPDoacrossClause *C) {
12252 llvm_unreachable("Not supported in SIMD-only mode");
12253}
12254
12255const VarDecl *
12256CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12257 const VarDecl *NativeParam) const {
12258 llvm_unreachable("Not supported in SIMD-only mode");
12259}
12260
12261Address
12262CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12263 const VarDecl *NativeParam,
12264 const VarDecl *TargetParam) const {
12265 llvm_unreachable("Not supported in SIMD-only mode");
12266}
12267