1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
24#include "clang/AST/OpenMPClause.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
164 PtrTy: PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(lvalue: PartIdLVal, Loc: PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
169 CGF.EmitBlock(BB: DoneBB);
170 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
171 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
172 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
173 Dest: CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
181 PtrTy: PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
183 lvalue: PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
187 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
188 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
189 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
190 Dest: CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
192 CGF.EmitBlock(BB: CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
570void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
571 CodeGenFunction::RunCleanupsScope Scope(CGF);
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
594static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
601 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
602 const auto *CE = cast<CallExpr>(Val: InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
612 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
613 (void)PrivateScope.Privatize();
614 RValue Func = RValue::get(V: Reduction.second);
615 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
616 CGF.EmitIgnoredExpr(E: InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(T: Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
631 RValue::getComplex(C: CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
653static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BB: BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
691 Name: "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
703
704 // Emit copy.
705 {
706 CodeGenFunction::RunCleanupsScope InitScope(CGF);
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
709 Original: SrcElementCurrent, Ty: ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
719 Name: "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
726 Name: "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
731 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
744 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, DestAddr: PrivateAddr, Type: PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 Init: EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SrcAddr: SharedAddr);
763}
764
765ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
766 ArrayRef<const Expr *> Origs,
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(N: Shareds.size());
770 SharedAddresses.reserve(N: Shareds.size());
771 Sizes.reserve(N: Shareds.size());
772 BaseDecls.reserve(N: Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
778 std::advance(i&: IOrig, n: 1);
779 std::advance(i&: IPriv, n: 1);
780 std::advance(i&: IRed, n: 1);
781 }
782}
783
784void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
789 SharedAddresses.emplace_back(Args&: First, Args&: Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(Args&: First, Args&: Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
795 OrigAddresses.emplace_back(Args&: First, Args&: Second);
796 }
797}
798
799void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 Args: CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
805 Args: nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
814 LHS: OrigAddresses[N].second.getPointer(CGF),
815 RHS: OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
823 }
824 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
825 CodeGenFunction::OpaqueValueMapping OpaqueMap(
826 CGF,
827 cast<OpaqueValueExpr>(
828 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
829 RValue::get(V: Size));
830 CGF.EmitVariablyModifiedType(Ty: PrivateType);
831}
832
833void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
842 CodeGenFunction::OpaqueValueMapping OpaqueMap(
843 CGF,
844 cast<OpaqueValueExpr>(
845 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
846 RValue::get(V: Size));
847 CGF.EmitVariablyModifiedType(Ty: PrivateType);
848}
849
850void ReductionCodeGen::emitInitialization(
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
866 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
870 Quals: PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
875bool ReductionCodeGen::needCleanups(unsigned N) {
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
881void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
888 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
889 }
890}
891
892static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
907 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
908 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
909}
910
911static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
912 Address OriginalBaseAddress, llvm::Value *Addr) {
913 RawAddress Tmp = RawAddress::invalid();
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
919 Tmp = CGF.CreateMemTemp(T: BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
929 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
930 V: Addr, DestTy: Tmp.getElementType());
931 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
932 return MostTopTmp;
933 }
934
935 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
936 V: Addr, DestTy: OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
948 DE = cast<DeclRefExpr>(Val: Base);
949 OrigVD = cast<VarDecl>(Val: DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
954 DE = cast<DeclRefExpr>(Val: Base);
955 OrigVD = cast<VarDecl>(Val: DE->getDecl());
956 }
957 return OrigVD;
958}
959
960Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(Args&: OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(E: DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, BaseTy: OrigVD->getType(), ElTy: SharedAddresses[N].first.getType(),
968 BaseLV: OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
972 RHS: SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
974 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
975 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
978 return castToBase(CGF, BaseTy: OrigVD->getType(),
979 ElTy: SharedAddresses[N].first.getType(),
980 OriginalBaseAddress: OriginalBaseLValue.getAddress(), Addr: Ptr);
981 }
982 BaseDecls.emplace_back(
983 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
987bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
996 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1009 CGF.incrementProfileCounter(S);
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1017 T: getThreadIDVariable()->getType(),
1018 Source: AlignmentSource::Decl);
1019}
1020
1021static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1025 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(D: Field);
1029 return Field;
1030}
1031
1032CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 OMPBuilder.initialize();
1041 OMPBuilder.loadOffloadInfoMetadata(HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1042 ? CGM.getLangOpts().OMPHostIRFile
1043 : StringRef{});
1044 OMPBuilder.setConfig(Config);
1045
1046 // The user forces the compiler to behave as if omp requires
1047 // unified_shared_memory was given.
1048 if (CGM.getLangOpts().OpenMPForceUSM) {
1049 HasRequiresUnifiedSharedMemory = true;
1050 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1051 }
1052}
1053
1054void CGOpenMPRuntime::clear() {
1055 InternalVars.clear();
1056 // Clean non-target variable declarations possibly used only in debug info.
1057 for (const auto &Data : EmittedNonTargetVariables) {
1058 if (!Data.getValue().pointsToAliveValue())
1059 continue;
1060 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1061 if (!GV)
1062 continue;
1063 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1064 continue;
1065 GV->eraseFromParent();
1066 }
1067}
1068
1069std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1070 return OMPBuilder.createPlatformSpecificName(Parts);
1071}
1072
1073static llvm::Function *
1074emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1075 const Expr *CombinerInitializer, const VarDecl *In,
1076 const VarDecl *Out, bool IsCombiner) {
1077 // void .omp_combiner.(Ty *in, Ty *out);
1078 ASTContext &C = CGM.getContext();
1079 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1080 FunctionArgList Args;
1081 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1082 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1083 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 Args.push_back(Elt: &OmpOutParm);
1086 Args.push_back(Elt: &OmpInParm);
1087 const CGFunctionInfo &FnInfo =
1088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
1089 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1090 std::string Name = CGM.getOpenMPRuntime().getName(
1091 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1092 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1093 N: Name, M: &CGM.getModule());
1094 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1095 if (CGM.getLangOpts().Optimize) {
1096 Fn->removeFnAttr(Kind: llvm::Attribute::NoInline);
1097 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
1098 Fn->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
1099 }
1100 CodeGenFunction CGF(CGM);
1101 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1102 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1103 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc: In->getLocation(),
1104 StartLoc: Out->getLocation());
1105 CodeGenFunction::OMPPrivateScope Scope(CGF);
1106 Address AddrIn = CGF.GetAddrOfLocalVar(VD: &OmpInParm);
1107 Scope.addPrivate(
1108 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1109 .getAddress());
1110 Address AddrOut = CGF.GetAddrOfLocalVar(VD: &OmpOutParm);
1111 Scope.addPrivate(
1112 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 (void)Scope.Privatize();
1115 if (!IsCombiner && Out->hasInit() &&
1116 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1117 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1118 Quals: Out->getType().getQualifiers(),
1119 /*IsInitializer=*/true);
1120 }
1121 if (CombinerInitializer)
1122 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1123 Scope.ForceCleanup();
1124 CGF.FinishFunction();
1125 return Fn;
1126}
1127
1128void CGOpenMPRuntime::emitUserDefinedReduction(
1129 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1130 if (UDRMap.count(Val: D) > 0)
1131 return;
1132 llvm::Function *Combiner = emitCombinerOrInitializer(
1133 CGM, Ty: D->getType(), CombinerInitializer: D->getCombiner(),
1134 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1135 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1136 /*IsCombiner=*/true);
1137 llvm::Function *Initializer = nullptr;
1138 if (const Expr *Init = D->getInitializer()) {
1139 Initializer = emitCombinerOrInitializer(
1140 CGM, Ty: D->getType(),
1141 CombinerInitializer: D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1142 : nullptr,
1143 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1144 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1145 /*IsCombiner=*/false);
1146 }
1147 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1148 if (CGF)
1149 FunctionUDRMap[CGF->CurFn].push_back(Elt: D);
1150}
1151
1152std::pair<llvm::Function *, llvm::Function *>
1153CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1154 auto I = UDRMap.find(Val: D);
1155 if (I != UDRMap.end())
1156 return I->second;
1157 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1158 return UDRMap.lookup(Val: D);
1159}
1160
1161namespace {
1162// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1163// Builder if one is present.
1164struct PushAndPopStackRAII {
1165 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1166 bool HasCancel, llvm::omp::Directive Kind)
1167 : OMPBuilder(OMPBuilder) {
1168 if (!OMPBuilder)
1169 return;
1170
1171 // The following callback is the crucial part of clangs cleanup process.
1172 //
1173 // NOTE:
1174 // Once the OpenMPIRBuilder is used to create parallel regions (and
1175 // similar), the cancellation destination (Dest below) is determined via
1176 // IP. That means if we have variables to finalize we split the block at IP,
1177 // use the new block (=BB) as destination to build a JumpDest (via
1178 // getJumpDestInCurrentScope(BB)) which then is fed to
1179 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1180 // to push & pop an FinalizationInfo object.
1181 // The FiniCB will still be needed but at the point where the
1182 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1183 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1184 assert(IP.getBlock()->end() == IP.getPoint() &&
1185 "Clang CG should cause non-terminated block!");
1186 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1187 CGF.Builder.restoreIP(IP);
1188 CodeGenFunction::JumpDest Dest =
1189 CGF.getOMPCancelDestination(Kind: OMPD_parallel);
1190 CGF.EmitBranchThroughCleanup(Dest);
1191 return llvm::Error::success();
1192 };
1193
1194 // TODO: Remove this once we emit parallel regions through the
1195 // OpenMPIRBuilder as it can do this setup internally.
1196 llvm::OpenMPIRBuilder::FinalizationInfo FI({.FiniCB: FiniCB, .DK: Kind, .IsCancellable: HasCancel});
1197 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1198 }
1199 ~PushAndPopStackRAII() {
1200 if (OMPBuilder)
1201 OMPBuilder->popFinalizationCB();
1202 }
1203 llvm::OpenMPIRBuilder *OMPBuilder;
1204};
1205} // namespace
1206
1207static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1208 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1209 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1210 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1211 assert(ThreadIDVar->getType()->isPointerType() &&
1212 "thread id variable must be of type kmp_int32 *");
1213 CodeGenFunction CGF(CGM, true);
1214 bool HasCancel = false;
1215 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1216 HasCancel = OPD->hasCancel();
1217 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1218 HasCancel = OPD->hasCancel();
1219 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1220 HasCancel = OPSD->hasCancel();
1221 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1222 HasCancel = OPFD->hasCancel();
1223 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1224 HasCancel = OPFD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD =
1228 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233
1234 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1235 // parallel region to make cancellation barriers work properly.
1236 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1237 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1238 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1239 HasCancel, OutlinedHelperName);
1240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, Loc: D.getBeginLoc());
1242}
1243
1244std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1245 std::string Suffix = getName(Parts: {"omp_outlined"});
1246 return (Name + Suffix).str();
1247}
1248
1249std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1250 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1251}
1252
1253std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1254 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1255 return (Name + Suffix).str();
1256}
1257
1258llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1259 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1260 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1261 const RegionCodeGenTy &CodeGen) {
1262 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_parallel);
1263 return emitParallelOrTeamsOutlinedFunction(
1264 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1265 CodeGen);
1266}
1267
1268llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1269 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_teams);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1278llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1279 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1280 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1281 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1282 bool Tied, unsigned &NumberOfParts) {
1283 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1284 PrePostActionTy &) {
1285 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1286 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1287 llvm::Value *TaskArgs[] = {
1288 UpLoc, ThreadID,
1289 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1290 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1291 .getPointer(CGF)};
1292 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1293 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1294 args: TaskArgs);
1295 };
1296 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1297 UntiedCodeGen);
1298 CodeGen.setAction(Action);
1299 assert(!ThreadIDVar->getType()->isPointerType() &&
1300 "thread id variable must be of type kmp_int32 for tasks");
1301 const OpenMPDirectiveKind Region =
1302 isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) ? OMPD_taskloop
1303 : OMPD_task;
1304 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: Region);
1305 bool HasCancel = false;
1306 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1307 HasCancel = TD->hasCancel();
1308 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1309 HasCancel = TD->hasCancel();
1310 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1313 HasCancel = TD->hasCancel();
1314
1315 CodeGenFunction CGF(CGM, true);
1316 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1317 InnermostKind, HasCancel, Action);
1318 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1319 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1320 if (!Tied)
1321 NumberOfParts = Action.getNumberOfParts();
1322 return Res;
1323}
1324
1325void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1326 bool AtCurrentPoint) {
1327 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1328 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329
1330 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1331 if (AtCurrentPoint) {
1332 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1333 CGF.Builder.GetInsertBlock());
1334 } else {
1335 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1336 Elem.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt->getIterator());
1337 }
1338}
1339
1340void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1341 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1342 if (Elem.ServiceInsertPt) {
1343 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1344 Elem.ServiceInsertPt = nullptr;
1345 Ptr->eraseFromParent();
1346 }
1347}
1348
1349static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1350 SourceLocation Loc,
1351 SmallString<128> &Buffer) {
1352 llvm::raw_svector_ostream OS(Buffer);
1353 // Build debug location
1354 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1355 OS << ";";
1356 if (auto *DbgInfo = CGF.getDebugInfo())
1357 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1358 else
1359 OS << PLoc.getFilename();
1360 OS << ";";
1361 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1362 OS << FD->getQualifiedNameAsString();
1363 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1364 return OS.str();
1365}
1366
1367llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1368 SourceLocation Loc,
1369 unsigned Flags, bool EmitLoc) {
1370 uint32_t SrcLocStrSize;
1371 llvm::Constant *SrcLocStr;
1372 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1373 llvm::codegenoptions::NoDebugInfo) ||
1374 Loc.isInvalid()) {
1375 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1376 } else {
1377 std::string FunctionName;
1378 std::string FileName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382 if (auto *DbgInfo = CGF.getDebugInfo())
1383 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1384 else
1385 FileName = PLoc.getFilename();
1386 unsigned Line = PLoc.getLine();
1387 unsigned Column = PLoc.getColumn();
1388 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1389 Column, SrcLocStrSize);
1390 }
1391 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1392 return OMPBuilder.getOrCreateIdent(
1393 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1394}
1395
1396llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1397 SourceLocation Loc) {
1398 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1399 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1400 // the clang invariants used below might be broken.
1401 if (CGM.getLangOpts().OpenMPIRBuilder) {
1402 SmallString<128> Buffer;
1403 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1404 uint32_t SrcLocStrSize;
1405 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1406 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1407 return OMPBuilder.getOrCreateThreadID(
1408 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409 }
1410
1411 llvm::Value *ThreadID = nullptr;
1412 // Check whether we've already cached a load of the thread id in this
1413 // function.
1414 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1415 if (I != OpenMPLocThreadIDMap.end()) {
1416 ThreadID = I->second.ThreadID;
1417 if (ThreadID != nullptr)
1418 return ThreadID;
1419 }
1420 // If exceptions are enabled, do not use parameter to avoid possible crash.
1421 if (auto *OMPRegionInfo =
1422 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1423 if (OMPRegionInfo->getThreadIDVariable()) {
1424 // Check if this an outlined function with thread id passed as argument.
1425 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1426 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1427 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1428 !CGF.getLangOpts().CXXExceptions ||
1429 CGF.Builder.GetInsertBlock() == TopBlock ||
1430 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1431 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1432 TopBlock ||
1433 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1434 CGF.Builder.GetInsertBlock()) {
1435 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1436 // If value loaded in entry block, cache it and use it everywhere in
1437 // function.
1438 if (CGF.Builder.GetInsertBlock() == TopBlock)
1439 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1440 return ThreadID;
1441 }
1442 }
1443 }
1444
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1448 // function.
1449 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1450 if (!Elem.ServiceInsertPt)
1451 setLocThreadIdInsertPt(CGF);
1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1454 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1455 llvm::CallInst *Call = CGF.Builder.CreateCall(
1456 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1457 FnID: OMPRTL___kmpc_global_thread_num),
1458 Args: emitUpdateLocation(CGF, Loc));
1459 Call->setCallingConv(CGF.getRuntimeCC());
1460 Elem.ThreadID = Call;
1461 return Call;
1462}
1463
1464void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1465 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1466 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1467 clearLocThreadIdInsertPt(CGF);
1468 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1469 }
1470 if (auto I = FunctionUDRMap.find(Val: CGF.CurFn); I != FunctionUDRMap.end()) {
1471 for (const auto *D : I->second)
1472 UDRMap.erase(Val: D);
1473 FunctionUDRMap.erase(I);
1474 }
1475 if (auto I = FunctionUDMMap.find(Val: CGF.CurFn); I != FunctionUDMMap.end()) {
1476 for (const auto *D : I->second)
1477 UDMMap.erase(Val: D);
1478 FunctionUDMMap.erase(I);
1479 }
1480 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1482}
1483
1484llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1486}
1487
1488static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1489convertDeviceClause(const VarDecl *VD) {
1490 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492 if (!DevTy)
1493 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1494
1495 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496 case OMPDeclareTargetDeclAttr::DT_Host:
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498 break;
1499 case OMPDeclareTargetDeclAttr::DT_NoHost:
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501 break;
1502 case OMPDeclareTargetDeclAttr::DT_Any:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504 break;
1505 default:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507 break;
1508 }
1509}
1510
1511static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1512convertCaptureClause(const VarDecl *VD) {
1513 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515 if (!MapType)
1516 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520 break;
1521 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523 break;
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526 break;
1527 default:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529 break;
1530 }
1531}
1532
1533static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1536
1537 auto FileInfoCallBack = [&]() {
1538 SourceManager &SM = CGM.getContext().getSourceManager();
1539 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1540
1541 llvm::sys::fs::UniqueID ID;
1542 if (llvm::sys::fs::getUniqueID(Path: PLoc.getFilename(), Result&: ID)) {
1543 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1544 }
1545
1546 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1547 };
1548
1549 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack, ParentName);
1550}
1551
1552ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1553 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1554
1555 auto LinkageForVariable = [&VD, this]() {
1556 return CGM.getLLVMLinkageVarDefinition(VD);
1557 };
1558
1559 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1560
1561 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1562 T: CGM.getContext().getPointerType(T: VD->getType()));
1563 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1564 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1565 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1566 IsExternallyVisible: VD->isExternallyVisible(),
1567 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1568 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
1569 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1570 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1571 VariableLinkage: LinkageForVariable);
1572
1573 if (!addr)
1574 return ConstantAddress::invalid();
1575 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(D: VD));
1576}
1577
1578llvm::Constant *
1579CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1580 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1581 !CGM.getContext().getTargetInfo().isTLSSupported());
1582 // Lookup the entry, lazily creating it if necessary.
1583 std::string Suffix = getName(Parts: {"cache", ""});
1584 return OMPBuilder.getOrCreateInternalVariable(
1585 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1586}
1587
1588Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1589 const VarDecl *VD,
1590 Address VDAddr,
1591 SourceLocation Loc) {
1592 if (CGM.getLangOpts().OpenMPUseTLS &&
1593 CGM.getContext().getTargetInfo().isTLSSupported())
1594 return VDAddr;
1595
1596 llvm::Type *VarTy = VDAddr.getElementType();
1597 llvm::Value *Args[] = {
1598 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1599 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1600 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1601 getOrCreateThreadPrivateCache(VD)};
1602 return Address(
1603 CGF.EmitRuntimeCall(
1604 callee: OMPBuilder.getOrCreateRuntimeFunction(
1605 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1606 args: Args),
1607 CGF.Int8Ty, VDAddr.getAlignment());
1608}
1609
1610void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614 // library.
1615 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1617 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1618 args: OMPLoc);
1619 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620 // to register constructor/destructor for variable.
1621 llvm::Value *Args[] = {
1622 OMPLoc,
1623 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1624 Ctor, CopyCtor, Dtor};
1625 CGF.EmitRuntimeCall(
1626 callee: OMPBuilder.getOrCreateRuntimeFunction(
1627 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1628 args: Args);
1629}
1630
1631llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633 bool PerformInit, CodeGenFunction *CGF) {
1634 if (CGM.getLangOpts().OpenMPUseTLS &&
1635 CGM.getContext().getTargetInfo().isTLSSupported())
1636 return nullptr;
1637
1638 VD = VD->getDefinition(C&: CGM.getContext());
1639 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1640 QualType ASTTy = VD->getType();
1641
1642 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643 const Expr *Init = VD->getAnyInitializer();
1644 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645 // Generate function that re-emits the declaration's initializer into the
1646 // threadprivate copy of the variable VD
1647 CodeGenFunction CtorCGF(CGM);
1648 FunctionArgList Args;
1649 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651 ImplicitParamKind::Other);
1652 Args.push_back(Elt: &Dst);
1653
1654 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655 resultType: CGM.getContext().VoidPtrTy, args: Args);
1656 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1657 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1658 llvm::Function *Fn =
1659 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1660 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1661 Args, Loc, StartLoc: Loc);
1662 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1664 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1665 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1666 VDAddr.getAlignment());
1667 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1668 /*IsInitializer=*/true);
1669 ArgVal = CtorCGF.EmitLoadOfScalar(
1670 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1671 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1672 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1673 CtorCGF.FinishFunction();
1674 Ctor = Fn;
1675 }
1676 if (VD->getType().isDestructedType() != QualType::DK_none) {
1677 // Generate function that emits destructor call for the threadprivate copy
1678 // of the variable VD
1679 CodeGenFunction DtorCGF(CGM);
1680 FunctionArgList Args;
1681 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683 ImplicitParamKind::Other);
1684 Args.push_back(Elt: &Dst);
1685
1686 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687 resultType: CGM.getContext().VoidTy, args: Args);
1688 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1689 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1690 llvm::Function *Fn =
1691 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1692 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1693 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1694 Loc, StartLoc: Loc);
1695 // Create a scope with an artificial location for the body of this function.
1696 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1697 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698 Addr: DtorCGF.GetAddrOfLocalVar(VD: &Dst),
1699 /*Volatile=*/false, Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1700 DtorCGF.emitDestroy(
1701 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1702 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1703 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1704 DtorCGF.FinishFunction();
1705 Dtor = Fn;
1706 }
1707 // Do not emit init function if it is not required.
1708 if (!Ctor && !Dtor)
1709 return nullptr;
1710
1711 // Copying constructor for the threadprivate variable.
1712 // Must be NULL - reserved by runtime, but currently it requires that this
1713 // parameter is always NULL. Otherwise it fires assertion.
1714 CopyCtor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1715 if (Ctor == nullptr) {
1716 Ctor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1717 }
1718 if (Dtor == nullptr) {
1719 Dtor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1720 }
1721 if (!CGF) {
1722 auto *InitFunctionTy =
1723 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1724 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1725 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1726 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1727 CodeGenFunction InitCGF(CGM);
1728 FunctionArgList ArgList;
1729 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1730 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1731 Loc, StartLoc: Loc);
1732 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1733 InitCGF.FinishFunction();
1734 return InitFunction;
1735 }
1736 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737 }
1738 return nullptr;
1739}
1740
1741void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1742 llvm::GlobalValue *GV) {
1743 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1744 OMPDeclareTargetDeclAttr::getActiveAttr(VD: FD);
1745
1746 // We only need to handle active 'indirect' declare target functions.
1747 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1748 return;
1749
1750 // Get a mangled name to store the new device global in.
1751 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1752 CGM, OMPBuilder, BeginLoc: FD->getCanonicalDecl()->getBeginLoc(), ParentName: FD->getName());
1753 SmallString<128> Name;
1754 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1755
1756 // We need to generate a new global to hold the address of the indirectly
1757 // called device function. Doing this allows us to keep the visibility and
1758 // linkage of the associated function unchanged while allowing the runtime to
1759 // access its value.
1760 llvm::GlobalValue *Addr = GV;
1761 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1762 Addr = new llvm::GlobalVariable(
1763 CGM.getModule(), CGM.VoidPtrTy,
1764 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1765 nullptr, llvm::GlobalValue::NotThreadLocal,
1766 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1767 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1768 }
1769
1770 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1771 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1772 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1773 Linkage: llvm::GlobalValue::WeakODRLinkage);
1774}
1775
1776Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1777 QualType VarType,
1778 StringRef Name) {
1779 std::string Suffix = getName(Parts: {"artificial", ""});
1780 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1781 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1782 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1783 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1784 CGM.getTarget().isTLSSupported()) {
1785 GAddr->setThreadLocal(/*Val=*/true);
1786 return Address(GAddr, GAddr->getValueType(),
1787 CGM.getContext().getTypeAlignInChars(T: VarType));
1788 }
1789 std::string CacheSuffix = getName(Parts: {"cache", ""});
1790 llvm::Value *Args[] = {
1791 emitUpdateLocation(CGF, Loc: SourceLocation()),
1792 getThreadID(CGF, Loc: SourceLocation()),
1793 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1794 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1795 /*isSigned=*/false),
1796 OMPBuilder.getOrCreateInternalVariable(
1797 Ty: CGM.VoidPtrPtrTy,
1798 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1799 return Address(
1800 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1801 V: CGF.EmitRuntimeCall(
1802 callee: OMPBuilder.getOrCreateRuntimeFunction(
1803 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1804 args: Args),
1805 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
1806 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1807}
1808
1809void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1810 const RegionCodeGenTy &ThenGen,
1811 const RegionCodeGenTy &ElseGen) {
1812 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1813
1814 // If the condition constant folds and can be elided, try to avoid emitting
1815 // the condition and the dead arm of the if/else.
1816 bool CondConstant;
1817 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1818 if (CondConstant)
1819 ThenGen(CGF);
1820 else
1821 ElseGen(CGF);
1822 return;
1823 }
1824
1825 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1826 // emit the conditional branch.
1827 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1828 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1829 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1830 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1831
1832 // Emit the 'then' code.
1833 CGF.EmitBlock(BB: ThenBlock);
1834 ThenGen(CGF);
1835 CGF.EmitBranch(Block: ContBlock);
1836 // Emit the 'else' code if present.
1837 // There is no need to emit line number for unconditional branch.
1838 (void)ApplyDebugLocation::CreateEmpty(CGF);
1839 CGF.EmitBlock(BB: ElseBlock);
1840 ElseGen(CGF);
1841 // There is no need to emit line number for unconditional branch.
1842 (void)ApplyDebugLocation::CreateEmpty(CGF);
1843 CGF.EmitBranch(Block: ContBlock);
1844 // Emit the continuation block for code after the if.
1845 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1846}
1847
1848void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1849 llvm::Function *OutlinedFn,
1850 ArrayRef<llvm::Value *> CapturedVars,
1851 const Expr *IfCond,
1852 llvm::Value *NumThreads) {
1853 if (!CGF.HaveInsertPoint())
1854 return;
1855 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1856 auto &M = CGM.getModule();
1857 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1858 this](CodeGenFunction &CGF, PrePostActionTy &) {
1859 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1860 llvm::Value *Args[] = {
1861 RTLoc,
1862 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1863 OutlinedFn};
1864 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1865 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1866 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1867
1868 llvm::FunctionCallee RTLFn =
1869 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1870 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1871 };
1872 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1873 this](CodeGenFunction &CGF, PrePostActionTy &) {
1874 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1875 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1876 // Build calls:
1877 // __kmpc_serialized_parallel(&Loc, GTid);
1878 llvm::Value *Args[] = {RTLoc, ThreadID};
1879 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1880 M, FnID: OMPRTL___kmpc_serialized_parallel),
1881 args: Args);
1882
1883 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1884 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1885 RawAddress ZeroAddrBound =
1886 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
1887 /*Name=*/".bound.zero.addr");
1888 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
1889 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1890 // ThreadId for serialized parallels is 0.
1891 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
1892 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
1893 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1894
1895 // Ensure we do not inline the function. This is trivially true for the ones
1896 // passed to __kmpc_fork_call but the ones called in serialized regions
1897 // could be inlined. This is not a perfect but it is closer to the invariant
1898 // we want, namely, every data environment starts with a new function.
1899 // TODO: We should pass the if condition to the runtime function and do the
1900 // handling there. Much cleaner code.
1901 OutlinedFn->removeFnAttr(Kind: llvm::Attribute::AlwaysInline);
1902 OutlinedFn->addFnAttr(Kind: llvm::Attribute::NoInline);
1903 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
1904
1905 // __kmpc_end_serialized_parallel(&Loc, GTid);
1906 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1908 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
1909 args: EndArgs);
1910 };
1911 if (IfCond) {
1912 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
1913 } else {
1914 RegionCodeGenTy ThenRCG(ThenGen);
1915 ThenRCG(CGF);
1916 }
1917}
1918
1919// If we're inside an (outlined) parallel region, use the region info's
1920// thread-ID variable (it is passed in a first argument of the outlined function
1921// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922// regular serial code region, get thread ID by calling kmp_int32
1923// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924// return the address of that temp.
1925Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1926 SourceLocation Loc) {
1927 if (auto *OMPRegionInfo =
1928 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
1929 if (OMPRegionInfo->getThreadIDVariable())
1930 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931
1932 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1933 QualType Int32Ty =
1934 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
1936 CGF.EmitStoreOfScalar(value: ThreadID,
1937 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
1938
1939 return ThreadIDTemp;
1940}
1941
1942llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1943 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1944 std::string Name = getName(Parts: {Prefix, "var"});
1945 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
1946}
1947
1948namespace {
1949/// Common pre(post)-action for different OpenMP constructs.
1950class CommonActionTy final : public PrePostActionTy {
1951 llvm::FunctionCallee EnterCallee;
1952 ArrayRef<llvm::Value *> EnterArgs;
1953 llvm::FunctionCallee ExitCallee;
1954 ArrayRef<llvm::Value *> ExitArgs;
1955 bool Conditional;
1956 llvm::BasicBlock *ContBlock = nullptr;
1957
1958public:
1959 CommonActionTy(llvm::FunctionCallee EnterCallee,
1960 ArrayRef<llvm::Value *> EnterArgs,
1961 llvm::FunctionCallee ExitCallee,
1962 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1963 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1964 ExitArgs(ExitArgs), Conditional(Conditional) {}
1965 void Enter(CodeGenFunction &CGF) override {
1966 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
1967 if (Conditional) {
1968 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
1969 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1970 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1971 // Generate the branch (If-stmt)
1972 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
1973 CGF.EmitBlock(BB: ThenBlock);
1974 }
1975 }
1976 void Done(CodeGenFunction &CGF) {
1977 // Emit the rest of blocks/branches
1978 CGF.EmitBranch(Block: ContBlock);
1979 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
1980 }
1981 void Exit(CodeGenFunction &CGF) override {
1982 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
1983 }
1984};
1985} // anonymous namespace
1986
1987void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1988 StringRef CriticalName,
1989 const RegionCodeGenTy &CriticalOpGen,
1990 SourceLocation Loc, const Expr *Hint) {
1991 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1992 // CriticalOpGen();
1993 // __kmpc_end_critical(ident_t *, gtid, Lock);
1994 // Prepare arguments and build a call to __kmpc_critical
1995 if (!CGF.HaveInsertPoint())
1996 return;
1997 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1998 getCriticalRegionLock(CriticalName)};
1999 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2000 std::end(arr&: Args));
2001 if (Hint) {
2002 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2003 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2004 }
2005 CommonActionTy Action(
2006 OMPBuilder.getOrCreateRuntimeFunction(
2007 M&: CGM.getModule(),
2008 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2009 EnterArgs,
2010 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2011 FnID: OMPRTL___kmpc_end_critical),
2012 Args);
2013 CriticalOpGen.setAction(Action);
2014 emitInlinedDirective(CGF, InnermostKind: OMPD_critical, CodeGen: CriticalOpGen);
2015}
2016
2017void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2018 const RegionCodeGenTy &MasterOpGen,
2019 SourceLocation Loc) {
2020 if (!CGF.HaveInsertPoint())
2021 return;
2022 // if(__kmpc_master(ident_t *, gtid)) {
2023 // MasterOpGen();
2024 // __kmpc_end_master(ident_t *, gtid);
2025 // }
2026 // Prepare arguments and build a call to __kmpc_master
2027 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2028 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2029 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2030 Args,
2031 OMPBuilder.getOrCreateRuntimeFunction(
2032 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2033 Args,
2034 /*Conditional=*/true);
2035 MasterOpGen.setAction(Action);
2036 emitInlinedDirective(CGF, InnermostKind: OMPD_master, CodeGen: MasterOpGen);
2037 Action.Done(CGF);
2038}
2039
2040void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2041 const RegionCodeGenTy &MaskedOpGen,
2042 SourceLocation Loc, const Expr *Filter) {
2043 if (!CGF.HaveInsertPoint())
2044 return;
2045 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2046 // MaskedOpGen();
2047 // __kmpc_end_masked(iden_t *, gtid);
2048 // }
2049 // Prepare arguments and build a call to __kmpc_masked
2050 llvm::Value *FilterVal = Filter
2051 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2052 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2053 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2054 FilterVal};
2055 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2056 getThreadID(CGF, Loc)};
2057 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2058 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2059 Args,
2060 OMPBuilder.getOrCreateRuntimeFunction(
2061 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2062 ArgsEnd,
2063 /*Conditional=*/true);
2064 MaskedOpGen.setAction(Action);
2065 emitInlinedDirective(CGF, InnermostKind: OMPD_masked, CodeGen: MaskedOpGen);
2066 Action.Done(CGF);
2067}
2068
2069void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2070 SourceLocation Loc) {
2071 if (!CGF.HaveInsertPoint())
2072 return;
2073 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2074 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2075 } else {
2076 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2077 llvm::Value *Args[] = {
2078 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2079 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2080 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2081 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2082 args: Args);
2083 }
2084
2085 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2086 Region->emitUntiedSwitch(CGF);
2087}
2088
2089void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2090 const RegionCodeGenTy &TaskgroupOpGen,
2091 SourceLocation Loc) {
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 // __kmpc_taskgroup(ident_t *, gtid);
2095 // TaskgroupOpGen();
2096 // __kmpc_end_taskgroup(ident_t *, gtid);
2097 // Prepare arguments and build a call to __kmpc_taskgroup
2098 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2099 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2100 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2101 Args,
2102 OMPBuilder.getOrCreateRuntimeFunction(
2103 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2104 Args);
2105 TaskgroupOpGen.setAction(Action);
2106 emitInlinedDirective(CGF, InnermostKind: OMPD_taskgroup, CodeGen: TaskgroupOpGen);
2107}
2108
2109/// Given an array of pointers to variables, project the address of a
2110/// given variable.
2111static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2112 unsigned Index, const VarDecl *Var) {
2113 // Pull out the pointer to the variable.
2114 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2115 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2116
2117 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2118 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(D: Var));
2119}
2120
2121static llvm::Value *emitCopyprivateCopyFunction(
2122 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2123 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2124 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2125 SourceLocation Loc) {
2126 ASTContext &C = CGM.getContext();
2127 // void copy_func(void *LHSArg, void *RHSArg);
2128 FunctionArgList Args;
2129 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2130 ImplicitParamKind::Other);
2131 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132 ImplicitParamKind::Other);
2133 Args.push_back(Elt: &LHSArg);
2134 Args.push_back(Elt: &RHSArg);
2135 const auto &CGFI =
2136 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
2137 std::string Name =
2138 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2139 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
2140 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
2141 M: &CGM.getModule());
2142 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2143 Fn->setDoesNotRecurse();
2144 CodeGenFunction CGF(CGM);
2145 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2146 // Dest = (void*[n])(LHSArg);
2147 // Src = (void*[n])(RHSArg);
2148 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2149 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
2150 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2151 ArgsElemType, CGF.getPointerAlign());
2152 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2153 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
2154 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2155 ArgsElemType, CGF.getPointerAlign());
2156 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2157 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2158 // ...
2159 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2160 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2161 const auto *DestVar =
2162 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2163 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2164
2165 const auto *SrcVar =
2166 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2167 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2168
2169 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2170 QualType Type = VD->getType();
2171 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2172 }
2173 CGF.FinishFunction();
2174 return Fn;
2175}
2176
2177void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2178 const RegionCodeGenTy &SingleOpGen,
2179 SourceLocation Loc,
2180 ArrayRef<const Expr *> CopyprivateVars,
2181 ArrayRef<const Expr *> SrcExprs,
2182 ArrayRef<const Expr *> DstExprs,
2183 ArrayRef<const Expr *> AssignmentOps) {
2184 if (!CGF.HaveInsertPoint())
2185 return;
2186 assert(CopyprivateVars.size() == SrcExprs.size() &&
2187 CopyprivateVars.size() == DstExprs.size() &&
2188 CopyprivateVars.size() == AssignmentOps.size());
2189 ASTContext &C = CGM.getContext();
2190 // int32 did_it = 0;
2191 // if(__kmpc_single(ident_t *, gtid)) {
2192 // SingleOpGen();
2193 // __kmpc_end_single(ident_t *, gtid);
2194 // did_it = 1;
2195 // }
2196 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2197 // <copy_func>, did_it);
2198
2199 Address DidIt = Address::invalid();
2200 if (!CopyprivateVars.empty()) {
2201 // int32 did_it = 0;
2202 QualType KmpInt32Ty =
2203 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2204 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2205 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2206 }
2207 // Prepare arguments and build a call to __kmpc_single
2208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2209 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2210 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2211 Args,
2212 OMPBuilder.getOrCreateRuntimeFunction(
2213 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2214 Args,
2215 /*Conditional=*/true);
2216 SingleOpGen.setAction(Action);
2217 emitInlinedDirective(CGF, InnermostKind: OMPD_single, CodeGen: SingleOpGen);
2218 if (DidIt.isValid()) {
2219 // did_it = 1;
2220 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2221 }
2222 Action.Done(CGF);
2223 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2224 // <copy_func>, did_it);
2225 if (DidIt.isValid()) {
2226 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2227 QualType CopyprivateArrayTy = C.getConstantArrayType(
2228 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2229 /*IndexTypeQuals=*/0);
2230 // Create a list of all private variables for copyprivate.
2231 Address CopyprivateList =
2232 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2233 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2234 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2235 CGF.Builder.CreateStore(
2236 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2237 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2238 DestTy: CGF.VoidPtrTy),
2239 Addr: Elem);
2240 }
2241 // Build function that copies private values from single region to all other
2242 // threads in the corresponding parallel region.
2243 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2244 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2245 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2246 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2247 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2248 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2249 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2250 llvm::Value *Args[] = {
2251 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2252 getThreadID(CGF, Loc), // i32 <gtid>
2253 BufSize, // size_t <buf_size>
2254 CL.emitRawPointer(CGF), // void *<copyprivate list>
2255 CpyFn, // void (*) (void *, void *) <copy_func>
2256 DidItVal // i32 did_it
2257 };
2258 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2259 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2260 args: Args);
2261 }
2262}
2263
2264void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2265 const RegionCodeGenTy &OrderedOpGen,
2266 SourceLocation Loc, bool IsThreads) {
2267 if (!CGF.HaveInsertPoint())
2268 return;
2269 // __kmpc_ordered(ident_t *, gtid);
2270 // OrderedOpGen();
2271 // __kmpc_end_ordered(ident_t *, gtid);
2272 // Prepare arguments and build a call to __kmpc_ordered
2273 if (IsThreads) {
2274 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2277 Args,
2278 OMPBuilder.getOrCreateRuntimeFunction(
2279 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2280 Args);
2281 OrderedOpGen.setAction(Action);
2282 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2283 return;
2284 }
2285 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2286}
2287
2288unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2289 unsigned Flags;
2290 if (Kind == OMPD_for)
2291 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2292 else if (Kind == OMPD_sections)
2293 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2294 else if (Kind == OMPD_single)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2296 else if (Kind == OMPD_barrier)
2297 Flags = OMP_IDENT_BARRIER_EXPL;
2298 else
2299 Flags = OMP_IDENT_BARRIER_IMPL;
2300 return Flags;
2301}
2302
2303void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2304 CodeGenFunction &CGF, const OMPLoopDirective &S,
2305 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2306 // Check if the loop directive is actually a doacross loop directive. In this
2307 // case choose static, 1 schedule.
2308 if (llvm::any_of(
2309 Range: S.getClausesOfKind<OMPOrderedClause>(),
2310 P: [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2311 ScheduleKind = OMPC_SCHEDULE_static;
2312 // Chunk size is 1 in this case.
2313 llvm::APInt ChunkSize(32, 1);
2314 ChunkExpr = IntegerLiteral::Create(
2315 C: CGF.getContext(), V: ChunkSize,
2316 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2317 l: SourceLocation());
2318 }
2319}
2320
2321void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2322 OpenMPDirectiveKind Kind, bool EmitChecks,
2323 bool ForceSimpleCall) {
2324 // Check if we should use the OMPBuilder
2325 auto *OMPRegionInfo =
2326 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2327 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2329 cantFail(ValOrErr: OMPBuilder.createBarrier(Loc: CGF.Builder, Kind, ForceSimpleCall,
2330 CheckCancelFlag: EmitChecks));
2331 CGF.Builder.restoreIP(IP: AfterIP);
2332 return;
2333 }
2334
2335 if (!CGF.HaveInsertPoint())
2336 return;
2337 // Build call __kmpc_cancel_barrier(loc, thread_id);
2338 // Build call __kmpc_barrier(loc, thread_id);
2339 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2340 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2341 // thread_id);
2342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2343 getThreadID(CGF, Loc)};
2344 if (OMPRegionInfo) {
2345 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2346 llvm::Value *Result = CGF.EmitRuntimeCall(
2347 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2348 FnID: OMPRTL___kmpc_cancel_barrier),
2349 args: Args);
2350 if (EmitChecks) {
2351 // if (__kmpc_cancel_barrier()) {
2352 // exit from construct;
2353 // }
2354 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2355 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2356 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2357 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2358 CGF.EmitBlock(BB: ExitBB);
2359 // exit from construct;
2360 CodeGenFunction::JumpDest CancelDestination =
2361 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
2362 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2363 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2364 }
2365 return;
2366 }
2367 }
2368 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2369 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2370 args: Args);
2371}
2372
2373void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2374 Expr *ME, bool IsFatal) {
2375 llvm::Value *MVL =
2376 ME ? CGF.EmitStringLiteralLValue(E: cast<StringLiteral>(Val: ME)).getPointer(CGF)
2377 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2378 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2379 // *message)
2380 llvm::Value *Args[] = {
2381 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2382 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2383 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2384 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2385 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2386 args: Args);
2387}
2388
2389/// Map the OpenMP loop schedule to the runtime enumeration.
2390static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2391 bool Chunked, bool Ordered) {
2392 switch (ScheduleKind) {
2393 case OMPC_SCHEDULE_static:
2394 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2395 : (Ordered ? OMP_ord_static : OMP_sch_static);
2396 case OMPC_SCHEDULE_dynamic:
2397 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2398 case OMPC_SCHEDULE_guided:
2399 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2400 case OMPC_SCHEDULE_runtime:
2401 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2402 case OMPC_SCHEDULE_auto:
2403 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2404 case OMPC_SCHEDULE_unknown:
2405 assert(!Chunked && "chunk was specified but schedule kind not known");
2406 return Ordered ? OMP_ord_static : OMP_sch_static;
2407 }
2408 llvm_unreachable("Unexpected runtime schedule");
2409}
2410
2411/// Map the OpenMP distribute schedule to the runtime enumeration.
2412static OpenMPSchedType
2413getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2414 // only static is allowed for dist_schedule
2415 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2416}
2417
2418bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2419 bool Chunked) const {
2420 OpenMPSchedType Schedule =
2421 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2422 return Schedule == OMP_sch_static;
2423}
2424
2425bool CGOpenMPRuntime::isStaticNonchunked(
2426 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2427 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2428 return Schedule == OMP_dist_sch_static;
2429}
2430
2431bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2432 bool Chunked) const {
2433 OpenMPSchedType Schedule =
2434 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2435 return Schedule == OMP_sch_static_chunked;
2436}
2437
2438bool CGOpenMPRuntime::isStaticChunked(
2439 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2440 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2441 return Schedule == OMP_dist_sch_static_chunked;
2442}
2443
2444bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2445 OpenMPSchedType Schedule =
2446 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2447 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2448 return Schedule != OMP_sch_static;
2449}
2450
2451static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2452 OpenMPScheduleClauseModifier M1,
2453 OpenMPScheduleClauseModifier M2) {
2454 int Modifier = 0;
2455 switch (M1) {
2456 case OMPC_SCHEDULE_MODIFIER_monotonic:
2457 Modifier = OMP_sch_modifier_monotonic;
2458 break;
2459 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2460 Modifier = OMP_sch_modifier_nonmonotonic;
2461 break;
2462 case OMPC_SCHEDULE_MODIFIER_simd:
2463 if (Schedule == OMP_sch_static_chunked)
2464 Schedule = OMP_sch_static_balanced_chunked;
2465 break;
2466 case OMPC_SCHEDULE_MODIFIER_last:
2467 case OMPC_SCHEDULE_MODIFIER_unknown:
2468 break;
2469 }
2470 switch (M2) {
2471 case OMPC_SCHEDULE_MODIFIER_monotonic:
2472 Modifier = OMP_sch_modifier_monotonic;
2473 break;
2474 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2475 Modifier = OMP_sch_modifier_nonmonotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_simd:
2478 if (Schedule == OMP_sch_static_chunked)
2479 Schedule = OMP_sch_static_balanced_chunked;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_last:
2482 case OMPC_SCHEDULE_MODIFIER_unknown:
2483 break;
2484 }
2485 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2486 // If the static schedule kind is specified or if the ordered clause is
2487 // specified, and if the nonmonotonic modifier is not specified, the effect is
2488 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2489 // modifier is specified, the effect is as if the nonmonotonic modifier is
2490 // specified.
2491 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2492 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2493 Schedule == OMP_sch_static_balanced_chunked ||
2494 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2495 Schedule == OMP_dist_sch_static_chunked ||
2496 Schedule == OMP_dist_sch_static))
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 }
2499 return Schedule | Modifier;
2500}
2501
2502void CGOpenMPRuntime::emitForDispatchInit(
2503 CodeGenFunction &CGF, SourceLocation Loc,
2504 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2505 bool Ordered, const DispatchRTInput &DispatchValues) {
2506 if (!CGF.HaveInsertPoint())
2507 return;
2508 OpenMPSchedType Schedule = getRuntimeSchedule(
2509 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2510 assert(Ordered ||
2511 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2512 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2513 Schedule != OMP_sch_static_balanced_chunked));
2514 // Call __kmpc_dispatch_init(
2515 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2516 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2517 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2518
2519 // If the Chunk was not specified in the clause - use default value 1.
2520 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2521 : CGF.Builder.getIntN(N: IVSize, C: 1);
2522 llvm::Value *Args[] = {
2523 emitUpdateLocation(CGF, Loc),
2524 getThreadID(CGF, Loc),
2525 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2526 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2527 DispatchValues.LB, // Lower
2528 DispatchValues.UB, // Upper
2529 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2530 Chunk // Chunk
2531 };
2532 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2533 args: Args);
2534}
2535
2536void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2537 SourceLocation Loc) {
2538 if (!CGF.HaveInsertPoint())
2539 return;
2540 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2541 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2542 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2543}
2544
2545static void emitForStaticInitCall(
2546 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2547 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2548 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2549 const CGOpenMPRuntime::StaticRTInput &Values) {
2550 if (!CGF.HaveInsertPoint())
2551 return;
2552
2553 assert(!Values.Ordered);
2554 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2555 Schedule == OMP_sch_static_balanced_chunked ||
2556 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2557 Schedule == OMP_dist_sch_static ||
2558 Schedule == OMP_dist_sch_static_chunked);
2559
2560 // Call __kmpc_for_static_init(
2561 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2562 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2563 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2564 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2565 llvm::Value *Chunk = Values.Chunk;
2566 if (Chunk == nullptr) {
2567 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2568 Schedule == OMP_dist_sch_static) &&
2569 "expected static non-chunked schedule");
2570 // If the Chunk was not specified in the clause - use default value 1.
2571 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2572 } else {
2573 assert((Schedule == OMP_sch_static_chunked ||
2574 Schedule == OMP_sch_static_balanced_chunked ||
2575 Schedule == OMP_ord_static_chunked ||
2576 Schedule == OMP_dist_sch_static_chunked) &&
2577 "expected static chunked schedule");
2578 }
2579 llvm::Value *Args[] = {
2580 UpdateLocation,
2581 ThreadId,
2582 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2583 M2)), // Schedule type
2584 Values.IL.emitRawPointer(CGF), // &isLastIter
2585 Values.LB.emitRawPointer(CGF), // &LB
2586 Values.UB.emitRawPointer(CGF), // &UB
2587 Values.ST.emitRawPointer(CGF), // &Stride
2588 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2589 Chunk // Chunk
2590 };
2591 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2592}
2593
2594void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2595 SourceLocation Loc,
2596 OpenMPDirectiveKind DKind,
2597 const OpenMPScheduleTy &ScheduleKind,
2598 const StaticRTInput &Values) {
2599 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2600 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2601 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2602 "Expected loop-based or sections-based directive.");
2603 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2604 Flags: isOpenMPLoopDirective(DKind)
2605 ? OMP_IDENT_WORK_LOOP
2606 : OMP_IDENT_WORK_SECTIONS);
2607 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2608 llvm::FunctionCallee StaticInitFunction =
2609 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2610 IsGPUDistribute: false);
2611 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2612 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2613 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2614}
2615
2616void CGOpenMPRuntime::emitDistributeStaticInit(
2617 CodeGenFunction &CGF, SourceLocation Loc,
2618 OpenMPDistScheduleClauseKind SchedKind,
2619 const CGOpenMPRuntime::StaticRTInput &Values) {
2620 OpenMPSchedType ScheduleNum =
2621 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2622 llvm::Value *UpdatedLocation =
2623 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2624 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2625 llvm::FunctionCallee StaticInitFunction;
2626 bool isGPUDistribute =
2627 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2628 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2629 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2630
2631 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2632 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2633 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2634}
2635
2636void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2637 SourceLocation Loc,
2638 OpenMPDirectiveKind DKind) {
2639 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2640 DKind == OMPD_sections) &&
2641 "Expected distribute, for, or sections directive kind");
2642 if (!CGF.HaveInsertPoint())
2643 return;
2644 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2645 llvm::Value *Args[] = {
2646 emitUpdateLocation(CGF, Loc,
2647 Flags: isOpenMPDistributeDirective(DKind) ||
2648 (DKind == OMPD_target_teams_loop)
2649 ? OMP_IDENT_WORK_DISTRIBUTE
2650 : isOpenMPLoopDirective(DKind)
2651 ? OMP_IDENT_WORK_LOOP
2652 : OMP_IDENT_WORK_SECTIONS),
2653 getThreadID(CGF, Loc)};
2654 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2655 if (isOpenMPDistributeDirective(DKind) &&
2656 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2657 CGF.EmitRuntimeCall(
2658 callee: OMPBuilder.getOrCreateRuntimeFunction(
2659 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2660 args: Args);
2661 else
2662 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2663 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2664 args: Args);
2665}
2666
2667void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2668 SourceLocation Loc,
2669 unsigned IVSize,
2670 bool IVSigned) {
2671 if (!CGF.HaveInsertPoint())
2672 return;
2673 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2674 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2675 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2676 args: Args);
2677}
2678
2679llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2680 SourceLocation Loc, unsigned IVSize,
2681 bool IVSigned, Address IL,
2682 Address LB, Address UB,
2683 Address ST) {
2684 // Call __kmpc_dispatch_next(
2685 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2686 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2687 // kmp_int[32|64] *p_stride);
2688 llvm::Value *Args[] = {
2689 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2690 IL.emitRawPointer(CGF), // &isLastIter
2691 LB.emitRawPointer(CGF), // &Lower
2692 UB.emitRawPointer(CGF), // &Upper
2693 ST.emitRawPointer(CGF) // &Stride
2694 };
2695 llvm::Value *Call = CGF.EmitRuntimeCall(
2696 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2697 return CGF.EmitScalarConversion(
2698 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2699 DstTy: CGF.getContext().BoolTy, Loc);
2700}
2701
2702void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2703 llvm::Value *NumThreads,
2704 SourceLocation Loc) {
2705 if (!CGF.HaveInsertPoint())
2706 return;
2707 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2708 llvm::Value *Args[] = {
2709 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2710 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)};
2711 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2712 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_threads),
2713 args: Args);
2714}
2715
2716void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2717 ProcBindKind ProcBind,
2718 SourceLocation Loc) {
2719 if (!CGF.HaveInsertPoint())
2720 return;
2721 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2722 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2723 llvm::Value *Args[] = {
2724 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2725 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2726 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2727 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2728 args: Args);
2729}
2730
2731void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2732 SourceLocation Loc, llvm::AtomicOrdering AO) {
2733 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2734 OMPBuilder.createFlush(Loc: CGF.Builder);
2735 } else {
2736 if (!CGF.HaveInsertPoint())
2737 return;
2738 // Build call void __kmpc_flush(ident_t *loc)
2739 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2740 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2741 args: emitUpdateLocation(CGF, Loc));
2742 }
2743}
2744
2745namespace {
2746/// Indexes of fields for type kmp_task_t.
2747enum KmpTaskTFields {
2748 /// List of shared variables.
2749 KmpTaskTShareds,
2750 /// Task routine.
2751 KmpTaskTRoutine,
2752 /// Partition id for the untied tasks.
2753 KmpTaskTPartId,
2754 /// Function with call of destructors for private variables.
2755 Data1,
2756 /// Task priority.
2757 Data2,
2758 /// (Taskloops only) Lower bound.
2759 KmpTaskTLowerBound,
2760 /// (Taskloops only) Upper bound.
2761 KmpTaskTUpperBound,
2762 /// (Taskloops only) Stride.
2763 KmpTaskTStride,
2764 /// (Taskloops only) Is last iteration flag.
2765 KmpTaskTLastIter,
2766 /// (Taskloops only) Reduction data.
2767 KmpTaskTReductions,
2768};
2769} // anonymous namespace
2770
2771void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2772 // If we are in simd mode or there are no entries, we don't need to do
2773 // anything.
2774 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2775 return;
2776
2777 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2778 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2779 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2780 SourceLocation Loc;
2781 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2782 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2783 E = CGM.getContext().getSourceManager().fileinfo_end();
2784 I != E; ++I) {
2785 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2786 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2787 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2788 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2789 break;
2790 }
2791 }
2792 }
2793 switch (Kind) {
2794 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2795 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2796 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for target region in "
2797 "%0 is incorrect: either the "
2798 "address or the ID is invalid.");
2799 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2800 } break;
2801 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2802 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2803 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for declare target "
2804 "variable %0 is incorrect: the "
2805 "address is invalid.");
2806 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2807 } break;
2808 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2809 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2810 L: DiagnosticsEngine::Error,
2811 FormatString: "Offloading entry for declare target variable is incorrect: the "
2812 "address is invalid.");
2813 CGM.getDiags().Report(DiagID);
2814 } break;
2815 }
2816 };
2817
2818 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2819}
2820
2821void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2822 if (!KmpRoutineEntryPtrTy) {
2823 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2824 ASTContext &C = CGM.getContext();
2825 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2826 FunctionProtoType::ExtProtoInfo EPI;
2827 KmpRoutineEntryPtrQTy = C.getPointerType(
2828 T: C.getFunctionType(ResultTy: KmpInt32Ty, Args: KmpRoutineEntryTyArgs, EPI));
2829 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(T: KmpRoutineEntryPtrQTy);
2830 }
2831}
2832
2833namespace {
2834struct PrivateHelpersTy {
2835 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2836 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2837 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2838 PrivateElemInit(PrivateElemInit) {}
2839 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2840 const Expr *OriginalRef = nullptr;
2841 const VarDecl *Original = nullptr;
2842 const VarDecl *PrivateCopy = nullptr;
2843 const VarDecl *PrivateElemInit = nullptr;
2844 bool isLocalPrivate() const {
2845 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2846 }
2847};
2848typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2849} // anonymous namespace
2850
2851static bool isAllocatableDecl(const VarDecl *VD) {
2852 const VarDecl *CVD = VD->getCanonicalDecl();
2853 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2854 return false;
2855 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2856 // Use the default allocation.
2857 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2858 !AA->getAllocator());
2859}
2860
2861static RecordDecl *
2862createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2863 if (!Privates.empty()) {
2864 ASTContext &C = CGM.getContext();
2865 // Build struct .kmp_privates_t. {
2866 // /* private vars */
2867 // };
2868 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
2869 RD->startDefinition();
2870 for (const auto &Pair : Privates) {
2871 const VarDecl *VD = Pair.second.Original;
2872 QualType Type = VD->getType().getNonReferenceType();
2873 // If the private variable is a local variable with lvalue ref type,
2874 // allocate the pointer instead of the pointee type.
2875 if (Pair.second.isLocalPrivate()) {
2876 if (VD->getType()->isLValueReferenceType())
2877 Type = C.getPointerType(T: Type);
2878 if (isAllocatableDecl(VD))
2879 Type = C.getPointerType(T: Type);
2880 }
2881 FieldDecl *FD = addFieldToRecordDecl(C, DC: RD, FieldTy: Type);
2882 if (VD->hasAttrs()) {
2883 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2884 E(VD->getAttrs().end());
2885 I != E; ++I)
2886 FD->addAttr(A: *I);
2887 }
2888 }
2889 RD->completeDefinition();
2890 return RD;
2891 }
2892 return nullptr;
2893}
2894
2895static RecordDecl *
2896createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2897 QualType KmpInt32Ty,
2898 QualType KmpRoutineEntryPointerQTy) {
2899 ASTContext &C = CGM.getContext();
2900 // Build struct kmp_task_t {
2901 // void * shareds;
2902 // kmp_routine_entry_t routine;
2903 // kmp_int32 part_id;
2904 // kmp_cmplrdata_t data1;
2905 // kmp_cmplrdata_t data2;
2906 // For taskloops additional fields:
2907 // kmp_uint64 lb;
2908 // kmp_uint64 ub;
2909 // kmp_int64 st;
2910 // kmp_int32 liter;
2911 // void * reductions;
2912 // };
2913 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
2914 UD->startDefinition();
2915 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpInt32Ty);
2916 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpRoutineEntryPointerQTy);
2917 UD->completeDefinition();
2918 QualType KmpCmplrdataTy = C.getRecordType(Decl: UD);
2919 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
2920 RD->startDefinition();
2921 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
2922 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpRoutineEntryPointerQTy);
2923 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
2924 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
2925 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
2926 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
2927 QualType KmpUInt64Ty =
2928 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2929 QualType KmpInt64Ty =
2930 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2931 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
2932 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
2933 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt64Ty);
2934 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
2935 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
2936 }
2937 RD->completeDefinition();
2938 return RD;
2939}
2940
2941static RecordDecl *
2942createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2943 ArrayRef<PrivateDataTy> Privates) {
2944 ASTContext &C = CGM.getContext();
2945 // Build struct kmp_task_t_with_privates {
2946 // kmp_task_t task_data;
2947 // .kmp_privates_t. privates;
2948 // };
2949 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
2950 RD->startDefinition();
2951 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpTaskTQTy);
2952 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2953 addFieldToRecordDecl(C, DC: RD, FieldTy: C.getRecordType(Decl: PrivateRD));
2954 RD->completeDefinition();
2955 return RD;
2956}
2957
2958/// Emit a proxy function which accepts kmp_task_t as the second
2959/// argument.
2960/// \code
2961/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2962/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2963/// For taskloops:
2964/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2965/// tt->reductions, tt->shareds);
2966/// return 0;
2967/// }
2968/// \endcode
2969static llvm::Function *
2970emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2971 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2972 QualType KmpTaskTWithPrivatesPtrQTy,
2973 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2974 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2975 llvm::Value *TaskPrivatesMap) {
2976 ASTContext &C = CGM.getContext();
2977 FunctionArgList Args;
2978 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2979 ImplicitParamKind::Other);
2980 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2981 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2982 ImplicitParamKind::Other);
2983 Args.push_back(Elt: &GtidArg);
2984 Args.push_back(Elt: &TaskTypeArg);
2985 const auto &TaskEntryFnInfo =
2986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
2987 llvm::FunctionType *TaskEntryTy =
2988 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
2989 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
2990 auto *TaskEntry = llvm::Function::Create(
2991 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
2992 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
2993 TaskEntry->setDoesNotRecurse();
2994 CodeGenFunction CGF(CGM);
2995 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
2996 Loc, StartLoc: Loc);
2997
2998 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2999 // tt,
3000 // For taskloops:
3001 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3002 // tt->task_data.shareds);
3003 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3004 Addr: CGF.GetAddrOfLocalVar(VD: &GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3005 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3006 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3007 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3008 const auto *KmpTaskTWithPrivatesQTyRD =
3009 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3010 LValue Base =
3011 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3012 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3013 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3014 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3015 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3016
3017 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3018 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3019 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3020 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3021 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3022
3023 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3024 llvm::Value *PrivatesParam;
3025 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3026 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3027 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3028 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3029 } else {
3030 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3031 }
3032
3033 llvm::Value *CommonArgs[] = {
3034 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3035 CGF.Builder
3036 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3037 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3038 .emitRawPointer(CGF)};
3039 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3040 std::end(arr&: CommonArgs));
3041 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3042 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3043 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3044 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3045 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3046 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3047 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3048 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3049 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3050 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3051 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3052 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3053 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3054 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3055 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3056 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3057 CallArgs.push_back(Elt: LBParam);
3058 CallArgs.push_back(Elt: UBParam);
3059 CallArgs.push_back(Elt: StParam);
3060 CallArgs.push_back(Elt: LIParam);
3061 CallArgs.push_back(Elt: RParam);
3062 }
3063 CallArgs.push_back(Elt: SharedsParam);
3064
3065 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3066 Args: CallArgs);
3067 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3068 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3069 CGF.FinishFunction();
3070 return TaskEntry;
3071}
3072
3073static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3074 SourceLocation Loc,
3075 QualType KmpInt32Ty,
3076 QualType KmpTaskTWithPrivatesPtrQTy,
3077 QualType KmpTaskTWithPrivatesQTy) {
3078 ASTContext &C = CGM.getContext();
3079 FunctionArgList Args;
3080 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3081 ImplicitParamKind::Other);
3082 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3083 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3084 ImplicitParamKind::Other);
3085 Args.push_back(Elt: &GtidArg);
3086 Args.push_back(Elt: &TaskTypeArg);
3087 const auto &DestructorFnInfo =
3088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3089 llvm::FunctionType *DestructorFnTy =
3090 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3091 std::string Name =
3092 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3093 auto *DestructorFn =
3094 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3095 N: Name, M: &CGM.getModule());
3096 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3097 FI: DestructorFnInfo);
3098 DestructorFn->setDoesNotRecurse();
3099 CodeGenFunction CGF(CGM);
3100 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3101 Args, Loc, StartLoc: Loc);
3102
3103 LValue Base = CGF.EmitLoadOfPointerLValue(
3104 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3105 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3106 const auto *KmpTaskTWithPrivatesQTyRD =
3107 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3108 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3109 Base = CGF.EmitLValueForField(Base, Field: *FI);
3110 for (const auto *Field :
3111 cast<RecordDecl>(Val: FI->getType()->getAsTagDecl())->fields()) {
3112 if (QualType::DestructionKind DtorKind =
3113 Field->getType().isDestructedType()) {
3114 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3115 CGF.pushDestroy(dtorKind: DtorKind, addr: FieldLValue.getAddress(), type: Field->getType());
3116 }
3117 }
3118 CGF.FinishFunction();
3119 return DestructorFn;
3120}
3121
3122/// Emit a privates mapping function for correct handling of private and
3123/// firstprivate variables.
3124/// \code
3125/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3126/// **noalias priv1,..., <tyn> **noalias privn) {
3127/// *priv1 = &.privates.priv1;
3128/// ...;
3129/// *privn = &.privates.privn;
3130/// }
3131/// \endcode
3132static llvm::Value *
3133emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3134 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3135 ArrayRef<PrivateDataTy> Privates) {
3136 ASTContext &C = CGM.getContext();
3137 FunctionArgList Args;
3138 ImplicitParamDecl TaskPrivatesArg(
3139 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3140 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3141 ImplicitParamKind::Other);
3142 Args.push_back(Elt: &TaskPrivatesArg);
3143 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3144 unsigned Counter = 1;
3145 for (const Expr *E : Data.PrivateVars) {
3146 Args.push_back(Elt: ImplicitParamDecl::Create(
3147 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3148 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3149 .withConst()
3150 .withRestrict(),
3151 ParamKind: ImplicitParamKind::Other));
3152 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3153 PrivateVarsPos[VD] = Counter;
3154 ++Counter;
3155 }
3156 for (const Expr *E : Data.FirstprivateVars) {
3157 Args.push_back(Elt: ImplicitParamDecl::Create(
3158 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3159 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3160 .withConst()
3161 .withRestrict(),
3162 ParamKind: ImplicitParamKind::Other));
3163 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3164 PrivateVarsPos[VD] = Counter;
3165 ++Counter;
3166 }
3167 for (const Expr *E : Data.LastprivateVars) {
3168 Args.push_back(Elt: ImplicitParamDecl::Create(
3169 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3170 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3171 .withConst()
3172 .withRestrict(),
3173 ParamKind: ImplicitParamKind::Other));
3174 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3175 PrivateVarsPos[VD] = Counter;
3176 ++Counter;
3177 }
3178 for (const VarDecl *VD : Data.PrivateLocals) {
3179 QualType Ty = VD->getType().getNonReferenceType();
3180 if (VD->getType()->isLValueReferenceType())
3181 Ty = C.getPointerType(T: Ty);
3182 if (isAllocatableDecl(VD))
3183 Ty = C.getPointerType(T: Ty);
3184 Args.push_back(Elt: ImplicitParamDecl::Create(
3185 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3186 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3187 ParamKind: ImplicitParamKind::Other));
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 const auto &TaskPrivatesMapFnInfo =
3192 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3193 llvm::FunctionType *TaskPrivatesMapTy =
3194 CGM.getTypes().GetFunctionType(Info: TaskPrivatesMapFnInfo);
3195 std::string Name =
3196 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3197 auto *TaskPrivatesMap = llvm::Function::Create(
3198 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3199 M: &CGM.getModule());
3200 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3201 FI: TaskPrivatesMapFnInfo);
3202 if (CGM.getLangOpts().Optimize) {
3203 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::NoInline);
3204 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
3205 TaskPrivatesMap->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
3206 }
3207 CodeGenFunction CGF(CGM);
3208 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3209 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3210
3211 // *privi = &.privates.privi;
3212 LValue Base = CGF.EmitLoadOfPointerLValue(
3213 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskPrivatesArg),
3214 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3215 const auto *PrivatesQTyRD = cast<RecordDecl>(Val: PrivatesQTy->getAsTagDecl());
3216 Counter = 0;
3217 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3218 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3219 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3220 LValue RefLVal =
3221 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD), T: VD->getType());
3222 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3223 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3224 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3225 ++Counter;
3226 }
3227 CGF.FinishFunction();
3228 return TaskPrivatesMap;
3229}
3230
3231/// Emit initialization for private variables in task-based directives.
3232static void emitPrivatesInit(CodeGenFunction &CGF,
3233 const OMPExecutableDirective &D,
3234 Address KmpTaskSharedsPtr, LValue TDBase,
3235 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3236 QualType SharedsTy, QualType SharedsPtrTy,
3237 const OMPTaskDataTy &Data,
3238 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3239 ASTContext &C = CGF.getContext();
3240 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3241 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3242 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())
3243 ? OMPD_taskloop
3244 : OMPD_task;
3245 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: Kind);
3246 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3247 LValue SrcBase;
3248 bool IsTargetTask =
3249 isOpenMPTargetDataManagementDirective(DKind: D.getDirectiveKind()) ||
3250 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
3251 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3252 // PointersArray, SizesArray, and MappersArray. The original variables for
3253 // these arrays are not captured and we get their addresses explicitly.
3254 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3255 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3256 SrcBase = CGF.MakeAddrLValue(
3257 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3258 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3259 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3260 T: SharedsTy);
3261 }
3262 FI = cast<RecordDecl>(Val: FI->getType()->getAsTagDecl())->field_begin();
3263 for (const PrivateDataTy &Pair : Privates) {
3264 // Do not initialize private locals.
3265 if (Pair.second.isLocalPrivate()) {
3266 ++FI;
3267 continue;
3268 }
3269 const VarDecl *VD = Pair.second.PrivateCopy;
3270 const Expr *Init = VD->getAnyInitializer();
3271 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3272 !CGF.isTrivialInitializer(Init)))) {
3273 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3274 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3275 const VarDecl *OriginalVD = Pair.second.Original;
3276 // Check if the variable is the target-based BasePointersArray,
3277 // PointersArray, SizesArray, or MappersArray.
3278 LValue SharedRefLValue;
3279 QualType Type = PrivateLValue.getType();
3280 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3281 if (IsTargetTask && !SharedField) {
3282 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3283 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3284 cast<CapturedDecl>(OriginalVD->getDeclContext())
3285 ->getNumParams() == 0 &&
3286 isa<TranslationUnitDecl>(
3287 cast<CapturedDecl>(OriginalVD->getDeclContext())
3288 ->getDeclContext()) &&
3289 "Expected artificial target data variable.");
3290 SharedRefLValue =
3291 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3292 } else if (ForDup) {
3293 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3294 SharedRefLValue = CGF.MakeAddrLValue(
3295 Addr: SharedRefLValue.getAddress().withAlignment(
3296 NewAlignment: C.getDeclAlign(D: OriginalVD)),
3297 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3298 TBAAInfo: SharedRefLValue.getTBAAInfo());
3299 } else if (CGF.LambdaCaptureFields.count(
3300 Val: Pair.second.Original->getCanonicalDecl()) > 0 ||
3301 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3302 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3303 } else {
3304 // Processing for implicitly captured variables.
3305 InlinedOpenMPRegionRAII Region(
3306 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3307 /*HasCancel=*/false, /*NoInheritance=*/true);
3308 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3309 }
3310 if (Type->isArrayType()) {
3311 // Initialize firstprivate array.
3312 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3313 // Perform simple memcpy.
3314 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3315 } else {
3316 // Initialize firstprivate array using element-by-element
3317 // initialization.
3318 CGF.EmitOMPAggregateAssign(
3319 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3320 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3321 Address SrcElement) {
3322 // Clean up any temporaries needed by the initialization.
3323 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3324 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3325 (void)InitScope.Privatize();
3326 // Emit initialization for single element.
3327 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3328 CGF, &CapturesInfo);
3329 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3330 Quals: Init->getType().getQualifiers(),
3331 /*IsInitializer=*/false);
3332 });
3333 }
3334 } else {
3335 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3336 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3337 (void)InitScope.Privatize();
3338 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3339 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue,
3340 /*capturedByInit=*/false);
3341 }
3342 } else {
3343 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue, /*capturedByInit=*/false);
3344 }
3345 }
3346 ++FI;
3347 }
3348}
3349
3350/// Check if duplication function is required for taskloops.
3351static bool checkInitIsRequired(CodeGenFunction &CGF,
3352 ArrayRef<PrivateDataTy> Privates) {
3353 bool InitRequired = false;
3354 for (const PrivateDataTy &Pair : Privates) {
3355 if (Pair.second.isLocalPrivate())
3356 continue;
3357 const VarDecl *VD = Pair.second.PrivateCopy;
3358 const Expr *Init = VD->getAnyInitializer();
3359 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3360 !CGF.isTrivialInitializer(Init));
3361 if (InitRequired)
3362 break;
3363 }
3364 return InitRequired;
3365}
3366
3367
3368/// Emit task_dup function (for initialization of
3369/// private/firstprivate/lastprivate vars and last_iter flag)
3370/// \code
3371/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3372/// lastpriv) {
3373/// // setup lastprivate flag
3374/// task_dst->last = lastpriv;
3375/// // could be constructor calls here...
3376/// }
3377/// \endcode
3378static llvm::Value *
3379emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3380 const OMPExecutableDirective &D,
3381 QualType KmpTaskTWithPrivatesPtrQTy,
3382 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3383 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3384 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3385 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3386 ASTContext &C = CGM.getContext();
3387 FunctionArgList Args;
3388 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3389 KmpTaskTWithPrivatesPtrQTy,
3390 ImplicitParamKind::Other);
3391 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392 KmpTaskTWithPrivatesPtrQTy,
3393 ImplicitParamKind::Other);
3394 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3395 ImplicitParamKind::Other);
3396 Args.push_back(Elt: &DstArg);
3397 Args.push_back(Elt: &SrcArg);
3398 Args.push_back(Elt: &LastprivArg);
3399 const auto &TaskDupFnInfo =
3400 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3401 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(Info: TaskDupFnInfo);
3402 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3403 auto *TaskDup = llvm::Function::Create(
3404 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3405 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3406 TaskDup->setDoesNotRecurse();
3407 CodeGenFunction CGF(CGM);
3408 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3409 StartLoc: Loc);
3410
3411 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3412 Ptr: CGF.GetAddrOfLocalVar(VD: &DstArg),
3413 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3414 // task_dst->liter = lastpriv;
3415 if (WithLastIter) {
3416 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3417 LValue Base = CGF.EmitLValueForField(
3418 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3419 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3420 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3421 Addr: CGF.GetAddrOfLocalVar(VD: &LastprivArg), /*Volatile=*/false, Ty: C.IntTy, Loc);
3422 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3423 }
3424
3425 // Emit initial values for private copies (if any).
3426 assert(!Privates.empty());
3427 Address KmpTaskSharedsPtr = Address::invalid();
3428 if (!Data.FirstprivateVars.empty()) {
3429 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3430 Ptr: CGF.GetAddrOfLocalVar(VD: &SrcArg),
3431 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3432 LValue Base = CGF.EmitLValueForField(
3433 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3434 KmpTaskSharedsPtr = Address(
3435 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3436 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3437 n: KmpTaskTShareds)),
3438 Loc),
3439 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3440 }
3441 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3442 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3443 CGF.FinishFunction();
3444 return TaskDup;
3445}
3446
3447/// Checks if destructor function is required to be generated.
3448/// \return true if cleanups are required, false otherwise.
3449static bool
3450checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3451 ArrayRef<PrivateDataTy> Privates) {
3452 for (const PrivateDataTy &P : Privates) {
3453 if (P.second.isLocalPrivate())
3454 continue;
3455 QualType Ty = P.second.Original->getType().getNonReferenceType();
3456 if (Ty.isDestructedType())
3457 return true;
3458 }
3459 return false;
3460}
3461
3462namespace {
3463/// Loop generator for OpenMP iterator expression.
3464class OMPIteratorGeneratorScope final
3465 : public CodeGenFunction::OMPPrivateScope {
3466 CodeGenFunction &CGF;
3467 const OMPIteratorExpr *E = nullptr;
3468 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3469 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3470 OMPIteratorGeneratorScope() = delete;
3471 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3472
3473public:
3474 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3475 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3476 if (!E)
3477 return;
3478 SmallVector<llvm::Value *, 4> Uppers;
3479 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3480 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3481 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3482 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(T: VD->getType(), Name: VD->getName()));
3483 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3484 addPrivate(
3485 LocalVD: HelperData.CounterVD,
3486 Addr: CGF.CreateMemTemp(T: HelperData.CounterVD->getType(), Name: "counter.addr"));
3487 }
3488 Privatize();
3489
3490 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3491 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3492 LValue CLVal =
3493 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3494 T: HelperData.CounterVD->getType());
3495 // Counter = 0;
3496 CGF.EmitStoreOfScalar(
3497 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3498 lvalue: CLVal);
3499 CodeGenFunction::JumpDest &ContDest =
3500 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3501 CodeGenFunction::JumpDest &ExitDest =
3502 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3503 // N = <number-of_iterations>;
3504 llvm::Value *N = Uppers[I];
3505 // cont:
3506 // if (Counter < N) goto body; else goto exit;
3507 CGF.EmitBlock(BB: ContDest.getBlock());
3508 auto *CVal =
3509 CGF.EmitLoadOfScalar(lvalue: CLVal, Loc: HelperData.CounterVD->getLocation());
3510 llvm::Value *Cmp =
3511 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3512 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3513 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3514 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3515 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3516 // body:
3517 CGF.EmitBlock(BB: BodyBB);
3518 // Iteri = Begini + Counter * Stepi;
3519 CGF.EmitIgnoredExpr(E: HelperData.Update);
3520 }
3521 }
3522 ~OMPIteratorGeneratorScope() {
3523 if (!E)
3524 return;
3525 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3526 // Counter = Counter + 1;
3527 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3528 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3529 // goto cont;
3530 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3531 // exit:
3532 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3533 }
3534 }
3535};
3536} // namespace
3537
3538static std::pair<llvm::Value *, llvm::Value *>
3539getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3540 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3541 llvm::Value *Addr;
3542 if (OASE) {
3543 const Expr *Base = OASE->getBase();
3544 Addr = CGF.EmitScalarExpr(E: Base);
3545 } else {
3546 Addr = CGF.EmitLValue(E).getPointer(CGF);
3547 }
3548 llvm::Value *SizeVal;
3549 QualType Ty = E->getType();
3550 if (OASE) {
3551 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3552 for (const Expr *SE : OASE->getDimensions()) {
3553 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3554 Sz = CGF.EmitScalarConversion(
3555 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3556 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3557 }
3558 } else if (const auto *ASE =
3559 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3560 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3561 Address UpAddrAddress = UpAddrLVal.getAddress();
3562 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3563 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3564 /*Idx0=*/1);
3565 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.SizeTy);
3566 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(V: UpAddr, DestTy: CGF.SizeTy);
3567 SizeVal = CGF.Builder.CreateNUWSub(LHS: UpIntPtr, RHS: LowIntPtr);
3568 } else {
3569 SizeVal = CGF.getTypeSize(Ty);
3570 }
3571 return std::make_pair(x&: Addr, y&: SizeVal);
3572}
3573
3574/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3575static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3576 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3577 if (KmpTaskAffinityInfoTy.isNull()) {
3578 RecordDecl *KmpAffinityInfoRD =
3579 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3580 KmpAffinityInfoRD->startDefinition();
3581 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getIntPtrType());
3582 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getSizeType());
3583 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: FlagsTy);
3584 KmpAffinityInfoRD->completeDefinition();
3585 KmpTaskAffinityInfoTy = C.getRecordType(Decl: KmpAffinityInfoRD);
3586 }
3587}
3588
3589CGOpenMPRuntime::TaskResultTy
3590CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3591 const OMPExecutableDirective &D,
3592 llvm::Function *TaskFunction, QualType SharedsTy,
3593 Address Shareds, const OMPTaskDataTy &Data) {
3594 ASTContext &C = CGM.getContext();
3595 llvm::SmallVector<PrivateDataTy, 4> Privates;
3596 // Aggregate privates and sort them by the alignment.
3597 const auto *I = Data.PrivateCopies.begin();
3598 for (const Expr *E : Data.PrivateVars) {
3599 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3600 Privates.emplace_back(
3601 Args: C.getDeclAlign(D: VD),
3602 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3603 /*PrivateElemInit=*/nullptr));
3604 ++I;
3605 }
3606 I = Data.FirstprivateCopies.begin();
3607 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3608 for (const Expr *E : Data.FirstprivateVars) {
3609 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3610 Privates.emplace_back(
3611 Args: C.getDeclAlign(D: VD),
3612 Args: PrivateHelpersTy(
3613 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3614 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3615 ++I;
3616 ++IElemInitRef;
3617 }
3618 I = Data.LastprivateCopies.begin();
3619 for (const Expr *E : Data.LastprivateVars) {
3620 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3621 Privates.emplace_back(
3622 Args: C.getDeclAlign(D: VD),
3623 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3624 /*PrivateElemInit=*/nullptr));
3625 ++I;
3626 }
3627 for (const VarDecl *VD : Data.PrivateLocals) {
3628 if (isAllocatableDecl(VD))
3629 Privates.emplace_back(Args: CGM.getPointerAlign(), Args: PrivateHelpersTy(VD));
3630 else
3631 Privates.emplace_back(Args: C.getDeclAlign(D: VD), Args: PrivateHelpersTy(VD));
3632 }
3633 llvm::stable_sort(Range&: Privates,
3634 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3635 return L.first > R.first;
3636 });
3637 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3638 // Build type kmp_routine_entry_t (if not built yet).
3639 emitKmpRoutineEntryT(KmpInt32Ty);
3640 // Build type kmp_task_t (if not built yet).
3641 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())) {
3642 if (SavedKmpTaskloopTQTy.isNull()) {
3643 SavedKmpTaskloopTQTy = C.getRecordType(Decl: createKmpTaskTRecordDecl(
3644 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3645 }
3646 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3647 } else {
3648 assert((D.getDirectiveKind() == OMPD_task ||
3649 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3650 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3651 "Expected taskloop, task or target directive");
3652 if (SavedKmpTaskTQTy.isNull()) {
3653 SavedKmpTaskTQTy = C.getRecordType(Decl: createKmpTaskTRecordDecl(
3654 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3655 }
3656 KmpTaskTQTy = SavedKmpTaskTQTy;
3657 }
3658 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3659 // Build particular struct kmp_task_t for the given task.
3660 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3661 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3662 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(Decl: KmpTaskTWithPrivatesQTyRD);
3663 QualType KmpTaskTWithPrivatesPtrQTy =
3664 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3665 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(AddrSpace: 0);
3666 llvm::Value *KmpTaskTWithPrivatesTySize =
3667 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3668 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3669
3670 // Emit initial values for private copies (if any).
3671 llvm::Value *TaskPrivatesMap = nullptr;
3672 llvm::Type *TaskPrivatesMapTy =
3673 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3674 if (!Privates.empty()) {
3675 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3676 TaskPrivatesMap =
3677 emitTaskPrivateMappingFunction(CGM, Loc, Data, PrivatesQTy: FI->getType(), Privates);
3678 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3679 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3680 } else {
3681 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3682 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3683 }
3684 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3685 // kmp_task_t *tt);
3686 llvm::Function *TaskEntry = emitProxyTaskFunction(
3687 CGM, Loc, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3688 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3689 TaskPrivatesMap);
3690
3691 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3692 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3693 // kmp_routine_entry_t *task_entry);
3694 // Task flags. Format is taken from
3695 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3696 // description of kmp_tasking_flags struct.
3697 enum {
3698 TiedFlag = 0x1,
3699 FinalFlag = 0x2,
3700 DestructorsFlag = 0x8,
3701 PriorityFlag = 0x20,
3702 DetachableFlag = 0x40,
3703 };
3704 unsigned Flags = Data.Tied ? TiedFlag : 0;
3705 bool NeedsCleanup = false;
3706 if (!Privates.empty()) {
3707 NeedsCleanup =
3708 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3709 if (NeedsCleanup)
3710 Flags = Flags | DestructorsFlag;
3711 }
3712 if (Data.Priority.getInt())
3713 Flags = Flags | PriorityFlag;
3714 if (D.hasClausesOfKind<OMPDetachClause>())
3715 Flags = Flags | DetachableFlag;
3716 llvm::Value *TaskFlags =
3717 Data.Final.getPointer()
3718 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3719 True: CGF.Builder.getInt32(C: FinalFlag),
3720 False: CGF.Builder.getInt32(/*C=*/0))
3721 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3722 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3723 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3724 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3725 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3726 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3727 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3728 llvm::Value *NewTask;
3729 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3730 // Check if we have any device clause associated with the directive.
3731 const Expr *Device = nullptr;
3732 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3733 Device = C->getDevice();
3734 // Emit device ID if any otherwise use default value.
3735 llvm::Value *DeviceID;
3736 if (Device)
3737 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3738 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3739 else
3740 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3741 AllocArgs.push_back(Elt: DeviceID);
3742 NewTask = CGF.EmitRuntimeCall(
3743 callee: OMPBuilder.getOrCreateRuntimeFunction(
3744 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3745 args: AllocArgs);
3746 } else {
3747 NewTask =
3748 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3749 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3750 args: AllocArgs);
3751 }
3752 // Emit detach clause initialization.
3753 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3754 // task_descriptor);
3755 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3756 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3757 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3758
3759 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3760 // int gtid, kmp_task_t *task);
3761 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3762 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3763 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3764 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3765 callee: OMPBuilder.getOrCreateRuntimeFunction(
3766 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3767 args: {Loc, Tid, NewTask});
3768 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3769 Loc: Evt->getExprLoc());
3770 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3771 }
3772 // Process affinity clauses.
3773 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3774 // Process list of affinity data.
3775 ASTContext &C = CGM.getContext();
3776 Address AffinitiesArray = Address::invalid();
3777 // Calculate number of elements to form the array of affinity data.
3778 llvm::Value *NumOfElements = nullptr;
3779 unsigned NumAffinities = 0;
3780 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3781 if (const Expr *Modifier = C->getModifier()) {
3782 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3783 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3784 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3785 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3786 NumOfElements =
3787 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3788 }
3789 } else {
3790 NumAffinities += C->varlist_size();
3791 }
3792 }
3793 getKmpAffinityType(C&: CGM.getContext(), KmpTaskAffinityInfoTy);
3794 // Fields ids in kmp_task_affinity_info record.
3795 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3796
3797 QualType KmpTaskAffinityInfoArrayTy;
3798 if (NumOfElements) {
3799 NumOfElements = CGF.Builder.CreateNUWAdd(
3800 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3801 auto *OVE = new (C) OpaqueValueExpr(
3802 Loc,
3803 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3804 VK_PRValue);
3805 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3806 RValue::get(V: NumOfElements));
3807 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3808 EltTy: KmpTaskAffinityInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
3809 /*IndexTypeQuals=*/0);
3810 // Properly emit variable-sized array.
3811 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3812 ParamKind: ImplicitParamKind::Other);
3813 CGF.EmitVarDecl(D: *PD);
3814 AffinitiesArray = CGF.GetAddrOfLocalVar(VD: PD);
3815 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3816 /*isSigned=*/false);
3817 } else {
3818 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3819 EltTy: KmpTaskAffinityInfoTy,
3820 ArySize: llvm::APInt(C.getTypeSize(T: C.getSizeType()), NumAffinities), SizeExpr: nullptr,
3821 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3822 AffinitiesArray =
3823 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
3824 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
3825 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
3826 /*isSigned=*/IsSigned: false);
3827 }
3828
3829 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3830 // Fill array by elements without iterators.
3831 unsigned Pos = 0;
3832 bool HasIterator = false;
3833 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3834 if (C->getModifier()) {
3835 HasIterator = true;
3836 continue;
3837 }
3838 for (const Expr *E : C->varlist()) {
3839 llvm::Value *Addr;
3840 llvm::Value *Size;
3841 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
3842 LValue Base =
3843 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateConstGEP(Addr: AffinitiesArray, Index: Pos),
3844 T: KmpTaskAffinityInfoTy);
3845 // affs[i].base_addr = &<Affinities[i].second>;
3846 LValue BaseAddrLVal = CGF.EmitLValueForField(
3847 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
3848 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
3849 lvalue: BaseAddrLVal);
3850 // affs[i].len = sizeof(<Affinities[i].second>);
3851 LValue LenLVal = CGF.EmitLValueForField(
3852 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
3853 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
3854 ++Pos;
3855 }
3856 }
3857 LValue PosLVal;
3858 if (HasIterator) {
3859 PosLVal = CGF.MakeAddrLValue(
3860 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
3861 T: C.getSizeType());
3862 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
3863 }
3864 // Process elements with iterators.
3865 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3866 const Expr *Modifier = C->getModifier();
3867 if (!Modifier)
3868 continue;
3869 OMPIteratorGeneratorScope IteratorScope(
3870 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
3871 for (const Expr *E : C->varlist()) {
3872 llvm::Value *Addr;
3873 llvm::Value *Size;
3874 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
3875 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
3876 LValue Base =
3877 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateGEP(CGF, Addr: AffinitiesArray, Index: Idx),
3878 T: KmpTaskAffinityInfoTy);
3879 // affs[i].base_addr = &<Affinities[i].second>;
3880 LValue BaseAddrLVal = CGF.EmitLValueForField(
3881 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
3882 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
3883 lvalue: BaseAddrLVal);
3884 // affs[i].len = sizeof(<Affinities[i].second>);
3885 LValue LenLVal = CGF.EmitLValueForField(
3886 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
3887 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
3888 Idx = CGF.Builder.CreateNUWAdd(
3889 LHS: Idx, RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
3890 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
3891 }
3892 }
3893 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3894 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3895 // naffins, kmp_task_affinity_info_t *affin_list);
3896 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3897 llvm::Value *GTid = getThreadID(CGF, Loc);
3898 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3899 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
3900 // FIXME: Emit the function and ignore its result for now unless the
3901 // runtime function is properly implemented.
3902 (void)CGF.EmitRuntimeCall(
3903 callee: OMPBuilder.getOrCreateRuntimeFunction(
3904 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
3905 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3906 }
3907 llvm::Value *NewTaskNewTaskTTy =
3908 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3909 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
3910 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
3911 T: KmpTaskTWithPrivatesQTy);
3912 LValue TDBase =
3913 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3914 // Fill the data in the resulting kmp_task_t record.
3915 // Copy shareds if there are any.
3916 Address KmpTaskSharedsPtr = Address::invalid();
3917 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3918 KmpTaskSharedsPtr = Address(
3919 CGF.EmitLoadOfScalar(
3920 lvalue: CGF.EmitLValueForField(
3921 Base: TDBase,
3922 Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds)),
3923 Loc),
3924 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3925 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
3926 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
3927 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
3928 }
3929 // Emit initial values for private copies (if any).
3930 TaskResultTy Result;
3931 if (!Privates.empty()) {
3932 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
3933 SharedsTy, SharedsPtrTy, Data, Privates,
3934 /*ForDup=*/false);
3935 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
3936 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3937 Result.TaskDupFn = emitTaskDupFunction(
3938 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3939 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3940 /*WithLastIter=*/!Data.LastprivateVars.empty());
3941 }
3942 }
3943 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3944 enum { Priority = 0, Destructors = 1 };
3945 // Provide pointer to function with destructors for privates.
3946 auto FI = std::next(x: KmpTaskTQTyRD->field_begin(), n: Data1);
3947 const RecordDecl *KmpCmplrdataUD =
3948 (*FI)->getType()->getAsUnionType()->getDecl();
3949 if (NeedsCleanup) {
3950 llvm::Value *DestructorFn = emitDestructorsFunction(
3951 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3952 KmpTaskTWithPrivatesQTy);
3953 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3954 LValue DestructorsLV = CGF.EmitLValueForField(
3955 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
3956 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3957 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
3958 lvalue: DestructorsLV);
3959 }
3960 // Set priority.
3961 if (Data.Priority.getInt()) {
3962 LValue Data2LV = CGF.EmitLValueForField(
3963 Base: TDBase, Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: Data2));
3964 LValue PriorityLV = CGF.EmitLValueForField(
3965 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
3966 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
3967 }
3968 Result.NewTask = NewTask;
3969 Result.TaskEntry = TaskEntry;
3970 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3971 Result.TDBase = TDBase;
3972 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3973 return Result;
3974}
3975
3976/// Translates internal dependency kind into the runtime kind.
3977static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3978 RTLDependenceKindTy DepKind;
3979 switch (K) {
3980 case OMPC_DEPEND_in:
3981 DepKind = RTLDependenceKindTy::DepIn;
3982 break;
3983 // Out and InOut dependencies must use the same code.
3984 case OMPC_DEPEND_out:
3985 case OMPC_DEPEND_inout:
3986 DepKind = RTLDependenceKindTy::DepInOut;
3987 break;
3988 case OMPC_DEPEND_mutexinoutset:
3989 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3990 break;
3991 case OMPC_DEPEND_inoutset:
3992 DepKind = RTLDependenceKindTy::DepInOutSet;
3993 break;
3994 case OMPC_DEPEND_outallmemory:
3995 DepKind = RTLDependenceKindTy::DepOmpAllMem;
3996 break;
3997 case OMPC_DEPEND_source:
3998 case OMPC_DEPEND_sink:
3999 case OMPC_DEPEND_depobj:
4000 case OMPC_DEPEND_inoutallmemory:
4001 case OMPC_DEPEND_unknown:
4002 llvm_unreachable("Unknown task dependence type");
4003 }
4004 return DepKind;
4005}
4006
4007/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4008static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4009 QualType &FlagsTy) {
4010 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.BoolTy), /*Signed=*/false);
4011 if (KmpDependInfoTy.isNull()) {
4012 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4013 KmpDependInfoRD->startDefinition();
4014 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getIntPtrType());
4015 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getSizeType());
4016 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: FlagsTy);
4017 KmpDependInfoRD->completeDefinition();
4018 KmpDependInfoTy = C.getRecordType(Decl: KmpDependInfoRD);
4019 }
4020}
4021
4022std::pair<llvm::Value *, LValue>
4023CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4024 SourceLocation Loc) {
4025 ASTContext &C = CGM.getContext();
4026 QualType FlagsTy;
4027 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4028 RecordDecl *KmpDependInfoRD =
4029 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4030 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4031 LValue Base = CGF.EmitLoadOfPointerLValue(
4032 Ptr: DepobjLVal.getAddress().withElementType(
4033 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4034 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4035 Address DepObjAddr = CGF.Builder.CreateGEP(
4036 CGF, Addr: Base.getAddress(),
4037 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4038 LValue NumDepsBase = CGF.MakeAddrLValue(
4039 Addr: DepObjAddr, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(), TBAAInfo: Base.getTBAAInfo());
4040 // NumDeps = deps[i].base_addr;
4041 LValue BaseAddrLVal = CGF.EmitLValueForField(
4042 Base: NumDepsBase,
4043 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4044 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4045 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4046 return std::make_pair(x&: NumDeps, y&: Base);
4047}
4048
4049static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4050 llvm::PointerUnion<unsigned *, LValue *> Pos,
4051 const OMPTaskDataTy::DependData &Data,
4052 Address DependenciesArray) {
4053 CodeGenModule &CGM = CGF.CGM;
4054 ASTContext &C = CGM.getContext();
4055 QualType FlagsTy;
4056 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4057 RecordDecl *KmpDependInfoRD =
4058 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4059 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4060
4061 OMPIteratorGeneratorScope IteratorScope(
4062 CGF, cast_or_null<OMPIteratorExpr>(
4063 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4064 : nullptr));
4065 for (const Expr *E : Data.DepExprs) {
4066 llvm::Value *Addr;
4067 llvm::Value *Size;
4068
4069 // The expression will be a nullptr in the 'omp_all_memory' case.
4070 if (E) {
4071 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4072 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4073 } else {
4074 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4075 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4076 }
4077 LValue Base;
4078 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4079 Base = CGF.MakeAddrLValue(
4080 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4081 } else {
4082 assert(E && "Expected a non-null expression");
4083 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4084 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4085 Base = CGF.MakeAddrLValue(
4086 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4087 }
4088 // deps[i].base_addr = &<Dependencies[i].second>;
4089 LValue BaseAddrLVal = CGF.EmitLValueForField(
4090 Base,
4091 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4092 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4093 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4094 // deps[i].len = sizeof(<Dependencies[i].second>);
4095 LValue LenLVal = CGF.EmitLValueForField(
4096 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4097 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4098 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4099 // deps[i].flags = <Dependencies[i].first>;
4100 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4101 LValue FlagsLVal = CGF.EmitLValueForField(
4102 Base,
4103 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4104 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4105 CGF.EmitStoreOfScalar(
4106 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4107 lvalue: FlagsLVal);
4108 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4109 ++(*P);
4110 } else {
4111 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4112 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4113 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4114 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4115 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4116 }
4117 }
4118}
4119
4120SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4121 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4122 const OMPTaskDataTy::DependData &Data) {
4123 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4124 "Expected depobj dependency kind.");
4125 SmallVector<llvm::Value *, 4> Sizes;
4126 SmallVector<LValue, 4> SizeLVals;
4127 ASTContext &C = CGF.getContext();
4128 {
4129 OMPIteratorGeneratorScope IteratorScope(
4130 CGF, cast_or_null<OMPIteratorExpr>(
4131 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4132 : nullptr));
4133 for (const Expr *E : Data.DepExprs) {
4134 llvm::Value *NumDeps;
4135 LValue Base;
4136 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4137 std::tie(args&: NumDeps, args&: Base) =
4138 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4139 LValue NumLVal = CGF.MakeAddrLValue(
4140 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4141 T: C.getUIntPtrType());
4142 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4143 Addr: NumLVal.getAddress());
4144 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4145 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4146 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4147 SizeLVals.push_back(Elt: NumLVal);
4148 }
4149 }
4150 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4151 llvm::Value *Size =
4152 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4153 Sizes.push_back(Elt: Size);
4154 }
4155 return Sizes;
4156}
4157
4158void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4159 QualType &KmpDependInfoTy,
4160 LValue PosLVal,
4161 const OMPTaskDataTy::DependData &Data,
4162 Address DependenciesArray) {
4163 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4164 "Expected depobj dependency kind.");
4165 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4166 {
4167 OMPIteratorGeneratorScope IteratorScope(
4168 CGF, cast_or_null<OMPIteratorExpr>(
4169 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4170 : nullptr));
4171 for (const Expr *E : Data.DepExprs) {
4172 llvm::Value *NumDeps;
4173 LValue Base;
4174 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4175 std::tie(args&: NumDeps, args&: Base) =
4176 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4177
4178 // memcopy dependency data.
4179 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4180 LHS: ElSize,
4181 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4182 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4183 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4184 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4185
4186 // Increase pos.
4187 // pos += size;
4188 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4189 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4190 }
4191 }
4192}
4193
4194std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4195 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4196 SourceLocation Loc) {
4197 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4198 return D.DepExprs.empty();
4199 }))
4200 return std::make_pair(x: nullptr, y: Address::invalid());
4201 // Process list of dependencies.
4202 ASTContext &C = CGM.getContext();
4203 Address DependenciesArray = Address::invalid();
4204 llvm::Value *NumOfElements = nullptr;
4205 unsigned NumDependencies = std::accumulate(
4206 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4207 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4208 return D.DepKind == OMPC_DEPEND_depobj
4209 ? V
4210 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4211 });
4212 QualType FlagsTy;
4213 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4214 bool HasDepobjDeps = false;
4215 bool HasRegularWithIterators = false;
4216 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4217 llvm::Value *NumOfRegularWithIterators =
4218 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4219 // Calculate number of depobj dependencies and regular deps with the
4220 // iterators.
4221 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4222 if (D.DepKind == OMPC_DEPEND_depobj) {
4223 SmallVector<llvm::Value *, 4> Sizes =
4224 emitDepobjElementsSizes(CGF, KmpDependInfoTy, Data: D);
4225 for (llvm::Value *Size : Sizes) {
4226 NumOfDepobjElements =
4227 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4228 }
4229 HasDepobjDeps = true;
4230 continue;
4231 }
4232 // Include number of iterations, if any.
4233
4234 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4235 llvm::Value *ClauseIteratorSpace =
4236 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4237 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4238 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4239 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4240 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4241 }
4242 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4243 LHS: ClauseIteratorSpace,
4244 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4245 NumOfRegularWithIterators =
4246 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4247 HasRegularWithIterators = true;
4248 continue;
4249 }
4250 }
4251
4252 QualType KmpDependInfoArrayTy;
4253 if (HasDepobjDeps || HasRegularWithIterators) {
4254 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4255 /*isSigned=*/IsSigned: false);
4256 if (HasDepobjDeps) {
4257 NumOfElements =
4258 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4259 }
4260 if (HasRegularWithIterators) {
4261 NumOfElements =
4262 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4263 }
4264 auto *OVE = new (C) OpaqueValueExpr(
4265 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4266 VK_PRValue);
4267 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4268 RValue::get(V: NumOfElements));
4269 KmpDependInfoArrayTy =
4270 C.getVariableArrayType(EltTy: KmpDependInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
4271 /*IndexTypeQuals=*/0);
4272 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4273 // Properly emit variable-sized array.
4274 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4275 ParamKind: ImplicitParamKind::Other);
4276 CGF.EmitVarDecl(D: *PD);
4277 DependenciesArray = CGF.GetAddrOfLocalVar(VD: PD);
4278 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4279 /*isSigned=*/false);
4280 } else {
4281 KmpDependInfoArrayTy = C.getConstantArrayType(
4282 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies), SizeExpr: nullptr,
4283 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4284 DependenciesArray =
4285 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4286 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4287 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4288 /*isSigned=*/IsSigned: false);
4289 }
4290 unsigned Pos = 0;
4291 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4292 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4293 continue;
4294 emitDependData(CGF, KmpDependInfoTy, Pos: &Pos, Data: Dep, DependenciesArray);
4295 }
4296 // Copy regular dependencies with iterators.
4297 LValue PosLVal = CGF.MakeAddrLValue(
4298 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4299 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4300 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4301 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4302 continue;
4303 emitDependData(CGF, KmpDependInfoTy, Pos: &PosLVal, Data: Dep, DependenciesArray);
4304 }
4305 // Copy final depobj arrays without iterators.
4306 if (HasDepobjDeps) {
4307 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4308 if (Dep.DepKind != OMPC_DEPEND_depobj)
4309 continue;
4310 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Data: Dep, DependenciesArray);
4311 }
4312 }
4313 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4314 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4315 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4316}
4317
4318Address CGOpenMPRuntime::emitDepobjDependClause(
4319 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4320 SourceLocation Loc) {
4321 if (Dependencies.DepExprs.empty())
4322 return Address::invalid();
4323 // Process list of dependencies.
4324 ASTContext &C = CGM.getContext();
4325 Address DependenciesArray = Address::invalid();
4326 unsigned NumDependencies = Dependencies.DepExprs.size();
4327 QualType FlagsTy;
4328 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4329 RecordDecl *KmpDependInfoRD =
4330 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4331
4332 llvm::Value *Size;
4333 // Define type kmp_depend_info[<Dependencies.size()>];
4334 // For depobj reserve one extra element to store the number of elements.
4335 // It is required to handle depobj(x) update(in) construct.
4336 // kmp_depend_info[<Dependencies.size()>] deps;
4337 llvm::Value *NumDepsVal;
4338 CharUnits Align = C.getTypeAlignInChars(T: KmpDependInfoTy);
4339 if (const auto *IE =
4340 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4341 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4342 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4343 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4344 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4345 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4346 }
4347 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4348 RHS: NumDepsVal);
4349 CharUnits SizeInBytes =
4350 C.getTypeSizeInChars(T: KmpDependInfoTy).alignTo(Align);
4351 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4352 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4353 NumDepsVal =
4354 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4355 } else {
4356 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4357 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4358 SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4359 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4360 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4361 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4362 }
4363 // Need to allocate on the dynamic memory.
4364 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4365 // Use default allocator.
4366 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4367 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4368
4369 llvm::Value *Addr =
4370 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4371 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4372 args: Args, name: ".dep.arr.addr");
4373 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(T: KmpDependInfoTy);
4374 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4375 V: Addr, DestTy: CGF.Builder.getPtrTy(AddrSpace: 0));
4376 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4377 // Write number of elements in the first element of array for depobj.
4378 LValue Base = CGF.MakeAddrLValue(Addr: DependenciesArray, T: KmpDependInfoTy);
4379 // deps[i].base_addr = NumDependencies;
4380 LValue BaseAddrLVal = CGF.EmitLValueForField(
4381 Base,
4382 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4383 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4384 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4385 llvm::PointerUnion<unsigned *, LValue *> Pos;
4386 unsigned Idx = 1;
4387 LValue PosLVal;
4388 if (Dependencies.IteratorExpr) {
4389 PosLVal = CGF.MakeAddrLValue(
4390 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4391 T: C.getSizeType());
4392 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4393 /*IsInit=*/isInit: true);
4394 Pos = &PosLVal;
4395 } else {
4396 Pos = &Idx;
4397 }
4398 emitDependData(CGF, KmpDependInfoTy, Pos, Data: Dependencies, DependenciesArray);
4399 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4400 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4401 ElementTy: CGF.Int8Ty);
4402 return DependenciesArray;
4403}
4404
4405void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4406 SourceLocation Loc) {
4407 ASTContext &C = CGM.getContext();
4408 QualType FlagsTy;
4409 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4410 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4411 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4412 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4413 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4414 Addr: Base.getAddress(), Ty: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy),
4415 ElementTy: CGF.ConvertTypeForMem(T: KmpDependInfoTy));
4416 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4417 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4418 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4419 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4420 DestTy: CGF.VoidPtrTy);
4421 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4422 // Use default allocator.
4423 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4424 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4425
4426 // _kmpc_free(gtid, addr, nullptr);
4427 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4428 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4429 args: Args);
4430}
4431
4432void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4433 OpenMPDependClauseKind NewDepKind,
4434 SourceLocation Loc) {
4435 ASTContext &C = CGM.getContext();
4436 QualType FlagsTy;
4437 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4438 RecordDecl *KmpDependInfoRD =
4439 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4440 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4441 llvm::Value *NumDeps;
4442 LValue Base;
4443 std::tie(args&: NumDeps, args&: Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4444
4445 Address Begin = Base.getAddress();
4446 // Cast from pointer to array type to pointer to single element.
4447 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4448 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4449 // The basic structure here is a while-do loop.
4450 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4451 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4452 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4453 CGF.EmitBlock(BB: BodyBB);
4454 llvm::PHINode *ElementPHI =
4455 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4456 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4457 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4458 Base = CGF.MakeAddrLValue(Addr: Begin, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(),
4459 TBAAInfo: Base.getTBAAInfo());
4460 // deps[i].flags = NewDepKind;
4461 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4462 LValue FlagsLVal = CGF.EmitLValueForField(
4463 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4464 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4465 CGF.EmitStoreOfScalar(
4466 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4467 lvalue: FlagsLVal);
4468
4469 // Shift the address forward by one element.
4470 llvm::Value *ElementNext =
4471 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4472 .emitRawPointer(CGF);
4473 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4474 llvm::Value *IsEmpty =
4475 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4476 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4477 // Done.
4478 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4479}
4480
4481void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4482 const OMPExecutableDirective &D,
4483 llvm::Function *TaskFunction,
4484 QualType SharedsTy, Address Shareds,
4485 const Expr *IfCond,
4486 const OMPTaskDataTy &Data) {
4487 if (!CGF.HaveInsertPoint())
4488 return;
4489
4490 TaskResultTy Result =
4491 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4492 llvm::Value *NewTask = Result.NewTask;
4493 llvm::Function *TaskEntry = Result.TaskEntry;
4494 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4495 LValue TDBase = Result.TDBase;
4496 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4497 // Process list of dependences.
4498 Address DependenciesArray = Address::invalid();
4499 llvm::Value *NumOfElements;
4500 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4501 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4502
4503 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4504 // libcall.
4505 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4506 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4507 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4508 // list is not empty
4509 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4510 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4511 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4512 llvm::Value *DepTaskArgs[7];
4513 if (!Data.Dependences.empty()) {
4514 DepTaskArgs[0] = UpLoc;
4515 DepTaskArgs[1] = ThreadID;
4516 DepTaskArgs[2] = NewTask;
4517 DepTaskArgs[3] = NumOfElements;
4518 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4519 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4520 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4521 }
4522 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4523 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4524 if (!Data.Tied) {
4525 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4526 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4527 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4528 }
4529 if (!Data.Dependences.empty()) {
4530 CGF.EmitRuntimeCall(
4531 callee: OMPBuilder.getOrCreateRuntimeFunction(
4532 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4533 args: DepTaskArgs);
4534 } else {
4535 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4536 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4537 args: TaskArgs);
4538 }
4539 // Check if parent region is untied and build return for untied task;
4540 if (auto *Region =
4541 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4542 Region->emitUntiedSwitch(CGF);
4543 };
4544
4545 llvm::Value *DepWaitTaskArgs[7];
4546 if (!Data.Dependences.empty()) {
4547 DepWaitTaskArgs[0] = UpLoc;
4548 DepWaitTaskArgs[1] = ThreadID;
4549 DepWaitTaskArgs[2] = NumOfElements;
4550 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4551 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4552 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4553 DepWaitTaskArgs[6] =
4554 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4555 }
4556 auto &M = CGM.getModule();
4557 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4558 TaskEntry, &Data, &DepWaitTaskArgs,
4559 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4560 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4561 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4562 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4563 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4564 // is specified.
4565 if (!Data.Dependences.empty())
4566 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4567 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4568 args: DepWaitTaskArgs);
4569 // Call proxy_task_entry(gtid, new_task);
4570 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4571 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4572 Action.Enter(CGF);
4573 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4574 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4575 Args: OutlinedFnArgs);
4576 };
4577
4578 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4579 // kmp_task_t *new_task);
4580 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4581 // kmp_task_t *new_task);
4582 RegionCodeGenTy RCG(CodeGen);
4583 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4584 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4585 TaskArgs,
4586 OMPBuilder.getOrCreateRuntimeFunction(
4587 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4588 TaskArgs);
4589 RCG.setAction(Action);
4590 RCG(CGF);
4591 };
4592
4593 if (IfCond) {
4594 emitIfClause(CGF, Cond: IfCond, ThenGen: ThenCodeGen, ElseGen: ElseCodeGen);
4595 } else {
4596 RegionCodeGenTy ThenRCG(ThenCodeGen);
4597 ThenRCG(CGF);
4598 }
4599}
4600
4601void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4602 const OMPLoopDirective &D,
4603 llvm::Function *TaskFunction,
4604 QualType SharedsTy, Address Shareds,
4605 const Expr *IfCond,
4606 const OMPTaskDataTy &Data) {
4607 if (!CGF.HaveInsertPoint())
4608 return;
4609 TaskResultTy Result =
4610 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4611 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4612 // libcall.
4613 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4614 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4615 // sched, kmp_uint64 grainsize, void *task_dup);
4616 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4617 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4618 llvm::Value *IfVal;
4619 if (IfCond) {
4620 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4621 /*isSigned=*/true);
4622 } else {
4623 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4624 }
4625
4626 LValue LBLVal = CGF.EmitLValueForField(
4627 Base: Result.TDBase,
4628 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4629 const auto *LBVar =
4630 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4631 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4632 /*IsInitializer=*/true);
4633 LValue UBLVal = CGF.EmitLValueForField(
4634 Base: Result.TDBase,
4635 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4636 const auto *UBVar =
4637 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4638 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4639 /*IsInitializer=*/true);
4640 LValue StLVal = CGF.EmitLValueForField(
4641 Base: Result.TDBase,
4642 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4643 const auto *StVar =
4644 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4645 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4646 /*IsInitializer=*/true);
4647 // Store reductions address.
4648 LValue RedLVal = CGF.EmitLValueForField(
4649 Base: Result.TDBase,
4650 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4651 if (Data.Reductions) {
4652 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4653 } else {
4654 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4655 Ty: CGF.getContext().VoidPtrTy);
4656 }
4657 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4658 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4659 UpLoc,
4660 ThreadID,
4661 Result.NewTask,
4662 IfVal,
4663 LBLVal.getPointer(CGF),
4664 UBLVal.getPointer(CGF),
4665 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4666 llvm::ConstantInt::getSigned(
4667 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4668 llvm::ConstantInt::getSigned(
4669 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4670 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4671 : NoSchedule),
4672 Data.Schedule.getPointer()
4673 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4674 /*isSigned=*/false)
4675 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0)};
4676 if (Data.HasModifier)
4677 TaskArgs.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 1));
4678
4679 TaskArgs.push_back(Elt: Result.TaskDupFn
4680 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4681 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4682 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy));
4683 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4684 M&: CGM.getModule(), FnID: Data.HasModifier
4685 ? OMPRTL___kmpc_taskloop_5
4686 : OMPRTL___kmpc_taskloop),
4687 args: TaskArgs);
4688}
4689
4690/// Emit reduction operation for each element of array (required for
4691/// array sections) LHS op = RHS.
4692/// \param Type Type of array.
4693/// \param LHSVar Variable on the left side of the reduction operation
4694/// (references element of array in original variable).
4695/// \param RHSVar Variable on the right side of the reduction operation
4696/// (references element of array in original variable).
4697/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4698/// RHSVar.
4699static void EmitOMPAggregateReduction(
4700 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4701 const VarDecl *RHSVar,
4702 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4703 const Expr *, const Expr *)> &RedOpGen,
4704 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4705 const Expr *UpExpr = nullptr) {
4706 // Perform element-by-element initialization.
4707 QualType ElementTy;
4708 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4709 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4710
4711 // Drill down to the base element type on both arrays.
4712 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4713 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4714
4715 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4716 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4717 // Cast from pointer to array type to pointer to single element.
4718 llvm::Value *LHSEnd =
4719 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4720 // The basic structure here is a while-do loop.
4721 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4722 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4723 llvm::Value *IsEmpty =
4724 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4725 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4726
4727 // Enter the loop body, making that address the current address.
4728 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4729 CGF.EmitBlock(BB: BodyBB);
4730
4731 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4732
4733 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4734 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4735 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4736 Address RHSElementCurrent(
4737 RHSElementPHI, RHSAddr.getElementType(),
4738 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4739
4740 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4741 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4742 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4743 Address LHSElementCurrent(
4744 LHSElementPHI, LHSAddr.getElementType(),
4745 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4746
4747 // Emit copy.
4748 CodeGenFunction::OMPPrivateScope Scope(CGF);
4749 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4750 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4751 Scope.Privatize();
4752 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4753 Scope.ForceCleanup();
4754
4755 // Shift the address forward by one element.
4756 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4757 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4758 Name: "omp.arraycpy.dest.element");
4759 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4760 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4761 Name: "omp.arraycpy.src.element");
4762 // Check whether we've reached the end.
4763 llvm::Value *Done =
4764 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4765 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4766 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4767 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4768
4769 // Done.
4770 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4771}
4772
4773/// Emit reduction combiner. If the combiner is a simple expression emit it as
4774/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4775/// UDR combiner function.
4776static void emitReductionCombiner(CodeGenFunction &CGF,
4777 const Expr *ReductionOp) {
4778 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4779 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4780 if (const auto *DRE =
4781 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4782 if (const auto *DRD =
4783 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4784 std::pair<llvm::Function *, llvm::Function *> Reduction =
4785 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4786 RValue Func = RValue::get(V: Reduction.first);
4787 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4788 CGF.EmitIgnoredExpr(E: ReductionOp);
4789 return;
4790 }
4791 CGF.EmitIgnoredExpr(E: ReductionOp);
4792}
4793
4794llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4795 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4796 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4797 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4798 ASTContext &C = CGM.getContext();
4799
4800 // void reduction_func(void *LHSArg, void *RHSArg);
4801 FunctionArgList Args;
4802 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4803 ImplicitParamKind::Other);
4804 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4805 ImplicitParamKind::Other);
4806 Args.push_back(Elt: &LHSArg);
4807 Args.push_back(Elt: &RHSArg);
4808 const auto &CGFI =
4809 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
4810 std::string Name = getReductionFuncName(Name: ReducerName);
4811 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
4812 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
4813 M: &CGM.getModule());
4814 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4815 Fn->setDoesNotRecurse();
4816 CodeGenFunction CGF(CGM);
4817 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4818
4819 // Dst = (void*[n])(LHSArg);
4820 // Src = (void*[n])(RHSArg);
4821 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4822 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
4823 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4824 ArgsElemType, CGF.getPointerAlign());
4825 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4826 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
4827 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4828 ArgsElemType, CGF.getPointerAlign());
4829
4830 // ...
4831 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4832 // ...
4833 CodeGenFunction::OMPPrivateScope Scope(CGF);
4834 const auto *IPriv = Privates.begin();
4835 unsigned Idx = 0;
4836 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4837 const auto *RHSVar =
4838 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
4839 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
4840 const auto *LHSVar =
4841 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
4842 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
4843 QualType PrivTy = (*IPriv)->getType();
4844 if (PrivTy->isVariablyModifiedType()) {
4845 // Get array size and emit VLA type.
4846 ++Idx;
4847 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
4848 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
4849 const VariableArrayType *VLA =
4850 CGF.getContext().getAsVariableArrayType(T: PrivTy);
4851 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
4852 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4853 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
4854 CGF.EmitVariablyModifiedType(Ty: PrivTy);
4855 }
4856 }
4857 Scope.Privatize();
4858 IPriv = Privates.begin();
4859 const auto *ILHS = LHSExprs.begin();
4860 const auto *IRHS = RHSExprs.begin();
4861 for (const Expr *E : ReductionOps) {
4862 if ((*IPriv)->getType()->isArrayType()) {
4863 // Emit reduction for array section.
4864 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4865 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4866 EmitOMPAggregateReduction(
4867 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
4868 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4869 emitReductionCombiner(CGF, ReductionOp: E);
4870 });
4871 } else {
4872 // Emit reduction for array subscript or single variable.
4873 emitReductionCombiner(CGF, ReductionOp: E);
4874 }
4875 ++IPriv;
4876 ++ILHS;
4877 ++IRHS;
4878 }
4879 Scope.ForceCleanup();
4880 CGF.FinishFunction();
4881 return Fn;
4882}
4883
4884void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4885 const Expr *ReductionOp,
4886 const Expr *PrivateRef,
4887 const DeclRefExpr *LHS,
4888 const DeclRefExpr *RHS) {
4889 if (PrivateRef->getType()->isArrayType()) {
4890 // Emit reduction for array section.
4891 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
4892 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
4893 EmitOMPAggregateReduction(
4894 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
4895 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4896 emitReductionCombiner(CGF, ReductionOp);
4897 });
4898 } else {
4899 // Emit reduction for array subscript or single variable.
4900 emitReductionCombiner(CGF, ReductionOp);
4901 }
4902}
4903
4904static std::string generateUniqueName(CodeGenModule &CGM,
4905 llvm::StringRef Prefix, const Expr *Ref);
4906
4907void CGOpenMPRuntime::emitPrivateReduction(
4908 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4909 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4910
4911 // Create a shared global variable (__shared_reduction_var) to accumulate the
4912 // final result.
4913 //
4914 // Call __kmpc_barrier to synchronize threads before initialization.
4915 //
4916 // The master thread (thread_id == 0) initializes __shared_reduction_var
4917 // with the identity value or initializer.
4918 //
4919 // Call __kmpc_barrier to synchronize before combining.
4920 // For each i:
4921 // - Thread enters critical section.
4922 // - Reads its private value from LHSExprs[i].
4923 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4924 // Privates[i]).
4925 // - Exits critical section.
4926 //
4927 // Call __kmpc_barrier after combining.
4928 //
4929 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4930 //
4931 // Final __kmpc_barrier to synchronize after broadcasting
4932 QualType PrivateType = Privates->getType();
4933 llvm::Type *LLVMType = CGF.ConvertTypeForMem(T: PrivateType);
4934
4935 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOp: ReductionOps);
4936 std::string ReductionVarNameStr;
4937 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates->IgnoreParenCasts()))
4938 ReductionVarNameStr =
4939 generateUniqueName(CGM, Prefix: DRE->getDecl()->getNameAsString(), Ref: Privates);
4940 else
4941 ReductionVarNameStr = "unnamed_priv_var";
4942
4943 // Create an internal shared variable
4944 std::string SharedName =
4945 CGM.getOpenMPRuntime().getName(Parts: {"internal_pivate_", ReductionVarNameStr});
4946 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4947 Ty: LLVMType, Name: ".omp.reduction." + SharedName);
4948
4949 SharedVar->setAlignment(
4950 llvm::MaybeAlign(CGF.getContext().getTypeAlign(T: PrivateType) / 8));
4951
4952 Address SharedResult =
4953 CGF.MakeNaturalAlignRawAddrLValue(V: SharedVar, T: PrivateType).getAddress();
4954
4955 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4956 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
4957 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4958
4959 llvm::BasicBlock *InitBB = CGF.createBasicBlock(name: "init");
4960 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock(name: "init.end");
4961
4962 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4963 LHS: ThreadId, RHS: llvm::ConstantInt::get(Ty: ThreadId->getType(), V: 0));
4964 CGF.Builder.CreateCondBr(Cond: IsWorker, True: InitBB, False: InitEndBB);
4965
4966 CGF.EmitBlock(BB: InitBB);
4967
4968 auto EmitSharedInit = [&]() {
4969 if (UDR) { // Check if it's a User-Defined Reduction
4970 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4971 std::pair<llvm::Function *, llvm::Function *> FnPair =
4972 getUserDefinedReduction(D: UDR);
4973 llvm::Function *InitializerFn = FnPair.second;
4974 if (InitializerFn) {
4975 if (const auto *CE =
4976 dyn_cast<CallExpr>(Val: UDRInitExpr->IgnoreParenImpCasts())) {
4977 const auto *OutDRE = cast<DeclRefExpr>(
4978 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
4979 ->getSubExpr());
4980 const VarDecl *OutVD = cast<VarDecl>(Val: OutDRE->getDecl());
4981
4982 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
4983 LocalScope.addPrivate(LocalVD: OutVD, Addr: SharedResult);
4984
4985 (void)LocalScope.Privatize();
4986 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
4987 Val: CE->getCallee()->IgnoreParenImpCasts())) {
4988 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4989 CGF, OVE, RValue::get(V: InitializerFn));
4990 CGF.EmitIgnoredExpr(E: CE);
4991 } else {
4992 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
4993 Quals: PrivateType.getQualifiers(),
4994 /*IsInitializer=*/true);
4995 }
4996 } else {
4997 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
4998 Quals: PrivateType.getQualifiers(),
4999 /*IsInitializer=*/true);
5000 }
5001 } else {
5002 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5003 Quals: PrivateType.getQualifiers(),
5004 /*IsInitializer=*/true);
5005 }
5006 } else {
5007 // EmitNullInitialization handles default construction for C++ classes
5008 // and zeroing for scalars, which is a reasonable default.
5009 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5010 }
5011 return; // UDR initialization handled
5012 }
5013 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates)) {
5014 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
5015 if (const Expr *InitExpr = VD->getInit()) {
5016 CGF.EmitAnyExprToMem(E: InitExpr, Location: SharedResult,
5017 Quals: PrivateType.getQualifiers(), IsInitializer: true);
5018 return;
5019 }
5020 }
5021 }
5022 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5023 };
5024 EmitSharedInit();
5025 CGF.Builder.CreateBr(Dest: InitEndBB);
5026 CGF.EmitBlock(BB: InitEndBB);
5027
5028 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5029 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5030 args: BarrierArgs);
5031
5032 const Expr *ReductionOp = ReductionOps;
5033 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5034 LValue SharedLV = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5035 LValue LHSLV = CGF.EmitLValue(E: Privates);
5036
5037 auto EmitCriticalReduction = [&](auto ReductionGen) {
5038 std::string CriticalName = getName(Parts: {"reduction_critical"});
5039 emitCriticalRegion(CGF, CriticalName, CriticalOpGen: ReductionGen, Loc);
5040 };
5041
5042 if (CurrentUDR) {
5043 // Handle user-defined reduction.
5044 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5045 Action.Enter(CGF);
5046 std::pair<llvm::Function *, llvm::Function *> FnPair =
5047 getUserDefinedReduction(D: CurrentUDR);
5048 if (FnPair.first) {
5049 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp)) {
5050 const auto *OutDRE = cast<DeclRefExpr>(
5051 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5052 ->getSubExpr());
5053 const auto *InDRE = cast<DeclRefExpr>(
5054 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 1)->IgnoreParenImpCasts())
5055 ->getSubExpr());
5056 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5057 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: OutDRE->getDecl()),
5058 Addr: SharedLV.getAddress());
5059 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: InDRE->getDecl()),
5060 Addr: LHSLV.getAddress());
5061 (void)LocalScope.Privatize();
5062 emitReductionCombiner(CGF, ReductionOp);
5063 }
5064 }
5065 };
5066 EmitCriticalReduction(ReductionGen);
5067 } else {
5068 // Handle built-in reduction operations.
5069#ifndef NDEBUG
5070 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5071 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5072 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5073
5074 const Expr *AssignRHS = nullptr;
5075 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5076 if (BinOp->getOpcode() == BO_Assign)
5077 AssignRHS = BinOp->getRHS();
5078 } else if (const auto *OpCall =
5079 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5080 if (OpCall->getOperator() == OO_Equal)
5081 AssignRHS = OpCall->getArg(1);
5082 }
5083
5084 assert(AssignRHS &&
5085 "Private Variable Reduction : Invalid ReductionOp expression");
5086#endif
5087
5088 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5089 Action.Enter(CGF);
5090 const auto *OmpOutDRE =
5091 dyn_cast<DeclRefExpr>(Val: LHSExprs->IgnoreParenImpCasts());
5092 const auto *OmpInDRE =
5093 dyn_cast<DeclRefExpr>(Val: RHSExprs->IgnoreParenImpCasts());
5094 assert(
5095 OmpOutDRE && OmpInDRE &&
5096 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5097 const VarDecl *OmpOutVD = cast<VarDecl>(Val: OmpOutDRE->getDecl());
5098 const VarDecl *OmpInVD = cast<VarDecl>(Val: OmpInDRE->getDecl());
5099 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5100 LocalScope.addPrivate(LocalVD: OmpOutVD, Addr: SharedLV.getAddress());
5101 LocalScope.addPrivate(LocalVD: OmpInVD, Addr: LHSLV.getAddress());
5102 (void)LocalScope.Privatize();
5103 // Emit the actual reduction operation
5104 CGF.EmitIgnoredExpr(E: ReductionOp);
5105 };
5106 EmitCriticalReduction(ReductionGen);
5107 }
5108
5109 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5110 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5111 args: BarrierArgs);
5112
5113 // Broadcast final result
5114 bool IsAggregate = PrivateType->isAggregateType();
5115 LValue SharedLV1 = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5116 llvm::Value *FinalResultVal = nullptr;
5117 Address FinalResultAddr = Address::invalid();
5118
5119 if (IsAggregate)
5120 FinalResultAddr = SharedResult;
5121 else
5122 FinalResultVal = CGF.EmitLoadOfScalar(lvalue: SharedLV1, Loc);
5123
5124 LValue TargetLHSLV = CGF.EmitLValue(E: RHSExprs);
5125 if (IsAggregate) {
5126 CGF.EmitAggregateCopy(Dest: TargetLHSLV,
5127 Src: CGF.MakeAddrLValue(Addr: FinalResultAddr, T: PrivateType),
5128 EltTy: PrivateType, MayOverlap: AggValueSlot::DoesNotOverlap, isVolatile: false);
5129 } else {
5130 CGF.EmitStoreOfScalar(value: FinalResultVal, lvalue: TargetLHSLV);
5131 }
5132 // Final synchronization barrier
5133 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5134 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5135 args: BarrierArgs);
5136
5137 // Combiner with original list item
5138 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5139 PrePostActionTy &Action) {
5140 Action.Enter(CGF);
5141 emitSingleReductionCombiner(CGF, ReductionOp: ReductionOps, PrivateRef: Privates,
5142 LHS: cast<DeclRefExpr>(Val: LHSExprs),
5143 RHS: cast<DeclRefExpr>(Val: RHSExprs));
5144 };
5145 EmitCriticalReduction(OriginalListCombiner);
5146}
5147
5148void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5149 ArrayRef<const Expr *> OrgPrivates,
5150 ArrayRef<const Expr *> OrgLHSExprs,
5151 ArrayRef<const Expr *> OrgRHSExprs,
5152 ArrayRef<const Expr *> OrgReductionOps,
5153 ReductionOptionsTy Options) {
5154 if (!CGF.HaveInsertPoint())
5155 return;
5156
5157 bool WithNowait = Options.WithNowait;
5158 bool SimpleReduction = Options.SimpleReduction;
5159
5160 // Next code should be emitted for reduction:
5161 //
5162 // static kmp_critical_name lock = { 0 };
5163 //
5164 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5165 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5166 // ...
5167 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5168 // *(Type<n>-1*)rhs[<n>-1]);
5169 // }
5170 //
5171 // ...
5172 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5173 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5174 // RedList, reduce_func, &<lock>)) {
5175 // case 1:
5176 // ...
5177 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5178 // ...
5179 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5180 // break;
5181 // case 2:
5182 // ...
5183 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5184 // ...
5185 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5186 // break;
5187 // default:;
5188 // }
5189 //
5190 // if SimpleReduction is true, only the next code is generated:
5191 // ...
5192 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5193 // ...
5194
5195 ASTContext &C = CGM.getContext();
5196
5197 if (SimpleReduction) {
5198 CodeGenFunction::RunCleanupsScope Scope(CGF);
5199 const auto *IPriv = OrgPrivates.begin();
5200 const auto *ILHS = OrgLHSExprs.begin();
5201 const auto *IRHS = OrgRHSExprs.begin();
5202 for (const Expr *E : OrgReductionOps) {
5203 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5204 RHS: cast<DeclRefExpr>(Val: *IRHS));
5205 ++IPriv;
5206 ++ILHS;
5207 ++IRHS;
5208 }
5209 return;
5210 }
5211
5212 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5213 // Only keep entries where the corresponding variable is not private.
5214 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5215 FilteredRHSExprs, FilteredReductionOps;
5216 for (unsigned I : llvm::seq<unsigned>(
5217 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5218 if (!Options.IsPrivateVarReduction[I]) {
5219 FilteredPrivates.emplace_back(Args: OrgPrivates[I]);
5220 FilteredLHSExprs.emplace_back(Args: OrgLHSExprs[I]);
5221 FilteredRHSExprs.emplace_back(Args: OrgRHSExprs[I]);
5222 FilteredReductionOps.emplace_back(Args: OrgReductionOps[I]);
5223 }
5224 }
5225 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5226 // processing.
5227 ArrayRef<const Expr *> Privates = FilteredPrivates;
5228 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5229 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5230 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5231
5232 // 1. Build a list of reduction variables.
5233 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5234 auto Size = RHSExprs.size();
5235 for (const Expr *E : Privates) {
5236 if (E->getType()->isVariablyModifiedType())
5237 // Reserve place for array size.
5238 ++Size;
5239 }
5240 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5241 QualType ReductionArrayTy = C.getConstantArrayType(
5242 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5243 /*IndexTypeQuals=*/0);
5244 RawAddress ReductionList =
5245 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5246 const auto *IPriv = Privates.begin();
5247 unsigned Idx = 0;
5248 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5249 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5250 CGF.Builder.CreateStore(
5251 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5252 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5253 Addr: Elem);
5254 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5255 // Store array size.
5256 ++Idx;
5257 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5258 llvm::Value *Size = CGF.Builder.CreateIntCast(
5259 V: CGF.getVLASize(
5260 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5261 .NumElts,
5262 DestTy: CGF.SizeTy, /*isSigned=*/false);
5263 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5264 Addr: Elem);
5265 }
5266 }
5267
5268 // 2. Emit reduce_func().
5269 llvm::Function *ReductionFn = emitReductionFunction(
5270 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5271 Privates, LHSExprs, RHSExprs, ReductionOps);
5272
5273 // 3. Create static kmp_critical_name lock = { 0 };
5274 std::string Name = getName(Parts: {"reduction"});
5275 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5276
5277 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5278 // RedList, reduce_func, &<lock>);
5279 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5280 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5281 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5282 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5283 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5284 llvm::Value *Args[] = {
5285 IdentTLoc, // ident_t *<loc>
5286 ThreadId, // i32 <gtid>
5287 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5288 ReductionArrayTySize, // size_type sizeof(RedList)
5289 RL, // void *RedList
5290 ReductionFn, // void (*) (void *, void *) <reduce_func>
5291 Lock // kmp_critical_name *&<lock>
5292 };
5293 llvm::Value *Res = CGF.EmitRuntimeCall(
5294 callee: OMPBuilder.getOrCreateRuntimeFunction(
5295 M&: CGM.getModule(),
5296 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5297 args: Args);
5298
5299 // 5. Build switch(res)
5300 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5301 llvm::SwitchInst *SwInst =
5302 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5303
5304 // 6. Build case 1:
5305 // ...
5306 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5307 // ...
5308 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5309 // break;
5310 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5311 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5312 CGF.EmitBlock(BB: Case1BB);
5313
5314 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5315 llvm::Value *EndArgs[] = {
5316 IdentTLoc, // ident_t *<loc>
5317 ThreadId, // i32 <gtid>
5318 Lock // kmp_critical_name *&<lock>
5319 };
5320 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5321 CodeGenFunction &CGF, PrePostActionTy &Action) {
5322 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5323 const auto *IPriv = Privates.begin();
5324 const auto *ILHS = LHSExprs.begin();
5325 const auto *IRHS = RHSExprs.begin();
5326 for (const Expr *E : ReductionOps) {
5327 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5328 RHS: cast<DeclRefExpr>(Val: *IRHS));
5329 ++IPriv;
5330 ++ILHS;
5331 ++IRHS;
5332 }
5333 };
5334 RegionCodeGenTy RCG(CodeGen);
5335 CommonActionTy Action(
5336 nullptr, {},
5337 OMPBuilder.getOrCreateRuntimeFunction(
5338 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5339 : OMPRTL___kmpc_end_reduce),
5340 EndArgs);
5341 RCG.setAction(Action);
5342 RCG(CGF);
5343
5344 CGF.EmitBranch(Block: DefaultBB);
5345
5346 // 7. Build case 2:
5347 // ...
5348 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5349 // ...
5350 // break;
5351 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5352 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5353 CGF.EmitBlock(BB: Case2BB);
5354
5355 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5356 CodeGenFunction &CGF, PrePostActionTy &Action) {
5357 const auto *ILHS = LHSExprs.begin();
5358 const auto *IRHS = RHSExprs.begin();
5359 const auto *IPriv = Privates.begin();
5360 for (const Expr *E : ReductionOps) {
5361 const Expr *XExpr = nullptr;
5362 const Expr *EExpr = nullptr;
5363 const Expr *UpExpr = nullptr;
5364 BinaryOperatorKind BO = BO_Comma;
5365 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5366 if (BO->getOpcode() == BO_Assign) {
5367 XExpr = BO->getLHS();
5368 UpExpr = BO->getRHS();
5369 }
5370 }
5371 // Try to emit update expression as a simple atomic.
5372 const Expr *RHSExpr = UpExpr;
5373 if (RHSExpr) {
5374 // Analyze RHS part of the whole expression.
5375 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5376 Val: RHSExpr->IgnoreParenImpCasts())) {
5377 // If this is a conditional operator, analyze its condition for
5378 // min/max reduction operator.
5379 RHSExpr = ACO->getCond();
5380 }
5381 if (const auto *BORHS =
5382 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5383 EExpr = BORHS->getRHS();
5384 BO = BORHS->getOpcode();
5385 }
5386 }
5387 if (XExpr) {
5388 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5389 auto &&AtomicRedGen = [BO, VD,
5390 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5391 const Expr *EExpr, const Expr *UpExpr) {
5392 LValue X = CGF.EmitLValue(E: XExpr);
5393 RValue E;
5394 if (EExpr)
5395 E = CGF.EmitAnyExpr(E: EExpr);
5396 CGF.EmitOMPAtomicSimpleUpdateExpr(
5397 X, E, BO, /*IsXLHSInRHSPart=*/true,
5398 AO: llvm::AtomicOrdering::Monotonic, Loc,
5399 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5400 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5401 Address LHSTemp = CGF.CreateMemTemp(T: VD->getType());
5402 CGF.emitOMPSimpleStore(
5403 LVal: CGF.MakeAddrLValue(Addr: LHSTemp, T: VD->getType()), RVal: XRValue,
5404 RValTy: VD->getType().getNonReferenceType(), Loc);
5405 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5406 (void)PrivateScope.Privatize();
5407 return CGF.EmitAnyExpr(E: UpExpr);
5408 });
5409 };
5410 if ((*IPriv)->getType()->isArrayType()) {
5411 // Emit atomic reduction for array section.
5412 const auto *RHSVar =
5413 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5414 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5415 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5416 } else {
5417 // Emit atomic reduction for array subscript or single variable.
5418 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5419 }
5420 } else {
5421 // Emit as a critical region.
5422 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5423 const Expr *, const Expr *) {
5424 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5425 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5426 RT.emitCriticalRegion(
5427 CGF, CriticalName: Name,
5428 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5429 Action.Enter(CGF);
5430 emitReductionCombiner(CGF, ReductionOp: E);
5431 },
5432 Loc);
5433 };
5434 if ((*IPriv)->getType()->isArrayType()) {
5435 const auto *LHSVar =
5436 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5437 const auto *RHSVar =
5438 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5439 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5440 RedOpGen: CritRedGen);
5441 } else {
5442 CritRedGen(CGF, nullptr, nullptr, nullptr);
5443 }
5444 }
5445 ++ILHS;
5446 ++IRHS;
5447 ++IPriv;
5448 }
5449 };
5450 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5451 if (!WithNowait) {
5452 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5453 llvm::Value *EndArgs[] = {
5454 IdentTLoc, // ident_t *<loc>
5455 ThreadId, // i32 <gtid>
5456 Lock // kmp_critical_name *&<lock>
5457 };
5458 CommonActionTy Action(nullptr, {},
5459 OMPBuilder.getOrCreateRuntimeFunction(
5460 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5461 EndArgs);
5462 AtomicRCG.setAction(Action);
5463 AtomicRCG(CGF);
5464 } else {
5465 AtomicRCG(CGF);
5466 }
5467
5468 CGF.EmitBranch(Block: DefaultBB);
5469 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5470 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5471 "PrivateVarReduction: Privates size mismatch");
5472 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5473 "PrivateVarReduction: ReductionOps size mismatch");
5474 for (unsigned I : llvm::seq<unsigned>(
5475 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5476 if (Options.IsPrivateVarReduction[I])
5477 emitPrivateReduction(CGF, Loc, Privates: OrgPrivates[I], LHSExprs: OrgLHSExprs[I],
5478 RHSExprs: OrgRHSExprs[I], ReductionOps: OrgReductionOps[I]);
5479 }
5480}
5481
5482/// Generates unique name for artificial threadprivate variables.
5483/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5484static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5485 const Expr *Ref) {
5486 SmallString<256> Buffer;
5487 llvm::raw_svector_ostream Out(Buffer);
5488 const clang::DeclRefExpr *DE;
5489 const VarDecl *D = ::getBaseDecl(Ref, DE);
5490 if (!D)
5491 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5492 D = D->getCanonicalDecl();
5493 std::string Name = CGM.getOpenMPRuntime().getName(
5494 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5495 Out << Prefix << Name << "_"
5496 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5497 return std::string(Out.str());
5498}
5499
5500/// Emits reduction initializer function:
5501/// \code
5502/// void @.red_init(void* %arg, void* %orig) {
5503/// %0 = bitcast void* %arg to <type>*
5504/// store <type> <init>, <type>* %0
5505/// ret void
5506/// }
5507/// \endcode
5508static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5509 SourceLocation Loc,
5510 ReductionCodeGen &RCG, unsigned N) {
5511 ASTContext &C = CGM.getContext();
5512 QualType VoidPtrTy = C.VoidPtrTy;
5513 VoidPtrTy.addRestrict();
5514 FunctionArgList Args;
5515 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5516 ImplicitParamKind::Other);
5517 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5518 ImplicitParamKind::Other);
5519 Args.emplace_back(Args: &Param);
5520 Args.emplace_back(Args: &ParamOrig);
5521 const auto &FnInfo =
5522 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5523 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5524 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5525 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5526 N: Name, M: &CGM.getModule());
5527 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5528 Fn->setDoesNotRecurse();
5529 CodeGenFunction CGF(CGM);
5530 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5531 QualType PrivateType = RCG.getPrivateType(N);
5532 Address PrivateAddr = CGF.EmitLoadOfPointer(
5533 Ptr: CGF.GetAddrOfLocalVar(VD: &Param).withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5534 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5535 llvm::Value *Size = nullptr;
5536 // If the size of the reduction item is non-constant, load it from global
5537 // threadprivate variable.
5538 if (RCG.getSizes(N).second) {
5539 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5540 CGF, VarType: CGM.getContext().getSizeType(),
5541 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5542 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5543 Ty: CGM.getContext().getSizeType(), Loc);
5544 }
5545 RCG.emitAggregateType(CGF, N, Size);
5546 Address OrigAddr = Address::invalid();
5547 // If initializer uses initializer from declare reduction construct, emit a
5548 // pointer to the address of the original reduction item (reuired by reduction
5549 // initializer)
5550 if (RCG.usesReductionInitializer(N)) {
5551 Address SharedAddr = CGF.GetAddrOfLocalVar(VD: &ParamOrig);
5552 OrigAddr = CGF.EmitLoadOfPointer(
5553 Ptr: SharedAddr,
5554 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5555 }
5556 // Emit the initializer:
5557 // %0 = bitcast void* %arg to <type>*
5558 // store <type> <init>, <type>* %0
5559 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5560 DefaultInit: [](CodeGenFunction &) { return false; });
5561 CGF.FinishFunction();
5562 return Fn;
5563}
5564
5565/// Emits reduction combiner function:
5566/// \code
5567/// void @.red_comb(void* %arg0, void* %arg1) {
5568/// %lhs = bitcast void* %arg0 to <type>*
5569/// %rhs = bitcast void* %arg1 to <type>*
5570/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5571/// store <type> %2, <type>* %lhs
5572/// ret void
5573/// }
5574/// \endcode
5575static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5576 SourceLocation Loc,
5577 ReductionCodeGen &RCG, unsigned N,
5578 const Expr *ReductionOp,
5579 const Expr *LHS, const Expr *RHS,
5580 const Expr *PrivateRef) {
5581 ASTContext &C = CGM.getContext();
5582 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5583 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5584 FunctionArgList Args;
5585 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5586 C.VoidPtrTy, ImplicitParamKind::Other);
5587 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5588 ImplicitParamKind::Other);
5589 Args.emplace_back(Args: &ParamInOut);
5590 Args.emplace_back(Args: &ParamIn);
5591 const auto &FnInfo =
5592 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5593 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5594 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5595 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5596 N: Name, M: &CGM.getModule());
5597 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5598 Fn->setDoesNotRecurse();
5599 CodeGenFunction CGF(CGM);
5600 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5601 llvm::Value *Size = nullptr;
5602 // If the size of the reduction item is non-constant, load it from global
5603 // threadprivate variable.
5604 if (RCG.getSizes(N).second) {
5605 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5606 CGF, VarType: CGM.getContext().getSizeType(),
5607 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5608 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5609 Ty: CGM.getContext().getSizeType(), Loc);
5610 }
5611 RCG.emitAggregateType(CGF, N, Size);
5612 // Remap lhs and rhs variables to the addresses of the function arguments.
5613 // %lhs = bitcast void* %arg0 to <type>*
5614 // %rhs = bitcast void* %arg1 to <type>*
5615 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5616 PrivateScope.addPrivate(
5617 LocalVD: LHSVD,
5618 // Pull out the pointer to the variable.
5619 Addr: CGF.EmitLoadOfPointer(
5620 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamInOut)
5621 .withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5622 PtrTy: C.getPointerType(T: LHSVD->getType())->castAs<PointerType>()));
5623 PrivateScope.addPrivate(
5624 LocalVD: RHSVD,
5625 // Pull out the pointer to the variable.
5626 Addr: CGF.EmitLoadOfPointer(
5627 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamIn).withElementType(
5628 ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5629 PtrTy: C.getPointerType(T: RHSVD->getType())->castAs<PointerType>()));
5630 PrivateScope.Privatize();
5631 // Emit the combiner body:
5632 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5633 // store <type> %2, <type>* %lhs
5634 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5635 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5636 RHS: cast<DeclRefExpr>(Val: RHS));
5637 CGF.FinishFunction();
5638 return Fn;
5639}
5640
5641/// Emits reduction finalizer function:
5642/// \code
5643/// void @.red_fini(void* %arg) {
5644/// %0 = bitcast void* %arg to <type>*
5645/// <destroy>(<type>* %0)
5646/// ret void
5647/// }
5648/// \endcode
5649static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5650 SourceLocation Loc,
5651 ReductionCodeGen &RCG, unsigned N) {
5652 if (!RCG.needCleanups(N))
5653 return nullptr;
5654 ASTContext &C = CGM.getContext();
5655 FunctionArgList Args;
5656 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5657 ImplicitParamKind::Other);
5658 Args.emplace_back(Args: &Param);
5659 const auto &FnInfo =
5660 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5661 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5662 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5663 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5664 N: Name, M: &CGM.getModule());
5665 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5666 Fn->setDoesNotRecurse();
5667 CodeGenFunction CGF(CGM);
5668 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5669 Address PrivateAddr = CGF.EmitLoadOfPointer(
5670 Ptr: CGF.GetAddrOfLocalVar(VD: &Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5671 llvm::Value *Size = nullptr;
5672 // If the size of the reduction item is non-constant, load it from global
5673 // threadprivate variable.
5674 if (RCG.getSizes(N).second) {
5675 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5676 CGF, VarType: CGM.getContext().getSizeType(),
5677 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5678 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5679 Ty: CGM.getContext().getSizeType(), Loc);
5680 }
5681 RCG.emitAggregateType(CGF, N, Size);
5682 // Emit the finalizer body:
5683 // <destroy>(<type>* %0)
5684 RCG.emitCleanups(CGF, N, PrivateAddr);
5685 CGF.FinishFunction(EndLoc: Loc);
5686 return Fn;
5687}
5688
5689llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5690 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5691 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5692 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5693 return nullptr;
5694
5695 // Build typedef struct:
5696 // kmp_taskred_input {
5697 // void *reduce_shar; // shared reduction item
5698 // void *reduce_orig; // original reduction item used for initialization
5699 // size_t reduce_size; // size of data item
5700 // void *reduce_init; // data initialization routine
5701 // void *reduce_fini; // data finalization routine
5702 // void *reduce_comb; // data combiner routine
5703 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5704 // } kmp_taskred_input_t;
5705 ASTContext &C = CGM.getContext();
5706 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5707 RD->startDefinition();
5708 const FieldDecl *SharedFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5709 const FieldDecl *OrigFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5710 const FieldDecl *SizeFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.getSizeType());
5711 const FieldDecl *InitFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5712 const FieldDecl *FiniFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5713 const FieldDecl *CombFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5714 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5715 C, DC: RD, FieldTy: C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5716 RD->completeDefinition();
5717 QualType RDType = C.getRecordType(Decl: RD);
5718 unsigned Size = Data.ReductionVars.size();
5719 llvm::APInt ArraySize(/*numBits=*/64, Size);
5720 QualType ArrayRDType =
5721 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5722 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5723 // kmp_task_red_input_t .rd_input.[Size];
5724 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5725 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5726 Data.ReductionCopies, Data.ReductionOps);
5727 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5728 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5729 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5730 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5731 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5732 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5733 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5734 Name: ".rd_input.gep.");
5735 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5736 // ElemLVal.reduce_shar = &Shareds[Cnt];
5737 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5738 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5739 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5740 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5741 // ElemLVal.reduce_orig = &Origs[Cnt];
5742 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5743 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5744 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5745 RCG.emitAggregateType(CGF, N: Cnt);
5746 llvm::Value *SizeValInChars;
5747 llvm::Value *SizeVal;
5748 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5749 // We use delayed creation/initialization for VLAs and array sections. It is
5750 // required because runtime does not provide the way to pass the sizes of
5751 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5752 // threadprivate global variables are used to store these values and use
5753 // them in the functions.
5754 bool DelayedCreation = !!SizeVal;
5755 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5756 /*isSigned=*/false);
5757 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5758 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5759 // ElemLVal.reduce_init = init;
5760 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5761 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5762 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5763 // ElemLVal.reduce_fini = fini;
5764 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5765 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5766 llvm::Value *FiniAddr =
5767 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5768 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5769 // ElemLVal.reduce_comb = comb;
5770 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5771 llvm::Value *CombAddr = emitReduceCombFunction(
5772 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5773 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5774 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5775 // ElemLVal.flags = 0;
5776 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5777 if (DelayedCreation) {
5778 CGF.EmitStoreOfScalar(
5779 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5780 lvalue: FlagsLVal);
5781 } else
5782 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5783 }
5784 if (Data.IsReductionWithTaskMod) {
5785 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5786 // is_ws, int num, void *data);
5787 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5788 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5789 DestTy: CGM.IntTy, /*isSigned=*/true);
5790 llvm::Value *Args[] = {
5791 IdentTLoc, GTid,
5792 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5793 /*isSigned=*/IsSigned: true),
5794 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5795 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5796 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5797 return CGF.EmitRuntimeCall(
5798 callee: OMPBuilder.getOrCreateRuntimeFunction(
5799 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5800 args: Args);
5801 }
5802 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5803 llvm::Value *Args[] = {
5804 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5805 /*isSigned=*/true),
5806 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5807 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5808 DestTy: CGM.VoidPtrTy)};
5809 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5810 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5811 args: Args);
5812}
5813
5814void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5815 SourceLocation Loc,
5816 bool IsWorksharingReduction) {
5817 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5818 // is_ws, int num, void *data);
5819 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5820 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5821 DestTy: CGM.IntTy, /*isSigned=*/true);
5822 llvm::Value *Args[] = {IdentTLoc, GTid,
5823 llvm::ConstantInt::get(Ty: CGM.IntTy,
5824 V: IsWorksharingReduction ? 1 : 0,
5825 /*isSigned=*/IsSigned: true)};
5826 (void)CGF.EmitRuntimeCall(
5827 callee: OMPBuilder.getOrCreateRuntimeFunction(
5828 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
5829 args: Args);
5830}
5831
5832void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5833 SourceLocation Loc,
5834 ReductionCodeGen &RCG,
5835 unsigned N) {
5836 auto Sizes = RCG.getSizes(N);
5837 // Emit threadprivate global variable if the type is non-constant
5838 // (Sizes.second = nullptr).
5839 if (Sizes.second) {
5840 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
5841 /*isSigned=*/false);
5842 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5843 CGF, VarType: CGM.getContext().getSizeType(),
5844 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5845 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
5846 }
5847}
5848
5849Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5850 SourceLocation Loc,
5851 llvm::Value *ReductionsPtr,
5852 LValue SharedLVal) {
5853 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5854 // *d);
5855 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5856 DestTy: CGM.IntTy,
5857 /*isSigned=*/true),
5858 ReductionsPtr,
5859 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5860 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
5861 return Address(
5862 CGF.EmitRuntimeCall(
5863 callee: OMPBuilder.getOrCreateRuntimeFunction(
5864 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
5865 args: Args),
5866 CGF.Int8Ty, SharedLVal.getAlignment());
5867}
5868
5869void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5870 const OMPTaskDataTy &Data) {
5871 if (!CGF.HaveInsertPoint())
5872 return;
5873
5874 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5875 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5876 OMPBuilder.createTaskwait(Loc: CGF.Builder);
5877 } else {
5878 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5879 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5880 auto &M = CGM.getModule();
5881 Address DependenciesArray = Address::invalid();
5882 llvm::Value *NumOfElements;
5883 std::tie(args&: NumOfElements, args&: DependenciesArray) =
5884 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
5885 if (!Data.Dependences.empty()) {
5886 llvm::Value *DepWaitTaskArgs[7];
5887 DepWaitTaskArgs[0] = UpLoc;
5888 DepWaitTaskArgs[1] = ThreadID;
5889 DepWaitTaskArgs[2] = NumOfElements;
5890 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5891 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
5892 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5893 DepWaitTaskArgs[6] =
5894 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
5895
5896 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5897
5898 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5899 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5900 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5901 // kmp_int32 has_no_wait); if dependence info is specified.
5902 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5903 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
5904 args: DepWaitTaskArgs);
5905
5906 } else {
5907
5908 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5909 // global_tid);
5910 llvm::Value *Args[] = {UpLoc, ThreadID};
5911 // Ignore return result until untied tasks are supported.
5912 CGF.EmitRuntimeCall(
5913 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
5914 args: Args);
5915 }
5916 }
5917
5918 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
5919 Region->emitUntiedSwitch(CGF);
5920}
5921
5922void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5923 OpenMPDirectiveKind InnerKind,
5924 const RegionCodeGenTy &CodeGen,
5925 bool HasCancel) {
5926 if (!CGF.HaveInsertPoint())
5927 return;
5928 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5929 InnerKind != OMPD_critical &&
5930 InnerKind != OMPD_master &&
5931 InnerKind != OMPD_masked);
5932 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5933}
5934
5935namespace {
5936enum RTCancelKind {
5937 CancelNoreq = 0,
5938 CancelParallel = 1,
5939 CancelLoop = 2,
5940 CancelSections = 3,
5941 CancelTaskgroup = 4
5942};
5943} // anonymous namespace
5944
5945static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5946 RTCancelKind CancelKind = CancelNoreq;
5947 if (CancelRegion == OMPD_parallel)
5948 CancelKind = CancelParallel;
5949 else if (CancelRegion == OMPD_for)
5950 CancelKind = CancelLoop;
5951 else if (CancelRegion == OMPD_sections)
5952 CancelKind = CancelSections;
5953 else {
5954 assert(CancelRegion == OMPD_taskgroup);
5955 CancelKind = CancelTaskgroup;
5956 }
5957 return CancelKind;
5958}
5959
5960void CGOpenMPRuntime::emitCancellationPointCall(
5961 CodeGenFunction &CGF, SourceLocation Loc,
5962 OpenMPDirectiveKind CancelRegion) {
5963 if (!CGF.HaveInsertPoint())
5964 return;
5965 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5966 // global_tid, kmp_int32 cncl_kind);
5967 if (auto *OMPRegionInfo =
5968 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5969 // For 'cancellation point taskgroup', the task region info may not have a
5970 // cancel. This may instead happen in another adjacent task.
5971 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5972 llvm::Value *Args[] = {
5973 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5974 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
5975 // Ignore return result until untied tasks are supported.
5976 llvm::Value *Result = CGF.EmitRuntimeCall(
5977 callee: OMPBuilder.getOrCreateRuntimeFunction(
5978 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
5979 args: Args);
5980 // if (__kmpc_cancellationpoint()) {
5981 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5982 // exit from construct;
5983 // }
5984 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5985 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5986 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5987 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5988 CGF.EmitBlock(BB: ExitBB);
5989 if (CancelRegion == OMPD_parallel)
5990 emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
5991 // exit from construct;
5992 CodeGenFunction::JumpDest CancelDest =
5993 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
5994 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5995 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5996 }
5997 }
5998}
5999
6000void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6001 const Expr *IfCond,
6002 OpenMPDirectiveKind CancelRegion) {
6003 if (!CGF.HaveInsertPoint())
6004 return;
6005 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6006 // kmp_int32 cncl_kind);
6007 auto &M = CGM.getModule();
6008 if (auto *OMPRegionInfo =
6009 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6010 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6011 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6012 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6013 llvm::Value *Args[] = {
6014 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6015 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6016 // Ignore return result until untied tasks are supported.
6017 llvm::Value *Result = CGF.EmitRuntimeCall(
6018 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), args: Args);
6019 // if (__kmpc_cancel()) {
6020 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6021 // exit from construct;
6022 // }
6023 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6024 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6025 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6026 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6027 CGF.EmitBlock(BB: ExitBB);
6028 if (CancelRegion == OMPD_parallel)
6029 RT.emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6030 // exit from construct;
6031 CodeGenFunction::JumpDest CancelDest =
6032 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6033 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6034 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6035 };
6036 if (IfCond) {
6037 emitIfClause(CGF, Cond: IfCond, ThenGen,
6038 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
6039 } else {
6040 RegionCodeGenTy ThenRCG(ThenGen);
6041 ThenRCG(CGF);
6042 }
6043 }
6044}
6045
6046namespace {
6047/// Cleanup action for uses_allocators support.
6048class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6049 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6050
6051public:
6052 OMPUsesAllocatorsActionTy(
6053 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6054 : Allocators(Allocators) {}
6055 void Enter(CodeGenFunction &CGF) override {
6056 if (!CGF.HaveInsertPoint())
6057 return;
6058 for (const auto &AllocatorData : Allocators) {
6059 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6060 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
6061 }
6062 }
6063 void Exit(CodeGenFunction &CGF) override {
6064 if (!CGF.HaveInsertPoint())
6065 return;
6066 for (const auto &AllocatorData : Allocators) {
6067 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6068 Allocator: AllocatorData.first);
6069 }
6070 }
6071};
6072} // namespace
6073
6074void CGOpenMPRuntime::emitTargetOutlinedFunction(
6075 const OMPExecutableDirective &D, StringRef ParentName,
6076 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6077 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6078 assert(!ParentName.empty() && "Invalid target entry parent name!");
6079 HasEmittedTargetRegion = true;
6080 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6081 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6082 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6083 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6084 if (!D.AllocatorTraits)
6085 continue;
6086 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
6087 }
6088 }
6089 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6090 CodeGen.setAction(UsesAllocatorAction);
6091 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6092 IsOffloadEntry, CodeGen);
6093}
6094
6095void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6096 const Expr *Allocator,
6097 const Expr *AllocatorTraits) {
6098 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6099 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6100 // Use default memspace handle.
6101 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6102 llvm::Value *NumTraits = llvm::ConstantInt::get(
6103 Ty: CGF.IntTy, V: cast<ConstantArrayType>(
6104 Val: AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6105 ->getSize()
6106 .getLimitedValue());
6107 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
6108 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6109 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
6110 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, T: CGF.getContext().VoidPtrTy,
6111 BaseInfo: AllocatorTraitsLVal.getBaseInfo(),
6112 TBAAInfo: AllocatorTraitsLVal.getTBAAInfo());
6113 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6114
6115 llvm::Value *AllocatorVal =
6116 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6117 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
6118 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
6119 // Store to allocator.
6120 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
6121 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
6122 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6123 AllocatorVal =
6124 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
6125 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
6126 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
6127}
6128
6129void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6130 const Expr *Allocator) {
6131 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6132 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6133 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6134 llvm::Value *AllocatorVal =
6135 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
6136 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
6137 DstTy: CGF.getContext().VoidPtrTy,
6138 Loc: Allocator->getExprLoc());
6139 (void)CGF.EmitRuntimeCall(
6140 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
6141 FnID: OMPRTL___kmpc_destroy_allocator),
6142 args: {ThreadId, AllocatorVal});
6143}
6144
6145void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6146 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6147 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6148 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6149 "invalid default attrs structure");
6150 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6151 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6152
6153 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: Attrs.MinTeams, MaxTeamsVal);
6154 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
6155 /*UpperBoundOnly=*/true);
6156
6157 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6158 for (auto *A : C->getAttrs()) {
6159 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6160 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6161 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(Val: A))
6162 CGM.handleCUDALaunchBoundsAttr(F: nullptr, A: Attr, MaxThreadsVal: &AttrMaxThreadsVal,
6163 MinBlocksVal: &AttrMinBlocksVal, MaxClusterRankVal: &AttrMaxBlocksVal);
6164 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(Val: A))
6165 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6166 F: nullptr, A: Attr, /*ReqdWGS=*/nullptr, MinThreadsVal: &AttrMinThreadsVal,
6167 MaxThreadsVal: &AttrMaxThreadsVal);
6168 else
6169 continue;
6170
6171 Attrs.MinThreads = std::max(a: Attrs.MinThreads, b: AttrMinThreadsVal);
6172 if (AttrMaxThreadsVal > 0)
6173 MaxThreadsVal = MaxThreadsVal > 0
6174 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
6175 : AttrMaxThreadsVal;
6176 Attrs.MinTeams = std::max(a: Attrs.MinTeams, b: AttrMinBlocksVal);
6177 if (AttrMaxBlocksVal > 0)
6178 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
6179 : AttrMaxBlocksVal;
6180 }
6181 }
6182}
6183
6184void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6185 const OMPExecutableDirective &D, StringRef ParentName,
6186 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6187 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6188
6189 llvm::TargetRegionEntryInfo EntryInfo =
6190 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
6191
6192 CodeGenFunction CGF(CGM, true);
6193 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6194 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6195 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
6196
6197 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6198 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, Loc: D.getBeginLoc());
6200 };
6201
6202 cantFail(Err: OMPBuilder.emitTargetRegionFunction(
6203 EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6204 OutlinedFnID));
6205
6206 if (!OutlinedFn)
6207 return;
6208
6209 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
6210
6211 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6212 for (auto *A : C->getAttrs()) {
6213 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(Val: A))
6214 CGM.handleAMDGPUWavesPerEUAttr(F: OutlinedFn, A: Attr);
6215 }
6216 }
6217}
6218
6219/// Checks if the expression is constant or does not have non-trivial function
6220/// calls.
6221static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6222 // We can skip constant expressions.
6223 // We can skip expressions with trivial calls or simple expressions.
6224 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
6225 !E->hasNonTrivialCall(Ctx)) &&
6226 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6227}
6228
6229const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6230 const Stmt *Body) {
6231 const Stmt *Child = Body->IgnoreContainers();
6232 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
6233 Child = nullptr;
6234 for (const Stmt *S : C->body()) {
6235 if (const auto *E = dyn_cast<Expr>(Val: S)) {
6236 if (isTrivial(Ctx, E))
6237 continue;
6238 }
6239 // Some of the statements can be ignored.
6240 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
6241 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
6242 continue;
6243 // Analyze declarations.
6244 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
6245 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
6246 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
6247 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
6248 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
6249 isa<UsingDirectiveDecl>(Val: D) ||
6250 isa<OMPDeclareReductionDecl>(Val: D) ||
6251 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6252 return true;
6253 const auto *VD = dyn_cast<VarDecl>(Val: D);
6254 if (!VD)
6255 return false;
6256 return VD->hasGlobalStorage() || !VD->isUsed();
6257 }))
6258 continue;
6259 }
6260 // Found multiple children - cannot get the one child only.
6261 if (Child)
6262 return nullptr;
6263 Child = S;
6264 }
6265 if (Child)
6266 Child = Child->IgnoreContainers();
6267 }
6268 return Child;
6269}
6270
6271const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6272 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6273 int32_t &MaxTeamsVal) {
6274
6275 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6276 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6277 "Expected target-based executable directive.");
6278 switch (DirectiveKind) {
6279 case OMPD_target: {
6280 const auto *CS = D.getInnermostCapturedStmt();
6281 const auto *Body =
6282 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6283 const Stmt *ChildStmt =
6284 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6285 if (const auto *NestedDir =
6286 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6287 if (isOpenMPTeamsDirective(DKind: NestedDir->getDirectiveKind())) {
6288 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6289 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6290 ->getNumTeams()
6291 .front();
6292 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6293 if (auto Constant =
6294 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6295 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6296 return NumTeams;
6297 }
6298 MinTeamsVal = MaxTeamsVal = 0;
6299 return nullptr;
6300 }
6301 MinTeamsVal = MaxTeamsVal = 1;
6302 return nullptr;
6303 }
6304 // A value of -1 is used to check if we need to emit no teams region
6305 MinTeamsVal = MaxTeamsVal = -1;
6306 return nullptr;
6307 }
6308 case OMPD_target_teams_loop:
6309 case OMPD_target_teams:
6310 case OMPD_target_teams_distribute:
6311 case OMPD_target_teams_distribute_simd:
6312 case OMPD_target_teams_distribute_parallel_for:
6313 case OMPD_target_teams_distribute_parallel_for_simd: {
6314 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6315 const Expr *NumTeams =
6316 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6317 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6318 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6319 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6320 return NumTeams;
6321 }
6322 MinTeamsVal = MaxTeamsVal = 0;
6323 return nullptr;
6324 }
6325 case OMPD_target_parallel:
6326 case OMPD_target_parallel_for:
6327 case OMPD_target_parallel_for_simd:
6328 case OMPD_target_parallel_loop:
6329 case OMPD_target_simd:
6330 MinTeamsVal = MaxTeamsVal = 1;
6331 return nullptr;
6332 case OMPD_parallel:
6333 case OMPD_for:
6334 case OMPD_parallel_for:
6335 case OMPD_parallel_loop:
6336 case OMPD_parallel_master:
6337 case OMPD_parallel_sections:
6338 case OMPD_for_simd:
6339 case OMPD_parallel_for_simd:
6340 case OMPD_cancel:
6341 case OMPD_cancellation_point:
6342 case OMPD_ordered:
6343 case OMPD_threadprivate:
6344 case OMPD_allocate:
6345 case OMPD_task:
6346 case OMPD_simd:
6347 case OMPD_tile:
6348 case OMPD_unroll:
6349 case OMPD_sections:
6350 case OMPD_section:
6351 case OMPD_single:
6352 case OMPD_master:
6353 case OMPD_critical:
6354 case OMPD_taskyield:
6355 case OMPD_barrier:
6356 case OMPD_taskwait:
6357 case OMPD_taskgroup:
6358 case OMPD_atomic:
6359 case OMPD_flush:
6360 case OMPD_depobj:
6361 case OMPD_scan:
6362 case OMPD_teams:
6363 case OMPD_target_data:
6364 case OMPD_target_exit_data:
6365 case OMPD_target_enter_data:
6366 case OMPD_distribute:
6367 case OMPD_distribute_simd:
6368 case OMPD_distribute_parallel_for:
6369 case OMPD_distribute_parallel_for_simd:
6370 case OMPD_teams_distribute:
6371 case OMPD_teams_distribute_simd:
6372 case OMPD_teams_distribute_parallel_for:
6373 case OMPD_teams_distribute_parallel_for_simd:
6374 case OMPD_target_update:
6375 case OMPD_declare_simd:
6376 case OMPD_declare_variant:
6377 case OMPD_begin_declare_variant:
6378 case OMPD_end_declare_variant:
6379 case OMPD_declare_target:
6380 case OMPD_end_declare_target:
6381 case OMPD_declare_reduction:
6382 case OMPD_declare_mapper:
6383 case OMPD_taskloop:
6384 case OMPD_taskloop_simd:
6385 case OMPD_master_taskloop:
6386 case OMPD_master_taskloop_simd:
6387 case OMPD_parallel_master_taskloop:
6388 case OMPD_parallel_master_taskloop_simd:
6389 case OMPD_requires:
6390 case OMPD_metadirective:
6391 case OMPD_unknown:
6392 break;
6393 default:
6394 break;
6395 }
6396 llvm_unreachable("Unexpected directive kind.");
6397}
6398
6399llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6400 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6401 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6402 "Clauses associated with the teams directive expected to be emitted "
6403 "only for the host!");
6404 CGBuilderTy &Bld = CGF.Builder;
6405 int32_t MinNT = -1, MaxNT = -1;
6406 const Expr *NumTeams =
6407 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6408 if (NumTeams != nullptr) {
6409 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6410
6411 switch (DirectiveKind) {
6412 case OMPD_target: {
6413 const auto *CS = D.getInnermostCapturedStmt();
6414 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6415 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6416 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6417 /*IgnoreResultAssign*/ true);
6418 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6419 /*isSigned=*/true);
6420 }
6421 case OMPD_target_teams:
6422 case OMPD_target_teams_distribute:
6423 case OMPD_target_teams_distribute_simd:
6424 case OMPD_target_teams_distribute_parallel_for:
6425 case OMPD_target_teams_distribute_parallel_for_simd: {
6426 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6427 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6428 /*IgnoreResultAssign*/ true);
6429 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6430 /*isSigned=*/true);
6431 }
6432 default:
6433 break;
6434 }
6435 }
6436
6437 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6438 return llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: MinNT);
6439}
6440
6441/// Check for a num threads constant value (stored in \p DefaultVal), or
6442/// expression (stored in \p E). If the value is conditional (via an if-clause),
6443/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6444/// nullptr, no expression evaluation is perfomed.
6445static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6446 const Expr **E, int32_t &UpperBound,
6447 bool UpperBoundOnly, llvm::Value **CondVal) {
6448 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6449 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6450 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6451 if (!Dir)
6452 return;
6453
6454 if (isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6455 // Handle if clause. If if clause present, the number of threads is
6456 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6457 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6458 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6459 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460 const OMPIfClause *IfClause = nullptr;
6461 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6462 if (C->getNameModifier() == OMPD_unknown ||
6463 C->getNameModifier() == OMPD_parallel) {
6464 IfClause = C;
6465 break;
6466 }
6467 }
6468 if (IfClause) {
6469 const Expr *CondExpr = IfClause->getCondition();
6470 bool Result;
6471 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6472 if (!Result) {
6473 UpperBound = 1;
6474 return;
6475 }
6476 } else {
6477 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6478 if (const auto *PreInit =
6479 cast_or_null<DeclStmt>(Val: IfClause->getPreInitStmt())) {
6480 for (const auto *I : PreInit->decls()) {
6481 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6482 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6483 } else {
6484 CodeGenFunction::AutoVarEmission Emission =
6485 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6486 CGF.EmitAutoVarCleanups(emission: Emission);
6487 }
6488 }
6489 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6490 }
6491 }
6492 }
6493 }
6494 // Check the value of num_threads clause iff if clause was not specified
6495 // or is not evaluated to false.
6496 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6497 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6498 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499 const auto *NumThreadsClause =
6500 Dir->getSingleClause<OMPNumThreadsClause>();
6501 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6502 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6503 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6504 UpperBound =
6505 UpperBound
6506 ? Constant->getZExtValue()
6507 : std::min(a: UpperBound,
6508 b: static_cast<int32_t>(Constant->getZExtValue()));
6509 // If we haven't found a upper bound, remember we saw a thread limiting
6510 // clause.
6511 if (UpperBound == -1)
6512 UpperBound = 0;
6513 if (!E)
6514 return;
6515 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6516 if (const auto *PreInit =
6517 cast_or_null<DeclStmt>(Val: NumThreadsClause->getPreInitStmt())) {
6518 for (const auto *I : PreInit->decls()) {
6519 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6520 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6521 } else {
6522 CodeGenFunction::AutoVarEmission Emission =
6523 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6524 CGF.EmitAutoVarCleanups(emission: Emission);
6525 }
6526 }
6527 }
6528 *E = NTExpr;
6529 }
6530 return;
6531 }
6532 if (isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6533 UpperBound = 1;
6534}
6535
6536const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6537 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6538 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6539 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6540 "Clauses associated with the teams directive expected to be emitted "
6541 "only for the host!");
6542 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6543 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6544 "Expected target-based executable directive.");
6545
6546 const Expr *NT = nullptr;
6547 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6548
6549 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6550 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6551 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6552 UpperBound = UpperBound ? Constant->getZExtValue()
6553 : std::min(a: UpperBound,
6554 b: int32_t(Constant->getZExtValue()));
6555 }
6556 // If we haven't found a upper bound, remember we saw a thread limiting
6557 // clause.
6558 if (UpperBound == -1)
6559 UpperBound = 0;
6560 if (EPtr)
6561 *EPtr = E;
6562 };
6563
6564 auto ReturnSequential = [&]() {
6565 UpperBound = 1;
6566 return NT;
6567 };
6568
6569 switch (DirectiveKind) {
6570 case OMPD_target: {
6571 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6572 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6573 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6574 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6575 // TODO: The standard is not clear how to resolve two thread limit clauses,
6576 // let's pick the teams one if it's present, otherwise the target one.
6577 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6578 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6579 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6580 ThreadLimitClause = TLC;
6581 if (ThreadLimitExpr) {
6582 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6583 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6584 CodeGenFunction::LexicalScope Scope(
6585 CGF,
6586 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6587 if (const auto *PreInit =
6588 cast_or_null<DeclStmt>(Val: ThreadLimitClause->getPreInitStmt())) {
6589 for (const auto *I : PreInit->decls()) {
6590 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6591 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6592 } else {
6593 CodeGenFunction::AutoVarEmission Emission =
6594 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6595 CGF.EmitAutoVarCleanups(emission: Emission);
6596 }
6597 }
6598 }
6599 }
6600 }
6601 }
6602 if (ThreadLimitClause)
6603 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6604 ThreadLimitExpr);
6605 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6606 if (isOpenMPTeamsDirective(DKind: Dir->getDirectiveKind()) &&
6607 !isOpenMPDistributeDirective(DKind: Dir->getDirectiveKind())) {
6608 CS = Dir->getInnermostCapturedStmt();
6609 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6610 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6611 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6612 }
6613 if (Dir && isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6614 CS = Dir->getInnermostCapturedStmt();
6615 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6616 } else if (Dir && isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6617 return ReturnSequential();
6618 }
6619 return NT;
6620 }
6621 case OMPD_target_teams: {
6622 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6623 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6624 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6625 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6626 ThreadLimitExpr);
6627 }
6628 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6629 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6630 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6631 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6632 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6633 if (Dir->getDirectiveKind() == OMPD_distribute) {
6634 CS = Dir->getInnermostCapturedStmt();
6635 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6636 }
6637 }
6638 return NT;
6639 }
6640 case OMPD_target_teams_distribute:
6641 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6642 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6643 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6644 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6645 ThreadLimitExpr);
6646 }
6647 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6648 UpperBoundOnly, CondVal);
6649 return NT;
6650 case OMPD_target_teams_loop:
6651 case OMPD_target_parallel_loop:
6652 case OMPD_target_parallel:
6653 case OMPD_target_parallel_for:
6654 case OMPD_target_parallel_for_simd:
6655 case OMPD_target_teams_distribute_parallel_for:
6656 case OMPD_target_teams_distribute_parallel_for_simd: {
6657 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6658 const OMPIfClause *IfClause = nullptr;
6659 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6660 if (C->getNameModifier() == OMPD_unknown ||
6661 C->getNameModifier() == OMPD_parallel) {
6662 IfClause = C;
6663 break;
6664 }
6665 }
6666 if (IfClause) {
6667 const Expr *Cond = IfClause->getCondition();
6668 bool Result;
6669 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6670 if (!Result)
6671 return ReturnSequential();
6672 } else {
6673 CodeGenFunction::RunCleanupsScope Scope(CGF);
6674 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6675 }
6676 }
6677 }
6678 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6679 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6680 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6681 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6682 ThreadLimitExpr);
6683 }
6684 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6685 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6686 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6687 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6688 return NumThreadsClause->getNumThreads();
6689 }
6690 return NT;
6691 }
6692 case OMPD_target_teams_distribute_simd:
6693 case OMPD_target_simd:
6694 return ReturnSequential();
6695 default:
6696 break;
6697 }
6698 llvm_unreachable("Unsupported directive kind.");
6699}
6700
6701llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6702 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6703 llvm::Value *NumThreadsVal = nullptr;
6704 llvm::Value *CondVal = nullptr;
6705 llvm::Value *ThreadLimitVal = nullptr;
6706 const Expr *ThreadLimitExpr = nullptr;
6707 int32_t UpperBound = -1;
6708
6709 const Expr *NT = getNumThreadsExprForTargetDirective(
6710 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6711 ThreadLimitExpr: &ThreadLimitExpr);
6712
6713 // Thread limit expressions are used below, emit them.
6714 if (ThreadLimitExpr) {
6715 ThreadLimitVal =
6716 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6717 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6718 /*isSigned=*/false);
6719 }
6720
6721 // Generate the num teams expression.
6722 if (UpperBound == 1) {
6723 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6724 } else if (NT) {
6725 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6726 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6727 /*isSigned=*/false);
6728 } else if (ThreadLimitVal) {
6729 // If we do not have a num threads value but a thread limit, replace the
6730 // former with the latter. We know handled the thread limit expression.
6731 NumThreadsVal = ThreadLimitVal;
6732 ThreadLimitVal = nullptr;
6733 } else {
6734 // Default to "0" which means runtime choice.
6735 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6736 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6737 }
6738
6739 // Handle if clause. If if clause present, the number of threads is
6740 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741 if (CondVal) {
6742 CodeGenFunction::RunCleanupsScope Scope(CGF);
6743 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6744 False: CGF.Builder.getInt32(C: 1));
6745 }
6746
6747 // If the thread limit and num teams expression were present, take the
6748 // minimum.
6749 if (ThreadLimitVal) {
6750 NumThreadsVal = CGF.Builder.CreateSelect(
6751 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6752 True: ThreadLimitVal, False: NumThreadsVal);
6753 }
6754
6755 return NumThreadsVal;
6756}
6757
6758namespace {
6759LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6760
6761// Utility to handle information from clauses associated with a given
6762// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6763// It provides a convenient interface to obtain the information and generate
6764// code for that information.
6765class MappableExprsHandler {
6766public:
6767 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6768 static unsigned getFlagMemberOffset() {
6769 unsigned Offset = 0;
6770 for (uint64_t Remain =
6771 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6772 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6773 !(Remain & 1); Remain = Remain >> 1)
6774 Offset++;
6775 return Offset;
6776 }
6777
6778 /// Class that holds debugging information for a data mapping to be passed to
6779 /// the runtime library.
6780 class MappingExprInfo {
6781 /// The variable declaration used for the data mapping.
6782 const ValueDecl *MapDecl = nullptr;
6783 /// The original expression used in the map clause, or null if there is
6784 /// none.
6785 const Expr *MapExpr = nullptr;
6786
6787 public:
6788 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6789 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6790
6791 const ValueDecl *getMapDecl() const { return MapDecl; }
6792 const Expr *getMapExpr() const { return MapExpr; }
6793 };
6794
6795 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6796 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6797 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6798 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6799 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6800 using MapNonContiguousArrayTy =
6801 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6802 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6803 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6804
6805 /// This structure contains combined information generated for mappable
6806 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6807 /// mappers, and non-contiguous information.
6808 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6809 MapExprsArrayTy Exprs;
6810 MapValueDeclsArrayTy Mappers;
6811 MapValueDeclsArrayTy DevicePtrDecls;
6812
6813 /// Append arrays in \a CurInfo.
6814 void append(MapCombinedInfoTy &CurInfo) {
6815 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
6816 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
6817 in_end: CurInfo.DevicePtrDecls.end());
6818 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
6819 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6820 }
6821 };
6822
6823 /// Map between a struct and the its lowest & highest elements which have been
6824 /// mapped.
6825 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6826 /// HE(FieldIndex, Pointer)}
6827 struct StructRangeInfoTy {
6828 MapCombinedInfoTy PreliminaryMapData;
6829 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6830 0, Address::invalid()};
6831 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6832 0, Address::invalid()};
6833 Address Base = Address::invalid();
6834 Address LB = Address::invalid();
6835 bool IsArraySection = false;
6836 bool HasCompleteRecord = false;
6837 };
6838
6839private:
6840 /// Kind that defines how a device pointer has to be returned.
6841 struct MapInfo {
6842 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6843 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6844 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6845 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6846 bool ReturnDevicePointer = false;
6847 bool IsImplicit = false;
6848 const ValueDecl *Mapper = nullptr;
6849 const Expr *VarRef = nullptr;
6850 bool ForDeviceAddr = false;
6851
6852 MapInfo() = default;
6853 MapInfo(
6854 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6855 OpenMPMapClauseKind MapType,
6856 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6857 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6858 bool ReturnDevicePointer, bool IsImplicit,
6859 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6860 bool ForDeviceAddr = false)
6861 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6862 MotionModifiers(MotionModifiers),
6863 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6864 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6865 };
6866
6867 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6868 /// member and there is no map information about it, then emission of that
6869 /// entry is deferred until the whole struct has been processed.
6870 struct DeferredDevicePtrEntryTy {
6871 const Expr *IE = nullptr;
6872 const ValueDecl *VD = nullptr;
6873 bool ForDeviceAddr = false;
6874
6875 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6876 bool ForDeviceAddr)
6877 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6878 };
6879
6880 /// The target directive from where the mappable clauses were extracted. It
6881 /// is either a executable directive or a user-defined mapper directive.
6882 llvm::PointerUnion<const OMPExecutableDirective *,
6883 const OMPDeclareMapperDecl *>
6884 CurDir;
6885
6886 /// Function the directive is being generated for.
6887 CodeGenFunction &CGF;
6888
6889 /// Set of all first private variables in the current directive.
6890 /// bool data is set to true if the variable is implicitly marked as
6891 /// firstprivate, false otherwise.
6892 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6893
6894 /// Map between device pointer declarations and their expression components.
6895 /// The key value for declarations in 'this' is null.
6896 llvm::DenseMap<
6897 const ValueDecl *,
6898 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6899 DevPointersMap;
6900
6901 /// Map between device addr declarations and their expression components.
6902 /// The key value for declarations in 'this' is null.
6903 llvm::DenseMap<
6904 const ValueDecl *,
6905 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6906 HasDevAddrsMap;
6907
6908 /// Map between lambda declarations and their map type.
6909 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6910
6911 llvm::Value *getExprTypeSize(const Expr *E) const {
6912 QualType ExprTy = E->getType().getCanonicalType();
6913
6914 // Calculate the size for array shaping expression.
6915 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
6916 llvm::Value *Size =
6917 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
6918 for (const Expr *SE : OAE->getDimensions()) {
6919 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
6920 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
6921 DstTy: CGF.getContext().getSizeType(),
6922 Loc: SE->getExprLoc());
6923 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
6924 }
6925 return Size;
6926 }
6927
6928 // Reference types are ignored for mapping purposes.
6929 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6930 ExprTy = RefTy->getPointeeType().getCanonicalType();
6931
6932 // Given that an array section is considered a built-in type, we need to
6933 // do the calculation based on the length of the section instead of relying
6934 // on CGF.getTypeSize(E->getType()).
6935 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
6936 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6937 Base: OAE->getBase()->IgnoreParenImpCasts())
6938 .getCanonicalType();
6939
6940 // If there is no length associated with the expression and lower bound is
6941 // not specified too, that means we are using the whole length of the
6942 // base.
6943 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6944 !OAE->getLowerBound())
6945 return CGF.getTypeSize(Ty: BaseTy);
6946
6947 llvm::Value *ElemSize;
6948 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6949 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
6950 } else {
6951 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
6952 assert(ATy && "Expecting array type if not a pointer type.");
6953 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
6954 }
6955
6956 // If we don't have a length at this point, that is because we have an
6957 // array section with a single element.
6958 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6959 return ElemSize;
6960
6961 if (const Expr *LenExpr = OAE->getLength()) {
6962 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
6963 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
6964 DstTy: CGF.getContext().getSizeType(),
6965 Loc: LenExpr->getExprLoc());
6966 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
6967 }
6968 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6969 OAE->getLowerBound() && "expected array_section[lb:].");
6970 // Size = sizetype - lb * elemtype;
6971 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
6972 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
6973 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
6974 DstTy: CGF.getContext().getSizeType(),
6975 Loc: OAE->getLowerBound()->getExprLoc());
6976 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
6977 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
6978 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
6979 LengthVal = CGF.Builder.CreateSelect(
6980 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
6981 return LengthVal;
6982 }
6983 return CGF.getTypeSize(Ty: ExprTy);
6984 }
6985
6986 /// Return the corresponding bits for a given map clause modifier. Add
6987 /// a flag marking the map as a pointer if requested. Add a flag marking the
6988 /// map as the first one of a series of maps that relate to the same map
6989 /// expression.
6990 OpenMPOffloadMappingFlags getMapTypeBits(
6991 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6992 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6993 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6994 OpenMPOffloadMappingFlags Bits =
6995 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6996 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6997 switch (MapType) {
6998 case OMPC_MAP_alloc:
6999 case OMPC_MAP_release:
7000 // alloc and release is the default behavior in the runtime library, i.e.
7001 // if we don't pass any bits alloc/release that is what the runtime is
7002 // going to do. Therefore, we don't need to signal anything for these two
7003 // type modifiers.
7004 break;
7005 case OMPC_MAP_to:
7006 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7007 break;
7008 case OMPC_MAP_from:
7009 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7010 break;
7011 case OMPC_MAP_tofrom:
7012 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7013 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7014 break;
7015 case OMPC_MAP_delete:
7016 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7017 break;
7018 case OMPC_MAP_unknown:
7019 llvm_unreachable("Unexpected map type!");
7020 }
7021 if (AddPtrFlag)
7022 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7023 if (AddIsTargetParamFlag)
7024 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7025 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
7026 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7027 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
7028 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7029 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
7030 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
7031 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7032 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
7033 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7034 if (IsNonContiguous)
7035 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7036 return Bits;
7037 }
7038
7039 /// Return true if the provided expression is a final array section. A
7040 /// final array section, is one whose length can't be proved to be one.
7041 bool isFinalArraySectionExpression(const Expr *E) const {
7042 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
7043
7044 // It is not an array section and therefore not a unity-size one.
7045 if (!OASE)
7046 return false;
7047
7048 // An array section with no colon always refer to a single element.
7049 if (OASE->getColonLocFirst().isInvalid())
7050 return false;
7051
7052 const Expr *Length = OASE->getLength();
7053
7054 // If we don't have a length we have to check if the array has size 1
7055 // for this dimension. Also, we should always expect a length if the
7056 // base type is pointer.
7057 if (!Length) {
7058 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7059 Base: OASE->getBase()->IgnoreParenImpCasts())
7060 .getCanonicalType();
7061 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
7062 return ATy->getSExtSize() != 1;
7063 // If we don't have a constant dimension length, we have to consider
7064 // the current section as having any size, so it is not necessarily
7065 // unitary. If it happen to be unity size, that's user fault.
7066 return true;
7067 }
7068
7069 // Check if the length evaluates to 1.
7070 Expr::EvalResult Result;
7071 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
7072 return true; // Can have more that size 1.
7073
7074 llvm::APSInt ConstLength = Result.Val.getInt();
7075 return ConstLength.getSExtValue() != 1;
7076 }
7077
7078 /// Generate the base pointers, section pointers, sizes, map type bits, and
7079 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7080 /// map type, map or motion modifiers, and expression components.
7081 /// \a IsFirstComponent should be set to true if the provided set of
7082 /// components is the first associated with a capture.
7083 void generateInfoForComponentList(
7084 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7085 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7086 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7087 MapCombinedInfoTy &CombinedInfo,
7088 MapCombinedInfoTy &StructBaseCombinedInfo,
7089 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7090 bool IsImplicit, bool GenerateAllInfoForClauses,
7091 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7092 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7093 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7094 OverlappedElements = {},
7095 bool AreBothBasePtrAndPteeMapped = false) const {
7096 // The following summarizes what has to be generated for each map and the
7097 // types below. The generated information is expressed in this order:
7098 // base pointer, section pointer, size, flags
7099 // (to add to the ones that come from the map type and modifier).
7100 //
7101 // double d;
7102 // int i[100];
7103 // float *p;
7104 // int **a = &i;
7105 //
7106 // struct S1 {
7107 // int i;
7108 // float f[50];
7109 // }
7110 // struct S2 {
7111 // int i;
7112 // float f[50];
7113 // S1 s;
7114 // double *p;
7115 // struct S2 *ps;
7116 // int &ref;
7117 // }
7118 // S2 s;
7119 // S2 *ps;
7120 //
7121 // map(d)
7122 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7123 //
7124 // map(i)
7125 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7126 //
7127 // map(i[1:23])
7128 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7129 //
7130 // map(p)
7131 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7132 //
7133 // map(p[1:24])
7134 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7135 // in unified shared memory mode or for local pointers
7136 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7137 //
7138 // map((*a)[0:3])
7139 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7140 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7141 //
7142 // map(**a)
7143 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7144 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7145 //
7146 // map(s)
7147 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7148 //
7149 // map(s.i)
7150 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7151 //
7152 // map(s.s.f)
7153 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7154 //
7155 // map(s.p)
7156 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7157 //
7158 // map(to: s.p[:22])
7159 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7160 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7161 // &(s.p), &(s.p[0]), 22*sizeof(double),
7162 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7163 // (*) alloc space for struct members, only this is a target parameter
7164 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7165 // optimizes this entry out, same in the examples below)
7166 // (***) map the pointee (map: to)
7167 //
7168 // map(to: s.ref)
7169 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7170 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7171 // (*) alloc space for struct members, only this is a target parameter
7172 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7173 // optimizes this entry out, same in the examples below)
7174 // (***) map the pointee (map: to)
7175 //
7176 // map(s.ps)
7177 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7178 //
7179 // map(from: s.ps->s.i)
7180 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7181 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7182 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7183 //
7184 // map(to: s.ps->ps)
7185 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7186 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7187 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7188 //
7189 // map(s.ps->ps->ps)
7190 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7191 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7192 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7193 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7194 //
7195 // map(to: s.ps->ps->s.f[:22])
7196 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7197 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7198 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7199 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7200 //
7201 // map(ps)
7202 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7203 //
7204 // map(ps->i)
7205 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7206 //
7207 // map(ps->s.f)
7208 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7209 //
7210 // map(from: ps->p)
7211 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7212 //
7213 // map(to: ps->p[:22])
7214 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7215 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7216 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7217 //
7218 // map(ps->ps)
7219 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7220 //
7221 // map(from: ps->ps->s.i)
7222 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7223 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7224 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7225 //
7226 // map(from: ps->ps->ps)
7227 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7228 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7229 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7230 //
7231 // map(ps->ps->ps->ps)
7232 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7233 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7234 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7235 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7236 //
7237 // map(to: ps->ps->ps->s.f[:22])
7238 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7239 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7240 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7241 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7242 //
7243 // map(to: s.f[:22]) map(from: s.p[:33])
7244 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7245 // sizeof(double*) (**), TARGET_PARAM
7246 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7247 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7248 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7249 // (*) allocate contiguous space needed to fit all mapped members even if
7250 // we allocate space for members not mapped (in this example,
7251 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7252 // them as well because they fall between &s.f[0] and &s.p)
7253 //
7254 // map(from: s.f[:22]) map(to: ps->p[:33])
7255 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7256 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7257 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7258 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7259 // (*) the struct this entry pertains to is the 2nd element in the list of
7260 // arguments, hence MEMBER_OF(2)
7261 //
7262 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7263 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7264 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7265 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7266 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7267 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7268 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7269 // (*) the struct this entry pertains to is the 4th element in the list
7270 // of arguments, hence MEMBER_OF(4)
7271 //
7272 // map(p, p[:100])
7273 // ===> map(p[:100])
7274 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7275
7276 // Track if the map information being generated is the first for a capture.
7277 bool IsCaptureFirstInfo = IsFirstComponentList;
7278 // When the variable is on a declare target link or in a to clause with
7279 // unified memory, a reference is needed to hold the host/device address
7280 // of the variable.
7281 bool RequiresReference = false;
7282
7283 // Scan the components from the base to the complete expression.
7284 auto CI = Components.rbegin();
7285 auto CE = Components.rend();
7286 auto I = CI;
7287
7288 // Track if the map information being generated is the first for a list of
7289 // components.
7290 bool IsExpressionFirstInfo = true;
7291 bool FirstPointerInComplexData = false;
7292 Address BP = Address::invalid();
7293 const Expr *AssocExpr = I->getAssociatedExpression();
7294 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7295 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7296 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7297
7298 if (AreBothBasePtrAndPteeMapped && std::next(x: I) == CE)
7299 return;
7300 if (isa<MemberExpr>(Val: AssocExpr)) {
7301 // The base is the 'this' pointer. The content of the pointer is going
7302 // to be the base of the field being mapped.
7303 BP = CGF.LoadCXXThisAddress();
7304 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7305 (OASE &&
7306 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7307 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7308 } else if (OAShE &&
7309 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7310 BP = Address(
7311 CGF.EmitScalarExpr(E: OAShE->getBase()),
7312 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7313 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7314 } else {
7315 // The base is the reference to the variable.
7316 // BP = &Var.
7317 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7318 if (const auto *VD =
7319 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7320 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7321 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7322 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7323 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7324 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7325 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7326 RequiresReference = true;
7327 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7328 }
7329 }
7330 }
7331
7332 // If the variable is a pointer and is being dereferenced (i.e. is not
7333 // the last component), the base has to be the pointer itself, not its
7334 // reference. References are ignored for mapping purposes.
7335 QualType Ty =
7336 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7337 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7338 // No need to generate individual map information for the pointer, it
7339 // can be associated with the combined storage if shared memory mode is
7340 // active or the base declaration is not global variable.
7341 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
7342 if (!AreBothBasePtrAndPteeMapped &&
7343 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7344 !VD || VD->hasLocalStorage()))
7345 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7346 else
7347 FirstPointerInComplexData = true;
7348 ++I;
7349 }
7350 }
7351
7352 // Track whether a component of the list should be marked as MEMBER_OF some
7353 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7354 // in a component list should be marked as MEMBER_OF, all subsequent entries
7355 // do not belong to the base struct. E.g.
7356 // struct S2 s;
7357 // s.ps->ps->ps->f[:]
7358 // (1) (2) (3) (4)
7359 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7360 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7361 // is the pointee of ps(2) which is not member of struct s, so it should not
7362 // be marked as such (it is still PTR_AND_OBJ).
7363 // The variable is initialized to false so that PTR_AND_OBJ entries which
7364 // are not struct members are not considered (e.g. array of pointers to
7365 // data).
7366 bool ShouldBeMemberOf = false;
7367
7368 // Variable keeping track of whether or not we have encountered a component
7369 // in the component list which is a member expression. Useful when we have a
7370 // pointer or a final array section, in which case it is the previous
7371 // component in the list which tells us whether we have a member expression.
7372 // E.g. X.f[:]
7373 // While processing the final array section "[:]" it is "f" which tells us
7374 // whether we are dealing with a member of a declared struct.
7375 const MemberExpr *EncounteredME = nullptr;
7376
7377 // Track for the total number of dimension. Start from one for the dummy
7378 // dimension.
7379 uint64_t DimSize = 1;
7380
7381 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7382 bool IsPrevMemberReference = false;
7383
7384 bool IsPartialMapped =
7385 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7386
7387 // We need to check if we will be encountering any MEs. If we do not
7388 // encounter any ME expression it means we will be mapping the whole struct.
7389 // In that case we need to skip adding an entry for the struct to the
7390 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7391 // list only when generating all info for clauses.
7392 bool IsMappingWholeStruct = true;
7393 if (!GenerateAllInfoForClauses) {
7394 IsMappingWholeStruct = false;
7395 } else {
7396 for (auto TempI = I; TempI != CE; ++TempI) {
7397 const MemberExpr *PossibleME =
7398 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
7399 if (PossibleME) {
7400 IsMappingWholeStruct = false;
7401 break;
7402 }
7403 }
7404 }
7405
7406 for (; I != CE; ++I) {
7407 // If the current component is member of a struct (parent struct) mark it.
7408 if (!EncounteredME) {
7409 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
7410 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7411 // as MEMBER_OF the parent struct.
7412 if (EncounteredME) {
7413 ShouldBeMemberOf = true;
7414 // Do not emit as complex pointer if this is actually not array-like
7415 // expression.
7416 if (FirstPointerInComplexData) {
7417 QualType Ty = std::prev(x: I)
7418 ->getAssociatedDeclaration()
7419 ->getType()
7420 .getNonReferenceType();
7421 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7422 FirstPointerInComplexData = false;
7423 }
7424 }
7425 }
7426
7427 auto Next = std::next(x: I);
7428
7429 // We need to generate the addresses and sizes if this is the last
7430 // component, if the component is a pointer or if it is an array section
7431 // whose length can't be proved to be one. If this is a pointer, it
7432 // becomes the base address for the following components.
7433
7434 // A final array section, is one whose length can't be proved to be one.
7435 // If the map item is non-contiguous then we don't treat any array section
7436 // as final array section.
7437 bool IsFinalArraySection =
7438 !IsNonContiguous &&
7439 isFinalArraySectionExpression(E: I->getAssociatedExpression());
7440
7441 // If we have a declaration for the mapping use that, otherwise use
7442 // the base declaration of the map clause.
7443 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7444 ? I->getAssociatedDeclaration()
7445 : BaseDecl;
7446 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7447 : MapExpr;
7448
7449 // Get information on whether the element is a pointer. Have to do a
7450 // special treatment for array sections given that they are built-in
7451 // types.
7452 const auto *OASE =
7453 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
7454 const auto *OAShE =
7455 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
7456 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
7457 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
7458 bool IsPointer =
7459 OAShE ||
7460 (OASE && ArraySectionExpr::getBaseOriginalType(Base: OASE)
7461 .getCanonicalType()
7462 ->isAnyPointerType()) ||
7463 I->getAssociatedExpression()->getType()->isAnyPointerType();
7464 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
7465 MapDecl &&
7466 MapDecl->getType()->isLValueReferenceType();
7467 bool IsNonDerefPointer = IsPointer &&
7468 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7469 !IsNonContiguous;
7470
7471 if (OASE)
7472 ++DimSize;
7473
7474 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7475 IsFinalArraySection) {
7476 // If this is not the last component, we expect the pointer to be
7477 // associated with an array expression or member expression.
7478 assert((Next == CE ||
7479 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7480 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7481 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7482 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7483 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7484 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7485 "Unexpected expression");
7486
7487 Address LB = Address::invalid();
7488 Address LowestElem = Address::invalid();
7489 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7490 const MemberExpr *E) {
7491 const Expr *BaseExpr = E->getBase();
7492 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7493 // scalar.
7494 LValue BaseLV;
7495 if (E->isArrow()) {
7496 LValueBaseInfo BaseInfo;
7497 TBAAAccessInfo TBAAInfo;
7498 Address Addr =
7499 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
7500 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7501 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
7502 } else {
7503 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
7504 }
7505 return BaseLV;
7506 };
7507 if (OAShE) {
7508 LowestElem = LB =
7509 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
7510 CGF.ConvertTypeForMem(
7511 T: OAShE->getBase()->getType()->getPointeeType()),
7512 CGF.getContext().getTypeAlignInChars(
7513 T: OAShE->getBase()->getType()));
7514 } else if (IsMemberReference) {
7515 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
7516 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7517 LowestElem = CGF.EmitLValueForFieldInitialization(
7518 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
7519 .getAddress();
7520 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
7521 .getAddress();
7522 } else {
7523 LowestElem = LB =
7524 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
7525 .getAddress();
7526 }
7527
7528 // If this component is a pointer inside the base struct then we don't
7529 // need to create any entry for it - it will be combined with the object
7530 // it is pointing to into a single PTR_AND_OBJ entry.
7531 bool IsMemberPointerOrAddr =
7532 EncounteredME &&
7533 (((IsPointer || ForDeviceAddr) &&
7534 I->getAssociatedExpression() == EncounteredME) ||
7535 (IsPrevMemberReference && !IsPointer) ||
7536 (IsMemberReference && Next != CE &&
7537 !Next->getAssociatedExpression()->getType()->isPointerType()));
7538 if (!OverlappedElements.empty() && Next == CE) {
7539 // Handle base element with the info for overlapped elements.
7540 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7541 assert(!IsPointer &&
7542 "Unexpected base element with the pointer type.");
7543 // Mark the whole struct as the struct that requires allocation on the
7544 // device.
7545 PartialStruct.LowestElem = {0, LowestElem};
7546 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7547 T: I->getAssociatedExpression()->getType());
7548 Address HB = CGF.Builder.CreateConstGEP(
7549 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7550 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
7551 Index: TypeSize.getQuantity() - 1);
7552 PartialStruct.HighestElem = {
7553 std::numeric_limits<decltype(
7554 PartialStruct.HighestElem.first)>::max(),
7555 HB};
7556 PartialStruct.Base = BP;
7557 PartialStruct.LB = LB;
7558 assert(
7559 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7560 "Overlapped elements must be used only once for the variable.");
7561 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
7562 // Emit data for non-overlapped data.
7563 OpenMPOffloadMappingFlags Flags =
7564 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7565 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7566 /*AddPtrFlag=*/false,
7567 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7568 llvm::Value *Size = nullptr;
7569 // Do bitcopy of all non-overlapped structure elements.
7570 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7571 Component : OverlappedElements) {
7572 Address ComponentLB = Address::invalid();
7573 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7574 Component) {
7575 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7576 const auto *FD = dyn_cast<FieldDecl>(Val: VD);
7577 if (FD && FD->getType()->isLValueReferenceType()) {
7578 const auto *ME =
7579 cast<MemberExpr>(Val: MC.getAssociatedExpression());
7580 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7581 ComponentLB =
7582 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD)
7583 .getAddress();
7584 } else {
7585 ComponentLB =
7586 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression())
7587 .getAddress();
7588 }
7589 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7590 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7591 Size = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: ComponentLBPtr,
7592 RHS: LBPtr);
7593 break;
7594 }
7595 }
7596 assert(Size && "Failed to determine structure size");
7597 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7598 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7599 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7600 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7601 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7602 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7603 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7604 CombinedInfo.Types.push_back(Elt: Flags);
7605 CombinedInfo.Mappers.push_back(Elt: nullptr);
7606 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7607 : 1);
7608 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7609 }
7610 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7611 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7612 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7613 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7614 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7615 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7616 Size = CGF.Builder.CreatePtrDiff(
7617 ElemTy: CGF.Int8Ty, LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF),
7618 RHS: LBPtr);
7619 CombinedInfo.Sizes.push_back(
7620 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7621 CombinedInfo.Types.push_back(Elt: Flags);
7622 CombinedInfo.Mappers.push_back(Elt: nullptr);
7623 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7624 : 1);
7625 break;
7626 }
7627 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
7628 // Skip adding an entry in the CurInfo of this combined entry if the
7629 // whole struct is currently being mapped. The struct needs to be added
7630 // in the first position before any data internal to the struct is being
7631 // mapped.
7632 // Skip adding an entry in the CurInfo of this combined entry if the
7633 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7634 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7635 (Next == CE && MapType != OMPC_MAP_unknown)) {
7636 if (!IsMappingWholeStruct) {
7637 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7638 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7639 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7640 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7641 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7642 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7643 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7644 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7645 : 1);
7646 } else {
7647 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7648 StructBaseCombinedInfo.BasePointers.push_back(
7649 Elt: BP.emitRawPointer(CGF));
7650 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7651 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7652 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7653 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7654 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7655 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7656 Elt: IsNonContiguous ? DimSize : 1);
7657 }
7658
7659 // If Mapper is valid, the last component inherits the mapper.
7660 bool HasMapper = Mapper && Next == CE;
7661 if (!IsMappingWholeStruct)
7662 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
7663 else
7664 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
7665 : nullptr);
7666
7667 // We need to add a pointer flag for each map that comes from the
7668 // same expression except for the first one. We also need to signal
7669 // this map is the first one that relates with the current capture
7670 // (there is a set of entries for each capture).
7671 OpenMPOffloadMappingFlags Flags =
7672 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7673 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
7674 FirstPointerInComplexData || IsMemberReference,
7675 AddIsTargetParamFlag: AreBothBasePtrAndPteeMapped ||
7676 (IsCaptureFirstInfo && !RequiresReference),
7677 IsNonContiguous);
7678
7679 if (!IsExpressionFirstInfo || IsMemberReference) {
7680 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7681 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7682 if (IsPointer || (IsMemberReference && Next != CE))
7683 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7684 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7685 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7686 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7687 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7688
7689 if (ShouldBeMemberOf) {
7690 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7691 // should be later updated with the correct value of MEMBER_OF.
7692 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7693 // From now on, all subsequent PTR_AND_OBJ entries should not be
7694 // marked as MEMBER_OF.
7695 ShouldBeMemberOf = false;
7696 }
7697 }
7698
7699 if (!IsMappingWholeStruct)
7700 CombinedInfo.Types.push_back(Elt: Flags);
7701 else
7702 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
7703 }
7704
7705 // If we have encountered a member expression so far, keep track of the
7706 // mapped member. If the parent is "*this", then the value declaration
7707 // is nullptr.
7708 if (EncounteredME) {
7709 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
7710 unsigned FieldIndex = FD->getFieldIndex();
7711
7712 // Update info about the lowest and highest elements for this struct
7713 if (!PartialStruct.Base.isValid()) {
7714 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7715 if (IsFinalArraySection && OASE) {
7716 Address HB =
7717 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7718 .getAddress();
7719 PartialStruct.HighestElem = {FieldIndex, HB};
7720 } else {
7721 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7722 }
7723 PartialStruct.Base = BP;
7724 PartialStruct.LB = BP;
7725 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7726 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7727 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7728 if (IsFinalArraySection && OASE) {
7729 Address HB =
7730 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7731 .getAddress();
7732 PartialStruct.HighestElem = {FieldIndex, HB};
7733 } else {
7734 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7735 }
7736 }
7737 }
7738
7739 // Need to emit combined struct for array sections.
7740 if (IsFinalArraySection || IsNonContiguous)
7741 PartialStruct.IsArraySection = true;
7742
7743 // If we have a final array section, we are done with this expression.
7744 if (IsFinalArraySection)
7745 break;
7746
7747 // The pointer becomes the base for the next element.
7748 if (Next != CE)
7749 BP = IsMemberReference ? LowestElem : LB;
7750 if (!IsPartialMapped)
7751 IsExpressionFirstInfo = false;
7752 IsCaptureFirstInfo = false;
7753 FirstPointerInComplexData = false;
7754 IsPrevMemberReference = IsMemberReference;
7755 } else if (FirstPointerInComplexData) {
7756 QualType Ty = Components.rbegin()
7757 ->getAssociatedDeclaration()
7758 ->getType()
7759 .getNonReferenceType();
7760 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7761 FirstPointerInComplexData = false;
7762 }
7763 }
7764 // If ran into the whole component - allocate the space for the whole
7765 // record.
7766 if (!EncounteredME)
7767 PartialStruct.HasCompleteRecord = true;
7768
7769 if (!IsNonContiguous)
7770 return;
7771
7772 const ASTContext &Context = CGF.getContext();
7773
7774 // For supporting stride in array section, we need to initialize the first
7775 // dimension size as 1, first offset as 0, and first count as 1
7776 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
7777 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7778 MapValuesArrayTy CurStrides;
7779 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7780 uint64_t ElementTypeSize;
7781
7782 // Collect Size information for each dimension and get the element size as
7783 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7784 // should be [10, 10] and the first stride is 4 btyes.
7785 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7786 Components) {
7787 const Expr *AssocExpr = Component.getAssociatedExpression();
7788 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7789
7790 if (!OASE)
7791 continue;
7792
7793 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
7794 auto *CAT = Context.getAsConstantArrayType(T: Ty);
7795 auto *VAT = Context.getAsVariableArrayType(T: Ty);
7796
7797 // We need all the dimension size except for the last dimension.
7798 assert((VAT || CAT || &Component == &*Components.begin()) &&
7799 "Should be either ConstantArray or VariableArray if not the "
7800 "first Component");
7801
7802 // Get element size if CurStrides is empty.
7803 if (CurStrides.empty()) {
7804 const Type *ElementType = nullptr;
7805 if (CAT)
7806 ElementType = CAT->getElementType().getTypePtr();
7807 else if (VAT)
7808 ElementType = VAT->getElementType().getTypePtr();
7809 else
7810 assert(&Component == &*Components.begin() &&
7811 "Only expect pointer (non CAT or VAT) when this is the "
7812 "first Component");
7813 // If ElementType is null, then it means the base is a pointer
7814 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7815 // for next iteration.
7816 if (ElementType) {
7817 // For the case that having pointer as base, we need to remove one
7818 // level of indirection.
7819 if (&Component != &*Components.begin())
7820 ElementType = ElementType->getPointeeOrArrayElementType();
7821 ElementTypeSize =
7822 Context.getTypeSizeInChars(T: ElementType).getQuantity();
7823 CurStrides.push_back(
7824 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
7825 }
7826 }
7827 // Get dimension value except for the last dimension since we don't need
7828 // it.
7829 if (DimSizes.size() < Components.size() - 1) {
7830 if (CAT)
7831 DimSizes.push_back(
7832 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
7833 else if (VAT)
7834 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
7835 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
7836 /*IsSigned=*/isSigned: false));
7837 }
7838 }
7839
7840 // Skip the dummy dimension since we have already have its information.
7841 auto *DI = DimSizes.begin() + 1;
7842 // Product of dimension.
7843 llvm::Value *DimProd =
7844 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
7845
7846 // Collect info for non-contiguous. Notice that offset, count, and stride
7847 // are only meaningful for array-section, so we insert a null for anything
7848 // other than array-section.
7849 // Also, the size of offset, count, and stride are not the same as
7850 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7851 // count, and stride are the same as the number of non-contiguous
7852 // declaration in target update to/from clause.
7853 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7854 Components) {
7855 const Expr *AssocExpr = Component.getAssociatedExpression();
7856
7857 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
7858 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7859 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
7860 /*isSigned=*/false);
7861 CurOffsets.push_back(Elt: Offset);
7862 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
7863 CurStrides.push_back(Elt: CurStrides.back());
7864 continue;
7865 }
7866
7867 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7868
7869 if (!OASE)
7870 continue;
7871
7872 // Offset
7873 const Expr *OffsetExpr = OASE->getLowerBound();
7874 llvm::Value *Offset = nullptr;
7875 if (!OffsetExpr) {
7876 // If offset is absent, then we just set it to zero.
7877 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
7878 } else {
7879 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
7880 DestTy: CGF.Int64Ty,
7881 /*isSigned=*/false);
7882 }
7883 CurOffsets.push_back(Elt: Offset);
7884
7885 // Count
7886 const Expr *CountExpr = OASE->getLength();
7887 llvm::Value *Count = nullptr;
7888 if (!CountExpr) {
7889 // In Clang, once a high dimension is an array section, we construct all
7890 // the lower dimension as array section, however, for case like
7891 // arr[0:2][2], Clang construct the inner dimension as an array section
7892 // but it actually is not in an array section form according to spec.
7893 if (!OASE->getColonLocFirst().isValid() &&
7894 !OASE->getColonLocSecond().isValid()) {
7895 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
7896 } else {
7897 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7898 // When the length is absent it defaults to ⌈(size −
7899 // lower-bound)/stride⌉, where size is the size of the array
7900 // dimension.
7901 const Expr *StrideExpr = OASE->getStride();
7902 llvm::Value *Stride =
7903 StrideExpr
7904 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7905 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7906 : nullptr;
7907 if (Stride)
7908 Count = CGF.Builder.CreateUDiv(
7909 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
7910 else
7911 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
7912 }
7913 } else {
7914 Count = CGF.EmitScalarExpr(E: CountExpr);
7915 }
7916 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
7917 CurCounts.push_back(Elt: Count);
7918
7919 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7920 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7921 // Offset Count Stride
7922 // D0 0 1 4 (int) <- dummy dimension
7923 // D1 0 2 8 (2 * (1) * 4)
7924 // D2 1 2 20 (1 * (1 * 5) * 4)
7925 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7926 const Expr *StrideExpr = OASE->getStride();
7927 llvm::Value *Stride =
7928 StrideExpr
7929 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7930 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7931 : nullptr;
7932 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
7933 if (Stride)
7934 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
7935 else
7936 CurStrides.push_back(Elt: DimProd);
7937 if (DI != DimSizes.end())
7938 ++DI;
7939 }
7940
7941 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
7942 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
7943 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
7944 }
7945
7946 /// Return the adjusted map modifiers if the declaration a capture refers to
7947 /// appears in a first-private clause. This is expected to be used only with
7948 /// directives that start with 'target'.
7949 OpenMPOffloadMappingFlags
7950 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7951 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7952
7953 // A first private variable captured by reference will use only the
7954 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7955 // declaration is known as first-private in this handler.
7956 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
7957 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7958 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7959 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7960 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7961 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7962 }
7963 auto I = LambdasMap.find(Val: Cap.getCapturedVar()->getCanonicalDecl());
7964 if (I != LambdasMap.end())
7965 // for map(to: lambda): using user specified map type.
7966 return getMapTypeBits(
7967 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
7968 /*MotionModifiers=*/{}, IsImplicit: I->getSecond()->isImplicit(),
7969 /*AddPtrFlag=*/false,
7970 /*AddIsTargetParamFlag=*/false,
7971 /*isNonContiguous=*/IsNonContiguous: false);
7972 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7973 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7974 }
7975
7976 void getPlainLayout(const CXXRecordDecl *RD,
7977 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7978 bool AsBase) const {
7979 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7980
7981 llvm::StructType *St =
7982 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7983
7984 unsigned NumElements = St->getNumElements();
7985 llvm::SmallVector<
7986 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7987 RecordLayout(NumElements);
7988
7989 // Fill bases.
7990 for (const auto &I : RD->bases()) {
7991 if (I.isVirtual())
7992 continue;
7993
7994 QualType BaseTy = I.getType();
7995 const auto *Base = BaseTy->getAsCXXRecordDecl();
7996 // Ignore empty bases.
7997 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
7998 CGF.getContext()
7999 .getASTRecordLayout(D: Base)
8000 .getNonVirtualSize()
8001 .isZero())
8002 continue;
8003
8004 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
8005 RecordLayout[FieldIndex] = Base;
8006 }
8007 // Fill in virtual bases.
8008 for (const auto &I : RD->vbases()) {
8009 QualType BaseTy = I.getType();
8010 // Ignore empty bases.
8011 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
8012 continue;
8013
8014 const auto *Base = BaseTy->getAsCXXRecordDecl();
8015 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
8016 if (RecordLayout[FieldIndex])
8017 continue;
8018 RecordLayout[FieldIndex] = Base;
8019 }
8020 // Fill in all the fields.
8021 assert(!RD->isUnion() && "Unexpected union.");
8022 for (const auto *Field : RD->fields()) {
8023 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8024 // will fill in later.)
8025 if (!Field->isBitField() &&
8026 !isEmptyFieldForLayout(Context: CGF.getContext(), FD: Field)) {
8027 unsigned FieldIndex = RL.getLLVMFieldNo(FD: Field);
8028 RecordLayout[FieldIndex] = Field;
8029 }
8030 }
8031 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8032 &Data : RecordLayout) {
8033 if (Data.isNull())
8034 continue;
8035 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Val: Data))
8036 getPlainLayout(RD: Base, Layout, /*AsBase=*/true);
8037 else
8038 Layout.push_back(Elt: cast<const FieldDecl *>(Val: Data));
8039 }
8040 }
8041
8042 /// Generate all the base pointers, section pointers, sizes, map types, and
8043 /// mappers for the extracted mappable expressions (all included in \a
8044 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8045 /// pair of the relevant declaration and index where it occurs is appended to
8046 /// the device pointers info array.
8047 void generateAllInfoForClauses(
8048 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8049 llvm::OpenMPIRBuilder &OMPBuilder,
8050 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8051 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8052 // We have to process the component lists that relate with the same
8053 // declaration in a single chunk so that we can generate the map flags
8054 // correctly. Therefore, we organize all lists in a map.
8055 enum MapKind { Present, Allocs, Other, Total };
8056 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8057 SmallVector<SmallVector<MapInfo, 8>, 4>>
8058 Info;
8059
8060 // Helper function to fill the information map for the different supported
8061 // clauses.
8062 auto &&InfoGen =
8063 [&Info, &SkipVarSet](
8064 const ValueDecl *D, MapKind Kind,
8065 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8066 OpenMPMapClauseKind MapType,
8067 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8068 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8069 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8070 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8071 if (SkipVarSet.contains(V: D))
8072 return;
8073 auto It = Info.try_emplace(Key: D, Args: Total).first;
8074 It->second[Kind].emplace_back(
8075 Args&: L, Args&: MapType, Args&: MapModifiers, Args&: MotionModifiers, Args&: ReturnDevicePointer,
8076 Args&: IsImplicit, Args&: Mapper, Args&: VarRef, Args&: ForDeviceAddr);
8077 };
8078
8079 for (const auto *Cl : Clauses) {
8080 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
8081 if (!C)
8082 continue;
8083 MapKind Kind = Other;
8084 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
8085 Element: OMPC_MAP_MODIFIER_present))
8086 Kind = Present;
8087 else if (C->getMapType() == OMPC_MAP_alloc)
8088 Kind = Allocs;
8089 const auto *EI = C->getVarRefs().begin();
8090 for (const auto L : C->component_lists()) {
8091 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8092 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), C->getMapType(),
8093 C->getMapTypeModifiers(), {},
8094 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8095 E);
8096 ++EI;
8097 }
8098 }
8099 for (const auto *Cl : Clauses) {
8100 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
8101 if (!C)
8102 continue;
8103 MapKind Kind = Other;
8104 if (llvm::is_contained(Range: C->getMotionModifiers(),
8105 Element: OMPC_MOTION_MODIFIER_present))
8106 Kind = Present;
8107 const auto *EI = C->getVarRefs().begin();
8108 for (const auto L : C->component_lists()) {
8109 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_to, {},
8110 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8111 C->isImplicit(), std::get<2>(t: L), *EI);
8112 ++EI;
8113 }
8114 }
8115 for (const auto *Cl : Clauses) {
8116 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
8117 if (!C)
8118 continue;
8119 MapKind Kind = Other;
8120 if (llvm::is_contained(Range: C->getMotionModifiers(),
8121 Element: OMPC_MOTION_MODIFIER_present))
8122 Kind = Present;
8123 const auto *EI = C->getVarRefs().begin();
8124 for (const auto L : C->component_lists()) {
8125 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_from, {},
8126 C->getMotionModifiers(),
8127 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8128 *EI);
8129 ++EI;
8130 }
8131 }
8132
8133 // Look at the use_device_ptr and use_device_addr clauses information and
8134 // mark the existing map entries as such. If there is no map information for
8135 // an entry in the use_device_ptr and use_device_addr list, we create one
8136 // with map type 'alloc' and zero size section. It is the user fault if that
8137 // was not mapped before. If there is no map information and the pointer is
8138 // a struct member, then we defer the emission of that entry until the whole
8139 // struct has been processed.
8140 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8141 SmallVector<DeferredDevicePtrEntryTy, 4>>
8142 DeferredInfo;
8143 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8144
8145 auto &&UseDeviceDataCombinedInfoGen =
8146 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8147 CodeGenFunction &CGF, bool IsDevAddr) {
8148 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
8149 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
8150 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8151 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8152 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8153 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
8154 UseDeviceDataCombinedInfo.Sizes.push_back(
8155 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8156 UseDeviceDataCombinedInfo.Types.push_back(
8157 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8158 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
8159 };
8160
8161 auto &&MapInfoGen =
8162 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8163 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8164 OMPClauseMappableExprCommon::MappableExprComponentListRef
8165 Components,
8166 bool IsImplicit, bool IsDevAddr) {
8167 // We didn't find any match in our map information - generate a zero
8168 // size array section - if the pointer is a struct member we defer
8169 // this action until the whole struct has been processed.
8170 if (isa<MemberExpr>(Val: IE)) {
8171 // Insert the pointer into Info to be processed by
8172 // generateInfoForComponentList. Because it is a member pointer
8173 // without a pointee, no entry will be generated for it, therefore
8174 // we need to generate one after the whole struct has been
8175 // processed. Nonetheless, generateInfoForComponentList must be
8176 // called to take the pointer into account for the calculation of
8177 // the range of the partial struct.
8178 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8179 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8180 IsDevAddr);
8181 DeferredInfo[nullptr].emplace_back(Args&: IE, Args&: VD, Args&: IsDevAddr);
8182 } else {
8183 llvm::Value *Ptr;
8184 if (IsDevAddr) {
8185 if (IE->isGLValue())
8186 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
8187 else
8188 Ptr = CGF.EmitScalarExpr(E: IE);
8189 } else {
8190 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
8191 }
8192 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8193 }
8194 };
8195
8196 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8197 const Expr *IE, bool IsDevAddr) -> bool {
8198 // We potentially have map information for this declaration already.
8199 // Look for the first set of components that refer to it. If found,
8200 // return true.
8201 // If the first component is a member expression, we have to look into
8202 // 'this', which maps to null in the map of map information. Otherwise
8203 // look directly for the information.
8204 auto It = Info.find(Key: isa<MemberExpr>(Val: IE) ? nullptr : VD);
8205 if (It != Info.end()) {
8206 bool Found = false;
8207 for (auto &Data : It->second) {
8208 auto *CI = llvm::find_if(Range&: Data, P: [VD](const MapInfo &MI) {
8209 return MI.Components.back().getAssociatedDeclaration() == VD;
8210 });
8211 // If we found a map entry, signal that the pointer has to be
8212 // returned and move on to the next declaration. Exclude cases where
8213 // the base pointer is mapped as array subscript, array section or
8214 // array shaping. The base address is passed as a pointer to base in
8215 // this case and cannot be used as a base for use_device_ptr list
8216 // item.
8217 if (CI != Data.end()) {
8218 if (IsDevAddr) {
8219 CI->ForDeviceAddr = IsDevAddr;
8220 CI->ReturnDevicePointer = true;
8221 Found = true;
8222 break;
8223 } else {
8224 auto PrevCI = std::next(x: CI->Components.rbegin());
8225 const auto *VarD = dyn_cast<VarDecl>(Val: VD);
8226 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8227 isa<MemberExpr>(Val: IE) ||
8228 !VD->getType().getNonReferenceType()->isPointerType() ||
8229 PrevCI == CI->Components.rend() ||
8230 isa<MemberExpr>(Val: PrevCI->getAssociatedExpression()) || !VarD ||
8231 VarD->hasLocalStorage()) {
8232 CI->ForDeviceAddr = IsDevAddr;
8233 CI->ReturnDevicePointer = true;
8234 Found = true;
8235 break;
8236 }
8237 }
8238 }
8239 }
8240 return Found;
8241 }
8242 return false;
8243 };
8244
8245 // Look at the use_device_ptr clause information and mark the existing map
8246 // entries as such. If there is no map information for an entry in the
8247 // use_device_ptr list, we create one with map type 'alloc' and zero size
8248 // section. It is the user fault if that was not mapped before. If there is
8249 // no map information and the pointer is a struct member, then we defer the
8250 // emission of that entry until the whole struct has been processed.
8251 for (const auto *Cl : Clauses) {
8252 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
8253 if (!C)
8254 continue;
8255 for (const auto L : C->component_lists()) {
8256 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8257 std::get<1>(t: L);
8258 assert(!Components.empty() &&
8259 "Not expecting empty list of components!");
8260 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8261 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
8262 const Expr *IE = Components.back().getAssociatedExpression();
8263 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8264 continue;
8265 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8266 /*IsDevAddr=*/false);
8267 }
8268 }
8269
8270 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8271 for (const auto *Cl : Clauses) {
8272 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
8273 if (!C)
8274 continue;
8275 for (const auto L : C->component_lists()) {
8276 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8277 std::get<1>(t: L);
8278 assert(!std::get<1>(L).empty() &&
8279 "Not expecting empty list of components!");
8280 const ValueDecl *VD = std::get<1>(t: L).back().getAssociatedDeclaration();
8281 if (!Processed.insert(V: VD).second)
8282 continue;
8283 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
8284 const Expr *IE = std::get<1>(t: L).back().getAssociatedExpression();
8285 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8286 continue;
8287 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8288 /*IsDevAddr=*/true);
8289 }
8290 }
8291
8292 for (const auto &Data : Info) {
8293 StructRangeInfoTy PartialStruct;
8294 // Current struct information:
8295 MapCombinedInfoTy CurInfo;
8296 // Current struct base information:
8297 MapCombinedInfoTy StructBaseCurInfo;
8298 const Decl *D = Data.first;
8299 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
8300 bool HasMapBasePtr = false;
8301 bool HasMapArraySec = false;
8302 if (VD && VD->getType()->isAnyPointerType()) {
8303 for (const auto &M : Data.second) {
8304 HasMapBasePtr = any_of(Range: M, P: [](const MapInfo &L) {
8305 return isa_and_present<DeclRefExpr>(Val: L.VarRef);
8306 });
8307 HasMapArraySec = any_of(Range: M, P: [](const MapInfo &L) {
8308 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8309 Val: L.VarRef);
8310 });
8311 if (HasMapBasePtr && HasMapArraySec)
8312 break;
8313 }
8314 }
8315 for (const auto &M : Data.second) {
8316 for (const MapInfo &L : M) {
8317 assert(!L.Components.empty() &&
8318 "Not expecting declaration with no component lists.");
8319
8320 // Remember the current base pointer index.
8321 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8322 unsigned StructBasePointersIdx =
8323 StructBaseCurInfo.BasePointers.size();
8324 CurInfo.NonContigInfo.IsNonContiguous =
8325 L.Components.back().isNonContiguous();
8326 generateInfoForComponentList(
8327 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
8328 CombinedInfo&: CurInfo, StructBaseCombinedInfo&: StructBaseCurInfo, PartialStruct,
8329 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
8330 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
8331 MapExpr: L.VarRef, /*OverlappedElements*/ {},
8332 AreBothBasePtrAndPteeMapped: HasMapBasePtr && HasMapArraySec);
8333
8334 // If this entry relates to a device pointer, set the relevant
8335 // declaration and add the 'return pointer' flag.
8336 if (L.ReturnDevicePointer) {
8337 // Check whether a value was added to either CurInfo or
8338 // StructBaseCurInfo and error if no value was added to either of
8339 // them:
8340 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8341 StructBasePointersIdx <
8342 StructBaseCurInfo.BasePointers.size()) &&
8343 "Unexpected number of mapped base pointers.");
8344
8345 // Choose a base pointer index which is always valid:
8346 const ValueDecl *RelevantVD =
8347 L.Components.back().getAssociatedDeclaration();
8348 assert(RelevantVD &&
8349 "No relevant declaration related with device pointer??");
8350
8351 // If StructBaseCurInfo has been updated this iteration then work on
8352 // the first new entry added to it i.e. make sure that when multiple
8353 // values are added to any of the lists, the first value added is
8354 // being modified by the assignments below (not the last value
8355 // added).
8356 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8357 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8358 RelevantVD;
8359 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8360 L.ForDeviceAddr ? DeviceInfoTy::Address
8361 : DeviceInfoTy::Pointer;
8362 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8363 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8364 } else {
8365 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8366 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8367 L.ForDeviceAddr ? DeviceInfoTy::Address
8368 : DeviceInfoTy::Pointer;
8369 CurInfo.Types[CurrentBasePointersIdx] |=
8370 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8371 }
8372 }
8373 }
8374 }
8375
8376 // Append any pending zero-length pointers which are struct members and
8377 // used with use_device_ptr or use_device_addr.
8378 auto CI = DeferredInfo.find(Key: Data.first);
8379 if (CI != DeferredInfo.end()) {
8380 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8381 llvm::Value *BasePtr;
8382 llvm::Value *Ptr;
8383 if (L.ForDeviceAddr) {
8384 if (L.IE->isGLValue())
8385 Ptr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8386 else
8387 Ptr = this->CGF.EmitScalarExpr(E: L.IE);
8388 BasePtr = Ptr;
8389 // Entry is RETURN_PARAM. Also, set the placeholder value
8390 // MEMBER_OF=FFFF so that the entry is later updated with the
8391 // correct value of MEMBER_OF.
8392 CurInfo.Types.push_back(
8393 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8394 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8395 } else {
8396 BasePtr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8397 Ptr = this->CGF.EmitLoadOfScalar(lvalue: this->CGF.EmitLValue(E: L.IE),
8398 Loc: L.IE->getExprLoc());
8399 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8400 // placeholder value MEMBER_OF=FFFF so that the entry is later
8401 // updated with the correct value of MEMBER_OF.
8402 CurInfo.Types.push_back(
8403 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8404 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8405 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8406 }
8407 CurInfo.Exprs.push_back(Elt: L.VD);
8408 CurInfo.BasePointers.emplace_back(Args&: BasePtr);
8409 CurInfo.DevicePtrDecls.emplace_back(Args: L.VD);
8410 CurInfo.DevicePointers.emplace_back(
8411 Args: L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8412 CurInfo.Pointers.push_back(Elt: Ptr);
8413 CurInfo.Sizes.push_back(
8414 Elt: llvm::Constant::getNullValue(Ty: this->CGF.Int64Ty));
8415 CurInfo.Mappers.push_back(Elt: nullptr);
8416 }
8417 }
8418
8419 // Unify entries in one list making sure the struct mapping precedes the
8420 // individual fields:
8421 MapCombinedInfoTy UnionCurInfo;
8422 UnionCurInfo.append(CurInfo&: StructBaseCurInfo);
8423 UnionCurInfo.append(CurInfo);
8424
8425 // If there is an entry in PartialStruct it means we have a struct with
8426 // individual members mapped. Emit an extra combined entry.
8427 if (PartialStruct.Base.isValid()) {
8428 UnionCurInfo.NonContigInfo.Dims.push_back(Elt: 0);
8429 // Emit a combined entry:
8430 emitCombinedEntry(CombinedInfo, CurTypes&: UnionCurInfo.Types, PartialStruct,
8431 /*IsMapThis*/ !VD, OMPBuilder, VD);
8432 }
8433
8434 // We need to append the results of this capture to what we already have.
8435 CombinedInfo.append(CurInfo&: UnionCurInfo);
8436 }
8437 // Append data for use_device_ptr clauses.
8438 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
8439 }
8440
8441public:
8442 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8443 : CurDir(&Dir), CGF(CGF) {
8444 // Extract firstprivate clause information.
8445 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8446 for (const auto *D : C->varlist())
8447 FirstPrivateDecls.try_emplace(
8448 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl()), Args: C->isImplicit());
8449 // Extract implicit firstprivates from uses_allocators clauses.
8450 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8451 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8452 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8453 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
8454 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
8455 /*Implicit=*/Args: true);
8456 else if (const auto *VD = dyn_cast<VarDecl>(
8457 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
8458 ->getDecl()))
8459 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
8460 }
8461 }
8462 // Extract device pointer clause information.
8463 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8464 for (auto L : C->component_lists())
8465 DevPointersMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
8466 // Extract device addr clause information.
8467 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8468 for (auto L : C->component_lists())
8469 HasDevAddrsMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
8470 // Extract map information.
8471 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8472 if (C->getMapType() != OMPC_MAP_to)
8473 continue;
8474 for (auto L : C->component_lists()) {
8475 const ValueDecl *VD = std::get<0>(t&: L);
8476 const auto *RD = VD ? VD->getType()
8477 .getCanonicalType()
8478 .getNonReferenceType()
8479 ->getAsCXXRecordDecl()
8480 : nullptr;
8481 if (RD && RD->isLambda())
8482 LambdasMap.try_emplace(Key: std::get<0>(t&: L), Args&: C);
8483 }
8484 }
8485 }
8486
8487 /// Constructor for the declare mapper directive.
8488 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8489 : CurDir(&Dir), CGF(CGF) {}
8490
8491 /// Generate code for the combined entry if we have a partially mapped struct
8492 /// and take care of the mapping flags of the arguments corresponding to
8493 /// individual struct members.
8494 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8495 MapFlagsArrayTy &CurTypes,
8496 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8497 llvm::OpenMPIRBuilder &OMPBuilder,
8498 const ValueDecl *VD = nullptr,
8499 bool NotTargetParams = true) const {
8500 if (CurTypes.size() == 1 &&
8501 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8502 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8503 !PartialStruct.IsArraySection)
8504 return;
8505 Address LBAddr = PartialStruct.LowestElem.second;
8506 Address HBAddr = PartialStruct.HighestElem.second;
8507 if (PartialStruct.HasCompleteRecord) {
8508 LBAddr = PartialStruct.LB;
8509 HBAddr = PartialStruct.LB;
8510 }
8511 CombinedInfo.Exprs.push_back(Elt: VD);
8512 // Base is the base of the struct
8513 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8514 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8515 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8516 // Pointer is the address of the lowest element
8517 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8518 const CXXMethodDecl *MD =
8519 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
8520 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8521 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8522 // There should not be a mapper for a combined entry.
8523 if (HasBaseClass) {
8524 // OpenMP 5.2 148:21:
8525 // If the target construct is within a class non-static member function,
8526 // and a variable is an accessible data member of the object for which the
8527 // non-static data member function is invoked, the variable is treated as
8528 // if the this[:1] expression had appeared in a map clause with a map-type
8529 // of tofrom.
8530 // Emit this[:1]
8531 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8532 QualType Ty = MD->getFunctionObjectParameterType();
8533 llvm::Value *Size =
8534 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
8535 /*isSigned=*/true);
8536 CombinedInfo.Sizes.push_back(Elt: Size);
8537 } else {
8538 CombinedInfo.Pointers.push_back(Elt: LB);
8539 // Size is (addr of {highest+1} element) - (addr of lowest element)
8540 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8541 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8542 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
8543 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
8544 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
8545 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: CHAddr, RHS: CLAddr);
8546 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
8547 /*isSigned=*/false);
8548 CombinedInfo.Sizes.push_back(Elt: Size);
8549 }
8550 CombinedInfo.Mappers.push_back(Elt: nullptr);
8551 // Map type is always TARGET_PARAM, if generate info for captures.
8552 CombinedInfo.Types.push_back(
8553 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8554 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8555 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8556 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8557 // If any element has the present modifier, then make sure the runtime
8558 // doesn't attempt to allocate the struct.
8559 if (CurTypes.end() !=
8560 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8561 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8562 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8563 }))
8564 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8565 // Remove TARGET_PARAM flag from the first element
8566 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8567 // If any element has the ompx_hold modifier, then make sure the runtime
8568 // uses the hold reference count for the struct as a whole so that it won't
8569 // be unmapped by an extra dynamic reference count decrement. Add it to all
8570 // elements as well so the runtime knows which reference count to check
8571 // when determining whether it's time for device-to-host transfers of
8572 // individual elements.
8573 if (CurTypes.end() !=
8574 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8575 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8576 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8577 })) {
8578 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8579 for (auto &M : CurTypes)
8580 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8581 }
8582
8583 // All other current entries will be MEMBER_OF the combined entry
8584 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8585 // 0xFFFF in the MEMBER_OF field).
8586 OpenMPOffloadMappingFlags MemberOfFlag =
8587 OMPBuilder.getMemberOfFlag(Position: CombinedInfo.BasePointers.size() - 1);
8588 for (auto &M : CurTypes)
8589 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
8590 }
8591
8592 /// Generate all the base pointers, section pointers, sizes, map types, and
8593 /// mappers for the extracted mappable expressions (all included in \a
8594 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8595 /// pair of the relevant declaration and index where it occurs is appended to
8596 /// the device pointers info array.
8597 void generateAllInfo(
8598 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8599 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8600 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8601 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8602 "Expect a executable directive");
8603 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
8604 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8605 SkipVarSet);
8606 }
8607
8608 /// Generate all the base pointers, section pointers, sizes, map types, and
8609 /// mappers for the extracted map clauses of user-defined mapper (all included
8610 /// in \a CombinedInfo).
8611 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8612 llvm::OpenMPIRBuilder &OMPBuilder) const {
8613 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8614 "Expect a declare mapper directive");
8615 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(Val: CurDir);
8616 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
8617 OMPBuilder);
8618 }
8619
8620 /// Emit capture info for lambdas for variables captured by reference.
8621 void generateInfoForLambdaCaptures(
8622 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8623 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8624 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8625 const auto *RD = VDType->getAsCXXRecordDecl();
8626 if (!RD || !RD->isLambda())
8627 return;
8628 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
8629 CGF.getContext().getDeclAlign(D: VD));
8630 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
8631 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8632 FieldDecl *ThisCapture = nullptr;
8633 RD->getCaptureFields(Captures, ThisCapture);
8634 if (ThisCapture) {
8635 LValue ThisLVal =
8636 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
8637 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
8638 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
8639 Args: VDLVal.getPointer(CGF));
8640 CombinedInfo.Exprs.push_back(Elt: VD);
8641 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
8642 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8643 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8644 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
8645 CombinedInfo.Sizes.push_back(
8646 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
8647 DestTy: CGF.Int64Ty, /*isSigned=*/true));
8648 CombinedInfo.Types.push_back(
8649 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8650 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8651 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8652 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8653 CombinedInfo.Mappers.push_back(Elt: nullptr);
8654 }
8655 for (const LambdaCapture &LC : RD->captures()) {
8656 if (!LC.capturesVariable())
8657 continue;
8658 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
8659 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8660 continue;
8661 auto It = Captures.find(Val: VD);
8662 assert(It != Captures.end() && "Found lambda capture without field.");
8663 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
8664 if (LC.getCaptureKind() == LCK_ByRef) {
8665 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
8666 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8667 Args: VDLVal.getPointer(CGF));
8668 CombinedInfo.Exprs.push_back(Elt: VD);
8669 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8670 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8671 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8672 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
8673 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8674 V: CGF.getTypeSize(
8675 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
8676 DestTy: CGF.Int64Ty, /*isSigned=*/true));
8677 } else {
8678 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
8679 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8680 Args: VDLVal.getPointer(CGF));
8681 CombinedInfo.Exprs.push_back(Elt: VD);
8682 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8683 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8684 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8685 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
8686 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
8687 }
8688 CombinedInfo.Types.push_back(
8689 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8690 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8691 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8692 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8693 CombinedInfo.Mappers.push_back(Elt: nullptr);
8694 }
8695 }
8696
8697 /// Set correct indices for lambdas captures.
8698 void adjustMemberOfForLambdaCaptures(
8699 llvm::OpenMPIRBuilder &OMPBuilder,
8700 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8701 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8702 MapFlagsArrayTy &Types) const {
8703 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8704 // Set correct member_of idx for all implicit lambda captures.
8705 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8706 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8707 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8708 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8709 continue;
8710 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
8711 assert(BasePtr && "Unable to find base lambda address.");
8712 int TgtIdx = -1;
8713 for (unsigned J = I; J > 0; --J) {
8714 unsigned Idx = J - 1;
8715 if (Pointers[Idx] != BasePtr)
8716 continue;
8717 TgtIdx = Idx;
8718 break;
8719 }
8720 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8721 // All other current entries will be MEMBER_OF the combined entry
8722 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8723 // 0xFFFF in the MEMBER_OF field).
8724 OpenMPOffloadMappingFlags MemberOfFlag =
8725 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
8726 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
8727 }
8728 }
8729
8730 /// Generate the base pointers, section pointers, sizes, map types, and
8731 /// mappers associated to a given capture (all included in \a CombinedInfo).
8732 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8733 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8734 StructRangeInfoTy &PartialStruct) const {
8735 assert(!Cap->capturesVariableArrayType() &&
8736 "Not expecting to generate map info for a variable array type!");
8737
8738 // We need to know when we generating information for the first component
8739 const ValueDecl *VD = Cap->capturesThis()
8740 ? nullptr
8741 : Cap->getCapturedVar()->getCanonicalDecl();
8742
8743 // for map(to: lambda): skip here, processing it in
8744 // generateDefaultMapInfo
8745 if (LambdasMap.count(Val: VD))
8746 return;
8747
8748 // If this declaration appears in a is_device_ptr clause we just have to
8749 // pass the pointer by value. If it is a reference to a declaration, we just
8750 // pass its value.
8751 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
8752 CombinedInfo.Exprs.push_back(Elt: VD);
8753 CombinedInfo.BasePointers.emplace_back(Args&: Arg);
8754 CombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8755 CombinedInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
8756 CombinedInfo.Pointers.push_back(Elt: Arg);
8757 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8758 V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), DestTy: CGF.Int64Ty,
8759 /*isSigned=*/true));
8760 CombinedInfo.Types.push_back(
8761 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8762 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8763 CombinedInfo.Mappers.push_back(Elt: nullptr);
8764 return;
8765 }
8766
8767 using MapData =
8768 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8769 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8770 const ValueDecl *, const Expr *>;
8771 SmallVector<MapData, 4> DeclComponentLists;
8772 // For member fields list in is_device_ptr, store it in
8773 // DeclComponentLists for generating components info.
8774 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8775 auto It = DevPointersMap.find(Val: VD);
8776 if (It != DevPointersMap.end())
8777 for (const auto &MCL : It->second)
8778 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_to, Args: Unknown,
8779 /*IsImpicit = */ Args: true, Args: nullptr,
8780 Args: nullptr);
8781 auto I = HasDevAddrsMap.find(Val: VD);
8782 if (I != HasDevAddrsMap.end())
8783 for (const auto &MCL : I->second)
8784 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_tofrom, Args: Unknown,
8785 /*IsImpicit = */ Args: true, Args: nullptr,
8786 Args: nullptr);
8787 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8788 "Expect a executable directive");
8789 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
8790 bool HasMapBasePtr = false;
8791 bool HasMapArraySec = false;
8792 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8793 const auto *EI = C->getVarRefs().begin();
8794 for (const auto L : C->decl_component_lists(VD)) {
8795 const ValueDecl *VDecl, *Mapper;
8796 // The Expression is not correct if the mapping is implicit
8797 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8798 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8799 std::tie(args&: VDecl, args&: Components, args&: Mapper) = L;
8800 assert(VDecl == VD && "We got information for the wrong declaration??");
8801 assert(!Components.empty() &&
8802 "Not expecting declaration with no component lists.");
8803 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(Val: E))
8804 HasMapBasePtr = true;
8805 if (VD && E && VD->getType()->isAnyPointerType() &&
8806 (isa<ArraySectionExpr>(Val: E) || isa<ArraySubscriptExpr>(Val: E)))
8807 HasMapArraySec = true;
8808 DeclComponentLists.emplace_back(Args&: Components, Args: C->getMapType(),
8809 Args: C->getMapTypeModifiers(),
8810 Args: C->isImplicit(), Args&: Mapper, Args&: E);
8811 ++EI;
8812 }
8813 }
8814 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
8815 const MapData &RHS) {
8816 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
8817 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
8818 bool HasPresent =
8819 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8820 bool HasAllocs = MapType == OMPC_MAP_alloc;
8821 MapModifiers = std::get<2>(t: RHS);
8822 MapType = std::get<1>(t: LHS);
8823 bool HasPresentR =
8824 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8825 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8826 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8827 });
8828
8829 // Find overlapping elements (including the offset from the base element).
8830 llvm::SmallDenseMap<
8831 const MapData *,
8832 llvm::SmallVector<
8833 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8834 4>
8835 OverlappedData;
8836 size_t Count = 0;
8837 for (const MapData &L : DeclComponentLists) {
8838 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8839 OpenMPMapClauseKind MapType;
8840 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8841 bool IsImplicit;
8842 const ValueDecl *Mapper;
8843 const Expr *VarRef;
8844 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8845 L;
8846 ++Count;
8847 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
8848 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8849 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
8850 args&: VarRef) = L1;
8851 auto CI = Components.rbegin();
8852 auto CE = Components.rend();
8853 auto SI = Components1.rbegin();
8854 auto SE = Components1.rend();
8855 for (; CI != CE && SI != SE; ++CI, ++SI) {
8856 if (CI->getAssociatedExpression()->getStmtClass() !=
8857 SI->getAssociatedExpression()->getStmtClass())
8858 break;
8859 // Are we dealing with different variables/fields?
8860 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8861 break;
8862 }
8863 // Found overlapping if, at least for one component, reached the head
8864 // of the components list.
8865 if (CI == CE || SI == SE) {
8866 // Ignore it if it is the same component.
8867 if (CI == CE && SI == SE)
8868 continue;
8869 const auto It = (SI == SE) ? CI : SI;
8870 // If one component is a pointer and another one is a kind of
8871 // dereference of this pointer (array subscript, section, dereference,
8872 // etc.), it is not an overlapping.
8873 // Same, if one component is a base and another component is a
8874 // dereferenced pointer memberexpr with the same base.
8875 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
8876 (std::prev(x: It)->getAssociatedDeclaration() &&
8877 std::prev(x: It)
8878 ->getAssociatedDeclaration()
8879 ->getType()
8880 ->isPointerType()) ||
8881 (It->getAssociatedDeclaration() &&
8882 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8883 std::next(x: It) != CE && std::next(x: It) != SE))
8884 continue;
8885 const MapData &BaseData = CI == CE ? L : L1;
8886 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8887 SI == SE ? Components : Components1;
8888 OverlappedData[&BaseData].push_back(Elt: SubData);
8889 }
8890 }
8891 }
8892 // Sort the overlapped elements for each item.
8893 llvm::SmallVector<const FieldDecl *, 4> Layout;
8894 if (!OverlappedData.empty()) {
8895 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8896 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8897 while (BaseType != OrigType) {
8898 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8899 OrigType = BaseType->getPointeeOrArrayElementType();
8900 }
8901
8902 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8903 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
8904 else {
8905 const auto *RD = BaseType->getAsRecordDecl();
8906 Layout.append(in_start: RD->field_begin(), in_end: RD->field_end());
8907 }
8908 }
8909 for (auto &Pair : OverlappedData) {
8910 llvm::stable_sort(
8911 Range&: Pair.getSecond(),
8912 C: [&Layout](
8913 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8914 OMPClauseMappableExprCommon::MappableExprComponentListRef
8915 Second) {
8916 auto CI = First.rbegin();
8917 auto CE = First.rend();
8918 auto SI = Second.rbegin();
8919 auto SE = Second.rend();
8920 for (; CI != CE && SI != SE; ++CI, ++SI) {
8921 if (CI->getAssociatedExpression()->getStmtClass() !=
8922 SI->getAssociatedExpression()->getStmtClass())
8923 break;
8924 // Are we dealing with different variables/fields?
8925 if (CI->getAssociatedDeclaration() !=
8926 SI->getAssociatedDeclaration())
8927 break;
8928 }
8929
8930 // Lists contain the same elements.
8931 if (CI == CE && SI == SE)
8932 return false;
8933
8934 // List with less elements is less than list with more elements.
8935 if (CI == CE || SI == SE)
8936 return CI == CE;
8937
8938 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
8939 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
8940 if (FD1->getParent() == FD2->getParent())
8941 return FD1->getFieldIndex() < FD2->getFieldIndex();
8942 const auto *It =
8943 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
8944 return FD == FD1 || FD == FD2;
8945 });
8946 return *It == FD1;
8947 });
8948 }
8949
8950 // Associated with a capture, because the mapping flags depend on it.
8951 // Go through all of the elements with the overlapped elements.
8952 bool IsFirstComponentList = true;
8953 MapCombinedInfoTy StructBaseCombinedInfo;
8954 for (const auto &Pair : OverlappedData) {
8955 const MapData &L = *Pair.getFirst();
8956 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8957 OpenMPMapClauseKind MapType;
8958 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8959 bool IsImplicit;
8960 const ValueDecl *Mapper;
8961 const Expr *VarRef;
8962 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8963 L;
8964 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8965 OverlappedComponents = Pair.getSecond();
8966 generateInfoForComponentList(
8967 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo,
8968 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8969 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8970 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
8971 IsFirstComponentList = false;
8972 }
8973 // Go through other elements without overlapped elements.
8974 for (const MapData &L : DeclComponentLists) {
8975 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8976 OpenMPMapClauseKind MapType;
8977 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8978 bool IsImplicit;
8979 const ValueDecl *Mapper;
8980 const Expr *VarRef;
8981 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8982 L;
8983 auto It = OverlappedData.find(Val: &L);
8984 if (It == OverlappedData.end())
8985 generateInfoForComponentList(
8986 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo,
8987 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8988 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8989 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
8990 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped: HasMapBasePtr && HasMapArraySec);
8991 IsFirstComponentList = false;
8992 }
8993 }
8994
8995 /// Generate the default map information for a given capture \a CI,
8996 /// record field declaration \a RI and captured value \a CV.
8997 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8998 const FieldDecl &RI, llvm::Value *CV,
8999 MapCombinedInfoTy &CombinedInfo) const {
9000 bool IsImplicit = true;
9001 // Do the default mapping.
9002 if (CI.capturesThis()) {
9003 CombinedInfo.Exprs.push_back(Elt: nullptr);
9004 CombinedInfo.BasePointers.push_back(Elt: CV);
9005 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9006 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9007 CombinedInfo.Pointers.push_back(Elt: CV);
9008 const auto *PtrTy = cast<PointerType>(Val: RI.getType().getTypePtr());
9009 CombinedInfo.Sizes.push_back(
9010 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
9011 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9012 // Default map type.
9013 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
9014 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9015 } else if (CI.capturesVariableByCopy()) {
9016 const VarDecl *VD = CI.getCapturedVar();
9017 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
9018 CombinedInfo.BasePointers.push_back(Elt: CV);
9019 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9020 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9021 CombinedInfo.Pointers.push_back(Elt: CV);
9022 if (!RI.getType()->isAnyPointerType()) {
9023 // We have to signal to the runtime captures passed by value that are
9024 // not pointers.
9025 CombinedInfo.Types.push_back(
9026 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9027 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9028 V: CGF.getTypeSize(Ty: RI.getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9029 } else {
9030 // Pointers are implicitly mapped with a zero size and no flags
9031 // (other than first map that is added for all implicit maps).
9032 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9033 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
9034 }
9035 auto I = FirstPrivateDecls.find(Val: VD);
9036 if (I != FirstPrivateDecls.end())
9037 IsImplicit = I->getSecond();
9038 } else {
9039 assert(CI.capturesVariable() && "Expected captured reference.");
9040 const auto *PtrTy = cast<ReferenceType>(Val: RI.getType().getTypePtr());
9041 QualType ElementType = PtrTy->getPointeeType();
9042 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9043 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9044 // The default map type for a scalar/complex type is 'to' because by
9045 // default the value doesn't have to be retrieved. For an aggregate
9046 // type, the default is 'tofrom'.
9047 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
9048 const VarDecl *VD = CI.getCapturedVar();
9049 auto I = FirstPrivateDecls.find(Val: VD);
9050 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
9051 CombinedInfo.BasePointers.push_back(Elt: CV);
9052 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9053 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9054 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9055 Address PtrAddr = CGF.EmitLoadOfReference(RefLVal: CGF.MakeAddrLValue(
9056 V: CV, T: ElementType, Alignment: CGF.getContext().getDeclAlign(D: VD),
9057 Source: AlignmentSource::Decl));
9058 CombinedInfo.Pointers.push_back(Elt: PtrAddr.emitRawPointer(CGF));
9059 } else {
9060 CombinedInfo.Pointers.push_back(Elt: CV);
9061 }
9062 if (I != FirstPrivateDecls.end())
9063 IsImplicit = I->getSecond();
9064 }
9065 // Every default map produces a single argument which is a target parameter.
9066 CombinedInfo.Types.back() |=
9067 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9068
9069 // Add flag stating this is an implicit map.
9070 if (IsImplicit)
9071 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9072
9073 // No user-defined mapper for default mapping.
9074 CombinedInfo.Mappers.push_back(Elt: nullptr);
9075 }
9076};
9077} // anonymous namespace
9078
9079// Try to extract the base declaration from a `this->x` expression if possible.
9080static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9081 if (!E)
9082 return nullptr;
9083
9084 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
9085 if (const MemberExpr *ME =
9086 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
9087 return ME->getMemberDecl();
9088 return nullptr;
9089}
9090
9091/// Emit a string constant containing the names of the values mapped to the
9092/// offloading runtime library.
9093static llvm::Constant *
9094emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9095 MappableExprsHandler::MappingExprInfo &MapExprs) {
9096
9097 uint32_t SrcLocStrSize;
9098 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9099 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9100
9101 SourceLocation Loc;
9102 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9103 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
9104 Loc = VD->getLocation();
9105 else
9106 Loc = MapExprs.getMapExpr()->getExprLoc();
9107 } else {
9108 Loc = MapExprs.getMapDecl()->getLocation();
9109 }
9110
9111 std::string ExprName;
9112 if (MapExprs.getMapExpr()) {
9113 PrintingPolicy P(CGF.getContext().getLangOpts());
9114 llvm::raw_string_ostream OS(ExprName);
9115 MapExprs.getMapExpr()->printPretty(OS, Helper: nullptr, Policy: P);
9116 } else {
9117 ExprName = MapExprs.getMapDecl()->getNameAsString();
9118 }
9119
9120 std::string FileName;
9121 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9122 if (auto *DbgInfo = CGF.getDebugInfo())
9123 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9124 else
9125 FileName = PLoc.getFilename();
9126 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: FileName, FileName: ExprName, Line: PLoc.getLine(),
9127 Column: PLoc.getColumn(), SrcLocStrSize);
9128}
9129/// Emit the arrays used to pass the captures and map information to the
9130/// offloading runtime library. If there is no map or capture information,
9131/// return nullptr by reference.
9132static void emitOffloadingArraysAndArgs(
9133 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9134 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9135 bool IsNonContiguous = false, bool ForEndCall = false) {
9136 CodeGenModule &CGM = CGF.CGM;
9137
9138 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9139 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9140 CGF.AllocaInsertPt->getIterator());
9141 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9142 CGF.Builder.GetInsertPoint());
9143
9144 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9145 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9146 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
9147 }
9148 };
9149
9150 auto CustomMapperCB = [&](unsigned int I) {
9151 llvm::Function *MFunc = nullptr;
9152 if (CombinedInfo.Mappers[I]) {
9153 Info.HasMapper = true;
9154 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9155 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
9156 }
9157 return MFunc;
9158 };
9159 cantFail(Err: OMPBuilder.emitOffloadingArraysAndArgs(
9160 AllocaIP, CodeGenIP, Info, RTArgs&: Info.RTArgs, CombinedInfo, CustomMapperCB,
9161 IsNonContiguous, ForEndCall, DeviceAddrCB));
9162}
9163
9164/// Check for inner distribute directive.
9165static const OMPExecutableDirective *
9166getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9167 const auto *CS = D.getInnermostCapturedStmt();
9168 const auto *Body =
9169 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9170 const Stmt *ChildStmt =
9171 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9172
9173 if (const auto *NestedDir =
9174 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
9175 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9176 switch (D.getDirectiveKind()) {
9177 case OMPD_target:
9178 // For now, treat 'target' with nested 'teams loop' as if it's
9179 // distributed (target teams distribute).
9180 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9181 return NestedDir;
9182 if (DKind == OMPD_teams) {
9183 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9184 /*IgnoreCaptured=*/true);
9185 if (!Body)
9186 return nullptr;
9187 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9188 if (const auto *NND =
9189 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
9190 DKind = NND->getDirectiveKind();
9191 if (isOpenMPDistributeDirective(DKind))
9192 return NND;
9193 }
9194 }
9195 return nullptr;
9196 case OMPD_target_teams:
9197 if (isOpenMPDistributeDirective(DKind))
9198 return NestedDir;
9199 return nullptr;
9200 case OMPD_target_parallel:
9201 case OMPD_target_simd:
9202 case OMPD_target_parallel_for:
9203 case OMPD_target_parallel_for_simd:
9204 return nullptr;
9205 case OMPD_target_teams_distribute:
9206 case OMPD_target_teams_distribute_simd:
9207 case OMPD_target_teams_distribute_parallel_for:
9208 case OMPD_target_teams_distribute_parallel_for_simd:
9209 case OMPD_parallel:
9210 case OMPD_for:
9211 case OMPD_parallel_for:
9212 case OMPD_parallel_master:
9213 case OMPD_parallel_sections:
9214 case OMPD_for_simd:
9215 case OMPD_parallel_for_simd:
9216 case OMPD_cancel:
9217 case OMPD_cancellation_point:
9218 case OMPD_ordered:
9219 case OMPD_threadprivate:
9220 case OMPD_allocate:
9221 case OMPD_task:
9222 case OMPD_simd:
9223 case OMPD_tile:
9224 case OMPD_unroll:
9225 case OMPD_sections:
9226 case OMPD_section:
9227 case OMPD_single:
9228 case OMPD_master:
9229 case OMPD_critical:
9230 case OMPD_taskyield:
9231 case OMPD_barrier:
9232 case OMPD_taskwait:
9233 case OMPD_taskgroup:
9234 case OMPD_atomic:
9235 case OMPD_flush:
9236 case OMPD_depobj:
9237 case OMPD_scan:
9238 case OMPD_teams:
9239 case OMPD_target_data:
9240 case OMPD_target_exit_data:
9241 case OMPD_target_enter_data:
9242 case OMPD_distribute:
9243 case OMPD_distribute_simd:
9244 case OMPD_distribute_parallel_for:
9245 case OMPD_distribute_parallel_for_simd:
9246 case OMPD_teams_distribute:
9247 case OMPD_teams_distribute_simd:
9248 case OMPD_teams_distribute_parallel_for:
9249 case OMPD_teams_distribute_parallel_for_simd:
9250 case OMPD_target_update:
9251 case OMPD_declare_simd:
9252 case OMPD_declare_variant:
9253 case OMPD_begin_declare_variant:
9254 case OMPD_end_declare_variant:
9255 case OMPD_declare_target:
9256 case OMPD_end_declare_target:
9257 case OMPD_declare_reduction:
9258 case OMPD_declare_mapper:
9259 case OMPD_taskloop:
9260 case OMPD_taskloop_simd:
9261 case OMPD_master_taskloop:
9262 case OMPD_master_taskloop_simd:
9263 case OMPD_parallel_master_taskloop:
9264 case OMPD_parallel_master_taskloop_simd:
9265 case OMPD_requires:
9266 case OMPD_metadirective:
9267 case OMPD_unknown:
9268 default:
9269 llvm_unreachable("Unexpected directive.");
9270 }
9271 }
9272
9273 return nullptr;
9274}
9275
9276/// Emit the user-defined mapper function. The code generation follows the
9277/// pattern in the example below.
9278/// \code
9279/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9280/// void *base, void *begin,
9281/// int64_t size, int64_t type,
9282/// void *name = nullptr) {
9283/// // Allocate space for an array section first or add a base/begin for
9284/// // pointer dereference.
9285/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9286/// !maptype.IsDelete)
9287/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9288/// size*sizeof(Ty), clearToFromMember(type));
9289/// // Map members.
9290/// for (unsigned i = 0; i < size; i++) {
9291/// // For each component specified by this mapper:
9292/// for (auto c : begin[i]->all_components) {
9293/// if (c.hasMapper())
9294/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9295/// c.arg_type, c.arg_name);
9296/// else
9297/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9298/// c.arg_begin, c.arg_size, c.arg_type,
9299/// c.arg_name);
9300/// }
9301/// }
9302/// // Delete the array section.
9303/// if (size > 1 && maptype.IsDelete)
9304/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9305/// size*sizeof(Ty), clearToFromMember(type));
9306/// }
9307/// \endcode
9308void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9309 CodeGenFunction *CGF) {
9310 if (UDMMap.count(Val: D) > 0)
9311 return;
9312 ASTContext &C = CGM.getContext();
9313 QualType Ty = D->getType();
9314 auto *MapperVarDecl =
9315 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
9316 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
9317 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
9318
9319 CodeGenFunction MapperCGF(CGM);
9320 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9321 auto PrivatizeAndGenMapInfoCB =
9322 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9323 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9324 MapperCGF.Builder.restoreIP(IP: CodeGenIP);
9325
9326 // Privatize the declared variable of mapper to be the current array
9327 // element.
9328 Address PtrCurrent(
9329 PtrPHI, ElemTy,
9330 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9331 .getAlignment()
9332 .alignmentOfArrayElement(elementSize: ElementSize));
9333 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9334 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
9335 (void)Scope.Privatize();
9336
9337 // Get map clause information.
9338 MappableExprsHandler MEHandler(*D, MapperCGF);
9339 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9340
9341 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9342 return emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: MapExpr);
9343 };
9344 if (CGM.getCodeGenOpts().getDebugInfo() !=
9345 llvm::codegenoptions::NoDebugInfo) {
9346 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
9347 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
9348 F: FillInfoMap);
9349 }
9350
9351 return CombinedInfo;
9352 };
9353
9354 auto CustomMapperCB = [&](unsigned I) {
9355 llvm::Function *MapperFunc = nullptr;
9356 if (CombinedInfo.Mappers[I]) {
9357 // Call the corresponding mapper function.
9358 MapperFunc = getOrCreateUserDefinedMapperFunc(
9359 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
9360 assert(MapperFunc && "Expect a valid mapper function is available.");
9361 }
9362 return MapperFunc;
9363 };
9364
9365 SmallString<64> TyStr;
9366 llvm::raw_svector_ostream Out(TyStr);
9367 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
9368 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
9369
9370 llvm::Function *NewFn = cantFail(ValOrErr: OMPBuilder.emitUserDefinedMapper(
9371 PrivAndGenMapInfoCB: PrivatizeAndGenMapInfoCB, ElemTy, FuncName: Name, CustomMapperCB));
9372 UDMMap.try_emplace(Key: D, Args&: NewFn);
9373 if (CGF)
9374 FunctionUDMMap[CGF->CurFn].push_back(Elt: D);
9375}
9376
9377llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9378 const OMPDeclareMapperDecl *D) {
9379 auto I = UDMMap.find(Val: D);
9380 if (I != UDMMap.end())
9381 return I->second;
9382 emitUserDefinedMapper(D);
9383 return UDMMap.lookup(Val: D);
9384}
9385
9386llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9387 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9388 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9389 const OMPLoopDirective &D)>
9390 SizeEmitter) {
9391 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9392 const OMPExecutableDirective *TD = &D;
9393 // Get nested teams distribute kind directive, if any. For now, treat
9394 // 'target_teams_loop' as if it's really a target_teams_distribute.
9395 if ((!isOpenMPDistributeDirective(DKind: Kind) || !isOpenMPTeamsDirective(DKind: Kind)) &&
9396 Kind != OMPD_target_teams_loop)
9397 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
9398 if (!TD)
9399 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9400
9401 const auto *LD = cast<OMPLoopDirective>(Val: TD);
9402 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9403 return NumIterations;
9404 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9405}
9406
9407static void
9408emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9409 const OMPExecutableDirective &D,
9410 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9411 bool RequiresOuterTask, const CapturedStmt &CS,
9412 bool OffloadingMandatory, CodeGenFunction &CGF) {
9413 if (OffloadingMandatory) {
9414 CGF.Builder.CreateUnreachable();
9415 } else {
9416 if (RequiresOuterTask) {
9417 CapturedVars.clear();
9418 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9419 }
9420 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
9421 Args: CapturedVars);
9422 }
9423}
9424
9425static llvm::Value *emitDeviceID(
9426 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9427 CodeGenFunction &CGF) {
9428 // Emit device ID if any.
9429 llvm::Value *DeviceID;
9430 if (Device.getPointer()) {
9431 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9432 Device.getInt() == OMPC_DEVICE_device_num) &&
9433 "Expected device_num modifier.");
9434 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
9435 DeviceID =
9436 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
9437 } else {
9438 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
9439 }
9440 return DeviceID;
9441}
9442
9443static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9444 CodeGenFunction &CGF) {
9445 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(C: 0);
9446
9447 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9448 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9449 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9450 E: DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9451 DynCGroupMem = CGF.Builder.CreateIntCast(V: DynCGroupMemVal, DestTy: CGF.Int32Ty,
9452 /*isSigned=*/false);
9453 }
9454 return DynCGroupMem;
9455}
9456static void genMapInfoForCaptures(
9457 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9458 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9459 llvm::OpenMPIRBuilder &OMPBuilder,
9460 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9461 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9462
9463 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9464 auto RI = CS.getCapturedRecordDecl()->field_begin();
9465 auto *CV = CapturedVars.begin();
9466 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9467 CE = CS.capture_end();
9468 CI != CE; ++CI, ++RI, ++CV) {
9469 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9470 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9471
9472 // VLA sizes are passed to the outlined region by copy and do not have map
9473 // information associated.
9474 if (CI->capturesVariableArrayType()) {
9475 CurInfo.Exprs.push_back(Elt: nullptr);
9476 CurInfo.BasePointers.push_back(Elt: *CV);
9477 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
9478 CurInfo.DevicePointers.push_back(
9479 Elt: MappableExprsHandler::DeviceInfoTy::None);
9480 CurInfo.Pointers.push_back(Elt: *CV);
9481 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9482 V: CGF.getTypeSize(Ty: RI->getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9483 // Copy to the device as an argument. No need to retrieve it.
9484 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9485 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9486 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9487 CurInfo.Mappers.push_back(Elt: nullptr);
9488 } else {
9489 // If we have any information in the map clause, we use it, otherwise we
9490 // just do a default mapping.
9491 MEHandler.generateInfoForCapture(Cap: CI, Arg: *CV, CombinedInfo&: CurInfo, PartialStruct);
9492 if (!CI->capturesThis())
9493 MappedVarSet.insert(V: CI->getCapturedVar());
9494 else
9495 MappedVarSet.insert(V: nullptr);
9496 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9497 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
9498 // Generate correct mapping for variables captured by reference in
9499 // lambdas.
9500 if (CI->capturesVariable())
9501 MEHandler.generateInfoForLambdaCaptures(VD: CI->getCapturedVar(), Arg: *CV,
9502 CombinedInfo&: CurInfo, LambdaPointers);
9503 }
9504 // We expect to have at least an element of information for this capture.
9505 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9506 "Non-existing map pointer for capture!");
9507 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9508 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9509 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9510 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9511 "Inconsistent map information sizes!");
9512
9513 // If there is an entry in PartialStruct it means we have a struct with
9514 // individual members mapped. Emit an extra combined entry.
9515 if (PartialStruct.Base.isValid()) {
9516 CombinedInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9517 MEHandler.emitCombinedEntry(CombinedInfo, CurTypes&: CurInfo.Types, PartialStruct,
9518 IsMapThis: CI->capturesThis(), OMPBuilder, VD: nullptr,
9519 /*NotTargetParams*/ false);
9520 }
9521
9522 // We need to append the results of this capture to what we already have.
9523 CombinedInfo.append(CurInfo);
9524 }
9525 // Adjust MEMBER_OF flags for the lambdas captures.
9526 MEHandler.adjustMemberOfForLambdaCaptures(
9527 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
9528 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
9529}
9530static void
9531genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9532 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9533 llvm::OpenMPIRBuilder &OMPBuilder,
9534 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9535 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9536
9537 CodeGenModule &CGM = CGF.CGM;
9538 // Map any list items in a map clause that were not captures because they
9539 // weren't referenced within the construct.
9540 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: SkippedVarSet);
9541
9542 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9543 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
9544 };
9545 if (CGM.getCodeGenOpts().getDebugInfo() !=
9546 llvm::codegenoptions::NoDebugInfo) {
9547 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
9548 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
9549 F: FillInfoMap);
9550 }
9551}
9552
9553static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9554 const CapturedStmt &CS,
9555 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9556 llvm::OpenMPIRBuilder &OMPBuilder,
9557 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9558 // Get mappable expression information.
9559 MappableExprsHandler MEHandler(D, CGF);
9560 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9561
9562 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9563 MappedVarSet, CombinedInfo);
9564 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, SkippedVarSet: MappedVarSet);
9565}
9566
9567template <typename ClauseTy>
9568static void
9569emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9570 const OMPExecutableDirective &D,
9571 llvm::SmallVectorImpl<llvm::Value *> &Values) {
9572 const auto *C = D.getSingleClause<ClauseTy>();
9573 assert(!C->varlist_empty() &&
9574 "ompx_bare requires explicit num_teams and thread_limit");
9575 CodeGenFunction::RunCleanupsScope Scope(CGF);
9576 for (auto *E : C->varlist()) {
9577 llvm::Value *V = CGF.EmitScalarExpr(E);
9578 Values.push_back(
9579 Elt: CGF.Builder.CreateIntCast(V, DestTy: CGF.Int32Ty, /*isSigned=*/true));
9580 }
9581}
9582
9583static void emitTargetCallKernelLaunch(
9584 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9585 const OMPExecutableDirective &D,
9586 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9587 const CapturedStmt &CS, bool OffloadingMandatory,
9588 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9589 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9590 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9591 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9592 const OMPLoopDirective &D)>
9593 SizeEmitter,
9594 CodeGenFunction &CGF, CodeGenModule &CGM) {
9595 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9596
9597 // Fill up the arrays with all the captured variables.
9598 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9599 CGOpenMPRuntime::TargetDataInfo Info;
9600 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9601
9602 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9603 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9604
9605 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9606 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9607 CGF.VoidPtrTy, CGM.getPointerAlign());
9608 InputInfo.PointersArray =
9609 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9610 InputInfo.SizesArray =
9611 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9612 InputInfo.MappersArray =
9613 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9614 MapTypesArray = Info.RTArgs.MapTypesArray;
9615 MapNamesArray = Info.RTArgs.MapNamesArray;
9616
9617 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9618 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9619 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9620 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9621 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9622
9623 if (IsReverseOffloading) {
9624 // Reverse offloading is not supported, so just execute on the host.
9625 // FIXME: This fallback solution is incorrect since it ignores the
9626 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9627 // assert here and ensure SEMA emits an error.
9628 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9629 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9630 return;
9631 }
9632
9633 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9634 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9635
9636 llvm::Value *BasePointersArray =
9637 InputInfo.BasePointersArray.emitRawPointer(CGF);
9638 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9639 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9640 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9641
9642 auto &&EmitTargetCallFallbackCB =
9643 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9644 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9645 -> llvm::OpenMPIRBuilder::InsertPointTy {
9646 CGF.Builder.restoreIP(IP);
9647 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9648 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9649 return CGF.Builder.saveIP();
9650 };
9651
9652 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9653 SmallVector<llvm::Value *, 3> NumTeams;
9654 SmallVector<llvm::Value *, 3> NumThreads;
9655 if (IsBare) {
9656 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, Values&: NumTeams);
9657 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9658 Values&: NumThreads);
9659 } else {
9660 NumTeams.push_back(Elt: OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9661 NumThreads.push_back(
9662 Elt: OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9663 }
9664
9665 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9666 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
9667 llvm::Value *NumIterations =
9668 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9669 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9670 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9671 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9672
9673 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9674 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9675 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9676
9677 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9678 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9679 DynCGGroupMem, HasNoWait);
9680
9681 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9682 cantFail(ValOrErr: OMPRuntime->getOMPBuilder().emitKernelLaunch(
9683 Loc: CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9684 RTLoc, AllocaIP));
9685 CGF.Builder.restoreIP(IP: AfterIP);
9686 };
9687
9688 if (RequiresOuterTask)
9689 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
9690 else
9691 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
9692}
9693
9694static void
9695emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9696 const OMPExecutableDirective &D,
9697 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9698 bool RequiresOuterTask, const CapturedStmt &CS,
9699 bool OffloadingMandatory, CodeGenFunction &CGF) {
9700
9701 // Notify that the host version must be executed.
9702 auto &&ElseGen =
9703 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9704 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9705 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9706 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9707 };
9708
9709 if (RequiresOuterTask) {
9710 CodeGenFunction::OMPTargetDataInfo InputInfo;
9711 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
9712 } else {
9713 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ElseGen);
9714 }
9715}
9716
9717void CGOpenMPRuntime::emitTargetCall(
9718 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9719 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9720 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9721 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9722 const OMPLoopDirective &D)>
9723 SizeEmitter) {
9724 if (!CGF.HaveInsertPoint())
9725 return;
9726
9727 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9728 CGM.getLangOpts().OpenMPOffloadMandatory;
9729
9730 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9731
9732 const bool RequiresOuterTask =
9733 D.hasClausesOfKind<OMPDependClause>() ||
9734 D.hasClausesOfKind<OMPNowaitClause>() ||
9735 D.hasClausesOfKind<OMPInReductionClause>() ||
9736 (CGM.getLangOpts().OpenMP >= 51 &&
9737 needsTaskBasedThreadLimit(DKind: D.getDirectiveKind()) &&
9738 D.hasClausesOfKind<OMPThreadLimitClause>());
9739 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9740 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
9741 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9742 PrePostActionTy &) {
9743 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9744 };
9745 emitInlinedDirective(CGF, InnerKind: OMPD_unknown, CodeGen: ArgsCodegen);
9746
9747 CodeGenFunction::OMPTargetDataInfo InputInfo;
9748 llvm::Value *MapTypesArray = nullptr;
9749 llvm::Value *MapNamesArray = nullptr;
9750
9751 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9752 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9753 OutlinedFnID, &InputInfo, &MapTypesArray,
9754 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9755 PrePostActionTy &) {
9756 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
9757 RequiresOuterTask, CS, OffloadingMandatory,
9758 Device, OutlinedFnID, InputInfo, MapTypesArray,
9759 MapNamesArray, SizeEmitter, CGF, CGM);
9760 };
9761
9762 auto &&TargetElseGen =
9763 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9764 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9765 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9766 CS, OffloadingMandatory, CGF);
9767 };
9768
9769 // If we have a target function ID it means that we need to support
9770 // offloading, otherwise, just execute on the host. We need to execute on host
9771 // regardless of the conditional in the if clause if, e.g., the user do not
9772 // specify target triples.
9773 if (OutlinedFnID) {
9774 if (IfCond) {
9775 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
9776 } else {
9777 RegionCodeGenTy ThenRCG(TargetThenGen);
9778 ThenRCG(CGF);
9779 }
9780 } else {
9781 RegionCodeGenTy ElseRCG(TargetElseGen);
9782 ElseRCG(CGF);
9783 }
9784}
9785
9786void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9787 StringRef ParentName) {
9788 if (!S)
9789 return;
9790
9791 // Codegen OMP target directives that offload compute to the device.
9792 bool RequiresDeviceCodegen =
9793 isa<OMPExecutableDirective>(Val: S) &&
9794 isOpenMPTargetExecutionDirective(
9795 DKind: cast<OMPExecutableDirective>(Val: S)->getDirectiveKind());
9796
9797 if (RequiresDeviceCodegen) {
9798 const auto &E = *cast<OMPExecutableDirective>(Val: S);
9799
9800 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9801 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
9802
9803 // Is this a target region that should not be emitted as an entry point? If
9804 // so just signal we are done with this target region.
9805 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9806 return;
9807
9808 switch (E.getDirectiveKind()) {
9809 case OMPD_target:
9810 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9811 S: cast<OMPTargetDirective>(Val: E));
9812 break;
9813 case OMPD_target_parallel:
9814 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9815 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
9816 break;
9817 case OMPD_target_teams:
9818 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9819 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
9820 break;
9821 case OMPD_target_teams_distribute:
9822 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9823 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
9824 break;
9825 case OMPD_target_teams_distribute_simd:
9826 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9827 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
9828 break;
9829 case OMPD_target_parallel_for:
9830 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9831 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
9832 break;
9833 case OMPD_target_parallel_for_simd:
9834 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9835 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
9836 break;
9837 case OMPD_target_simd:
9838 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9839 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
9840 break;
9841 case OMPD_target_teams_distribute_parallel_for:
9842 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9843 CGM, ParentName,
9844 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
9845 break;
9846 case OMPD_target_teams_distribute_parallel_for_simd:
9847 CodeGenFunction::
9848 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9849 CGM, ParentName,
9850 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
9851 break;
9852 case OMPD_target_teams_loop:
9853 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9854 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
9855 break;
9856 case OMPD_target_parallel_loop:
9857 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9858 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
9859 break;
9860 case OMPD_parallel:
9861 case OMPD_for:
9862 case OMPD_parallel_for:
9863 case OMPD_parallel_master:
9864 case OMPD_parallel_sections:
9865 case OMPD_for_simd:
9866 case OMPD_parallel_for_simd:
9867 case OMPD_cancel:
9868 case OMPD_cancellation_point:
9869 case OMPD_ordered:
9870 case OMPD_threadprivate:
9871 case OMPD_allocate:
9872 case OMPD_task:
9873 case OMPD_simd:
9874 case OMPD_tile:
9875 case OMPD_unroll:
9876 case OMPD_sections:
9877 case OMPD_section:
9878 case OMPD_single:
9879 case OMPD_master:
9880 case OMPD_critical:
9881 case OMPD_taskyield:
9882 case OMPD_barrier:
9883 case OMPD_taskwait:
9884 case OMPD_taskgroup:
9885 case OMPD_atomic:
9886 case OMPD_flush:
9887 case OMPD_depobj:
9888 case OMPD_scan:
9889 case OMPD_teams:
9890 case OMPD_target_data:
9891 case OMPD_target_exit_data:
9892 case OMPD_target_enter_data:
9893 case OMPD_distribute:
9894 case OMPD_distribute_simd:
9895 case OMPD_distribute_parallel_for:
9896 case OMPD_distribute_parallel_for_simd:
9897 case OMPD_teams_distribute:
9898 case OMPD_teams_distribute_simd:
9899 case OMPD_teams_distribute_parallel_for:
9900 case OMPD_teams_distribute_parallel_for_simd:
9901 case OMPD_target_update:
9902 case OMPD_declare_simd:
9903 case OMPD_declare_variant:
9904 case OMPD_begin_declare_variant:
9905 case OMPD_end_declare_variant:
9906 case OMPD_declare_target:
9907 case OMPD_end_declare_target:
9908 case OMPD_declare_reduction:
9909 case OMPD_declare_mapper:
9910 case OMPD_taskloop:
9911 case OMPD_taskloop_simd:
9912 case OMPD_master_taskloop:
9913 case OMPD_master_taskloop_simd:
9914 case OMPD_parallel_master_taskloop:
9915 case OMPD_parallel_master_taskloop_simd:
9916 case OMPD_requires:
9917 case OMPD_metadirective:
9918 case OMPD_unknown:
9919 default:
9920 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9921 }
9922 return;
9923 }
9924
9925 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
9926 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9927 return;
9928
9929 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
9930 return;
9931 }
9932
9933 // If this is a lambda function, look into its body.
9934 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
9935 S = L->getBody();
9936
9937 // Keep looking for target regions recursively.
9938 for (const Stmt *II : S->children())
9939 scanForTargetRegionsFunctions(S: II, ParentName);
9940}
9941
9942static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9943 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9944 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9945 if (!DevTy)
9946 return false;
9947 // Do not emit device_type(nohost) functions for the host.
9948 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9949 return true;
9950 // Do not emit device_type(host) functions for the device.
9951 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9952 return true;
9953 return false;
9954}
9955
9956bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9957 // If emitting code for the host, we do not process FD here. Instead we do
9958 // the normal code generation.
9959 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9960 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
9961 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9962 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9963 return true;
9964 return false;
9965 }
9966
9967 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
9968 // Try to detect target regions in the function.
9969 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
9970 StringRef Name = CGM.getMangledName(GD);
9971 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
9972 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9973 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9974 return true;
9975 }
9976
9977 // Do not to emit function if it is not marked as declare target.
9978 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9979 AlreadyEmittedTargetDecls.count(V: VD) == 0;
9980}
9981
9982bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9983 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
9984 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9985 return true;
9986
9987 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9988 return false;
9989
9990 // Check if there are Ctors/Dtors in this declaration and look for target
9991 // regions in it. We use the complete variant to produce the kernel name
9992 // mangling.
9993 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
9994 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9995 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9996 StringRef ParentName =
9997 CGM.getMangledName(GD: GlobalDecl(Ctor, Ctor_Complete));
9998 scanForTargetRegionsFunctions(S: Ctor->getBody(), ParentName);
9999 }
10000 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10001 StringRef ParentName =
10002 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
10003 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
10004 }
10005 }
10006
10007 // Do not to emit variable if it is not marked as declare target.
10008 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10009 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10010 VD: cast<VarDecl>(Val: GD.getDecl()));
10011 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10012 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10013 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10014 HasRequiresUnifiedSharedMemory)) {
10015 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
10016 return true;
10017 }
10018 return false;
10019}
10020
10021void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10022 llvm::Constant *Addr) {
10023 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10024 !CGM.getLangOpts().OpenMPIsTargetDevice)
10025 return;
10026
10027 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10028 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10029
10030 // If this is an 'extern' declaration we defer to the canonical definition and
10031 // do not emit an offloading entry.
10032 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10033 VD->hasExternalStorage())
10034 return;
10035
10036 if (!Res) {
10037 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10038 // Register non-target variables being emitted in device code (debug info
10039 // may cause this).
10040 StringRef VarName = CGM.getMangledName(GD: VD);
10041 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
10042 }
10043 return;
10044 }
10045
10046 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
10047 auto LinkageForVariable = [&VD, this]() {
10048 return CGM.getLLVMLinkageVarDefinition(VD);
10049 };
10050
10051 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10052 OMPBuilder.registerTargetGlobalVariable(
10053 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
10054 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10055 IsExternallyVisible: VD->isExternallyVisible(),
10056 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10057 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
10058 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
10059 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
10060 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
10061 T: CGM.getContext().getPointerType(T: VD->getType())),
10062 Addr);
10063
10064 for (auto *ref : GeneratedRefs)
10065 CGM.addCompilerUsedGlobal(GV: ref);
10066}
10067
10068bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10069 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
10070 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
10071 return emitTargetFunctions(GD);
10072
10073 return emitTargetGlobalVariable(GD);
10074}
10075
10076void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10077 for (const VarDecl *VD : DeferredGlobalVariables) {
10078 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10079 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10080 if (!Res)
10081 continue;
10082 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10083 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10084 !HasRequiresUnifiedSharedMemory) {
10085 CGM.EmitGlobal(D: VD);
10086 } else {
10087 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10088 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10089 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10090 HasRequiresUnifiedSharedMemory)) &&
10091 "Expected link clause or to clause with unified memory.");
10092 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10093 }
10094 }
10095}
10096
10097void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10098 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10099 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10100 " Expected target-based directive.");
10101}
10102
10103void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10104 for (const OMPClause *Clause : D->clauselists()) {
10105 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10106 HasRequiresUnifiedSharedMemory = true;
10107 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10108 } else if (const auto *AC =
10109 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
10110 switch (AC->getAtomicDefaultMemOrderKind()) {
10111 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10112 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10113 break;
10114 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10115 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10116 break;
10117 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10118 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10119 break;
10120 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10121 break;
10122 }
10123 }
10124 }
10125}
10126
10127llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10128 return RequiresAtomicOrdering;
10129}
10130
10131bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10132 LangAS &AS) {
10133 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10134 return false;
10135 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10136 switch(A->getAllocatorType()) {
10137 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10138 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10139 // Not supported, fallback to the default mem space.
10140 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10141 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10142 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10143 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10144 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10145 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10146 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10147 AS = LangAS::Default;
10148 return true;
10149 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10150 llvm_unreachable("Expected predefined allocator for the variables with the "
10151 "static storage.");
10152 }
10153 return false;
10154}
10155
10156bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10157 return HasRequiresUnifiedSharedMemory;
10158}
10159
10160CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10161 CodeGenModule &CGM)
10162 : CGM(CGM) {
10163 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10164 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10165 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10166 }
10167}
10168
10169CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10170 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10171 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10172}
10173
10174bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10175 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10176 return true;
10177
10178 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
10179 // Do not to emit function if it is marked as declare target as it was already
10180 // emitted.
10181 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: D)) {
10182 if (D->hasBody() && AlreadyEmittedTargetDecls.count(V: D) == 0) {
10183 if (auto *F = dyn_cast_or_null<llvm::Function>(
10184 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
10185 return !F->isDeclaration();
10186 return false;
10187 }
10188 return true;
10189 }
10190
10191 return !AlreadyEmittedTargetDecls.insert(V: D).second;
10192}
10193
10194void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10195 const OMPExecutableDirective &D,
10196 SourceLocation Loc,
10197 llvm::Function *OutlinedFn,
10198 ArrayRef<llvm::Value *> CapturedVars) {
10199 if (!CGF.HaveInsertPoint())
10200 return;
10201
10202 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10203 CodeGenFunction::RunCleanupsScope Scope(CGF);
10204
10205 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10206 llvm::Value *Args[] = {
10207 RTLoc,
10208 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
10209 OutlinedFn};
10210 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10211 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
10212 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
10213
10214 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10215 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
10216 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
10217}
10218
10219void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10220 const Expr *NumTeams,
10221 const Expr *ThreadLimit,
10222 SourceLocation Loc) {
10223 if (!CGF.HaveInsertPoint())
10224 return;
10225
10226 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10227
10228 llvm::Value *NumTeamsVal =
10229 NumTeams
10230 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
10231 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10232 : CGF.Builder.getInt32(C: 0);
10233
10234 llvm::Value *ThreadLimitVal =
10235 ThreadLimit
10236 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10237 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10238 : CGF.Builder.getInt32(C: 0);
10239
10240 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10241 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10242 ThreadLimitVal};
10243 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10244 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
10245 args: PushNumTeamsArgs);
10246}
10247
10248void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10249 const Expr *ThreadLimit,
10250 SourceLocation Loc) {
10251 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10252 llvm::Value *ThreadLimitVal =
10253 ThreadLimit
10254 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10255 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10256 : CGF.Builder.getInt32(C: 0);
10257
10258 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10259 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10260 ThreadLimitVal};
10261 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10262 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
10263 args: ThreadLimitArgs);
10264}
10265
10266void CGOpenMPRuntime::emitTargetDataCalls(
10267 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10268 const Expr *Device, const RegionCodeGenTy &CodeGen,
10269 CGOpenMPRuntime::TargetDataInfo &Info) {
10270 if (!CGF.HaveInsertPoint())
10271 return;
10272
10273 // Action used to replace the default codegen action and turn privatization
10274 // off.
10275 PrePostActionTy NoPrivAction;
10276
10277 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10278
10279 llvm::Value *IfCondVal = nullptr;
10280 if (IfCond)
10281 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
10282
10283 // Emit device ID if any.
10284 llvm::Value *DeviceID = nullptr;
10285 if (Device) {
10286 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10287 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10288 } else {
10289 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10290 }
10291
10292 // Fill up the arrays with all the mapped variables.
10293 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10294 auto GenMapInfoCB =
10295 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10296 CGF.Builder.restoreIP(IP: CodeGenIP);
10297 // Get map clause information.
10298 MappableExprsHandler MEHandler(D, CGF);
10299 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10300
10301 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10302 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10303 };
10304 if (CGM.getCodeGenOpts().getDebugInfo() !=
10305 llvm::codegenoptions::NoDebugInfo) {
10306 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10307 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10308 F: FillInfoMap);
10309 }
10310
10311 return CombinedInfo;
10312 };
10313 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10314 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10315 CGF.Builder.restoreIP(IP: CodeGenIP);
10316 switch (BodyGenType) {
10317 case BodyGenTy::Priv:
10318 if (!Info.CaptureDeviceAddrMap.empty())
10319 CodeGen(CGF);
10320 break;
10321 case BodyGenTy::DupNoPriv:
10322 if (!Info.CaptureDeviceAddrMap.empty()) {
10323 CodeGen.setAction(NoPrivAction);
10324 CodeGen(CGF);
10325 }
10326 break;
10327 case BodyGenTy::NoPriv:
10328 if (Info.CaptureDeviceAddrMap.empty()) {
10329 CodeGen.setAction(NoPrivAction);
10330 CodeGen(CGF);
10331 }
10332 break;
10333 }
10334 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10335 CGF.Builder.GetInsertPoint());
10336 };
10337
10338 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10339 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10340 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10341 }
10342 };
10343
10344 auto CustomMapperCB = [&](unsigned int I) {
10345 llvm::Function *MFunc = nullptr;
10346 if (CombinedInfo.Mappers[I]) {
10347 Info.HasMapper = true;
10348 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10349 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10350 }
10351 return MFunc;
10352 };
10353
10354 // Source location for the ident struct
10355 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10356
10357 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10358 CGF.AllocaInsertPt->getIterator());
10359 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10360 CGF.Builder.GetInsertPoint());
10361 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10362 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10363 cantFail(ValOrErr: OMPBuilder.createTargetData(
10364 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
10365 CustomMapperCB,
10366 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, SrcLocInfo: RTLoc));
10367 CGF.Builder.restoreIP(IP: AfterIP);
10368}
10369
10370void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10371 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10372 const Expr *Device) {
10373 if (!CGF.HaveInsertPoint())
10374 return;
10375
10376 assert((isa<OMPTargetEnterDataDirective>(D) ||
10377 isa<OMPTargetExitDataDirective>(D) ||
10378 isa<OMPTargetUpdateDirective>(D)) &&
10379 "Expecting either target enter, exit data, or update directives.");
10380
10381 CodeGenFunction::OMPTargetDataInfo InputInfo;
10382 llvm::Value *MapTypesArray = nullptr;
10383 llvm::Value *MapNamesArray = nullptr;
10384 // Generate the code for the opening of the data environment.
10385 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10386 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10387 // Emit device ID if any.
10388 llvm::Value *DeviceID = nullptr;
10389 if (Device) {
10390 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10391 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10392 } else {
10393 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10394 }
10395
10396 // Emit the number of elements in the offloading arrays.
10397 llvm::Constant *PointerNum =
10398 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
10399
10400 // Source location for the ident struct
10401 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10402
10403 SmallVector<llvm::Value *, 13> OffloadingArgs(
10404 {RTLoc, DeviceID, PointerNum,
10405 InputInfo.BasePointersArray.emitRawPointer(CGF),
10406 InputInfo.PointersArray.emitRawPointer(CGF),
10407 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10408 InputInfo.MappersArray.emitRawPointer(CGF)});
10409
10410 // Select the right runtime function call for each standalone
10411 // directive.
10412 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10413 RuntimeFunction RTLFn;
10414 switch (D.getDirectiveKind()) {
10415 case OMPD_target_enter_data:
10416 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10417 : OMPRTL___tgt_target_data_begin_mapper;
10418 break;
10419 case OMPD_target_exit_data:
10420 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10421 : OMPRTL___tgt_target_data_end_mapper;
10422 break;
10423 case OMPD_target_update:
10424 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10425 : OMPRTL___tgt_target_data_update_mapper;
10426 break;
10427 case OMPD_parallel:
10428 case OMPD_for:
10429 case OMPD_parallel_for:
10430 case OMPD_parallel_master:
10431 case OMPD_parallel_sections:
10432 case OMPD_for_simd:
10433 case OMPD_parallel_for_simd:
10434 case OMPD_cancel:
10435 case OMPD_cancellation_point:
10436 case OMPD_ordered:
10437 case OMPD_threadprivate:
10438 case OMPD_allocate:
10439 case OMPD_task:
10440 case OMPD_simd:
10441 case OMPD_tile:
10442 case OMPD_unroll:
10443 case OMPD_sections:
10444 case OMPD_section:
10445 case OMPD_single:
10446 case OMPD_master:
10447 case OMPD_critical:
10448 case OMPD_taskyield:
10449 case OMPD_barrier:
10450 case OMPD_taskwait:
10451 case OMPD_taskgroup:
10452 case OMPD_atomic:
10453 case OMPD_flush:
10454 case OMPD_depobj:
10455 case OMPD_scan:
10456 case OMPD_teams:
10457 case OMPD_target_data:
10458 case OMPD_distribute:
10459 case OMPD_distribute_simd:
10460 case OMPD_distribute_parallel_for:
10461 case OMPD_distribute_parallel_for_simd:
10462 case OMPD_teams_distribute:
10463 case OMPD_teams_distribute_simd:
10464 case OMPD_teams_distribute_parallel_for:
10465 case OMPD_teams_distribute_parallel_for_simd:
10466 case OMPD_declare_simd:
10467 case OMPD_declare_variant:
10468 case OMPD_begin_declare_variant:
10469 case OMPD_end_declare_variant:
10470 case OMPD_declare_target:
10471 case OMPD_end_declare_target:
10472 case OMPD_declare_reduction:
10473 case OMPD_declare_mapper:
10474 case OMPD_taskloop:
10475 case OMPD_taskloop_simd:
10476 case OMPD_master_taskloop:
10477 case OMPD_master_taskloop_simd:
10478 case OMPD_parallel_master_taskloop:
10479 case OMPD_parallel_master_taskloop_simd:
10480 case OMPD_target:
10481 case OMPD_target_simd:
10482 case OMPD_target_teams_distribute:
10483 case OMPD_target_teams_distribute_simd:
10484 case OMPD_target_teams_distribute_parallel_for:
10485 case OMPD_target_teams_distribute_parallel_for_simd:
10486 case OMPD_target_teams:
10487 case OMPD_target_parallel:
10488 case OMPD_target_parallel_for:
10489 case OMPD_target_parallel_for_simd:
10490 case OMPD_requires:
10491 case OMPD_metadirective:
10492 case OMPD_unknown:
10493 default:
10494 llvm_unreachable("Unexpected standalone target data directive.");
10495 break;
10496 }
10497 if (HasNowait) {
10498 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10499 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10500 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10501 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10502 }
10503 CGF.EmitRuntimeCall(
10504 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
10505 args: OffloadingArgs);
10506 };
10507
10508 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10509 &MapNamesArray](CodeGenFunction &CGF,
10510 PrePostActionTy &) {
10511 // Fill up the arrays with all the mapped variables.
10512 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10513 CGOpenMPRuntime::TargetDataInfo Info;
10514 MappableExprsHandler MEHandler(D, CGF);
10515 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10516 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10517 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10518
10519 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10520 D.hasClausesOfKind<OMPNowaitClause>();
10521
10522 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10523 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10524 CGF.VoidPtrTy, CGM.getPointerAlign());
10525 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10526 CGM.getPointerAlign());
10527 InputInfo.SizesArray =
10528 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10529 InputInfo.MappersArray =
10530 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10531 MapTypesArray = Info.RTArgs.MapTypesArray;
10532 MapNamesArray = Info.RTArgs.MapNamesArray;
10533 if (RequiresOuterTask)
10534 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10535 else
10536 emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
10537 };
10538
10539 if (IfCond) {
10540 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
10541 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
10542 } else {
10543 RegionCodeGenTy ThenRCG(TargetThenGen);
10544 ThenRCG(CGF);
10545 }
10546}
10547
10548namespace {
10549 /// Kind of parameter in a function with 'declare simd' directive.
10550enum ParamKindTy {
10551 Linear,
10552 LinearRef,
10553 LinearUVal,
10554 LinearVal,
10555 Uniform,
10556 Vector,
10557};
10558/// Attribute set of the parameter.
10559struct ParamAttrTy {
10560 ParamKindTy Kind = Vector;
10561 llvm::APSInt StrideOrArg;
10562 llvm::APSInt Alignment;
10563 bool HasVarStride = false;
10564};
10565} // namespace
10566
10567static unsigned evaluateCDTSize(const FunctionDecl *FD,
10568 ArrayRef<ParamAttrTy> ParamAttrs) {
10569 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10570 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10571 // of that clause. The VLEN value must be power of 2.
10572 // In other case the notion of the function`s "characteristic data type" (CDT)
10573 // is used to compute the vector length.
10574 // CDT is defined in the following order:
10575 // a) For non-void function, the CDT is the return type.
10576 // b) If the function has any non-uniform, non-linear parameters, then the
10577 // CDT is the type of the first such parameter.
10578 // c) If the CDT determined by a) or b) above is struct, union, or class
10579 // type which is pass-by-value (except for the type that maps to the
10580 // built-in complex data type), the characteristic data type is int.
10581 // d) If none of the above three cases is applicable, the CDT is int.
10582 // The VLEN is then determined based on the CDT and the size of vector
10583 // register of that ISA for which current vector version is generated. The
10584 // VLEN is computed using the formula below:
10585 // VLEN = sizeof(vector_register) / sizeof(CDT),
10586 // where vector register size specified in section 3.2.1 Registers and the
10587 // Stack Frame of original AMD64 ABI document.
10588 QualType RetType = FD->getReturnType();
10589 if (RetType.isNull())
10590 return 0;
10591 ASTContext &C = FD->getASTContext();
10592 QualType CDT;
10593 if (!RetType.isNull() && !RetType->isVoidType()) {
10594 CDT = RetType;
10595 } else {
10596 unsigned Offset = 0;
10597 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
10598 if (ParamAttrs[Offset].Kind == Vector)
10599 CDT = C.getPointerType(T: C.getRecordType(Decl: MD->getParent()));
10600 ++Offset;
10601 }
10602 if (CDT.isNull()) {
10603 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10604 if (ParamAttrs[I + Offset].Kind == Vector) {
10605 CDT = FD->getParamDecl(i: I)->getType();
10606 break;
10607 }
10608 }
10609 }
10610 }
10611 if (CDT.isNull())
10612 CDT = C.IntTy;
10613 CDT = CDT->getCanonicalTypeUnqualified();
10614 if (CDT->isRecordType() || CDT->isUnionType())
10615 CDT = C.IntTy;
10616 return C.getTypeSize(T: CDT);
10617}
10618
10619/// Mangle the parameter part of the vector function name according to
10620/// their OpenMP classification. The mangling function is defined in
10621/// section 4.5 of the AAVFABI(2021Q1).
10622static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10623 SmallString<256> Buffer;
10624 llvm::raw_svector_ostream Out(Buffer);
10625 for (const auto &ParamAttr : ParamAttrs) {
10626 switch (ParamAttr.Kind) {
10627 case Linear:
10628 Out << 'l';
10629 break;
10630 case LinearRef:
10631 Out << 'R';
10632 break;
10633 case LinearUVal:
10634 Out << 'U';
10635 break;
10636 case LinearVal:
10637 Out << 'L';
10638 break;
10639 case Uniform:
10640 Out << 'u';
10641 break;
10642 case Vector:
10643 Out << 'v';
10644 break;
10645 }
10646 if (ParamAttr.HasVarStride)
10647 Out << "s" << ParamAttr.StrideOrArg;
10648 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10649 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10650 // Don't print the step value if it is not present or if it is
10651 // equal to 1.
10652 if (ParamAttr.StrideOrArg < 0)
10653 Out << 'n' << -ParamAttr.StrideOrArg;
10654 else if (ParamAttr.StrideOrArg != 1)
10655 Out << ParamAttr.StrideOrArg;
10656 }
10657
10658 if (!!ParamAttr.Alignment)
10659 Out << 'a' << ParamAttr.Alignment;
10660 }
10661
10662 return std::string(Out.str());
10663}
10664
10665static void
10666emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10667 const llvm::APSInt &VLENVal,
10668 ArrayRef<ParamAttrTy> ParamAttrs,
10669 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10670 struct ISADataTy {
10671 char ISA;
10672 unsigned VecRegSize;
10673 };
10674 ISADataTy ISAData[] = {
10675 {
10676 .ISA: 'b', .VecRegSize: 128
10677 }, // SSE
10678 {
10679 .ISA: 'c', .VecRegSize: 256
10680 }, // AVX
10681 {
10682 .ISA: 'd', .VecRegSize: 256
10683 }, // AVX2
10684 {
10685 .ISA: 'e', .VecRegSize: 512
10686 }, // AVX512
10687 };
10688 llvm::SmallVector<char, 2> Masked;
10689 switch (State) {
10690 case OMPDeclareSimdDeclAttr::BS_Undefined:
10691 Masked.push_back(Elt: 'N');
10692 Masked.push_back(Elt: 'M');
10693 break;
10694 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10695 Masked.push_back(Elt: 'N');
10696 break;
10697 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10698 Masked.push_back(Elt: 'M');
10699 break;
10700 }
10701 for (char Mask : Masked) {
10702 for (const ISADataTy &Data : ISAData) {
10703 SmallString<256> Buffer;
10704 llvm::raw_svector_ostream Out(Buffer);
10705 Out << "_ZGV" << Data.ISA << Mask;
10706 if (!VLENVal) {
10707 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10708 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10709 Out << llvm::APSInt::getUnsigned(X: Data.VecRegSize / NumElts);
10710 } else {
10711 Out << VLENVal;
10712 }
10713 Out << mangleVectorParameters(ParamAttrs);
10714 Out << '_' << Fn->getName();
10715 Fn->addFnAttr(Kind: Out.str());
10716 }
10717 }
10718}
10719
10720// This are the Functions that are needed to mangle the name of the
10721// vector functions generated by the compiler, according to the rules
10722// defined in the "Vector Function ABI specifications for AArch64",
10723// available at
10724// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10725
10726/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10727static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10728 QT = QT.getCanonicalType();
10729
10730 if (QT->isVoidType())
10731 return false;
10732
10733 if (Kind == ParamKindTy::Uniform)
10734 return false;
10735
10736 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10737 return false;
10738
10739 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10740 !QT->isReferenceType())
10741 return false;
10742
10743 return true;
10744}
10745
10746/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10747static bool getAArch64PBV(QualType QT, ASTContext &C) {
10748 QT = QT.getCanonicalType();
10749 unsigned Size = C.getTypeSize(T: QT);
10750
10751 // Only scalars and complex within 16 bytes wide set PVB to true.
10752 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10753 return false;
10754
10755 if (QT->isFloatingType())
10756 return true;
10757
10758 if (QT->isIntegerType())
10759 return true;
10760
10761 if (QT->isPointerType())
10762 return true;
10763
10764 // TODO: Add support for complex types (section 3.1.2, item 2).
10765
10766 return false;
10767}
10768
10769/// Computes the lane size (LS) of a return type or of an input parameter,
10770/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10771/// TODO: Add support for references, section 3.2.1, item 1.
10772static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10773 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10774 QualType PTy = QT.getCanonicalType()->getPointeeType();
10775 if (getAArch64PBV(QT: PTy, C))
10776 return C.getTypeSize(T: PTy);
10777 }
10778 if (getAArch64PBV(QT, C))
10779 return C.getTypeSize(T: QT);
10780
10781 return C.getTypeSize(T: C.getUIntPtrType());
10782}
10783
10784// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10785// signature of the scalar function, as defined in 3.2.2 of the
10786// AAVFABI.
10787static std::tuple<unsigned, unsigned, bool>
10788getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10789 QualType RetType = FD->getReturnType().getCanonicalType();
10790
10791 ASTContext &C = FD->getASTContext();
10792
10793 bool OutputBecomesInput = false;
10794
10795 llvm::SmallVector<unsigned, 8> Sizes;
10796 if (!RetType->isVoidType()) {
10797 Sizes.push_back(Elt: getAArch64LS(QT: RetType, Kind: ParamKindTy::Vector, C));
10798 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
10799 OutputBecomesInput = true;
10800 }
10801 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10802 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
10803 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
10804 }
10805
10806 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10807 // The LS of a function parameter / return value can only be a power
10808 // of 2, starting from 8 bits, up to 128.
10809 assert(llvm::all_of(Sizes,
10810 [](unsigned Size) {
10811 return Size == 8 || Size == 16 || Size == 32 ||
10812 Size == 64 || Size == 128;
10813 }) &&
10814 "Invalid size");
10815
10816 return std::make_tuple(args&: *llvm::min_element(Range&: Sizes), args&: *llvm::max_element(Range&: Sizes),
10817 args&: OutputBecomesInput);
10818}
10819
10820// Function used to add the attribute. The parameter `VLEN` is
10821// templated to allow the use of "x" when targeting scalable functions
10822// for SVE.
10823template <typename T>
10824static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10825 char ISA, StringRef ParSeq,
10826 StringRef MangledName, bool OutputBecomesInput,
10827 llvm::Function *Fn) {
10828 SmallString<256> Buffer;
10829 llvm::raw_svector_ostream Out(Buffer);
10830 Out << Prefix << ISA << LMask << VLEN;
10831 if (OutputBecomesInput)
10832 Out << "v";
10833 Out << ParSeq << "_" << MangledName;
10834 Fn->addFnAttr(Kind: Out.str());
10835}
10836
10837// Helper function to generate the Advanced SIMD names depending on
10838// the value of the NDS when simdlen is not present.
10839static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10840 StringRef Prefix, char ISA,
10841 StringRef ParSeq, StringRef MangledName,
10842 bool OutputBecomesInput,
10843 llvm::Function *Fn) {
10844 switch (NDS) {
10845 case 8:
10846 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10847 OutputBecomesInput, Fn);
10848 addAArch64VectorName(VLEN: 16, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10849 OutputBecomesInput, Fn);
10850 break;
10851 case 16:
10852 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10853 OutputBecomesInput, Fn);
10854 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10855 OutputBecomesInput, Fn);
10856 break;
10857 case 32:
10858 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10859 OutputBecomesInput, Fn);
10860 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10861 OutputBecomesInput, Fn);
10862 break;
10863 case 64:
10864 case 128:
10865 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10866 OutputBecomesInput, Fn);
10867 break;
10868 default:
10869 llvm_unreachable("Scalar type is too wide.");
10870 }
10871}
10872
10873/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10874static void emitAArch64DeclareSimdFunction(
10875 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10876 ArrayRef<ParamAttrTy> ParamAttrs,
10877 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10878 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10879
10880 // Get basic data for building the vector signature.
10881 const auto Data = getNDSWDS(FD, ParamAttrs);
10882 const unsigned NDS = std::get<0>(t: Data);
10883 const unsigned WDS = std::get<1>(t: Data);
10884 const bool OutputBecomesInput = std::get<2>(t: Data);
10885
10886 // Check the values provided via `simdlen` by the user.
10887 // 1. A `simdlen(1)` doesn't produce vector signatures,
10888 if (UserVLEN == 1) {
10889 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10890 L: DiagnosticsEngine::Warning,
10891 FormatString: "The clause simdlen(1) has no effect when targeting aarch64.");
10892 CGM.getDiags().Report(Loc: SLoc, DiagID);
10893 return;
10894 }
10895
10896 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10897 // Advanced SIMD output.
10898 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
10899 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10900 L: DiagnosticsEngine::Warning, FormatString: "The value specified in simdlen must be a "
10901 "power of 2 when targeting Advanced SIMD.");
10902 CGM.getDiags().Report(Loc: SLoc, DiagID);
10903 return;
10904 }
10905
10906 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10907 // limits.
10908 if (ISA == 's' && UserVLEN != 0) {
10909 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10910 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10911 L: DiagnosticsEngine::Warning, FormatString: "The clause simdlen must fit the %0-bit "
10912 "lanes in the architectural constraints "
10913 "for SVE (min is 128-bit, max is "
10914 "2048-bit, by steps of 128-bit)");
10915 CGM.getDiags().Report(Loc: SLoc, DiagID) << WDS;
10916 return;
10917 }
10918 }
10919
10920 // Sort out parameter sequence.
10921 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10922 StringRef Prefix = "_ZGV";
10923 // Generate simdlen from user input (if any).
10924 if (UserVLEN) {
10925 if (ISA == 's') {
10926 // SVE generates only a masked function.
10927 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10928 OutputBecomesInput, Fn);
10929 } else {
10930 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10931 // Advanced SIMD generates one or two functions, depending on
10932 // the `[not]inbranch` clause.
10933 switch (State) {
10934 case OMPDeclareSimdDeclAttr::BS_Undefined:
10935 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10936 OutputBecomesInput, Fn);
10937 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10938 OutputBecomesInput, Fn);
10939 break;
10940 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10941 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10942 OutputBecomesInput, Fn);
10943 break;
10944 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10945 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10946 OutputBecomesInput, Fn);
10947 break;
10948 }
10949 }
10950 } else {
10951 // If no user simdlen is provided, follow the AAVFABI rules for
10952 // generating the vector length.
10953 if (ISA == 's') {
10954 // SVE, section 3.4.1, item 1.
10955 addAArch64VectorName(VLEN: "x", LMask: "M", Prefix, ISA, ParSeq, MangledName,
10956 OutputBecomesInput, Fn);
10957 } else {
10958 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10959 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10960 // two vector names depending on the use of the clause
10961 // `[not]inbranch`.
10962 switch (State) {
10963 case OMPDeclareSimdDeclAttr::BS_Undefined:
10964 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10965 OutputBecomesInput, Fn);
10966 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10967 OutputBecomesInput, Fn);
10968 break;
10969 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10970 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10971 OutputBecomesInput, Fn);
10972 break;
10973 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10974 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10975 OutputBecomesInput, Fn);
10976 break;
10977 }
10978 }
10979 }
10980}
10981
10982void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10983 llvm::Function *Fn) {
10984 ASTContext &C = CGM.getContext();
10985 FD = FD->getMostRecentDecl();
10986 while (FD) {
10987 // Map params to their positions in function decl.
10988 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10989 if (isa<CXXMethodDecl>(Val: FD))
10990 ParamPositions.try_emplace(Key: FD, Args: 0);
10991 unsigned ParamPos = ParamPositions.size();
10992 for (const ParmVarDecl *P : FD->parameters()) {
10993 ParamPositions.try_emplace(Key: P->getCanonicalDecl(), Args&: ParamPos);
10994 ++ParamPos;
10995 }
10996 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10997 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10998 // Mark uniform parameters.
10999 for (const Expr *E : Attr->uniforms()) {
11000 E = E->IgnoreParenImpCasts();
11001 unsigned Pos;
11002 if (isa<CXXThisExpr>(Val: E)) {
11003 Pos = ParamPositions[FD];
11004 } else {
11005 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11006 ->getCanonicalDecl();
11007 auto It = ParamPositions.find(Val: PVD);
11008 assert(It != ParamPositions.end() && "Function parameter not found");
11009 Pos = It->second;
11010 }
11011 ParamAttrs[Pos].Kind = Uniform;
11012 }
11013 // Get alignment info.
11014 auto *NI = Attr->alignments_begin();
11015 for (const Expr *E : Attr->aligneds()) {
11016 E = E->IgnoreParenImpCasts();
11017 unsigned Pos;
11018 QualType ParmTy;
11019 if (isa<CXXThisExpr>(Val: E)) {
11020 Pos = ParamPositions[FD];
11021 ParmTy = E->getType();
11022 } else {
11023 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11024 ->getCanonicalDecl();
11025 auto It = ParamPositions.find(Val: PVD);
11026 assert(It != ParamPositions.end() && "Function parameter not found");
11027 Pos = It->second;
11028 ParmTy = PVD->getType();
11029 }
11030 ParamAttrs[Pos].Alignment =
11031 (*NI)
11032 ? (*NI)->EvaluateKnownConstInt(Ctx: C)
11033 : llvm::APSInt::getUnsigned(
11034 X: C.toCharUnitsFromBits(BitSize: C.getOpenMPDefaultSimdAlign(T: ParmTy))
11035 .getQuantity());
11036 ++NI;
11037 }
11038 // Mark linear parameters.
11039 auto *SI = Attr->steps_begin();
11040 auto *MI = Attr->modifiers_begin();
11041 for (const Expr *E : Attr->linears()) {
11042 E = E->IgnoreParenImpCasts();
11043 unsigned Pos;
11044 bool IsReferenceType = false;
11045 // Rescaling factor needed to compute the linear parameter
11046 // value in the mangled name.
11047 unsigned PtrRescalingFactor = 1;
11048 if (isa<CXXThisExpr>(Val: E)) {
11049 Pos = ParamPositions[FD];
11050 auto *P = cast<PointerType>(Val: E->getType());
11051 PtrRescalingFactor = CGM.getContext()
11052 .getTypeSizeInChars(T: P->getPointeeType())
11053 .getQuantity();
11054 } else {
11055 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11056 ->getCanonicalDecl();
11057 auto It = ParamPositions.find(Val: PVD);
11058 assert(It != ParamPositions.end() && "Function parameter not found");
11059 Pos = It->second;
11060 if (auto *P = dyn_cast<PointerType>(Val: PVD->getType()))
11061 PtrRescalingFactor = CGM.getContext()
11062 .getTypeSizeInChars(T: P->getPointeeType())
11063 .getQuantity();
11064 else if (PVD->getType()->isReferenceType()) {
11065 IsReferenceType = true;
11066 PtrRescalingFactor =
11067 CGM.getContext()
11068 .getTypeSizeInChars(T: PVD->getType().getNonReferenceType())
11069 .getQuantity();
11070 }
11071 }
11072 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11073 if (*MI == OMPC_LINEAR_ref)
11074 ParamAttr.Kind = LinearRef;
11075 else if (*MI == OMPC_LINEAR_uval)
11076 ParamAttr.Kind = LinearUVal;
11077 else if (IsReferenceType)
11078 ParamAttr.Kind = LinearVal;
11079 else
11080 ParamAttr.Kind = Linear;
11081 // Assuming a stride of 1, for `linear` without modifiers.
11082 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: 1);
11083 if (*SI) {
11084 Expr::EvalResult Result;
11085 if (!(*SI)->EvaluateAsInt(Result, Ctx: C, AllowSideEffects: Expr::SE_AllowSideEffects)) {
11086 if (const auto *DRE =
11087 cast<DeclRefExpr>(Val: (*SI)->IgnoreParenImpCasts())) {
11088 if (const auto *StridePVD =
11089 dyn_cast<ParmVarDecl>(Val: DRE->getDecl())) {
11090 ParamAttr.HasVarStride = true;
11091 auto It = ParamPositions.find(Val: StridePVD->getCanonicalDecl());
11092 assert(It != ParamPositions.end() &&
11093 "Function parameter not found");
11094 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: It->second);
11095 }
11096 }
11097 } else {
11098 ParamAttr.StrideOrArg = Result.Val.getInt();
11099 }
11100 }
11101 // If we are using a linear clause on a pointer, we need to
11102 // rescale the value of linear_step with the byte size of the
11103 // pointee type.
11104 if (!ParamAttr.HasVarStride &&
11105 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11106 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11107 ++SI;
11108 ++MI;
11109 }
11110 llvm::APSInt VLENVal;
11111 SourceLocation ExprLoc;
11112 const Expr *VLENExpr = Attr->getSimdlen();
11113 if (VLENExpr) {
11114 VLENVal = VLENExpr->EvaluateKnownConstInt(Ctx: C);
11115 ExprLoc = VLENExpr->getExprLoc();
11116 }
11117 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11118 if (CGM.getTriple().isX86()) {
11119 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11120 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11121 unsigned VLEN = VLENVal.getExtValue();
11122 StringRef MangledName = Fn->getName();
11123 if (CGM.getTarget().hasFeature(Feature: "sve"))
11124 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
11125 MangledName, ISA: 's', VecRegSize: 128, Fn, SLoc: ExprLoc);
11126 else if (CGM.getTarget().hasFeature(Feature: "neon"))
11127 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
11128 MangledName, ISA: 'n', VecRegSize: 128, Fn, SLoc: ExprLoc);
11129 }
11130 }
11131 FD = FD->getPreviousDecl();
11132 }
11133}
11134
11135namespace {
11136/// Cleanup action for doacross support.
11137class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11138public:
11139 static const int DoacrossFinArgs = 2;
11140
11141private:
11142 llvm::FunctionCallee RTLFn;
11143 llvm::Value *Args[DoacrossFinArgs];
11144
11145public:
11146 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11147 ArrayRef<llvm::Value *> CallArgs)
11148 : RTLFn(RTLFn) {
11149 assert(CallArgs.size() == DoacrossFinArgs);
11150 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
11151 }
11152 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11153 if (!CGF.HaveInsertPoint())
11154 return;
11155 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11156 }
11157};
11158} // namespace
11159
11160void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11161 const OMPLoopDirective &D,
11162 ArrayRef<Expr *> NumIterations) {
11163 if (!CGF.HaveInsertPoint())
11164 return;
11165
11166 ASTContext &C = CGM.getContext();
11167 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11168 RecordDecl *RD;
11169 if (KmpDimTy.isNull()) {
11170 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11171 // kmp_int64 lo; // lower
11172 // kmp_int64 up; // upper
11173 // kmp_int64 st; // stride
11174 // };
11175 RD = C.buildImplicitRecord(Name: "kmp_dim");
11176 RD->startDefinition();
11177 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11178 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11179 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11180 RD->completeDefinition();
11181 KmpDimTy = C.getRecordType(Decl: RD);
11182 } else {
11183 RD = cast<RecordDecl>(Val: KmpDimTy->getAsTagDecl());
11184 }
11185 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11186 QualType ArrayTy = C.getConstantArrayType(EltTy: KmpDimTy, ArySize: Size, SizeExpr: nullptr,
11187 ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11188
11189 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
11190 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
11191 enum { LowerFD = 0, UpperFD, StrideFD };
11192 // Fill dims with data.
11193 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11194 LValue DimsLVal = CGF.MakeAddrLValue(
11195 Addr: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: I), T: KmpDimTy);
11196 // dims.upper = num_iterations;
11197 LValue UpperLVal = CGF.EmitLValueForField(
11198 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
11199 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11200 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
11201 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
11202 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
11203 // dims.stride = 1;
11204 LValue StrideLVal = CGF.EmitLValueForField(
11205 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
11206 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
11207 lvalue: StrideLVal);
11208 }
11209
11210 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11211 // kmp_int32 num_dims, struct kmp_dim * dims);
11212 llvm::Value *Args[] = {
11213 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
11214 getThreadID(CGF, Loc: D.getBeginLoc()),
11215 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
11216 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11217 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
11218 DestTy: CGM.VoidPtrTy)};
11219
11220 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11221 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
11222 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11223 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11224 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
11225 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11226 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
11227 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
11228 A: llvm::ArrayRef(FiniArgs));
11229}
11230
11231template <typename T>
11232static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11233 const T *C, llvm::Value *ULoc,
11234 llvm::Value *ThreadID) {
11235 QualType Int64Ty =
11236 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11237 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11238 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11239 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11240 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
11241 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11242 const Expr *CounterVal = C->getLoopData(I);
11243 assert(CounterVal);
11244 llvm::Value *CntVal = CGF.EmitScalarConversion(
11245 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
11246 Loc: CounterVal->getExprLoc());
11247 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
11248 /*Volatile=*/false, Ty: Int64Ty);
11249 }
11250 llvm::Value *Args[] = {
11251 ULoc, ThreadID,
11252 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
11253 llvm::FunctionCallee RTLFn;
11254 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11255 OMPDoacrossKind<T> ODK;
11256 if (ODK.isSource(C)) {
11257 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11258 FnID: OMPRTL___kmpc_doacross_post);
11259 } else {
11260 assert(ODK.isSink(C) && "Expect sink modifier.");
11261 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11262 FnID: OMPRTL___kmpc_doacross_wait);
11263 }
11264 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11265}
11266
11267void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11268 const OMPDependClause *C) {
11269 return EmitDoacrossOrdered<OMPDependClause>(
11270 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11271 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
11272}
11273
11274void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11275 const OMPDoacrossClause *C) {
11276 return EmitDoacrossOrdered<OMPDoacrossClause>(
11277 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11278 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
11279}
11280
11281void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11282 llvm::FunctionCallee Callee,
11283 ArrayRef<llvm::Value *> Args) const {
11284 assert(Loc.isValid() && "Outlined function call location must be valid.");
11285 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
11286
11287 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
11288 if (Fn->doesNotThrow()) {
11289 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
11290 return;
11291 }
11292 }
11293 CGF.EmitRuntimeCall(callee: Callee, args: Args);
11294}
11295
11296void CGOpenMPRuntime::emitOutlinedFunctionCall(
11297 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11298 ArrayRef<llvm::Value *> Args) const {
11299 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
11300}
11301
11302void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11303 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
11304 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: FD))
11305 HasEmittedDeclareTargetRegion = true;
11306}
11307
11308Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11309 const VarDecl *NativeParam,
11310 const VarDecl *TargetParam) const {
11311 return CGF.GetAddrOfLocalVar(VD: NativeParam);
11312}
11313
11314/// Return allocator value from expression, or return a null allocator (default
11315/// when no allocator specified).
11316static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11317 const Expr *Allocator) {
11318 llvm::Value *AllocVal;
11319 if (Allocator) {
11320 AllocVal = CGF.EmitScalarExpr(E: Allocator);
11321 // According to the standard, the original allocator type is a enum
11322 // (integer). Convert to pointer type, if required.
11323 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
11324 DstTy: CGF.getContext().VoidPtrTy,
11325 Loc: Allocator->getExprLoc());
11326 } else {
11327 // If no allocator specified, it defaults to the null allocator.
11328 AllocVal = llvm::Constant::getNullValue(
11329 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
11330 }
11331 return AllocVal;
11332}
11333
11334/// Return the alignment from an allocate directive if present.
11335static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11336 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11337
11338 if (!AllocateAlignment)
11339 return nullptr;
11340
11341 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
11342}
11343
11344Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11345 const VarDecl *VD) {
11346 if (!VD)
11347 return Address::invalid();
11348 Address UntiedAddr = Address::invalid();
11349 Address UntiedRealAddr = Address::invalid();
11350 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11351 if (It != FunctionToUntiedTaskStackMap.end()) {
11352 const UntiedLocalVarsAddressesMap &UntiedData =
11353 UntiedLocalVarsStack[It->second];
11354 auto I = UntiedData.find(Key: VD);
11355 if (I != UntiedData.end()) {
11356 UntiedAddr = I->second.first;
11357 UntiedRealAddr = I->second.second;
11358 }
11359 }
11360 const VarDecl *CVD = VD->getCanonicalDecl();
11361 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11362 // Use the default allocation.
11363 if (!isAllocatableDecl(VD))
11364 return UntiedAddr;
11365 llvm::Value *Size;
11366 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
11367 if (CVD->getType()->isVariablyModifiedType()) {
11368 Size = CGF.getTypeSize(Ty: CVD->getType());
11369 // Align the size: ((size + align - 1) / align) * align
11370 Size = CGF.Builder.CreateNUWAdd(
11371 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
11372 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
11373 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
11374 } else {
11375 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
11376 Size = CGM.getSize(numChars: Sz.alignTo(Align));
11377 }
11378 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
11379 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11380 const Expr *Allocator = AA->getAllocator();
11381 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11382 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
11383 SmallVector<llvm::Value *, 4> Args;
11384 Args.push_back(Elt: ThreadID);
11385 if (Alignment)
11386 Args.push_back(Elt: Alignment);
11387 Args.push_back(Elt: Size);
11388 Args.push_back(Elt: AllocVal);
11389 llvm::omp::RuntimeFunction FnID =
11390 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11391 llvm::Value *Addr = CGF.EmitRuntimeCall(
11392 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args,
11393 name: getName(Parts: {CVD->getName(), ".void.addr"}));
11394 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11395 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
11396 QualType Ty = CGM.getContext().getPointerType(T: CVD->getType());
11397 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11398 V: Addr, DestTy: CGF.ConvertTypeForMem(T: Ty), Name: getName(Parts: {CVD->getName(), ".addr"}));
11399 if (UntiedAddr.isValid())
11400 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
11401
11402 // Cleanup action for allocate support.
11403 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11404 llvm::FunctionCallee RTLFn;
11405 SourceLocation::UIntTy LocEncoding;
11406 Address Addr;
11407 const Expr *AllocExpr;
11408
11409 public:
11410 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11411 SourceLocation::UIntTy LocEncoding, Address Addr,
11412 const Expr *AllocExpr)
11413 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11414 AllocExpr(AllocExpr) {}
11415 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11416 if (!CGF.HaveInsertPoint())
11417 return;
11418 llvm::Value *Args[3];
11419 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11420 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
11421 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11422 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
11423 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
11424 Args[2] = AllocVal;
11425 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11426 }
11427 };
11428 Address VDAddr =
11429 UntiedRealAddr.isValid()
11430 ? UntiedRealAddr
11431 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
11432 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11433 Kind: NormalAndEHCleanup, A: FiniRTLFn, A: CVD->getLocation().getRawEncoding(),
11434 A: VDAddr, A: Allocator);
11435 if (UntiedRealAddr.isValid())
11436 if (auto *Region =
11437 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
11438 Region->emitUntiedSwitch(CGF);
11439 return VDAddr;
11440 }
11441 return UntiedAddr;
11442}
11443
11444bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11445 const VarDecl *VD) const {
11446 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11447 if (It == FunctionToUntiedTaskStackMap.end())
11448 return false;
11449 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
11450}
11451
11452CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11453 CodeGenModule &CGM, const OMPLoopDirective &S)
11454 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11455 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11456 if (!NeedToPush)
11457 return;
11458 NontemporalDeclsSet &DS =
11459 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11460 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11461 for (const Stmt *Ref : C->private_refs()) {
11462 const auto *SimpleRefExpr = cast<Expr>(Val: Ref)->IgnoreParenImpCasts();
11463 const ValueDecl *VD;
11464 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SimpleRefExpr)) {
11465 VD = DRE->getDecl();
11466 } else {
11467 const auto *ME = cast<MemberExpr>(Val: SimpleRefExpr);
11468 assert((ME->isImplicitCXXThis() ||
11469 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11470 "Expected member of current class.");
11471 VD = ME->getMemberDecl();
11472 }
11473 DS.insert(V: VD);
11474 }
11475 }
11476}
11477
11478CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11479 if (!NeedToPush)
11480 return;
11481 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11482}
11483
11484CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11485 CodeGenFunction &CGF,
11486 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11487 std::pair<Address, Address>> &LocalVars)
11488 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11489 if (!NeedToPush)
11490 return;
11491 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11492 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11493 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
11494}
11495
11496CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11497 if (!NeedToPush)
11498 return;
11499 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11500}
11501
11502bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11503 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11504
11505 return llvm::any_of(
11506 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
11507 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(V: VD); });
11508}
11509
11510void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11511 const OMPExecutableDirective &S,
11512 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11513 const {
11514 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11515 // Vars in target/task regions must be excluded completely.
11516 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()) ||
11517 isOpenMPTaskingDirective(Kind: S.getDirectiveKind())) {
11518 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11519 getOpenMPCaptureRegions(CaptureRegions, DKind: S.getDirectiveKind());
11520 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CaptureRegions.front());
11521 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11522 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11523 NeedToCheckForLPCs.insert(V: Cap.getCapturedVar());
11524 }
11525 }
11526 // Exclude vars in private clauses.
11527 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11528 for (const Expr *Ref : C->varlist()) {
11529 if (!Ref->getType()->isScalarType())
11530 continue;
11531 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11532 if (!DRE)
11533 continue;
11534 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11535 }
11536 }
11537 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11538 for (const Expr *Ref : C->varlist()) {
11539 if (!Ref->getType()->isScalarType())
11540 continue;
11541 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11542 if (!DRE)
11543 continue;
11544 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11545 }
11546 }
11547 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11548 for (const Expr *Ref : C->varlist()) {
11549 if (!Ref->getType()->isScalarType())
11550 continue;
11551 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11552 if (!DRE)
11553 continue;
11554 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11555 }
11556 }
11557 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11558 for (const Expr *Ref : C->varlist()) {
11559 if (!Ref->getType()->isScalarType())
11560 continue;
11561 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11562 if (!DRE)
11563 continue;
11564 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11565 }
11566 }
11567 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11568 for (const Expr *Ref : C->varlist()) {
11569 if (!Ref->getType()->isScalarType())
11570 continue;
11571 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11572 if (!DRE)
11573 continue;
11574 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11575 }
11576 }
11577 for (const Decl *VD : NeedToCheckForLPCs) {
11578 for (const LastprivateConditionalData &Data :
11579 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11580 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
11581 if (!Data.Disabled)
11582 NeedToAddForLPCsAsDisabled.insert(V: VD);
11583 break;
11584 }
11585 }
11586 }
11587}
11588
11589CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11590 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11591 : CGM(CGF.CGM),
11592 Action((CGM.getLangOpts().OpenMP >= 50 &&
11593 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
11594 P: [](const OMPLastprivateClause *C) {
11595 return C->getKind() ==
11596 OMPC_LASTPRIVATE_conditional;
11597 }))
11598 ? ActionToDo::PushAsLastprivateConditional
11599 : ActionToDo::DoNotPush) {
11600 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11601 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11602 return;
11603 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11604 "Expected a push action.");
11605 LastprivateConditionalData &Data =
11606 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11607 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11608 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11609 continue;
11610
11611 for (const Expr *Ref : C->varlist()) {
11612 Data.DeclToUniqueName.insert(KV: std::make_pair(
11613 x: cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts())->getDecl(),
11614 y: SmallString<16>(generateUniqueName(CGM, Prefix: "pl_cond", Ref))));
11615 }
11616 }
11617 Data.IVLVal = IVLVal;
11618 Data.Fn = CGF.CurFn;
11619}
11620
11621CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11622 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11623 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11624 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11625 if (CGM.getLangOpts().OpenMP < 50)
11626 return;
11627 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11628 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11629 if (!NeedToAddForLPCsAsDisabled.empty()) {
11630 Action = ActionToDo::DisableLastprivateConditional;
11631 LastprivateConditionalData &Data =
11632 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11633 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11634 Data.DeclToUniqueName.try_emplace(Key: VD);
11635 Data.Fn = CGF.CurFn;
11636 Data.Disabled = true;
11637 }
11638}
11639
11640CGOpenMPRuntime::LastprivateConditionalRAII
11641CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11642 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11643 return LastprivateConditionalRAII(CGF, S);
11644}
11645
11646CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11647 if (CGM.getLangOpts().OpenMP < 50)
11648 return;
11649 if (Action == ActionToDo::DisableLastprivateConditional) {
11650 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11651 "Expected list of disabled private vars.");
11652 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11653 }
11654 if (Action == ActionToDo::PushAsLastprivateConditional) {
11655 assert(
11656 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11657 "Expected list of lastprivate conditional vars.");
11658 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11659 }
11660}
11661
11662Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11663 const VarDecl *VD) {
11664 ASTContext &C = CGM.getContext();
11665 auto I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
11666 QualType NewType;
11667 const FieldDecl *VDField;
11668 const FieldDecl *FiredField;
11669 LValue BaseLVal;
11670 auto VI = I->getSecond().find(Val: VD);
11671 if (VI == I->getSecond().end()) {
11672 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
11673 RD->startDefinition();
11674 VDField = addFieldToRecordDecl(C, DC: RD, FieldTy: VD->getType().getNonReferenceType());
11675 FiredField = addFieldToRecordDecl(C, DC: RD, FieldTy: C.CharTy);
11676 RD->completeDefinition();
11677 NewType = C.getRecordType(Decl: RD);
11678 Address Addr = CGF.CreateMemTemp(T: NewType, Align: C.getDeclAlign(D: VD), Name: VD->getName());
11679 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
11680 I->getSecond().try_emplace(Key: VD, Args&: NewType, Args&: VDField, Args&: FiredField, Args&: BaseLVal);
11681 } else {
11682 NewType = std::get<0>(t&: VI->getSecond());
11683 VDField = std::get<1>(t&: VI->getSecond());
11684 FiredField = std::get<2>(t&: VI->getSecond());
11685 BaseLVal = std::get<3>(t&: VI->getSecond());
11686 }
11687 LValue FiredLVal =
11688 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
11689 CGF.EmitStoreOfScalar(
11690 value: llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
11691 lvalue: FiredLVal);
11692 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
11693}
11694
11695namespace {
11696/// Checks if the lastprivate conditional variable is referenced in LHS.
11697class LastprivateConditionalRefChecker final
11698 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11699 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11700 const Expr *FoundE = nullptr;
11701 const Decl *FoundD = nullptr;
11702 StringRef UniqueDeclName;
11703 LValue IVLVal;
11704 llvm::Function *FoundFn = nullptr;
11705 SourceLocation Loc;
11706
11707public:
11708 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11709 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11710 llvm::reverse(C&: LPM)) {
11711 auto It = D.DeclToUniqueName.find(Key: E->getDecl());
11712 if (It == D.DeclToUniqueName.end())
11713 continue;
11714 if (D.Disabled)
11715 return false;
11716 FoundE = E;
11717 FoundD = E->getDecl()->getCanonicalDecl();
11718 UniqueDeclName = It->second;
11719 IVLVal = D.IVLVal;
11720 FoundFn = D.Fn;
11721 break;
11722 }
11723 return FoundE == E;
11724 }
11725 bool VisitMemberExpr(const MemberExpr *E) {
11726 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
11727 return false;
11728 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11729 llvm::reverse(C&: LPM)) {
11730 auto It = D.DeclToUniqueName.find(Key: E->getMemberDecl());
11731 if (It == D.DeclToUniqueName.end())
11732 continue;
11733 if (D.Disabled)
11734 return false;
11735 FoundE = E;
11736 FoundD = E->getMemberDecl()->getCanonicalDecl();
11737 UniqueDeclName = It->second;
11738 IVLVal = D.IVLVal;
11739 FoundFn = D.Fn;
11740 break;
11741 }
11742 return FoundE == E;
11743 }
11744 bool VisitStmt(const Stmt *S) {
11745 for (const Stmt *Child : S->children()) {
11746 if (!Child)
11747 continue;
11748 if (const auto *E = dyn_cast<Expr>(Val: Child))
11749 if (!E->isGLValue())
11750 continue;
11751 if (Visit(S: Child))
11752 return true;
11753 }
11754 return false;
11755 }
11756 explicit LastprivateConditionalRefChecker(
11757 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11758 : LPM(LPM) {}
11759 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11760 getFoundData() const {
11761 return std::make_tuple(args: FoundE, args: FoundD, args: UniqueDeclName, args: IVLVal, args: FoundFn);
11762 }
11763};
11764} // namespace
11765
11766void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11767 LValue IVLVal,
11768 StringRef UniqueDeclName,
11769 LValue LVal,
11770 SourceLocation Loc) {
11771 // Last updated loop counter for the lastprivate conditional var.
11772 // int<xx> last_iv = 0;
11773 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
11774 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11775 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
11776 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
11777 IVLVal.getAlignment().getAsAlign());
11778 LValue LastIVLVal =
11779 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
11780
11781 // Last value of the lastprivate conditional.
11782 // decltype(priv_a) last_a;
11783 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11784 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
11785 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
11786 LVal.getAlignment().getAsAlign());
11787 LValue LastLVal =
11788 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
11789
11790 // Global loop counter. Required to handle inner parallel-for regions.
11791 // iv
11792 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
11793
11794 // #pragma omp critical(a)
11795 // if (last_iv <= iv) {
11796 // last_iv = iv;
11797 // last_a = priv_a;
11798 // }
11799 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11800 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11801 Action.Enter(CGF);
11802 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
11803 // (last_iv <= iv) ? Check if the variable is updated and store new
11804 // value in global var.
11805 llvm::Value *CmpRes;
11806 if (IVLVal.getType()->isSignedIntegerType()) {
11807 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
11808 } else {
11809 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11810 "Loop iteration variable must be integer.");
11811 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
11812 }
11813 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
11814 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
11815 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
11816 // {
11817 CGF.EmitBlock(BB: ThenBB);
11818
11819 // last_iv = iv;
11820 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
11821
11822 // last_a = priv_a;
11823 switch (CGF.getEvaluationKind(T: LVal.getType())) {
11824 case TEK_Scalar: {
11825 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
11826 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
11827 break;
11828 }
11829 case TEK_Complex: {
11830 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
11831 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
11832 break;
11833 }
11834 case TEK_Aggregate:
11835 llvm_unreachable(
11836 "Aggregates are not supported in lastprivate conditional.");
11837 }
11838 // }
11839 CGF.EmitBranch(Block: ExitBB);
11840 // There is no need to emit line number for unconditional branch.
11841 (void)ApplyDebugLocation::CreateEmpty(CGF);
11842 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
11843 };
11844
11845 if (CGM.getLangOpts().OpenMPSimd) {
11846 // Do not emit as a critical region as no parallel region could be emitted.
11847 RegionCodeGenTy ThenRCG(CodeGen);
11848 ThenRCG(CGF);
11849 } else {
11850 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
11851 }
11852}
11853
11854void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11855 const Expr *LHS) {
11856 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11857 return;
11858 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11859 if (!Checker.Visit(S: LHS))
11860 return;
11861 const Expr *FoundE;
11862 const Decl *FoundD;
11863 StringRef UniqueDeclName;
11864 LValue IVLVal;
11865 llvm::Function *FoundFn;
11866 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
11867 Checker.getFoundData();
11868 if (FoundFn != CGF.CurFn) {
11869 // Special codegen for inner parallel regions.
11870 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11871 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
11872 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11873 "Lastprivate conditional is not found in outer region.");
11874 QualType StructTy = std::get<0>(t&: It->getSecond());
11875 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
11876 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
11877 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11878 Addr: PrivLVal.getAddress(),
11879 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
11880 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
11881 LValue BaseLVal =
11882 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
11883 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
11884 CGF.EmitAtomicStore(rvalue: RValue::get(V: llvm::ConstantInt::get(
11885 Ty: CGF.ConvertTypeForMem(T: FiredDecl->getType()), V: 1)),
11886 lvalue: FiredLVal, AO: llvm::AtomicOrdering::Unordered,
11887 /*IsVolatile=*/true, /*isInit=*/false);
11888 return;
11889 }
11890
11891 // Private address of the lastprivate conditional in the current context.
11892 // priv_a
11893 LValue LVal = CGF.EmitLValue(E: FoundE);
11894 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11895 Loc: FoundE->getExprLoc());
11896}
11897
11898void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11899 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11900 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11901 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11902 return;
11903 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
11904 auto It = llvm::find_if(
11905 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
11906 if (It == Range.end() || It->Fn != CGF.CurFn)
11907 return;
11908 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
11909 assert(LPCI != LastprivateConditionalToTypes.end() &&
11910 "Lastprivates must be registered already.");
11911 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11912 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
11913 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: CaptureRegions.back());
11914 for (const auto &Pair : It->DeclToUniqueName) {
11915 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
11916 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
11917 continue;
11918 auto I = LPCI->getSecond().find(Val: Pair.first);
11919 assert(I != LPCI->getSecond().end() &&
11920 "Lastprivate must be rehistered already.");
11921 // bool Cmp = priv_a.Fired != 0;
11922 LValue BaseLVal = std::get<3>(t&: I->getSecond());
11923 LValue FiredLVal =
11924 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
11925 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
11926 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
11927 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
11928 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
11929 // if (Cmp) {
11930 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
11931 CGF.EmitBlock(BB: ThenBB);
11932 Address Addr = CGF.GetAddrOfLocalVar(VD);
11933 LValue LVal;
11934 if (VD->getType()->isReferenceType())
11935 LVal = CGF.EmitLoadOfReferenceLValue(RefAddr: Addr, RefTy: VD->getType(),
11936 Source: AlignmentSource::Decl);
11937 else
11938 LVal = CGF.MakeAddrLValue(Addr, T: VD->getType().getNonReferenceType(),
11939 Source: AlignmentSource::Decl);
11940 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
11941 Loc: D.getBeginLoc());
11942 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11943 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
11944 // }
11945 }
11946}
11947
11948void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11949 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11950 SourceLocation Loc) {
11951 if (CGF.getLangOpts().OpenMP < 50)
11952 return;
11953 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(Key: VD);
11954 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11955 "Unknown lastprivate conditional variable.");
11956 StringRef UniqueName = It->second;
11957 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
11958 // The variable was not updated in the region - exit.
11959 if (!GV)
11960 return;
11961 LValue LPLVal = CGF.MakeRawAddrLValue(
11962 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
11963 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
11964 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
11965}
11966
11967llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11968 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11969 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11970 const RegionCodeGenTy &CodeGen) {
11971 llvm_unreachable("Not supported in SIMD-only mode");
11972}
11973
11974llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11975 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11976 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11977 const RegionCodeGenTy &CodeGen) {
11978 llvm_unreachable("Not supported in SIMD-only mode");
11979}
11980
11981llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11982 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11983 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11984 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11985 bool Tied, unsigned &NumberOfParts) {
11986 llvm_unreachable("Not supported in SIMD-only mode");
11987}
11988
11989void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11990 SourceLocation Loc,
11991 llvm::Function *OutlinedFn,
11992 ArrayRef<llvm::Value *> CapturedVars,
11993 const Expr *IfCond,
11994 llvm::Value *NumThreads) {
11995 llvm_unreachable("Not supported in SIMD-only mode");
11996}
11997
11998void CGOpenMPSIMDRuntime::emitCriticalRegion(
11999 CodeGenFunction &CGF, StringRef CriticalName,
12000 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12001 const Expr *Hint) {
12002 llvm_unreachable("Not supported in SIMD-only mode");
12003}
12004
12005void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12006 const RegionCodeGenTy &MasterOpGen,
12007 SourceLocation Loc) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12009}
12010
12011void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12012 const RegionCodeGenTy &MasterOpGen,
12013 SourceLocation Loc,
12014 const Expr *Filter) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016}
12017
12018void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12019 SourceLocation Loc) {
12020 llvm_unreachable("Not supported in SIMD-only mode");
12021}
12022
12023void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12024 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12025 SourceLocation Loc) {
12026 llvm_unreachable("Not supported in SIMD-only mode");
12027}
12028
12029void CGOpenMPSIMDRuntime::emitSingleRegion(
12030 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12031 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12032 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12033 ArrayRef<const Expr *> AssignmentOps) {
12034 llvm_unreachable("Not supported in SIMD-only mode");
12035}
12036
12037void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12038 const RegionCodeGenTy &OrderedOpGen,
12039 SourceLocation Loc,
12040 bool IsThreads) {
12041 llvm_unreachable("Not supported in SIMD-only mode");
12042}
12043
12044void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12045 SourceLocation Loc,
12046 OpenMPDirectiveKind Kind,
12047 bool EmitChecks,
12048 bool ForceSimpleCall) {
12049 llvm_unreachable("Not supported in SIMD-only mode");
12050}
12051
12052void CGOpenMPSIMDRuntime::emitForDispatchInit(
12053 CodeGenFunction &CGF, SourceLocation Loc,
12054 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12055 bool Ordered, const DispatchRTInput &DispatchValues) {
12056 llvm_unreachable("Not supported in SIMD-only mode");
12057}
12058
12059void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12060 SourceLocation Loc) {
12061 llvm_unreachable("Not supported in SIMD-only mode");
12062}
12063
12064void CGOpenMPSIMDRuntime::emitForStaticInit(
12065 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12066 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12067 llvm_unreachable("Not supported in SIMD-only mode");
12068}
12069
12070void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12071 CodeGenFunction &CGF, SourceLocation Loc,
12072 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12073 llvm_unreachable("Not supported in SIMD-only mode");
12074}
12075
12076void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12077 SourceLocation Loc,
12078 unsigned IVSize,
12079 bool IVSigned) {
12080 llvm_unreachable("Not supported in SIMD-only mode");
12081}
12082
12083void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12084 SourceLocation Loc,
12085 OpenMPDirectiveKind DKind) {
12086 llvm_unreachable("Not supported in SIMD-only mode");
12087}
12088
12089llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12090 SourceLocation Loc,
12091 unsigned IVSize, bool IVSigned,
12092 Address IL, Address LB,
12093 Address UB, Address ST) {
12094 llvm_unreachable("Not supported in SIMD-only mode");
12095}
12096
12097void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12098 llvm::Value *NumThreads,
12099 SourceLocation Loc) {
12100 llvm_unreachable("Not supported in SIMD-only mode");
12101}
12102
12103void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12104 ProcBindKind ProcBind,
12105 SourceLocation Loc) {
12106 llvm_unreachable("Not supported in SIMD-only mode");
12107}
12108
12109Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12110 const VarDecl *VD,
12111 Address VDAddr,
12112 SourceLocation Loc) {
12113 llvm_unreachable("Not supported in SIMD-only mode");
12114}
12115
12116llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12117 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12118 CodeGenFunction *CGF) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12120}
12121
12122Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12123 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12124 llvm_unreachable("Not supported in SIMD-only mode");
12125}
12126
12127void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12128 ArrayRef<const Expr *> Vars,
12129 SourceLocation Loc,
12130 llvm::AtomicOrdering AO) {
12131 llvm_unreachable("Not supported in SIMD-only mode");
12132}
12133
12134void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12135 const OMPExecutableDirective &D,
12136 llvm::Function *TaskFunction,
12137 QualType SharedsTy, Address Shareds,
12138 const Expr *IfCond,
12139 const OMPTaskDataTy &Data) {
12140 llvm_unreachable("Not supported in SIMD-only mode");
12141}
12142
12143void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12144 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12145 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12146 const Expr *IfCond, const OMPTaskDataTy &Data) {
12147 llvm_unreachable("Not supported in SIMD-only mode");
12148}
12149
12150void CGOpenMPSIMDRuntime::emitReduction(
12151 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12152 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12153 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12154 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12155 CGOpenMPRuntime::emitReduction(CGF, Loc, OrgPrivates: Privates, OrgLHSExprs: LHSExprs, OrgRHSExprs: RHSExprs,
12156 OrgReductionOps: ReductionOps, Options);
12157}
12158
12159llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12160 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12161 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12162 llvm_unreachable("Not supported in SIMD-only mode");
12163}
12164
12165void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12166 SourceLocation Loc,
12167 bool IsWorksharingReduction) {
12168 llvm_unreachable("Not supported in SIMD-only mode");
12169}
12170
12171void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12172 SourceLocation Loc,
12173 ReductionCodeGen &RCG,
12174 unsigned N) {
12175 llvm_unreachable("Not supported in SIMD-only mode");
12176}
12177
12178Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12179 SourceLocation Loc,
12180 llvm::Value *ReductionsPtr,
12181 LValue SharedLVal) {
12182 llvm_unreachable("Not supported in SIMD-only mode");
12183}
12184
12185void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12186 SourceLocation Loc,
12187 const OMPTaskDataTy &Data) {
12188 llvm_unreachable("Not supported in SIMD-only mode");
12189}
12190
12191void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12192 CodeGenFunction &CGF, SourceLocation Loc,
12193 OpenMPDirectiveKind CancelRegion) {
12194 llvm_unreachable("Not supported in SIMD-only mode");
12195}
12196
12197void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12198 SourceLocation Loc, const Expr *IfCond,
12199 OpenMPDirectiveKind CancelRegion) {
12200 llvm_unreachable("Not supported in SIMD-only mode");
12201}
12202
12203void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12204 const OMPExecutableDirective &D, StringRef ParentName,
12205 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12206 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12207 llvm_unreachable("Not supported in SIMD-only mode");
12208}
12209
12210void CGOpenMPSIMDRuntime::emitTargetCall(
12211 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12212 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12213 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12214 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12215 const OMPLoopDirective &D)>
12216 SizeEmitter) {
12217 llvm_unreachable("Not supported in SIMD-only mode");
12218}
12219
12220bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12221 llvm_unreachable("Not supported in SIMD-only mode");
12222}
12223
12224bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12225 llvm_unreachable("Not supported in SIMD-only mode");
12226}
12227
12228bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12229 return false;
12230}
12231
12232void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12233 const OMPExecutableDirective &D,
12234 SourceLocation Loc,
12235 llvm::Function *OutlinedFn,
12236 ArrayRef<llvm::Value *> CapturedVars) {
12237 llvm_unreachable("Not supported in SIMD-only mode");
12238}
12239
12240void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12241 const Expr *NumTeams,
12242 const Expr *ThreadLimit,
12243 SourceLocation Loc) {
12244 llvm_unreachable("Not supported in SIMD-only mode");
12245}
12246
12247void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12248 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12249 const Expr *Device, const RegionCodeGenTy &CodeGen,
12250 CGOpenMPRuntime::TargetDataInfo &Info) {
12251 llvm_unreachable("Not supported in SIMD-only mode");
12252}
12253
12254void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12255 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12256 const Expr *Device) {
12257 llvm_unreachable("Not supported in SIMD-only mode");
12258}
12259
12260void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12261 const OMPLoopDirective &D,
12262 ArrayRef<Expr *> NumIterations) {
12263 llvm_unreachable("Not supported in SIMD-only mode");
12264}
12265
12266void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12267 const OMPDependClause *C) {
12268 llvm_unreachable("Not supported in SIMD-only mode");
12269}
12270
12271void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12272 const OMPDoacrossClause *C) {
12273 llvm_unreachable("Not supported in SIMD-only mode");
12274}
12275
12276const VarDecl *
12277CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12278 const VarDecl *NativeParam) const {
12279 llvm_unreachable("Not supported in SIMD-only mode");
12280}
12281
12282Address
12283CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12284 const VarDecl *NativeParam,
12285 const VarDecl *TargetParam) const {
12286 llvm_unreachable("Not supported in SIMD-only mode");
12287}
12288