1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
24#include "clang/AST/OpenMPClause.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/SourceManager.h"
30#include "clang/CodeGen/ConstantInitBuilder.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SmallSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/StringExtras.h"
35#include "llvm/Bitcode/BitcodeReader.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/GlobalValue.h"
39#include "llvm/IR/InstrTypes.h"
40#include "llvm/IR/Value.h"
41#include "llvm/Support/AtomicOrdering.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
166 PtrTy: PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(lvalue: PartIdLVal, Loc: PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
171 CGF.EmitBlock(BB: DoneBB);
172 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
173 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
174 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
175 Dest: CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
183 PtrTy: PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
185 lvalue: PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
189 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
191 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
192 Dest: CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
194 CGF.EmitBlock(BB: CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Fused distribute+for static schedule (entityId = team*nthreads + tid,
550 /// num_entities = nteams*nthreads). One for_static_init call, no
551 /// surrounding distribute_static_init. Matches
552 /// kmp_sched_distr_static_chunk_sched_static_chunkone in the device RTL
553 /// (openmp/device/include/DeviceTypes.h).
554 OMP_dist_sch_static_chunked_sch_static_chunkone = 93,
555 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
556 /// Set if the monotonic schedule modifier was present.
557 OMP_sch_modifier_monotonic = (1 << 29),
558 /// Set if the nonmonotonic schedule modifier was present.
559 OMP_sch_modifier_nonmonotonic = (1 << 30),
560};
561
562/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
563/// region.
564class CleanupTy final : public EHScopeStack::Cleanup {
565 PrePostActionTy *Action;
566
567public:
568 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
569 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
570 if (!CGF.HaveInsertPoint())
571 return;
572 Action->Exit(CGF);
573 }
574};
575
576} // anonymous namespace
577
578void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
579 CodeGenFunction::RunCleanupsScope Scope(CGF);
580 if (PrePostAction) {
581 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
582 Callback(CodeGen, CGF, *PrePostAction);
583 } else {
584 PrePostActionTy Action;
585 Callback(CodeGen, CGF, Action);
586 }
587}
588
589/// Check if the combiner is a call to UDR combiner and if it is so return the
590/// UDR decl used for reduction.
591static const OMPDeclareReductionDecl *
592getReductionInit(const Expr *ReductionOp) {
593 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
594 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
595 if (const auto *DRE =
596 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
597 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
598 return DRD;
599 return nullptr;
600}
601
602static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
603 const OMPDeclareReductionDecl *DRD,
604 const Expr *InitOp,
605 Address Private, Address Original,
606 QualType Ty) {
607 if (DRD->getInitializer()) {
608 std::pair<llvm::Function *, llvm::Function *> Reduction =
609 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
610 const auto *CE = cast<CallExpr>(Val: InitOp);
611 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
612 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
613 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
614 const auto *LHSDRE =
615 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
616 const auto *RHSDRE =
617 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
618 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
619 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
620 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
621 (void)PrivateScope.Privatize();
622 RValue Func = RValue::get(V: Reduction.second);
623 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
624 CGF.EmitIgnoredExpr(E: InitOp);
625 } else {
626 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
627 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
628 auto *GV = new llvm::GlobalVariable(
629 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
630 llvm::GlobalValue::PrivateLinkage, Init, Name);
631 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
632 RValue InitRVal;
633 switch (CGF.getEvaluationKind(T: Ty)) {
634 case TEK_Scalar:
635 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
636 break;
637 case TEK_Complex:
638 InitRVal =
639 RValue::getComplex(C: CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
640 break;
641 case TEK_Aggregate: {
642 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
643 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
644 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
645 /*IsInitializer=*/false);
646 return;
647 }
648 }
649 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
650 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
651 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
652 /*IsInitializer=*/false);
653 }
654}
655
656/// Emit initialization of arrays of complex types.
657/// \param DestAddr Address of the array.
658/// \param Type Type of array.
659/// \param Init Initial expression of array.
660/// \param SrcAddr Address of the original array.
661static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
662 QualType Type, bool EmitDeclareReductionInit,
663 const Expr *Init,
664 const OMPDeclareReductionDecl *DRD,
665 Address SrcAddr = Address::invalid()) {
666 // Perform element-by-element initialization.
667 QualType ElementTy;
668
669 // Drill down to the base element type on both arrays.
670 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
671 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
672 if (DRD)
673 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
674
675 llvm::Value *SrcBegin = nullptr;
676 if (DRD)
677 SrcBegin = SrcAddr.emitRawPointer(CGF);
678 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
679 // Cast from pointer to array type to pointer to single element.
680 llvm::Value *DestEnd =
681 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
682 // The basic structure here is a while-do loop.
683 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
684 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
685 llvm::Value *IsEmpty =
686 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
687 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
688
689 // Enter the loop body, making that address the current address.
690 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
691 CGF.EmitBlock(BB: BodyBB);
692
693 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
694
695 llvm::PHINode *SrcElementPHI = nullptr;
696 Address SrcElementCurrent = Address::invalid();
697 if (DRD) {
698 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
699 Name: "omp.arraycpy.srcElementPast");
700 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
701 SrcElementCurrent =
702 Address(SrcElementPHI, SrcAddr.getElementType(),
703 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
704 }
705 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
706 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
707 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
708 Address DestElementCurrent =
709 Address(DestElementPHI, DestAddr.getElementType(),
710 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
711
712 // Emit copy.
713 {
714 CodeGenFunction::RunCleanupsScope InitScope(CGF);
715 if (EmitDeclareReductionInit) {
716 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
717 Original: SrcElementCurrent, Ty: ElementTy);
718 } else
719 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
720 /*IsInitializer=*/false);
721 }
722
723 if (DRD) {
724 // Shift the address forward by one element.
725 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
726 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
727 Name: "omp.arraycpy.dest.element");
728 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
729 }
730
731 // Shift the address forward by one element.
732 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
733 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
734 Name: "omp.arraycpy.dest.element");
735 // Check whether we've reached the end.
736 llvm::Value *Done =
737 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
738 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
739 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
740
741 // Done.
742 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
743}
744
745LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
746 return CGF.EmitOMPSharedLValue(E);
747}
748
749LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
750 const Expr *E) {
751 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
752 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
753 return LValue();
754}
755
756void ReductionCodeGen::emitAggregateInitialization(
757 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
758 const OMPDeclareReductionDecl *DRD) {
759 // Emit VarDecl with copy init for arrays.
760 // Get the address of the original variable captured in current
761 // captured region.
762 const auto *PrivateVD =
763 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
764 bool EmitDeclareReductionInit =
765 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
766 EmitOMPAggregateInit(CGF, DestAddr: PrivateAddr, Type: PrivateVD->getType(),
767 EmitDeclareReductionInit,
768 Init: EmitDeclareReductionInit ? ClausesData[N].ReductionOp
769 : PrivateVD->getInit(),
770 DRD, SrcAddr: SharedAddr);
771}
772
773ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
774 ArrayRef<const Expr *> Origs,
775 ArrayRef<const Expr *> Privates,
776 ArrayRef<const Expr *> ReductionOps) {
777 ClausesData.reserve(N: Shareds.size());
778 SharedAddresses.reserve(N: Shareds.size());
779 Sizes.reserve(N: Shareds.size());
780 BaseDecls.reserve(N: Shareds.size());
781 const auto *IOrig = Origs.begin();
782 const auto *IPriv = Privates.begin();
783 const auto *IRed = ReductionOps.begin();
784 for (const Expr *Ref : Shareds) {
785 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
786 std::advance(i&: IOrig, n: 1);
787 std::advance(i&: IPriv, n: 1);
788 std::advance(i&: IRed, n: 1);
789 }
790}
791
792void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
793 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
794 "Number of generated lvalues must be exactly N.");
795 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
796 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
797 SharedAddresses.emplace_back(Args&: First, Args&: Second);
798 if (ClausesData[N].Shared == ClausesData[N].Ref) {
799 OrigAddresses.emplace_back(Args&: First, Args&: Second);
800 } else {
801 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
802 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
803 OrigAddresses.emplace_back(Args&: First, Args&: Second);
804 }
805}
806
807void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
808 QualType PrivateType = getPrivateType(N);
809 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
810 if (!PrivateType->isVariablyModifiedType()) {
811 Sizes.emplace_back(
812 Args: CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
813 Args: nullptr);
814 return;
815 }
816 llvm::Value *Size;
817 llvm::Value *SizeInChars;
818 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
819 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
820 if (AsArraySection) {
821 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
822 LHS: OrigAddresses[N].second.getPointer(CGF),
823 RHS: OrigAddresses[N].first.getPointer(CGF));
824 Size = CGF.Builder.CreateZExtOrTrunc(V: Size, DestTy: ElemSizeOf->getType());
825 Size = CGF.Builder.CreateNUWAdd(
826 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
827 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
828 } else {
829 SizeInChars =
830 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
831 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
832 }
833 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
834 CodeGenFunction::OpaqueValueMapping OpaqueMap(
835 CGF,
836 cast<OpaqueValueExpr>(
837 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
838 RValue::get(V: Size));
839 CGF.EmitVariablyModifiedType(Ty: PrivateType);
840}
841
842void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
843 llvm::Value *Size) {
844 QualType PrivateType = getPrivateType(N);
845 if (!PrivateType->isVariablyModifiedType()) {
846 assert(!Size && !Sizes[N].second &&
847 "Size should be nullptr for non-variably modified reduction "
848 "items.");
849 return;
850 }
851 CodeGenFunction::OpaqueValueMapping OpaqueMap(
852 CGF,
853 cast<OpaqueValueExpr>(
854 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
855 RValue::get(V: Size));
856 CGF.EmitVariablyModifiedType(Ty: PrivateType);
857}
858
859void ReductionCodeGen::emitInitialization(
860 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
861 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
862 assert(SharedAddresses.size() > N && "No variable was generated");
863 const auto *PrivateVD =
864 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
865 const OMPDeclareReductionDecl *DRD =
866 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
867 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
868 if (DRD && DRD->getInitializer())
869 (void)DefaultInit(CGF);
870 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
871 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
872 (void)DefaultInit(CGF);
873 QualType SharedType = SharedAddresses[N].first.getType();
874 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
875 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
876 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
877 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
878 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
879 Quals: PrivateVD->getType().getQualifiers(),
880 /*IsInitializer=*/false);
881 }
882}
883
884bool ReductionCodeGen::needCleanups(unsigned N) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 return DTorKind != QualType::DK_none;
888}
889
890void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
891 Address PrivateAddr) {
892 QualType PrivateType = getPrivateType(N);
893 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
894 if (needCleanups(N)) {
895 PrivateAddr =
896 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
897 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
898 }
899}
900
901static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
902 LValue BaseLV) {
903 BaseTy = BaseTy.getNonReferenceType();
904 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
905 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
906 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
907 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
908 } else {
909 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
910 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
911 }
912 BaseTy = BaseTy->getPointeeType();
913 }
914 return CGF.MakeAddrLValue(
915 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
916 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
917 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
918}
919
920static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
921 Address OriginalBaseAddress, llvm::Value *Addr) {
922 RawAddress Tmp = RawAddress::invalid();
923 Address TopTmp = Address::invalid();
924 Address MostTopTmp = Address::invalid();
925 BaseTy = BaseTy.getNonReferenceType();
926 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
927 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
928 Tmp = CGF.CreateMemTempWithoutCast(T: BaseTy);
929 if (TopTmp.isValid())
930 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
931 else
932 MostTopTmp = Tmp;
933 TopTmp = Tmp;
934 BaseTy = BaseTy->getPointeeType();
935 }
936
937 if (Tmp.isValid()) {
938 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
939 V: Addr, DestTy: Tmp.getElementType());
940 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
941 return MostTopTmp;
942 }
943
944 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
945 V: Addr, DestTy: OriginalBaseAddress.getType());
946 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
947}
948
949static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
950 const VarDecl *OrigVD = nullptr;
951 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
952 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
953 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
954 Base = TempOASE->getBase()->IgnoreParenImpCasts();
955 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
956 Base = TempASE->getBase()->IgnoreParenImpCasts();
957 DE = cast<DeclRefExpr>(Val: Base);
958 OrigVD = cast<VarDecl>(Val: DE->getDecl());
959 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
960 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
961 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
962 Base = TempASE->getBase()->IgnoreParenImpCasts();
963 DE = cast<DeclRefExpr>(Val: Base);
964 OrigVD = cast<VarDecl>(Val: DE->getDecl());
965 }
966 return OrigVD;
967}
968
969Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
970 Address PrivateAddr) {
971 const DeclRefExpr *DE;
972 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
973 BaseDecls.emplace_back(Args&: OrigVD);
974 LValue OriginalBaseLValue = CGF.EmitLValue(E: DE);
975 LValue BaseLValue =
976 loadToBegin(CGF, BaseTy: OrigVD->getType(), ElTy: SharedAddresses[N].first.getType(),
977 BaseLV: OriginalBaseLValue);
978 Address SharedAddr = SharedAddresses[N].first.getAddress();
979 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
980 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
981 RHS: SharedAddr.emitRawPointer(CGF));
982 llvm::Value *PrivatePointer =
983 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
984 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
985 llvm::Value *Ptr = CGF.Builder.CreateGEP(
986 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
987 return castToBase(CGF, BaseTy: OrigVD->getType(),
988 ElTy: SharedAddresses[N].first.getType(),
989 OriginalBaseAddress: OriginalBaseLValue.getAddress(), Addr: Ptr);
990 }
991 BaseDecls.emplace_back(
992 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
993 return PrivateAddr;
994}
995
996bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
997 const OMPDeclareReductionDecl *DRD =
998 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
999 return DRD && DRD->getInitializer();
1000}
1001
1002LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1003 return CGF.EmitLoadOfPointerLValue(
1004 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1005 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
1006}
1007
1008void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1009 if (!CGF.HaveInsertPoint())
1010 return;
1011 // 1.2.2 OpenMP Language Terminology
1012 // Structured block - An executable statement with a single entry at the
1013 // top and a single exit at the bottom.
1014 // The point of exit cannot be a branch out of the structured block.
1015 // longjmp() and throw() must not violate the entry/exit criteria.
1016 CGF.EHStack.pushTerminate();
1017 if (S)
1018 CGF.incrementProfileCounter(S);
1019 CodeGen(CGF);
1020 CGF.EHStack.popTerminate();
1021}
1022
1023LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1024 CodeGenFunction &CGF) {
1025 return CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1026 T: getThreadIDVariable()->getType(),
1027 Source: AlignmentSource::Decl);
1028}
1029
1030static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1031 QualType FieldTy) {
1032 auto *Field = FieldDecl::Create(
1033 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1034 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1035 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1036 Field->setAccess(AS_public);
1037 DC->addDecl(D: Field);
1038 return Field;
1039}
1040
1041CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1042 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1043 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1044 llvm::OpenMPIRBuilderConfig Config(
1045 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1046 CGM.getLangOpts().OpenMPOffloadMandatory,
1047 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1048 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1049 Config.setDefaultTargetAS(
1050 CGM.getContext().getTargetInfo().getTargetAddressSpace(AS: LangAS::Default));
1051 Config.setRuntimeCC(CGM.getRuntimeCC());
1052
1053 OMPBuilder.setConfig(Config);
1054 OMPBuilder.initialize();
1055 OMPBuilder.loadOffloadInfoMetadata(VFS&: *CGM.getFileSystem(),
1056 HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1057 ? CGM.getLangOpts().OMPHostIRFile
1058 : StringRef{});
1059
1060 // The user forces the compiler to behave as if omp requires
1061 // unified_shared_memory was given.
1062 if (CGM.getLangOpts().OpenMPForceUSM) {
1063 HasRequiresUnifiedSharedMemory = true;
1064 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1065 }
1066}
1067
1068void CGOpenMPRuntime::clear() {
1069 InternalVars.clear();
1070 // Clean non-target variable declarations possibly used only in debug info.
1071 for (const auto &Data : EmittedNonTargetVariables) {
1072 if (!Data.getValue().pointsToAliveValue())
1073 continue;
1074 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1075 if (!GV)
1076 continue;
1077 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1078 continue;
1079 GV->eraseFromParent();
1080 }
1081}
1082
1083std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1084 return OMPBuilder.createPlatformSpecificName(Parts);
1085}
1086
1087static llvm::Function *
1088emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1089 const Expr *CombinerInitializer, const VarDecl *In,
1090 const VarDecl *Out, bool IsCombiner) {
1091 // void .omp_combiner.(Ty *in, Ty *out);
1092 ASTContext &C = CGM.getContext();
1093 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1094 auto *OmpOutParm = ImplicitParamDecl::Create(
1095 C, /*DC=*/nullptr, IdLoc: Out->getLocation(),
1096 /*Id=*/nullptr, T: PtrTy, ParamKind: ImplicitParamKind::Other);
1097 auto *OmpInParm = ImplicitParamDecl::Create(
1098 C, /*DC=*/nullptr, IdLoc: In->getLocation(),
1099 /*Id=*/nullptr, T: PtrTy, ParamKind: ImplicitParamKind::Other);
1100 FunctionArgList Args{OmpOutParm, OmpInParm};
1101 const CGFunctionInfo &FnInfo =
1102 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
1103 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1104 std::string Name = CGM.getOpenMPRuntime().getName(
1105 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1106 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1107 N: Name, M: &CGM.getModule());
1108 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1109 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
1110 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
1111 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1112 Fn->removeFnAttr(Kind: llvm::Attribute::NoInline);
1113 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
1114 Fn->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
1115 }
1116 CodeGenFunction CGF(CGM);
1117 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1118 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1119 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc: In->getLocation(),
1120 StartLoc: Out->getLocation());
1121 CodeGenFunction::OMPPrivateScope Scope(CGF);
1122 Address AddrIn = CGF.GetAddrOfLocalVar(VD: OmpInParm);
1123 Scope.addPrivate(
1124 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1125 .getAddress());
1126 Address AddrOut = CGF.GetAddrOfLocalVar(VD: OmpOutParm);
1127 Scope.addPrivate(
1128 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1129 .getAddress());
1130 (void)Scope.Privatize();
1131 if (!IsCombiner && Out->hasInit() &&
1132 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1133 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1134 Quals: Out->getType().getQualifiers(),
1135 /*IsInitializer=*/true);
1136 }
1137 if (CombinerInitializer)
1138 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1139 Scope.ForceCleanup();
1140 CGF.FinishFunction();
1141 return Fn;
1142}
1143
1144void CGOpenMPRuntime::emitUserDefinedReduction(
1145 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1146 if (UDRMap.count(Val: D) > 0)
1147 return;
1148 llvm::Function *Combiner = emitCombinerOrInitializer(
1149 CGM, Ty: D->getType(), CombinerInitializer: D->getCombiner(),
1150 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1151 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1152 /*IsCombiner=*/true);
1153 llvm::Function *Initializer = nullptr;
1154 if (const Expr *Init = D->getInitializer()) {
1155 Initializer = emitCombinerOrInitializer(
1156 CGM, Ty: D->getType(),
1157 CombinerInitializer: D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1158 : nullptr,
1159 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1160 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1161 /*IsCombiner=*/false);
1162 }
1163 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1164 if (CGF)
1165 FunctionUDRMap[CGF->CurFn].push_back(Elt: D);
1166}
1167
1168std::pair<llvm::Function *, llvm::Function *>
1169CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1170 auto I = UDRMap.find(Val: D);
1171 if (I != UDRMap.end())
1172 return I->second;
1173 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1174 return UDRMap.lookup(Val: D);
1175}
1176
1177namespace {
1178// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1179// Builder if one is present.
1180struct PushAndPopStackRAII {
1181 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1182 bool HasCancel, llvm::omp::Directive Kind)
1183 : OMPBuilder(OMPBuilder) {
1184 if (!OMPBuilder)
1185 return;
1186
1187 // The following callback is the crucial part of clangs cleanup process.
1188 //
1189 // NOTE:
1190 // Once the OpenMPIRBuilder is used to create parallel regions (and
1191 // similar), the cancellation destination (Dest below) is determined via
1192 // IP. That means if we have variables to finalize we split the block at IP,
1193 // use the new block (=BB) as destination to build a JumpDest (via
1194 // getJumpDestInCurrentScope(BB)) which then is fed to
1195 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1196 // to push & pop an FinalizationInfo object.
1197 // The FiniCB will still be needed but at the point where the
1198 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1199 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1200 assert(IP.getBlock()->end() == IP.getPoint() &&
1201 "Clang CG should cause non-terminated block!");
1202 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1203 CGF.Builder.restoreIP(IP);
1204 CodeGenFunction::JumpDest Dest =
1205 CGF.getOMPCancelDestination(Kind: OMPD_parallel);
1206 CGF.EmitBranchThroughCleanup(Dest);
1207 return llvm::Error::success();
1208 };
1209
1210 // TODO: Remove this once we emit parallel regions through the
1211 // OpenMPIRBuilder as it can do this setup internally.
1212 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1213 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1214 }
1215 ~PushAndPopStackRAII() {
1216 if (OMPBuilder)
1217 OMPBuilder->popFinalizationCB();
1218 }
1219 llvm::OpenMPIRBuilder *OMPBuilder;
1220};
1221} // namespace
1222
1223static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1224 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1225 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1226 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1227 assert(ThreadIDVar->getType()->isPointerType() &&
1228 "thread id variable must be of type kmp_int32 *");
1229 CodeGenFunction CGF(CGM, true);
1230 bool HasCancel = false;
1231 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1232 HasCancel = OPD->hasCancel();
1233 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1234 HasCancel = OPD->hasCancel();
1235 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1236 HasCancel = OPSD->hasCancel();
1237 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1238 HasCancel = OPFD->hasCancel();
1239 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1240 HasCancel = OPFD->hasCancel();
1241 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1242 HasCancel = OPFD->hasCancel();
1243 else if (const auto *OPFD =
1244 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1245 HasCancel = OPFD->hasCancel();
1246 else if (const auto *OPFD =
1247 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1248 HasCancel = OPFD->hasCancel();
1249
1250 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1251 // parallel region to make cancellation barriers work properly.
1252 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1253 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1254 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1255 HasCancel, OutlinedHelperName);
1256 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1257 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, D);
1258}
1259
1260std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1261 std::string Suffix = getName(Parts: {"omp_outlined"});
1262 return (Name + Suffix).str();
1263}
1264
1265std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1266 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1267}
1268
1269std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1270 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1271 return (Name + Suffix).str();
1272}
1273
1274llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1275 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1276 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1277 const RegionCodeGenTy &CodeGen) {
1278 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_parallel);
1279 return emitParallelOrTeamsOutlinedFunction(
1280 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1281 CodeGen);
1282}
1283
1284llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1285 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1286 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1287 const RegionCodeGenTy &CodeGen) {
1288 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_teams);
1289 return emitParallelOrTeamsOutlinedFunction(
1290 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1291 CodeGen);
1292}
1293
1294llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1295 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1296 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1297 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1298 bool Tied, unsigned &NumberOfParts) {
1299 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1300 PrePostActionTy &) {
1301 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1302 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1303 llvm::Value *TaskArgs[] = {
1304 UpLoc, ThreadID,
1305 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1306 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1307 .getPointer(CGF)};
1308 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1309 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1310 args: TaskArgs);
1311 };
1312 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1313 UntiedCodeGen);
1314 CodeGen.setAction(Action);
1315 assert(!ThreadIDVar->getType()->isPointerType() &&
1316 "thread id variable must be of type kmp_int32 for tasks");
1317 const OpenMPDirectiveKind Region =
1318 isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) ? OMPD_taskloop
1319 : OMPD_task;
1320 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: Region);
1321 bool HasCancel = false;
1322 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1323 HasCancel = TD->hasCancel();
1324 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1325 HasCancel = TD->hasCancel();
1326 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1327 HasCancel = TD->hasCancel();
1328 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1329 HasCancel = TD->hasCancel();
1330
1331 CodeGenFunction CGF(CGM, true);
1332 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1333 InnermostKind, HasCancel, Action);
1334 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1335 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1336 if (!Tied)
1337 NumberOfParts = Action.getNumberOfParts();
1338 return Res;
1339}
1340
1341void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1342 bool AtCurrentPoint) {
1343 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1344 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1345
1346 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1347 if (AtCurrentPoint) {
1348 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1349 CGF.Builder.GetInsertBlock());
1350 } else {
1351 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1352 Elem.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt->getIterator());
1353 }
1354}
1355
1356void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1357 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1358 if (Elem.ServiceInsertPt) {
1359 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1360 Elem.ServiceInsertPt = nullptr;
1361 Ptr->eraseFromParent();
1362 }
1363}
1364
1365static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1366 SourceLocation Loc,
1367 SmallString<128> &Buffer) {
1368 llvm::raw_svector_ostream OS(Buffer);
1369 // Build debug location
1370 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1371 OS << ";";
1372 if (auto *DbgInfo = CGF.getDebugInfo())
1373 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1374 else
1375 OS << PLoc.getFilename();
1376 OS << ";";
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1378 OS << FD->getQualifiedNameAsString();
1379 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1380 return OS.str();
1381}
1382
1383llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1384 SourceLocation Loc,
1385 unsigned Flags, bool EmitLoc) {
1386 uint32_t SrcLocStrSize;
1387 llvm::Constant *SrcLocStr;
1388 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1389 llvm::codegenoptions::NoDebugInfo) ||
1390 Loc.isInvalid()) {
1391 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1392 } else {
1393 std::string FunctionName;
1394 std::string FileName;
1395 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1396 FunctionName = FD->getQualifiedNameAsString();
1397 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1398 if (auto *DbgInfo = CGF.getDebugInfo())
1399 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1400 else
1401 FileName = PLoc.getFilename();
1402 unsigned Line = PLoc.getLine();
1403 unsigned Column = PLoc.getColumn();
1404 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1405 Column, SrcLocStrSize);
1406 }
1407 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1408 return OMPBuilder.getOrCreateIdent(
1409 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1410}
1411
1412llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1413 SourceLocation Loc) {
1414 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1415 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1416 // the clang invariants used below might be broken.
1417 if (CGM.getLangOpts().OpenMPIRBuilder) {
1418 SmallString<128> Buffer;
1419 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1420 uint32_t SrcLocStrSize;
1421 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1422 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1423 return OMPBuilder.getOrCreateThreadID(
1424 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1425 }
1426
1427 llvm::Value *ThreadID = nullptr;
1428 // Check whether we've already cached a load of the thread id in this
1429 // function.
1430 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1431 if (I != OpenMPLocThreadIDMap.end()) {
1432 ThreadID = I->second.ThreadID;
1433 if (ThreadID != nullptr)
1434 return ThreadID;
1435 }
1436 // If exceptions are enabled, do not use parameter to avoid possible crash.
1437 if (auto *OMPRegionInfo =
1438 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1439 if (OMPRegionInfo->getThreadIDVariable()) {
1440 // Check if this an outlined function with thread id passed as argument.
1441 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1442 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1443 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1444 !CGF.getLangOpts().CXXExceptions ||
1445 CGF.Builder.GetInsertBlock() == TopBlock ||
1446 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1447 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1448 TopBlock ||
1449 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1450 CGF.Builder.GetInsertBlock()) {
1451 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1452 // If value loaded in entry block, cache it and use it everywhere in
1453 // function.
1454 if (CGF.Builder.GetInsertBlock() == TopBlock)
1455 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1456 return ThreadID;
1457 }
1458 }
1459 }
1460
1461 // This is not an outlined function region - need to call __kmpc_int32
1462 // kmpc_global_thread_num(ident_t *loc).
1463 // Generate thread id value and cache this value for use across the
1464 // function.
1465 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1466 if (!Elem.ServiceInsertPt)
1467 setLocThreadIdInsertPt(CGF);
1468 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1469 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1470 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1471 llvm::CallInst *Call = CGF.Builder.CreateCall(
1472 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1473 FnID: OMPRTL___kmpc_global_thread_num),
1474 Args: emitUpdateLocation(CGF, Loc));
1475 Call->setCallingConv(CGF.getRuntimeCC());
1476 Elem.ThreadID = Call;
1477 return Call;
1478}
1479
1480void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1481 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1482 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1483 clearLocThreadIdInsertPt(CGF);
1484 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1485 }
1486 if (auto I = FunctionUDRMap.find(Val: CGF.CurFn); I != FunctionUDRMap.end()) {
1487 for (const auto *D : I->second)
1488 UDRMap.erase(Val: D);
1489 FunctionUDRMap.erase(I);
1490 }
1491 if (auto I = FunctionUDMMap.find(Val: CGF.CurFn); I != FunctionUDMMap.end()) {
1492 for (const auto *D : I->second)
1493 UDMMap.erase(Val: D);
1494 FunctionUDMMap.erase(I);
1495 }
1496 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1497 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1498}
1499
1500llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1501 return OMPBuilder.IdentPtr;
1502}
1503
1504static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1505convertDeviceClause(const VarDecl *VD) {
1506 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1507 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1508 if (!DevTy)
1509 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1510
1511 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1512 case OMPDeclareTargetDeclAttr::DT_Host:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1514 break;
1515 case OMPDeclareTargetDeclAttr::DT_NoHost:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1517 break;
1518 case OMPDeclareTargetDeclAttr::DT_Any:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1520 break;
1521 default:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1523 break;
1524 }
1525}
1526
1527static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1528convertCaptureClause(const VarDecl *VD) {
1529 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1530 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1531 if (!MapType)
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1533 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1536 break;
1537 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1539 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1540 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1541 break;
1542 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
1543 // MT_Local variables don't need offload entry (device-local).
1544 llvm_unreachable("MT_Local should not reach convertCaptureClause");
1545 break;
1546 default:
1547 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1548 break;
1549 }
1550}
1551
1552static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1553 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1554 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1555
1556 auto FileInfoCallBack = [&]() {
1557 SourceManager &SM = CGM.getContext().getSourceManager();
1558 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1559
1560 if (!CGM.getFileSystem()->exists(Path: PLoc.getFilename()))
1561 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1562
1563 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1564 };
1565
1566 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack,
1567 VFS&: *CGM.getFileSystem(), ParentName);
1568}
1569
1570ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1571 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1572
1573 auto LinkageForVariable = [&VD, this]() {
1574 return CGM.getLLVMLinkageVarDefinition(VD);
1575 };
1576
1577 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1578
1579 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1580 T: CGM.getContext().getPointerType(T: VD->getType()));
1581 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1582 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1583 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1584 IsExternallyVisible: VD->isExternallyVisible(),
1585 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1586 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
1587 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1588 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1589 VariableLinkage: LinkageForVariable);
1590
1591 if (!addr)
1592 return ConstantAddress::invalid();
1593 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(D: VD));
1594}
1595
1596llvm::Constant *
1597CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1598 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1599 !CGM.getContext().getTargetInfo().isTLSSupported());
1600 // Lookup the entry, lazily creating it if necessary.
1601 std::string Suffix = getName(Parts: {"cache", ""});
1602 return OMPBuilder.getOrCreateInternalVariable(
1603 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1604}
1605
1606Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1607 const VarDecl *VD,
1608 Address VDAddr,
1609 SourceLocation Loc) {
1610 if (CGM.getLangOpts().OpenMPUseTLS &&
1611 CGM.getContext().getTargetInfo().isTLSSupported())
1612 return VDAddr;
1613
1614 llvm::Type *VarTy = VDAddr.getElementType();
1615 llvm::Value *Args[] = {
1616 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1617 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1618 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1619 getOrCreateThreadPrivateCache(VD)};
1620 return Address(
1621 CGF.EmitRuntimeCall(
1622 callee: OMPBuilder.getOrCreateRuntimeFunction(
1623 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1624 args: Args),
1625 CGF.Int8Ty, VDAddr.getAlignment());
1626}
1627
1628void CGOpenMPRuntime::emitThreadPrivateVarInit(
1629 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1630 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1631 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1632 // library.
1633 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1634 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1635 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1636 args: OMPLoc);
1637 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1638 // to register constructor/destructor for variable.
1639 llvm::Value *Args[] = {
1640 OMPLoc,
1641 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1642 Ctor, CopyCtor, Dtor};
1643 CGF.EmitRuntimeCall(
1644 callee: OMPBuilder.getOrCreateRuntimeFunction(
1645 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1646 args: Args);
1647}
1648
1649llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1650 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1651 bool PerformInit, CodeGenFunction *CGF) {
1652 if (CGM.getLangOpts().OpenMPUseTLS &&
1653 CGM.getContext().getTargetInfo().isTLSSupported())
1654 return nullptr;
1655
1656 VD = VD->getDefinition(C&: CGM.getContext());
1657 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1658 QualType ASTTy = VD->getType();
1659
1660 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1661 const Expr *Init = VD->getAnyInitializer();
1662 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1663 // Generate function that re-emits the declaration's initializer into the
1664 // threadprivate copy of the variable VD
1665 CodeGenFunction CtorCGF(CGM);
1666 auto *Dst = ImplicitParamDecl::Create(
1667 C&: CGM.getContext(), /*DC=*/nullptr, IdLoc: Loc,
1668 /*Id=*/nullptr, T: CGM.getContext().VoidPtrTy, ParamKind: ImplicitParamKind::Other);
1669
1670 FunctionArgList Args{Dst};
1671 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1672 resultType: CGM.getContext().VoidPtrTy, args: Args);
1673 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1674 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1675 llvm::Function *Fn =
1676 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1677 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1678 Args, Loc, StartLoc: Loc);
1679 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1680 Addr: CtorCGF.GetAddrOfLocalVar(VD: Dst), /*Volatile=*/false,
1681 Ty: CGM.getContext().VoidPtrTy, Loc: Dst->getLocation());
1682 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1683 VDAddr.getAlignment());
1684 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1685 /*IsInitializer=*/true);
1686 ArgVal = CtorCGF.EmitLoadOfScalar(
1687 Addr: CtorCGF.GetAddrOfLocalVar(VD: Dst), /*Volatile=*/false,
1688 Ty: CGM.getContext().VoidPtrTy, Loc: Dst->getLocation());
1689 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1690 CtorCGF.FinishFunction();
1691 Ctor = Fn;
1692 }
1693 if (VD->getType().isDestructedType() != QualType::DK_none) {
1694 // Generate function that emits destructor call for the threadprivate copy
1695 // of the variable VD
1696 CodeGenFunction DtorCGF(CGM);
1697 auto *Dst = ImplicitParamDecl::Create(
1698 C&: CGM.getContext(), /*DC=*/nullptr, IdLoc: Loc,
1699 /*Id=*/nullptr, T: CGM.getContext().VoidPtrTy, ParamKind: ImplicitParamKind::Other);
1700
1701 FunctionArgList Args{Dst};
1702 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1703 resultType: CGM.getContext().VoidTy, args: Args);
1704 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1705 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1706 llvm::Function *Fn =
1707 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1708 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1709 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1710 Loc, StartLoc: Loc);
1711 // Create a scope with an artificial location for the body of this function.
1712 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1713 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1714 Addr: DtorCGF.GetAddrOfLocalVar(VD: Dst),
1715 /*Volatile=*/false, Ty: CGM.getContext().VoidPtrTy, Loc: Dst->getLocation());
1716 DtorCGF.emitDestroy(
1717 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1718 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1719 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1720 DtorCGF.FinishFunction();
1721 Dtor = Fn;
1722 }
1723 // Do not emit init function if it is not required.
1724 if (!Ctor && !Dtor)
1725 return nullptr;
1726
1727 // Copying constructor for the threadprivate variable.
1728 // Must be NULL - reserved by runtime, but currently it requires that this
1729 // parameter is always NULL. Otherwise it fires assertion.
1730 CopyCtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1731 if (Ctor == nullptr) {
1732 Ctor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1733 }
1734 if (Dtor == nullptr) {
1735 Dtor = llvm::Constant::getNullValue(Ty: CGM.DefaultPtrTy);
1736 }
1737 if (!CGF) {
1738 auto *InitFunctionTy =
1739 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1740 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1741 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1742 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1743 CodeGenFunction InitCGF(CGM);
1744 FunctionArgList ArgList;
1745 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1746 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1747 Loc, StartLoc: Loc);
1748 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1749 InitCGF.FinishFunction();
1750 return InitFunction;
1751 }
1752 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753 }
1754 return nullptr;
1755}
1756
1757void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1758 llvm::GlobalValue *GV) {
1759 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1760 OMPDeclareTargetDeclAttr::getActiveAttr(VD: FD);
1761
1762 // We only need to handle active 'indirect' declare target functions.
1763 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1764 return;
1765
1766 // Get a mangled name to store the new device global in.
1767 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1768 CGM, OMPBuilder, BeginLoc: FD->getCanonicalDecl()->getBeginLoc(), ParentName: FD->getName());
1769 SmallString<128> Name;
1770 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1771
1772 // We need to generate a new global to hold the address of the indirectly
1773 // called device function. Doing this allows us to keep the visibility and
1774 // linkage of the associated function unchanged while allowing the runtime to
1775 // access its value.
1776 llvm::GlobalValue *Addr = GV;
1777 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1778 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1779 C&: CGM.getLLVMContext(),
1780 AddressSpace: CGM.getModule().getDataLayout().getProgramAddressSpace());
1781 Addr = new llvm::GlobalVariable(
1782 CGM.getModule(), FnPtrTy,
1783 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1784 nullptr, llvm::GlobalValue::NotThreadLocal,
1785 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1786 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1787 }
1788
1789 // Register the indirect Vtable:
1790 // This is similar to OMPTargetGlobalVarEntryIndirect, except that the
1791 // size field refers to the size of memory pointed to, not the size of
1792 // the pointer symbol itself (which is implicitly the size of a pointer).
1793 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1794 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1795 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1796 Linkage: llvm::GlobalValue::WeakODRLinkage);
1797}
1798
1799void CGOpenMPRuntime::registerVTableOffloadEntry(llvm::GlobalVariable *VTable,
1800 const VarDecl *VD) {
1801 // TODO: add logic to avoid duplicate vtable registrations per
1802 // translation unit; though for external linkage, this should no
1803 // longer be an issue - or at least we can avoid the issue by
1804 // checking for an existing offloading entry. But, perhaps the
1805 // better approach is to defer emission of the vtables and offload
1806 // entries until later (by tracking a list of items that need to be
1807 // emitted).
1808
1809 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1810
1811 // Generate a new externally visible global to point to the
1812 // internally visible vtable. Doing this allows us to keep the
1813 // visibility and linkage of the associated vtable unchanged while
1814 // allowing the runtime to access its value. The externally
1815 // visible global var needs to be emitted with a unique mangled
1816 // name that won't conflict with similarly named (internal)
1817 // vtables in other translation units.
1818
1819 // Register vtable with source location of dynamic object in map
1820 // clause.
1821 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1822 CGM, OMPBuilder, BeginLoc: VD->getCanonicalDecl()->getBeginLoc(),
1823 ParentName: VTable->getName());
1824
1825 llvm::GlobalVariable *Addr = VTable;
1826 SmallString<128> AddrName;
1827 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name&: AddrName, EntryInfo);
1828 AddrName.append(RHS: "addr");
1829
1830 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1831 Addr = new llvm::GlobalVariable(
1832 CGM.getModule(), VTable->getType(),
1833 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, VTable,
1834 AddrName,
1835 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
1836 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1837 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1838 }
1839 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1840 VarName: AddrName, Addr: VTable,
1841 VarSize: CGM.getDataLayout().getTypeAllocSize(Ty: VTable->getInitializer()->getType()),
1842 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirectVTable,
1843 Linkage: llvm::GlobalValue::WeakODRLinkage);
1844}
1845
1846void CGOpenMPRuntime::emitAndRegisterVTable(CodeGenModule &CGM,
1847 CXXRecordDecl *CXXRecord,
1848 const VarDecl *VD) {
1849 // Register C++ VTable to OpenMP Offload Entry if it's a new
1850 // CXXRecordDecl.
1851 if (CXXRecord && CXXRecord->isDynamicClass() &&
1852 !CGM.getOpenMPRuntime().VTableDeclMap.contains(Val: CXXRecord)) {
1853 auto Res = CGM.getOpenMPRuntime().VTableDeclMap.try_emplace(Key: CXXRecord, Args&: VD);
1854 if (Res.second) {
1855 CGM.EmitVTable(Class: CXXRecord);
1856 CodeGenVTables VTables = CGM.getVTables();
1857 llvm::GlobalVariable *VTablesAddr = VTables.GetAddrOfVTable(RD: CXXRecord);
1858 assert(VTablesAddr && "Expected non-null VTable address");
1859 CGM.getOpenMPRuntime().registerVTableOffloadEntry(VTable: VTablesAddr, VD);
1860 // Emit VTable for all the fields containing dynamic CXXRecord
1861 for (const FieldDecl *Field : CXXRecord->fields()) {
1862 if (CXXRecordDecl *RecordDecl = Field->getType()->getAsCXXRecordDecl())
1863 emitAndRegisterVTable(CGM, CXXRecord: RecordDecl, VD);
1864 }
1865 // Emit VTable for all dynamic parent class
1866 for (CXXBaseSpecifier &Base : CXXRecord->bases()) {
1867 if (CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl())
1868 emitAndRegisterVTable(CGM, CXXRecord: BaseDecl, VD);
1869 }
1870 }
1871 }
1872}
1873
1874void CGOpenMPRuntime::registerVTable(const OMPExecutableDirective &D) {
1875 // Register VTable by scanning through the map clause of OpenMP target region.
1876 // Get CXXRecordDecl and VarDecl from Expr.
1877 auto GetVTableDecl = [](const Expr *E) {
1878 QualType VDTy = E->getType();
1879 CXXRecordDecl *CXXRecord = nullptr;
1880 if (const auto *RefType = VDTy->getAs<LValueReferenceType>())
1881 VDTy = RefType->getPointeeType();
1882 if (VDTy->isPointerType())
1883 CXXRecord = VDTy->getPointeeType()->getAsCXXRecordDecl();
1884 else
1885 CXXRecord = VDTy->getAsCXXRecordDecl();
1886
1887 const VarDecl *VD = nullptr;
1888 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: E)) {
1889 VD = cast<VarDecl>(Val: DRE->getDecl());
1890 } else if (auto *MRE = dyn_cast<MemberExpr>(Val: E)) {
1891 if (auto *BaseDRE = dyn_cast<DeclRefExpr>(Val: MRE->getBase())) {
1892 if (auto *BaseVD = dyn_cast<VarDecl>(Val: BaseDRE->getDecl()))
1893 VD = BaseVD;
1894 }
1895 }
1896 return std::pair<CXXRecordDecl *, const VarDecl *>(CXXRecord, VD);
1897 };
1898 // Collect VTable from OpenMP map clause.
1899 for (const auto *C : D.getClausesOfKind<OMPMapClause>()) {
1900 for (const auto *E : C->varlist()) {
1901 auto DeclPair = GetVTableDecl(E);
1902 // Ensure VD is not null
1903 if (DeclPair.second)
1904 emitAndRegisterVTable(CGM, CXXRecord: DeclPair.first, VD: DeclPair.second);
1905 }
1906 }
1907}
1908
1909Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1910 QualType VarType,
1911 StringRef Name) {
1912 std::string Suffix = getName(Parts: {"artificial", ""});
1913 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1914 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1915 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1916 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1917 CGM.getTarget().isTLSSupported()) {
1918 GAddr->setThreadLocal(/*Val=*/true);
1919 return Address(GAddr, GAddr->getValueType(),
1920 CGM.getContext().getTypeAlignInChars(T: VarType));
1921 }
1922 std::string CacheSuffix = getName(Parts: {"cache", ""});
1923 llvm::Value *Args[] = {
1924 emitUpdateLocation(CGF, Loc: SourceLocation()),
1925 getThreadID(CGF, Loc: SourceLocation()),
1926 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1927 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1928 /*isSigned=*/false),
1929 OMPBuilder.getOrCreateInternalVariable(
1930 Ty: CGM.VoidPtrPtrTy,
1931 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1932 return Address(
1933 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1934 V: CGF.EmitRuntimeCall(
1935 callee: OMPBuilder.getOrCreateRuntimeFunction(
1936 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1937 args: Args),
1938 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
1939 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1940}
1941
1942void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1943 const RegionCodeGenTy &ThenGen,
1944 const RegionCodeGenTy &ElseGen) {
1945 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1946
1947 // If the condition constant folds and can be elided, try to avoid emitting
1948 // the condition and the dead arm of the if/else.
1949 bool CondConstant;
1950 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1951 if (CondConstant)
1952 ThenGen(CGF);
1953 else
1954 ElseGen(CGF);
1955 return;
1956 }
1957
1958 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1959 // emit the conditional branch.
1960 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1961 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1962 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1963 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1964
1965 // Emit the 'then' code.
1966 CGF.EmitBlock(BB: ThenBlock);
1967 ThenGen(CGF);
1968 CGF.EmitBranch(Block: ContBlock);
1969 // Emit the 'else' code if present.
1970 // There is no need to emit line number for unconditional branch.
1971 (void)ApplyDebugLocation::CreateEmpty(CGF);
1972 CGF.EmitBlock(BB: ElseBlock);
1973 ElseGen(CGF);
1974 // There is no need to emit line number for unconditional branch.
1975 (void)ApplyDebugLocation::CreateEmpty(CGF);
1976 CGF.EmitBranch(Block: ContBlock);
1977 // Emit the continuation block for code after the if.
1978 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1979}
1980
1981void CGOpenMPRuntime::emitParallelCall(
1982 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1983 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1984 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1985 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1986 if (!CGF.HaveInsertPoint())
1987 return;
1988 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1989 auto &M = CGM.getModule();
1990 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1991 this](CodeGenFunction &CGF, PrePostActionTy &) {
1992 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1993 llvm::Value *Args[] = {
1994 RTLoc,
1995 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1996 OutlinedFn};
1997 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1998 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1999 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
2000
2001 llvm::FunctionCallee RTLFn =
2002 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
2003 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
2004 };
2005 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2006 this](CodeGenFunction &CGF, PrePostActionTy &) {
2007 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2008 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2009 // Build calls:
2010 // __kmpc_serialized_parallel(&Loc, GTid);
2011 llvm::Value *Args[] = {RTLoc, ThreadID};
2012 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2013 M, FnID: OMPRTL___kmpc_serialized_parallel),
2014 args: Args);
2015
2016 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2017 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2018 RawAddress ZeroAddrBound =
2019 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
2020 /*Name=*/".bound.zero.addr");
2021 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
2022 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2023 // ThreadId for serialized parallels is 0.
2024 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
2025 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
2026 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
2027
2028 // Ensure we do not inline the function. This is trivially true for the ones
2029 // passed to __kmpc_fork_call but the ones called in serialized regions
2030 // could be inlined. This is not a perfect but it is closer to the invariant
2031 // we want, namely, every data environment starts with a new function.
2032 // TODO: We should pass the if condition to the runtime function and do the
2033 // handling there. Much cleaner code.
2034 OutlinedFn->removeFnAttr(Kind: llvm::Attribute::AlwaysInline);
2035 OutlinedFn->addFnAttr(Kind: llvm::Attribute::NoInline);
2036 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
2037
2038 // __kmpc_end_serialized_parallel(&Loc, GTid);
2039 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2040 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2041 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
2042 args: EndArgs);
2043 };
2044 if (IfCond) {
2045 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2046 } else {
2047 RegionCodeGenTy ThenRCG(ThenGen);
2048 ThenRCG(CGF);
2049 }
2050}
2051
2052// If we're inside an (outlined) parallel region, use the region info's
2053// thread-ID variable (it is passed in a first argument of the outlined function
2054// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2055// regular serial code region, get thread ID by calling kmp_int32
2056// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2057// return the address of that temp.
2058Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2059 SourceLocation Loc) {
2060 if (auto *OMPRegionInfo =
2061 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2062 if (OMPRegionInfo->getThreadIDVariable())
2063 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2064
2065 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2066 QualType Int32Ty =
2067 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2068 Address ThreadIDTemp =
2069 CGF.CreateMemTempWithoutCast(T: Int32Ty, /*Name*/ ".threadid_temp.");
2070 CGF.EmitStoreOfScalar(value: ThreadID,
2071 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
2072
2073 return ThreadIDTemp;
2074}
2075
2076llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2077 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2078 std::string Name = getName(Parts: {Prefix, "var"});
2079 llvm::GlobalVariable *GV =
2080 OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
2081 CGM.setDSOLocal(GV);
2082 return GV;
2083}
2084
2085namespace {
2086/// Common pre(post)-action for different OpenMP constructs.
2087class CommonActionTy final : public PrePostActionTy {
2088 llvm::FunctionCallee EnterCallee;
2089 ArrayRef<llvm::Value *> EnterArgs;
2090 llvm::FunctionCallee ExitCallee;
2091 ArrayRef<llvm::Value *> ExitArgs;
2092 bool Conditional;
2093 llvm::BasicBlock *ContBlock = nullptr;
2094
2095public:
2096 CommonActionTy(llvm::FunctionCallee EnterCallee,
2097 ArrayRef<llvm::Value *> EnterArgs,
2098 llvm::FunctionCallee ExitCallee,
2099 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2100 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2101 ExitArgs(ExitArgs), Conditional(Conditional) {}
2102 void Enter(CodeGenFunction &CGF) override {
2103 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
2104 if (Conditional) {
2105 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
2106 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
2107 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
2108 // Generate the branch (If-stmt)
2109 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
2110 CGF.EmitBlock(BB: ThenBlock);
2111 }
2112 }
2113 void Done(CodeGenFunction &CGF) {
2114 // Emit the rest of blocks/branches
2115 CGF.EmitBranch(Block: ContBlock);
2116 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2117 }
2118 void Exit(CodeGenFunction &CGF) override {
2119 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
2120 }
2121};
2122} // anonymous namespace
2123
2124void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2125 StringRef CriticalName,
2126 const RegionCodeGenTy &CriticalOpGen,
2127 SourceLocation Loc, const Expr *Hint) {
2128 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2129 // CriticalOpGen();
2130 // __kmpc_end_critical(ident_t *, gtid, Lock);
2131 // Prepare arguments and build a call to __kmpc_critical
2132 if (!CGF.HaveInsertPoint())
2133 return;
2134 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2135 M&: CGM.getModule(),
2136 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2137 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2138 unsigned LockVarArgIdx = 2;
2139 if (cast<llvm::GlobalVariable>(Val: LockVar)->getAddressSpace() !=
2140 RuntimeFcn.getFunctionType()
2141 ->getParamType(i: LockVarArgIdx)
2142 ->getPointerAddressSpace())
2143 LockVar = CGF.Builder.CreateAddrSpaceCast(
2144 V: LockVar, DestTy: RuntimeFcn.getFunctionType()->getParamType(i: LockVarArgIdx));
2145 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2146 LockVar};
2147 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2148 std::end(arr&: Args));
2149 if (Hint) {
2150 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2151 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2152 }
2153 CommonActionTy Action(RuntimeFcn, EnterArgs,
2154 OMPBuilder.getOrCreateRuntimeFunction(
2155 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_critical),
2156 Args);
2157 CriticalOpGen.setAction(Action);
2158 emitInlinedDirective(CGF, InnermostKind: OMPD_critical, CodeGen: CriticalOpGen);
2159}
2160
2161void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2162 const RegionCodeGenTy &MasterOpGen,
2163 SourceLocation Loc) {
2164 if (!CGF.HaveInsertPoint())
2165 return;
2166 // if(__kmpc_master(ident_t *, gtid)) {
2167 // MasterOpGen();
2168 // __kmpc_end_master(ident_t *, gtid);
2169 // }
2170 // Prepare arguments and build a call to __kmpc_master
2171 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2172 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2173 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2174 Args,
2175 OMPBuilder.getOrCreateRuntimeFunction(
2176 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2177 Args,
2178 /*Conditional=*/true);
2179 MasterOpGen.setAction(Action);
2180 emitInlinedDirective(CGF, InnermostKind: OMPD_master, CodeGen: MasterOpGen);
2181 Action.Done(CGF);
2182}
2183
2184void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2185 const RegionCodeGenTy &MaskedOpGen,
2186 SourceLocation Loc, const Expr *Filter) {
2187 if (!CGF.HaveInsertPoint())
2188 return;
2189 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2190 // MaskedOpGen();
2191 // __kmpc_end_masked(iden_t *, gtid);
2192 // }
2193 // Prepare arguments and build a call to __kmpc_masked
2194 llvm::Value *FilterVal = Filter
2195 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2196 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2197 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2198 FilterVal};
2199 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2200 getThreadID(CGF, Loc)};
2201 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2202 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2203 Args,
2204 OMPBuilder.getOrCreateRuntimeFunction(
2205 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2206 ArgsEnd,
2207 /*Conditional=*/true);
2208 MaskedOpGen.setAction(Action);
2209 emitInlinedDirective(CGF, InnermostKind: OMPD_masked, CodeGen: MaskedOpGen);
2210 Action.Done(CGF);
2211}
2212
2213void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2214 SourceLocation Loc) {
2215 if (!CGF.HaveInsertPoint())
2216 return;
2217 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2218 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2219 } else {
2220 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2221 llvm::Value *Args[] = {
2222 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2223 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2224 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2225 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2226 args: Args);
2227 }
2228
2229 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2230 Region->emitUntiedSwitch(CGF);
2231}
2232
2233void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2234 const RegionCodeGenTy &TaskgroupOpGen,
2235 SourceLocation Loc) {
2236 if (!CGF.HaveInsertPoint())
2237 return;
2238 // __kmpc_taskgroup(ident_t *, gtid);
2239 // TaskgroupOpGen();
2240 // __kmpc_end_taskgroup(ident_t *, gtid);
2241 // Prepare arguments and build a call to __kmpc_taskgroup
2242 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2243 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2244 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2245 Args,
2246 OMPBuilder.getOrCreateRuntimeFunction(
2247 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2248 Args);
2249 TaskgroupOpGen.setAction(Action);
2250 emitInlinedDirective(CGF, InnermostKind: OMPD_taskgroup, CodeGen: TaskgroupOpGen);
2251}
2252
2253/// Given an array of pointers to variables, project the address of a
2254/// given variable.
2255static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2256 unsigned Index, const VarDecl *Var) {
2257 // Pull out the pointer to the variable.
2258 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2259 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2260
2261 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2262 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(D: Var));
2263}
2264
2265static llvm::Value *emitCopyprivateCopyFunction(
2266 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2267 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2268 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2269 SourceLocation Loc) {
2270 ASTContext &C = CGM.getContext();
2271 // void copy_func(void *LHSArg, void *RHSArg);
2272
2273 auto *LHSArg =
2274 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
2275 T: C.VoidPtrTy, ParamKind: ImplicitParamKind::Other);
2276 auto *RHSArg =
2277 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
2278 T: C.VoidPtrTy, ParamKind: ImplicitParamKind::Other);
2279 FunctionArgList Args{LHSArg, RHSArg};
2280 const auto &CGFI =
2281 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
2282 std::string Name =
2283 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2284 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
2285 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
2286 M: &CGM.getModule());
2287 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2288 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
2289 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
2290 Fn->setDoesNotRecurse();
2291 CodeGenFunction CGF(CGM);
2292 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2293 // Dest = (void*[n])(LHSArg);
2294 // Src = (void*[n])(RHSArg);
2295 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2296 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: LHSArg)),
2297 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2298 ArgsElemType, CGF.getPointerAlign());
2299 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2300 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: RHSArg)),
2301 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2302 ArgsElemType, CGF.getPointerAlign());
2303 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2304 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2305 // ...
2306 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2307 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2308 const auto *DestVar =
2309 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2310 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2311
2312 const auto *SrcVar =
2313 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2314 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2315
2316 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2317 QualType Type = VD->getType();
2318 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2319 }
2320 CGF.FinishFunction();
2321 return Fn;
2322}
2323
2324void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2325 const RegionCodeGenTy &SingleOpGen,
2326 SourceLocation Loc,
2327 ArrayRef<const Expr *> CopyprivateVars,
2328 ArrayRef<const Expr *> SrcExprs,
2329 ArrayRef<const Expr *> DstExprs,
2330 ArrayRef<const Expr *> AssignmentOps) {
2331 if (!CGF.HaveInsertPoint())
2332 return;
2333 assert(CopyprivateVars.size() == SrcExprs.size() &&
2334 CopyprivateVars.size() == DstExprs.size() &&
2335 CopyprivateVars.size() == AssignmentOps.size());
2336 ASTContext &C = CGM.getContext();
2337 // int32 did_it = 0;
2338 // if(__kmpc_single(ident_t *, gtid)) {
2339 // SingleOpGen();
2340 // __kmpc_end_single(ident_t *, gtid);
2341 // did_it = 1;
2342 // }
2343 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2344 // <copy_func>, did_it);
2345
2346 Address DidIt = Address::invalid();
2347 if (!CopyprivateVars.empty()) {
2348 // int32 did_it = 0;
2349 QualType KmpInt32Ty =
2350 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2351 DidIt = CGF.CreateMemTempWithoutCast(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2352 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2353 }
2354 // Prepare arguments and build a call to __kmpc_single
2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2356 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2357 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2358 Args,
2359 OMPBuilder.getOrCreateRuntimeFunction(
2360 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2361 Args,
2362 /*Conditional=*/true);
2363 SingleOpGen.setAction(Action);
2364 emitInlinedDirective(CGF, InnermostKind: OMPD_single, CodeGen: SingleOpGen);
2365 if (DidIt.isValid()) {
2366 // did_it = 1;
2367 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2368 }
2369 Action.Done(CGF);
2370 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2371 // <copy_func>, did_it);
2372 if (DidIt.isValid()) {
2373 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2374 QualType CopyprivateArrayTy = C.getConstantArrayType(
2375 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2376 /*IndexTypeQuals=*/0);
2377 // Create a list of all private variables for copyprivate.
2378 Address CopyprivateList = CGF.CreateMemTempWithoutCast(
2379 T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2380 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2381 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2382 CGF.Builder.CreateStore(
2383 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2384 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2385 DestTy: CGF.VoidPtrTy),
2386 Addr: Elem);
2387 }
2388 // Build function that copies private values from single region to all other
2389 // threads in the corresponding parallel region.
2390 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2391 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2392 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2393 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2394 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2395 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2396 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2397 llvm::Value *Args[] = {
2398 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2399 getThreadID(CGF, Loc), // i32 <gtid>
2400 BufSize, // size_t <buf_size>
2401 CL.emitRawPointer(CGF), // void *<copyprivate list>
2402 CpyFn, // void (*) (void *, void *) <copy_func>
2403 DidItVal // i32 did_it
2404 };
2405 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2406 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2407 args: Args);
2408 }
2409}
2410
2411void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2412 const RegionCodeGenTy &OrderedOpGen,
2413 SourceLocation Loc, bool IsThreads) {
2414 if (!CGF.HaveInsertPoint())
2415 return;
2416 // __kmpc_ordered(ident_t *, gtid);
2417 // OrderedOpGen();
2418 // __kmpc_end_ordered(ident_t *, gtid);
2419 // Prepare arguments and build a call to __kmpc_ordered
2420 if (IsThreads) {
2421 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2422 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2423 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2424 Args,
2425 OMPBuilder.getOrCreateRuntimeFunction(
2426 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2427 Args);
2428 OrderedOpGen.setAction(Action);
2429 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2430 return;
2431 }
2432 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2433}
2434
2435unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2436 unsigned Flags;
2437 if (Kind == OMPD_for)
2438 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2439 else if (Kind == OMPD_sections)
2440 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2441 else if (Kind == OMPD_single)
2442 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2443 else if (Kind == OMPD_barrier)
2444 Flags = OMP_IDENT_BARRIER_EXPL;
2445 else
2446 Flags = OMP_IDENT_BARRIER_IMPL;
2447 return Flags;
2448}
2449
2450void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2451 CodeGenFunction &CGF, const OMPLoopDirective &S,
2452 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2453 // Check if the loop directive is actually a doacross loop directive. In this
2454 // case choose static, 1 schedule.
2455 if (llvm::any_of(
2456 Range: S.getClausesOfKind<OMPOrderedClause>(),
2457 P: [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2458 ScheduleKind = OMPC_SCHEDULE_static;
2459 // Chunk size is 1 in this case.
2460 llvm::APInt ChunkSize(32, 1);
2461 ChunkExpr = IntegerLiteral::Create(
2462 C: CGF.getContext(), V: ChunkSize,
2463 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2464 l: SourceLocation());
2465 }
2466}
2467
2468void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2469 OpenMPDirectiveKind Kind, bool EmitChecks,
2470 bool ForceSimpleCall) {
2471 // Check if we should use the OMPBuilder
2472 auto *OMPRegionInfo =
2473 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2474 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2475 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2476 cantFail(ValOrErr: OMPBuilder.createBarrier(Loc: CGF.Builder, Kind, ForceSimpleCall,
2477 CheckCancelFlag: EmitChecks));
2478 CGF.Builder.restoreIP(IP: AfterIP);
2479 return;
2480 }
2481
2482 if (!CGF.HaveInsertPoint())
2483 return;
2484 // Build call __kmpc_cancel_barrier(loc, thread_id);
2485 // Build call __kmpc_barrier(loc, thread_id);
2486 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2487 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2488 // thread_id);
2489 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2490 getThreadID(CGF, Loc)};
2491 if (OMPRegionInfo) {
2492 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2493 llvm::Value *Result = CGF.EmitRuntimeCall(
2494 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2495 FnID: OMPRTL___kmpc_cancel_barrier),
2496 args: Args);
2497 if (EmitChecks) {
2498 // if (__kmpc_cancel_barrier()) {
2499 // exit from construct;
2500 // }
2501 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2502 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2503 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2504 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2505 CGF.EmitBlock(BB: ExitBB);
2506 // exit from construct;
2507 CodeGenFunction::JumpDest CancelDestination =
2508 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
2509 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2510 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2511 }
2512 return;
2513 }
2514 }
2515 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2516 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2517 args: Args);
2518}
2519
2520void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2521 Expr *ME, bool IsFatal) {
2522 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(E: ME)
2523 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2524 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2525 // *message)
2526 llvm::Value *Args[] = {
2527 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2528 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2529 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2530 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2531 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2532 args: Args);
2533}
2534
2535/// Map the OpenMP loop schedule to the runtime enumeration.
2536static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2537 bool Chunked, bool Ordered) {
2538 switch (ScheduleKind) {
2539 case OMPC_SCHEDULE_static:
2540 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2541 : (Ordered ? OMP_ord_static : OMP_sch_static);
2542 case OMPC_SCHEDULE_dynamic:
2543 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2544 case OMPC_SCHEDULE_guided:
2545 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2546 case OMPC_SCHEDULE_runtime:
2547 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2548 case OMPC_SCHEDULE_auto:
2549 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2550 case OMPC_SCHEDULE_unknown:
2551 assert(!Chunked && "chunk was specified but schedule kind not known");
2552 return Ordered ? OMP_ord_static : OMP_sch_static;
2553 }
2554 llvm_unreachable("Unexpected runtime schedule");
2555}
2556
2557/// Map the OpenMP distribute schedule to the runtime enumeration.
2558static OpenMPSchedType
2559getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2560 // only static is allowed for dist_schedule
2561 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2562}
2563
2564bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2565 bool Chunked) const {
2566 OpenMPSchedType Schedule =
2567 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2568 return Schedule == OMP_sch_static;
2569}
2570
2571bool CGOpenMPRuntime::isStaticNonchunked(
2572 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2573 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2574 return Schedule == OMP_dist_sch_static;
2575}
2576
2577bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2578 bool Chunked) const {
2579 OpenMPSchedType Schedule =
2580 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2581 return Schedule == OMP_sch_static_chunked;
2582}
2583
2584bool CGOpenMPRuntime::isStaticChunked(
2585 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2586 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2587 return Schedule == OMP_dist_sch_static_chunked;
2588}
2589
2590bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2591 OpenMPSchedType Schedule =
2592 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2593 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2594 return Schedule != OMP_sch_static;
2595}
2596
2597static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2598 OpenMPScheduleClauseModifier M1,
2599 OpenMPScheduleClauseModifier M2) {
2600 int Modifier = 0;
2601 switch (M1) {
2602 case OMPC_SCHEDULE_MODIFIER_monotonic:
2603 Modifier = OMP_sch_modifier_monotonic;
2604 break;
2605 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2606 Modifier = OMP_sch_modifier_nonmonotonic;
2607 break;
2608 case OMPC_SCHEDULE_MODIFIER_simd:
2609 if (Schedule == OMP_sch_static_chunked)
2610 Schedule = OMP_sch_static_balanced_chunked;
2611 break;
2612 case OMPC_SCHEDULE_MODIFIER_last:
2613 case OMPC_SCHEDULE_MODIFIER_unknown:
2614 break;
2615 }
2616 switch (M2) {
2617 case OMPC_SCHEDULE_MODIFIER_monotonic:
2618 Modifier = OMP_sch_modifier_monotonic;
2619 break;
2620 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2621 Modifier = OMP_sch_modifier_nonmonotonic;
2622 break;
2623 case OMPC_SCHEDULE_MODIFIER_simd:
2624 if (Schedule == OMP_sch_static_chunked)
2625 Schedule = OMP_sch_static_balanced_chunked;
2626 break;
2627 case OMPC_SCHEDULE_MODIFIER_last:
2628 case OMPC_SCHEDULE_MODIFIER_unknown:
2629 break;
2630 }
2631 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2632 // If the static schedule kind is specified or if the ordered clause is
2633 // specified, and if the nonmonotonic modifier is not specified, the effect is
2634 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2635 // modifier is specified, the effect is as if the nonmonotonic modifier is
2636 // specified.
2637 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2638 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2639 Schedule == OMP_sch_static_balanced_chunked ||
2640 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2641 Schedule == OMP_dist_sch_static_chunked ||
2642 Schedule == OMP_dist_sch_static ||
2643 Schedule == OMP_dist_sch_static_chunked_sch_static_chunkone))
2644 Modifier = OMP_sch_modifier_nonmonotonic;
2645 }
2646 return Schedule | Modifier;
2647}
2648
2649void CGOpenMPRuntime::emitForDispatchInit(
2650 CodeGenFunction &CGF, SourceLocation Loc,
2651 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2652 bool Ordered, const DispatchRTInput &DispatchValues) {
2653 if (!CGF.HaveInsertPoint())
2654 return;
2655 OpenMPSchedType Schedule = getRuntimeSchedule(
2656 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2657 assert(Ordered ||
2658 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2659 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2660 Schedule != OMP_sch_static_balanced_chunked));
2661 // Call __kmpc_dispatch_init(
2662 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2663 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2664 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2665
2666 // If the Chunk was not specified in the clause - use default value 1.
2667 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2668 : CGF.Builder.getIntN(N: IVSize, C: 1);
2669 llvm::Value *Args[] = {
2670 emitUpdateLocation(CGF, Loc),
2671 getThreadID(CGF, Loc),
2672 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2673 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2674 DispatchValues.LB, // Lower
2675 DispatchValues.UB, // Upper
2676 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2677 Chunk // Chunk
2678 };
2679 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2680 args: Args);
2681}
2682
2683void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2684 SourceLocation Loc) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2688 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2689 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2690}
2691
2692static void emitForStaticInitCall(
2693 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2694 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2695 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2696 const CGOpenMPRuntime::StaticRTInput &Values) {
2697 if (!CGF.HaveInsertPoint())
2698 return;
2699
2700 assert(!Values.Ordered);
2701 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2702 Schedule == OMP_sch_static_balanced_chunked ||
2703 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2704 Schedule == OMP_dist_sch_static ||
2705 Schedule == OMP_dist_sch_static_chunked ||
2706 Schedule == OMP_dist_sch_static_chunked_sch_static_chunkone);
2707
2708 // Call __kmpc_for_static_init(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2710 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2711 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2712 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2713 llvm::Value *Chunk = Values.Chunk;
2714 if (Chunk == nullptr) {
2715 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2716 Schedule == OMP_dist_sch_static) &&
2717 "expected static non-chunked schedule");
2718 // If the Chunk was not specified in the clause - use default value 1.
2719 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2720 } else {
2721 assert((Schedule == OMP_sch_static_chunked ||
2722 Schedule == OMP_sch_static_balanced_chunked ||
2723 Schedule == OMP_ord_static_chunked ||
2724 Schedule == OMP_dist_sch_static_chunked ||
2725 Schedule == OMP_dist_sch_static_chunked_sch_static_chunkone) &&
2726 "expected static chunked schedule");
2727 }
2728 llvm::Value *Args[] = {
2729 UpdateLocation,
2730 ThreadId,
2731 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2732 M2)), // Schedule type
2733 Values.IL.emitRawPointer(CGF), // &isLastIter
2734 Values.LB.emitRawPointer(CGF), // &LB
2735 Values.UB.emitRawPointer(CGF), // &UB
2736 Values.ST.emitRawPointer(CGF), // &Stride
2737 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2738 Chunk // Chunk
2739 };
2740 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2741}
2742
2743void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2744 SourceLocation Loc,
2745 OpenMPDirectiveKind DKind,
2746 const OpenMPScheduleTy &ScheduleKind,
2747 const StaticRTInput &Values) {
2748 OpenMPSchedType ScheduleNum =
2749 ScheduleKind.UseFusedDistChunkSchedule
2750 ? OMP_dist_sch_static_chunked_sch_static_chunkone
2751 : getRuntimeSchedule(ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr,
2752 Ordered: Values.Ordered);
2753 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2754 "Expected loop-based or sections-based directive.");
2755 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2756 Flags: isOpenMPLoopDirective(DKind)
2757 ? OMP_IDENT_WORK_LOOP
2758 : OMP_IDENT_WORK_SECTIONS);
2759 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2760 llvm::FunctionCallee StaticInitFunction =
2761 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2762 IsGPUDistribute: false);
2763 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2764 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2765 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2766}
2767
2768void CGOpenMPRuntime::emitDistributeStaticInit(
2769 CodeGenFunction &CGF, SourceLocation Loc,
2770 OpenMPDistScheduleClauseKind SchedKind,
2771 const CGOpenMPRuntime::StaticRTInput &Values) {
2772 OpenMPSchedType ScheduleNum =
2773 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2774 llvm::Value *UpdatedLocation =
2775 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2776 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2777 llvm::FunctionCallee StaticInitFunction;
2778 bool isGPUDistribute =
2779 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2780 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2781 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2782
2783 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2784 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2785 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2786}
2787
2788void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2789 SourceLocation Loc,
2790 OpenMPDirectiveKind DKind) {
2791 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2792 DKind == OMPD_sections) &&
2793 "Expected distribute, for, or sections directive kind");
2794 if (!CGF.HaveInsertPoint())
2795 return;
2796 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2797 llvm::Value *Args[] = {
2798 emitUpdateLocation(CGF, Loc,
2799 Flags: isOpenMPDistributeDirective(DKind) ||
2800 (DKind == OMPD_target_teams_loop)
2801 ? OMP_IDENT_WORK_DISTRIBUTE
2802 : isOpenMPLoopDirective(DKind)
2803 ? OMP_IDENT_WORK_LOOP
2804 : OMP_IDENT_WORK_SECTIONS),
2805 getThreadID(CGF, Loc)};
2806 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2807 if (isOpenMPDistributeDirective(DKind) &&
2808 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2809 CGF.EmitRuntimeCall(
2810 callee: OMPBuilder.getOrCreateRuntimeFunction(
2811 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2812 args: Args);
2813 else
2814 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2815 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2816 args: Args);
2817}
2818
2819void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2820 SourceLocation Loc,
2821 unsigned IVSize,
2822 bool IVSigned) {
2823 if (!CGF.HaveInsertPoint())
2824 return;
2825 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2826 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2827 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2828 args: Args);
2829}
2830
2831llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2832 SourceLocation Loc, unsigned IVSize,
2833 bool IVSigned, Address IL,
2834 Address LB, Address UB,
2835 Address ST) {
2836 // Call __kmpc_dispatch_next(
2837 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2838 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2839 // kmp_int[32|64] *p_stride);
2840 llvm::Value *Args[] = {
2841 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2842 IL.emitRawPointer(CGF), // &isLastIter
2843 LB.emitRawPointer(CGF), // &Lower
2844 UB.emitRawPointer(CGF), // &Upper
2845 ST.emitRawPointer(CGF) // &Stride
2846 };
2847 llvm::Value *Call = CGF.EmitRuntimeCall(
2848 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2849 return CGF.EmitScalarConversion(
2850 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2851 DstTy: CGF.getContext().BoolTy, Loc);
2852}
2853
2854llvm::Value *CGOpenMPRuntime::emitMessageClause(CodeGenFunction &CGF,
2855 const Expr *Message,
2856 SourceLocation Loc) {
2857 if (!Message)
2858 return llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2859 return CGF.EmitScalarExpr(E: Message);
2860}
2861
2862llvm::Value *
2863CGOpenMPRuntime::emitSeverityClause(OpenMPSeverityClauseKind Severity,
2864 SourceLocation Loc) {
2865 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2866 // as if sev-level is fatal."
2867 return llvm::ConstantInt::get(Ty: CGM.Int32Ty,
2868 V: Severity == OMPC_SEVERITY_warning ? 1 : 2);
2869}
2870
2871void CGOpenMPRuntime::emitNumThreadsClause(
2872 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2873 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
2874 SourceLocation SeverityLoc, const Expr *Message,
2875 SourceLocation MessageLoc) {
2876 if (!CGF.HaveInsertPoint())
2877 return;
2878 llvm::SmallVector<llvm::Value *, 4> Args(
2879 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2880 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)});
2881 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2882 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2883 // messsage) if strict modifier is used.
2884 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2885 if (Modifier == OMPC_NUMTHREADS_strict) {
2886 FnID = OMPRTL___kmpc_push_num_threads_strict;
2887 Args.push_back(Elt: emitSeverityClause(Severity, Loc: SeverityLoc));
2888 Args.push_back(Elt: emitMessageClause(CGF, Message, Loc: MessageLoc));
2889 }
2890 CGF.EmitRuntimeCall(
2891 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args);
2892}
2893
2894void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2895 ProcBindKind ProcBind,
2896 SourceLocation Loc) {
2897 if (!CGF.HaveInsertPoint())
2898 return;
2899 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2900 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2901 llvm::Value *Args[] = {
2902 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2903 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2904 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2905 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2906 args: Args);
2907}
2908
2909void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2910 SourceLocation Loc, llvm::AtomicOrdering AO) {
2911 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2912 OMPBuilder.createFlush(Loc: CGF.Builder);
2913 } else {
2914 if (!CGF.HaveInsertPoint())
2915 return;
2916 // Build call void __kmpc_flush(ident_t *loc)
2917 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2918 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2919 args: emitUpdateLocation(CGF, Loc));
2920 }
2921}
2922
2923namespace {
2924/// Indexes of fields for type kmp_task_t.
2925enum KmpTaskTFields {
2926 /// List of shared variables.
2927 KmpTaskTShareds,
2928 /// Task routine.
2929 KmpTaskTRoutine,
2930 /// Partition id for the untied tasks.
2931 KmpTaskTPartId,
2932 /// Function with call of destructors for private variables.
2933 Data1,
2934 /// Task priority.
2935 Data2,
2936 /// (Taskloops only) Lower bound.
2937 KmpTaskTLowerBound,
2938 /// (Taskloops only) Upper bound.
2939 KmpTaskTUpperBound,
2940 /// (Taskloops only) Stride.
2941 KmpTaskTStride,
2942 /// (Taskloops only) Is last iteration flag.
2943 KmpTaskTLastIter,
2944 /// (Taskloops only) Reduction data.
2945 KmpTaskTReductions,
2946};
2947} // anonymous namespace
2948
2949void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2950 // If we are in simd mode or there are no entries, we don't need to do
2951 // anything.
2952 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2953 return;
2954
2955 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2956 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2957 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2958 SourceLocation Loc;
2959 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2960 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2961 E = CGM.getContext().getSourceManager().fileinfo_end();
2962 I != E; ++I) {
2963 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2964 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2965 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2966 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2967 break;
2968 }
2969 }
2970 }
2971 switch (Kind) {
2972 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2973 CGM.getDiags().Report(Loc,
2974 DiagID: diag::err_target_region_offloading_entry_incorrect)
2975 << EntryInfo.ParentName;
2976 } break;
2977 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2978 CGM.getDiags().Report(
2979 Loc, DiagID: diag::err_target_var_offloading_entry_incorrect_with_parent)
2980 << EntryInfo.ParentName;
2981 } break;
2982 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2983 CGM.getDiags().Report(DiagID: diag::err_target_var_offloading_entry_incorrect);
2984 } break;
2985 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR: {
2986 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2987 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for indirect declare "
2988 "target variable is incorrect: the "
2989 "address is invalid.");
2990 CGM.getDiags().Report(DiagID);
2991 } break;
2992 }
2993 };
2994
2995 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2996}
2997
2998void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2999 if (!KmpRoutineEntryPtrTy) {
3000 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3001 ASTContext &C = CGM.getContext();
3002 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3003 FunctionProtoType::ExtProtoInfo EPI;
3004 KmpRoutineEntryPtrQTy = C.getPointerType(
3005 T: C.getFunctionType(ResultTy: KmpInt32Ty, Args: KmpRoutineEntryTyArgs, EPI));
3006 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(T: KmpRoutineEntryPtrQTy);
3007 }
3008}
3009
3010namespace {
3011struct PrivateHelpersTy {
3012 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3013 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3014 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3015 PrivateElemInit(PrivateElemInit) {}
3016 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3017 const Expr *OriginalRef = nullptr;
3018 const VarDecl *Original = nullptr;
3019 const VarDecl *PrivateCopy = nullptr;
3020 const VarDecl *PrivateElemInit = nullptr;
3021 bool isLocalPrivate() const {
3022 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3023 }
3024};
3025typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3026} // anonymous namespace
3027
3028static bool isAllocatableDecl(const VarDecl *VD) {
3029 const VarDecl *CVD = VD->getCanonicalDecl();
3030 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3031 return false;
3032 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3033 // Use the default allocation.
3034 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3035 !AA->getAllocator());
3036}
3037
3038static RecordDecl *
3039createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3040 if (!Privates.empty()) {
3041 ASTContext &C = CGM.getContext();
3042 // Build struct .kmp_privates_t. {
3043 // /* private vars */
3044 // };
3045 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
3046 RD->startDefinition();
3047 for (const auto &Pair : Privates) {
3048 const VarDecl *VD = Pair.second.Original;
3049 QualType Type = VD->getType().getNonReferenceType();
3050 // If the private variable is a local variable with lvalue ref type,
3051 // allocate the pointer instead of the pointee type.
3052 if (Pair.second.isLocalPrivate()) {
3053 if (VD->getType()->isLValueReferenceType())
3054 Type = C.getPointerType(T: Type);
3055 if (isAllocatableDecl(VD))
3056 Type = C.getPointerType(T: Type);
3057 }
3058 FieldDecl *FD = addFieldToRecordDecl(C, DC: RD, FieldTy: Type);
3059 if (VD->hasAttrs()) {
3060 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3061 E(VD->getAttrs().end());
3062 I != E; ++I)
3063 FD->addAttr(A: *I);
3064 }
3065 }
3066 RD->completeDefinition();
3067 return RD;
3068 }
3069 return nullptr;
3070}
3071
3072static RecordDecl *
3073createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3074 QualType KmpInt32Ty,
3075 QualType KmpRoutineEntryPointerQTy) {
3076 ASTContext &C = CGM.getContext();
3077 // Build struct kmp_task_t {
3078 // void * shareds;
3079 // kmp_routine_entry_t routine;
3080 // kmp_int32 part_id;
3081 // kmp_cmplrdata_t data1;
3082 // kmp_cmplrdata_t data2;
3083 // For taskloops additional fields:
3084 // kmp_uint64 lb;
3085 // kmp_uint64 ub;
3086 // kmp_int64 st;
3087 // kmp_int32 liter;
3088 // void * reductions;
3089 // };
3090 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
3091 UD->startDefinition();
3092 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpInt32Ty);
3093 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpRoutineEntryPointerQTy);
3094 UD->completeDefinition();
3095 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(TD: UD);
3096 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
3097 RD->startDefinition();
3098 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3099 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpRoutineEntryPointerQTy);
3100 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3101 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3102 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
3103 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3104 QualType KmpUInt64Ty =
3105 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3106 QualType KmpInt64Ty =
3107 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3108 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3109 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
3110 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt64Ty);
3111 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
3112 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
3113 }
3114 RD->completeDefinition();
3115 return RD;
3116}
3117
3118static RecordDecl *
3119createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3120 ArrayRef<PrivateDataTy> Privates) {
3121 ASTContext &C = CGM.getContext();
3122 // Build struct kmp_task_t_with_privates {
3123 // kmp_task_t task_data;
3124 // .kmp_privates_t. privates;
3125 // };
3126 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
3127 RD->startDefinition();
3128 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpTaskTQTy);
3129 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3130 addFieldToRecordDecl(C, DC: RD, FieldTy: C.getCanonicalTagType(TD: PrivateRD));
3131 RD->completeDefinition();
3132 return RD;
3133}
3134
3135/// Emit a proxy function which accepts kmp_task_t as the second
3136/// argument.
3137/// \code
3138/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3139/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3140/// For taskloops:
3141/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3142/// tt->reductions, tt->shareds);
3143/// return 0;
3144/// }
3145/// \endcode
3146static llvm::Function *
3147emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3148 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3149 QualType KmpTaskTWithPrivatesPtrQTy,
3150 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3151 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3152 llvm::Value *TaskPrivatesMap) {
3153 ASTContext &C = CGM.getContext();
3154 auto *GtidArg =
3155 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3156 T: KmpInt32Ty, ParamKind: ImplicitParamKind::Other);
3157 auto *TaskTypeArg = ImplicitParamDecl::Create(
3158 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3159 T: KmpTaskTWithPrivatesPtrQTy.withRestrict(), ParamKind: ImplicitParamKind::Other);
3160 FunctionArgList Args{GtidArg, TaskTypeArg};
3161 const auto &TaskEntryFnInfo =
3162 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3163 llvm::FunctionType *TaskEntryTy =
3164 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
3165 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
3166 auto *TaskEntry = llvm::Function::Create(
3167 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3168 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
3169 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3170 TaskEntry->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3171 TaskEntry->setDoesNotRecurse();
3172 CodeGenFunction CGF(CGM);
3173 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
3174 Loc, StartLoc: Loc);
3175
3176 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3177 // tt,
3178 // For taskloops:
3179 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3180 // tt->task_data.shareds);
3181 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3182 Addr: CGF.GetAddrOfLocalVar(VD: GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3183 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3184 Ptr: CGF.GetAddrOfLocalVar(VD: TaskTypeArg),
3185 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3186 const auto *KmpTaskTWithPrivatesQTyRD =
3187 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3188 LValue Base =
3189 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3190 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3191 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3192 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3193 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3194
3195 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3196 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3197 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3198 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3199 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3200
3201 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3202 llvm::Value *PrivatesParam;
3203 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3204 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3205 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3206 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3207 } else {
3208 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3209 }
3210
3211 llvm::Value *CommonArgs[] = {
3212 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3213 CGF.Builder
3214 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3215 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3216 .emitRawPointer(CGF)};
3217 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3218 std::end(arr&: CommonArgs));
3219 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3220 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3221 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3222 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3223 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3224 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3225 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3226 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3227 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3228 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3229 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3230 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3231 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3232 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3233 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3234 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3235 CallArgs.push_back(Elt: LBParam);
3236 CallArgs.push_back(Elt: UBParam);
3237 CallArgs.push_back(Elt: StParam);
3238 CallArgs.push_back(Elt: LIParam);
3239 CallArgs.push_back(Elt: RParam);
3240 }
3241 CallArgs.push_back(Elt: SharedsParam);
3242
3243 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3244 Args: CallArgs);
3245 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3246 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3247 CGF.FinishFunction();
3248 return TaskEntry;
3249}
3250
3251static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3252 SourceLocation Loc,
3253 QualType KmpInt32Ty,
3254 QualType KmpTaskTWithPrivatesPtrQTy,
3255 QualType KmpTaskTWithPrivatesQTy) {
3256 ASTContext &C = CGM.getContext();
3257 auto *GtidArg =
3258 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3259 T: KmpInt32Ty, ParamKind: ImplicitParamKind::Other);
3260 auto *TaskTypeArg = ImplicitParamDecl::Create(
3261 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3262 T: KmpTaskTWithPrivatesPtrQTy.withRestrict(), ParamKind: ImplicitParamKind::Other);
3263 FunctionArgList Args{GtidArg, TaskTypeArg};
3264 const auto &DestructorFnInfo =
3265 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3266 llvm::FunctionType *DestructorFnTy =
3267 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3268 std::string Name =
3269 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3270 auto *DestructorFn =
3271 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3272 N: Name, M: &CGM.getModule());
3273 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3274 FI: DestructorFnInfo);
3275 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3276 DestructorFn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3277 DestructorFn->setDoesNotRecurse();
3278 CodeGenFunction CGF(CGM);
3279 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3280 Args, Loc, StartLoc: Loc);
3281
3282 LValue Base = CGF.EmitLoadOfPointerLValue(
3283 Ptr: CGF.GetAddrOfLocalVar(VD: TaskTypeArg),
3284 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3285 const auto *KmpTaskTWithPrivatesQTyRD =
3286 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3287 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3288 Base = CGF.EmitLValueForField(Base, Field: *FI);
3289 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3290 if (QualType::DestructionKind DtorKind =
3291 Field->getType().isDestructedType()) {
3292 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3293 CGF.pushDestroy(dtorKind: DtorKind, addr: FieldLValue.getAddress(), type: Field->getType());
3294 }
3295 }
3296 CGF.FinishFunction();
3297 return DestructorFn;
3298}
3299
3300/// Emit a privates mapping function for correct handling of private and
3301/// firstprivate variables.
3302/// \code
3303/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3304/// **noalias priv1,..., <tyn> **noalias privn) {
3305/// *priv1 = &.privates.priv1;
3306/// ...;
3307/// *privn = &.privates.privn;
3308/// }
3309/// \endcode
3310static llvm::Value *
3311emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3312 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3313 ArrayRef<PrivateDataTy> Privates) {
3314 ASTContext &C = CGM.getContext();
3315 FunctionArgList Args;
3316 auto *TaskPrivatesArg = ImplicitParamDecl::Create(
3317 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3318 T: C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3319 ParamKind: ImplicitParamKind::Other);
3320 Args.push_back(Elt: TaskPrivatesArg);
3321 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3322 unsigned Counter = 1;
3323 for (const Expr *E : Data.PrivateVars) {
3324 Args.push_back(Elt: ImplicitParamDecl::Create(
3325 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3326 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3327 .withConst()
3328 .withRestrict(),
3329 ParamKind: ImplicitParamKind::Other));
3330 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3331 PrivateVarsPos[VD] = Counter;
3332 ++Counter;
3333 }
3334 for (const Expr *E : Data.FirstprivateVars) {
3335 Args.push_back(Elt: ImplicitParamDecl::Create(
3336 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3337 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3338 .withConst()
3339 .withRestrict(),
3340 ParamKind: ImplicitParamKind::Other));
3341 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3342 PrivateVarsPos[VD] = Counter;
3343 ++Counter;
3344 }
3345 for (const Expr *E : Data.LastprivateVars) {
3346 Args.push_back(Elt: ImplicitParamDecl::Create(
3347 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3348 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3349 .withConst()
3350 .withRestrict(),
3351 ParamKind: ImplicitParamKind::Other));
3352 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3353 PrivateVarsPos[VD] = Counter;
3354 ++Counter;
3355 }
3356 for (const VarDecl *VD : Data.PrivateLocals) {
3357 QualType Ty = VD->getType().getNonReferenceType();
3358 if (VD->getType()->isLValueReferenceType())
3359 Ty = C.getPointerType(T: Ty);
3360 if (isAllocatableDecl(VD))
3361 Ty = C.getPointerType(T: Ty);
3362 Args.push_back(Elt: ImplicitParamDecl::Create(
3363 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3364 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3365 ParamKind: ImplicitParamKind::Other));
3366 PrivateVarsPos[VD] = Counter;
3367 ++Counter;
3368 }
3369 const auto &TaskPrivatesMapFnInfo =
3370 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3371 llvm::FunctionType *TaskPrivatesMapTy =
3372 CGM.getTypes().GetFunctionType(Info: TaskPrivatesMapFnInfo);
3373 std::string Name =
3374 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3375 auto *TaskPrivatesMap = llvm::Function::Create(
3376 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3377 M: &CGM.getModule());
3378 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3379 FI: TaskPrivatesMapFnInfo);
3380 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3381 TaskPrivatesMap->addFnAttr(Kind: "sample-profile-suffix-elision-policy",
3382 Val: "selected");
3383 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3384 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::NoInline);
3385 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
3386 TaskPrivatesMap->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
3387 }
3388 CodeGenFunction CGF(CGM);
3389 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3390 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3391
3392 // *privi = &.privates.privi;
3393 LValue Base = CGF.EmitLoadOfPointerLValue(
3394 Ptr: CGF.GetAddrOfLocalVar(VD: TaskPrivatesArg),
3395 PtrTy: TaskPrivatesArg->getType()->castAs<PointerType>());
3396 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3397 Counter = 0;
3398 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3399 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3400 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3401 LValue RefLVal =
3402 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD), T: VD->getType());
3403 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3404 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3405 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3406 ++Counter;
3407 }
3408 CGF.FinishFunction();
3409 return TaskPrivatesMap;
3410}
3411
3412/// Emit initialization for private variables in task-based directives.
3413static void emitPrivatesInit(CodeGenFunction &CGF,
3414 const OMPExecutableDirective &D,
3415 Address KmpTaskSharedsPtr, LValue TDBase,
3416 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3417 QualType SharedsTy, QualType SharedsPtrTy,
3418 const OMPTaskDataTy &Data,
3419 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3420 ASTContext &C = CGF.getContext();
3421 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3422 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3423 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())
3424 ? OMPD_taskloop
3425 : OMPD_task;
3426 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: Kind);
3427 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3428 LValue SrcBase;
3429 bool IsTargetTask =
3430 isOpenMPTargetDataManagementDirective(DKind: D.getDirectiveKind()) ||
3431 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
3432 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3433 // PointersArray, SizesArray, and MappersArray. The original variables for
3434 // these arrays are not captured and we get their addresses explicitly.
3435 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3436 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3437 SrcBase = CGF.MakeAddrLValue(
3438 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3439 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3440 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3441 T: SharedsTy);
3442 }
3443 FI = FI->getType()->castAsRecordDecl()->field_begin();
3444 for (const PrivateDataTy &Pair : Privates) {
3445 // Do not initialize private locals.
3446 if (Pair.second.isLocalPrivate()) {
3447 ++FI;
3448 continue;
3449 }
3450 const VarDecl *VD = Pair.second.PrivateCopy;
3451 const Expr *Init = VD->getAnyInitializer();
3452 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3453 !CGF.isTrivialInitializer(Init)))) {
3454 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3455 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3456 const VarDecl *OriginalVD = Pair.second.Original;
3457 // Check if the variable is the target-based BasePointersArray,
3458 // PointersArray, SizesArray, or MappersArray.
3459 LValue SharedRefLValue;
3460 QualType Type = PrivateLValue.getType();
3461 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3462 if (IsTargetTask && !SharedField) {
3463 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3464 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3465 cast<CapturedDecl>(OriginalVD->getDeclContext())
3466 ->getNumParams() == 0 &&
3467 isa<TranslationUnitDecl>(
3468 cast<CapturedDecl>(OriginalVD->getDeclContext())
3469 ->getDeclContext()) &&
3470 "Expected artificial target data variable.");
3471 SharedRefLValue =
3472 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3473 } else if (ForDup) {
3474 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3475 SharedRefLValue = CGF.MakeAddrLValue(
3476 Addr: SharedRefLValue.getAddress().withAlignment(
3477 NewAlignment: C.getDeclAlign(D: OriginalVD)),
3478 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3479 TBAAInfo: SharedRefLValue.getTBAAInfo());
3480 } else if (CGF.LambdaCaptureFields.count(
3481 Val: Pair.second.Original->getCanonicalDecl()) > 0 ||
3482 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3483 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3484 } else {
3485 // Processing for implicitly captured variables.
3486 InlinedOpenMPRegionRAII Region(
3487 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3488 /*HasCancel=*/false, /*NoInheritance=*/true);
3489 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3490 }
3491 if (Type->isArrayType()) {
3492 // Initialize firstprivate array.
3493 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3494 // Perform simple memcpy.
3495 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3496 } else {
3497 // Initialize firstprivate array using element-by-element
3498 // initialization.
3499 CGF.EmitOMPAggregateAssign(
3500 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3501 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3502 Address SrcElement) {
3503 // Clean up any temporaries needed by the initialization.
3504 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3505 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3506 (void)InitScope.Privatize();
3507 // Emit initialization for single element.
3508 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3509 CGF, &CapturesInfo);
3510 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3511 Quals: Init->getType().getQualifiers(),
3512 /*IsInitializer=*/false);
3513 });
3514 }
3515 } else {
3516 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3517 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3518 (void)InitScope.Privatize();
3519 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3520 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue,
3521 /*capturedByInit=*/false);
3522 }
3523 } else {
3524 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue, /*capturedByInit=*/false);
3525 }
3526 }
3527 ++FI;
3528 }
3529}
3530
3531/// Check if duplication function is required for taskloops.
3532static bool checkInitIsRequired(CodeGenFunction &CGF,
3533 ArrayRef<PrivateDataTy> Privates) {
3534 bool InitRequired = false;
3535 for (const PrivateDataTy &Pair : Privates) {
3536 if (Pair.second.isLocalPrivate())
3537 continue;
3538 const VarDecl *VD = Pair.second.PrivateCopy;
3539 const Expr *Init = VD->getAnyInitializer();
3540 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3541 !CGF.isTrivialInitializer(Init));
3542 if (InitRequired)
3543 break;
3544 }
3545 return InitRequired;
3546}
3547
3548
3549/// Emit task_dup function (for initialization of
3550/// private/firstprivate/lastprivate vars and last_iter flag)
3551/// \code
3552/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3553/// lastpriv) {
3554/// // setup lastprivate flag
3555/// task_dst->last = lastpriv;
3556/// // could be constructor calls here...
3557/// }
3558/// \endcode
3559static llvm::Value *
3560emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3561 const OMPExecutableDirective &D,
3562 QualType KmpTaskTWithPrivatesPtrQTy,
3563 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3564 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3565 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3566 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3567 ASTContext &C = CGM.getContext();
3568 auto *DstArg = ImplicitParamDecl::Create(
3569 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr, T: KmpTaskTWithPrivatesPtrQTy,
3570 ParamKind: ImplicitParamKind::Other);
3571 auto *SrcArg = ImplicitParamDecl::Create(
3572 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr, T: KmpTaskTWithPrivatesPtrQTy,
3573 ParamKind: ImplicitParamKind::Other);
3574 auto *LastprivArg =
3575 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr, T: C.IntTy,
3576 ParamKind: ImplicitParamKind::Other);
3577 FunctionArgList Args{DstArg, SrcArg, LastprivArg};
3578 const auto &TaskDupFnInfo =
3579 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3580 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(Info: TaskDupFnInfo);
3581 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3582 auto *TaskDup = llvm::Function::Create(
3583 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3584 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3585 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
3586 TaskDup->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
3587 TaskDup->setDoesNotRecurse();
3588 CodeGenFunction CGF(CGM);
3589 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3590 StartLoc: Loc);
3591
3592 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3593 Ptr: CGF.GetAddrOfLocalVar(VD: DstArg),
3594 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3595 // task_dst->liter = lastpriv;
3596 if (WithLastIter) {
3597 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3598 LValue Base = CGF.EmitLValueForField(
3599 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3600 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3601 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3602 Addr: CGF.GetAddrOfLocalVar(VD: LastprivArg), /*Volatile=*/false, Ty: C.IntTy, Loc);
3603 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3604 }
3605
3606 // Emit initial values for private copies (if any).
3607 assert(!Privates.empty());
3608 Address KmpTaskSharedsPtr = Address::invalid();
3609 if (!Data.FirstprivateVars.empty()) {
3610 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3611 Ptr: CGF.GetAddrOfLocalVar(VD: SrcArg),
3612 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3613 LValue Base = CGF.EmitLValueForField(
3614 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3615 KmpTaskSharedsPtr = Address(
3616 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3617 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3618 n: KmpTaskTShareds)),
3619 Loc),
3620 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3621 }
3622 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3623 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3624 CGF.FinishFunction();
3625 return TaskDup;
3626}
3627
3628/// Checks if destructor function is required to be generated.
3629/// \return true if cleanups are required, false otherwise.
3630static bool
3631checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3632 ArrayRef<PrivateDataTy> Privates) {
3633 for (const PrivateDataTy &P : Privates) {
3634 if (P.second.isLocalPrivate())
3635 continue;
3636 QualType Ty = P.second.Original->getType().getNonReferenceType();
3637 if (Ty.isDestructedType())
3638 return true;
3639 }
3640 return false;
3641}
3642
3643namespace {
3644/// Loop generator for OpenMP iterator expression.
3645class OMPIteratorGeneratorScope final
3646 : public CodeGenFunction::OMPPrivateScope {
3647 CodeGenFunction &CGF;
3648 const OMPIteratorExpr *E = nullptr;
3649 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3650 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3651 OMPIteratorGeneratorScope() = delete;
3652 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3653
3654public:
3655 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3656 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3657 if (!E)
3658 return;
3659 SmallVector<llvm::Value *, 4> Uppers;
3660 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3661 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3662 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3663 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(T: VD->getType(), Name: VD->getName()));
3664 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3665 addPrivate(
3666 LocalVD: HelperData.CounterVD,
3667 Addr: CGF.CreateMemTemp(T: HelperData.CounterVD->getType(), Name: "counter.addr"));
3668 }
3669 Privatize();
3670
3671 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3672 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3673 LValue CLVal =
3674 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3675 T: HelperData.CounterVD->getType());
3676 // Counter = 0;
3677 CGF.EmitStoreOfScalar(
3678 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3679 lvalue: CLVal);
3680 CodeGenFunction::JumpDest &ContDest =
3681 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3682 CodeGenFunction::JumpDest &ExitDest =
3683 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3684 // N = <number-of_iterations>;
3685 llvm::Value *N = Uppers[I];
3686 // cont:
3687 // if (Counter < N) goto body; else goto exit;
3688 CGF.EmitBlock(BB: ContDest.getBlock());
3689 auto *CVal =
3690 CGF.EmitLoadOfScalar(lvalue: CLVal, Loc: HelperData.CounterVD->getLocation());
3691 llvm::Value *Cmp =
3692 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3693 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3694 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3695 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3696 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3697 // body:
3698 CGF.EmitBlock(BB: BodyBB);
3699 // Iteri = Begini + Counter * Stepi;
3700 CGF.EmitIgnoredExpr(E: HelperData.Update);
3701 }
3702 }
3703 ~OMPIteratorGeneratorScope() {
3704 if (!E)
3705 return;
3706 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3707 // Counter = Counter + 1;
3708 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3709 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3710 // goto cont;
3711 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3712 // exit:
3713 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3714 }
3715 }
3716};
3717} // namespace
3718
3719static std::pair<llvm::Value *, llvm::Value *>
3720getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3721 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3722 llvm::Value *Addr;
3723 if (OASE) {
3724 const Expr *Base = OASE->getBase();
3725 Addr = CGF.EmitScalarExpr(E: Base);
3726 } else {
3727 Addr = CGF.EmitLValue(E).getPointer(CGF);
3728 }
3729 llvm::Value *SizeVal;
3730 QualType Ty = E->getType();
3731 if (OASE) {
3732 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3733 for (const Expr *SE : OASE->getDimensions()) {
3734 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3735 Sz = CGF.EmitScalarConversion(
3736 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3737 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3738 }
3739 } else if (const auto *ASE =
3740 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3741 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3742 Address UpAddrAddress = UpAddrLVal.getAddress();
3743 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3744 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3745 /*Idx0=*/1);
3746 SizeVal = CGF.Builder.CreatePtrDiff(LHS: UpAddr, RHS: Addr, Name: "", /*IsNUW=*/true);
3747 } else {
3748 SizeVal = CGF.getTypeSize(Ty);
3749 }
3750 return std::make_pair(x&: Addr, y&: SizeVal);
3751}
3752
3753/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3754static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3755 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3756 if (KmpTaskAffinityInfoTy.isNull()) {
3757 RecordDecl *KmpAffinityInfoRD =
3758 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3759 KmpAffinityInfoRD->startDefinition();
3760 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getIntPtrType());
3761 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getSizeType());
3762 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: FlagsTy);
3763 KmpAffinityInfoRD->completeDefinition();
3764 KmpTaskAffinityInfoTy = C.getCanonicalTagType(TD: KmpAffinityInfoRD);
3765 }
3766}
3767
3768CGOpenMPRuntime::TaskResultTy
3769CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3770 const OMPExecutableDirective &D,
3771 llvm::Function *TaskFunction, QualType SharedsTy,
3772 Address Shareds, const OMPTaskDataTy &Data) {
3773 ASTContext &C = CGM.getContext();
3774 llvm::SmallVector<PrivateDataTy, 4> Privates;
3775 // Aggregate privates and sort them by the alignment.
3776 const auto *I = Data.PrivateCopies.begin();
3777 for (const Expr *E : Data.PrivateVars) {
3778 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3779 Privates.emplace_back(
3780 Args: C.getDeclAlign(D: VD),
3781 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3782 /*PrivateElemInit=*/nullptr));
3783 ++I;
3784 }
3785 I = Data.FirstprivateCopies.begin();
3786 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3787 for (const Expr *E : Data.FirstprivateVars) {
3788 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3789 Privates.emplace_back(
3790 Args: C.getDeclAlign(D: VD),
3791 Args: PrivateHelpersTy(
3792 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3793 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3794 ++I;
3795 ++IElemInitRef;
3796 }
3797 I = Data.LastprivateCopies.begin();
3798 for (const Expr *E : Data.LastprivateVars) {
3799 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3800 Privates.emplace_back(
3801 Args: C.getDeclAlign(D: VD),
3802 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3803 /*PrivateElemInit=*/nullptr));
3804 ++I;
3805 }
3806 for (const VarDecl *VD : Data.PrivateLocals) {
3807 if (isAllocatableDecl(VD))
3808 Privates.emplace_back(Args: CGM.getPointerAlign(), Args: PrivateHelpersTy(VD));
3809 else
3810 Privates.emplace_back(Args: C.getDeclAlign(D: VD), Args: PrivateHelpersTy(VD));
3811 }
3812 llvm::stable_sort(Range&: Privates,
3813 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3814 return L.first > R.first;
3815 });
3816 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3817 // Build type kmp_routine_entry_t (if not built yet).
3818 emitKmpRoutineEntryT(KmpInt32Ty);
3819 // Build type kmp_task_t (if not built yet).
3820 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())) {
3821 if (SavedKmpTaskloopTQTy.isNull()) {
3822 SavedKmpTaskloopTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3823 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3824 }
3825 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3826 } else {
3827 assert((D.getDirectiveKind() == OMPD_task ||
3828 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3829 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3830 "Expected taskloop, task or target directive");
3831 if (SavedKmpTaskTQTy.isNull()) {
3832 SavedKmpTaskTQTy = C.getCanonicalTagType(TD: createKmpTaskTRecordDecl(
3833 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3834 }
3835 KmpTaskTQTy = SavedKmpTaskTQTy;
3836 }
3837 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3838 // Build particular struct kmp_task_t for the given task.
3839 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3840 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3841 CanQualType KmpTaskTWithPrivatesQTy =
3842 C.getCanonicalTagType(TD: KmpTaskTWithPrivatesQTyRD);
3843 QualType KmpTaskTWithPrivatesPtrQTy =
3844 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3845 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(AddrSpace: 0);
3846 llvm::Value *KmpTaskTWithPrivatesTySize =
3847 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3848 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3849
3850 // Emit initial values for private copies (if any).
3851 llvm::Value *TaskPrivatesMap = nullptr;
3852 llvm::Type *TaskPrivatesMapTy =
3853 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3854 if (!Privates.empty()) {
3855 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3856 TaskPrivatesMap =
3857 emitTaskPrivateMappingFunction(CGM, Loc, Data, PrivatesQTy: FI->getType(), Privates);
3858 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3859 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3860 } else {
3861 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3862 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3863 }
3864 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3865 // kmp_task_t *tt);
3866 llvm::Function *TaskEntry = emitProxyTaskFunction(
3867 CGM, Loc, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3868 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3869 TaskPrivatesMap);
3870
3871 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3872 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3873 // kmp_routine_entry_t *task_entry);
3874 // Task flags. Format is taken from
3875 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3876 // description of kmp_tasking_flags struct.
3877 enum {
3878 TiedFlag = 0x1,
3879 FinalFlag = 0x2,
3880 DestructorsFlag = 0x8,
3881 PriorityFlag = 0x20,
3882 DetachableFlag = 0x40,
3883 FreeAgentFlag = 0x80,
3884 TransparentFlag = 0x100,
3885 };
3886 unsigned Flags = Data.Tied ? TiedFlag : 0;
3887 bool NeedsCleanup = false;
3888 if (!Privates.empty()) {
3889 NeedsCleanup =
3890 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3891 if (NeedsCleanup)
3892 Flags = Flags | DestructorsFlag;
3893 }
3894 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3895 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3896 if (Kind == OMPC_THREADSET_omp_pool)
3897 Flags = Flags | FreeAgentFlag;
3898 }
3899 if (D.getSingleClause<OMPTransparentClause>())
3900 Flags |= TransparentFlag;
3901
3902 if (Data.Priority.getInt())
3903 Flags = Flags | PriorityFlag;
3904 if (D.hasClausesOfKind<OMPDetachClause>())
3905 Flags = Flags | DetachableFlag;
3906 llvm::Value *TaskFlags =
3907 Data.Final.getPointer()
3908 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3909 True: CGF.Builder.getInt32(C: FinalFlag),
3910 False: CGF.Builder.getInt32(/*C=*/0))
3911 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3912 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3913 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3914 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3915 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3916 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3917 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3918 llvm::Value *NewTask;
3919 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3920 // Check if we have any device clause associated with the directive.
3921 const Expr *Device = nullptr;
3922 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3923 Device = C->getDevice();
3924 // Emit device ID if any otherwise use default value.
3925 llvm::Value *DeviceID;
3926 if (Device)
3927 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3928 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3929 else
3930 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3931 AllocArgs.push_back(Elt: DeviceID);
3932 NewTask = CGF.EmitRuntimeCall(
3933 callee: OMPBuilder.getOrCreateRuntimeFunction(
3934 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3935 args: AllocArgs);
3936 } else {
3937 NewTask =
3938 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3939 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3940 args: AllocArgs);
3941 }
3942 // Emit detach clause initialization.
3943 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3944 // task_descriptor);
3945 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3946 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3947 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3948
3949 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3950 // int gtid, kmp_task_t *task);
3951 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3952 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3953 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3954 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3955 callee: OMPBuilder.getOrCreateRuntimeFunction(
3956 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3957 args: {Loc, Tid, NewTask});
3958 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3959 Loc: Evt->getExprLoc());
3960 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3961 }
3962 // Process affinity clauses.
3963 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3964 // Process list of affinity data.
3965 ASTContext &C = CGM.getContext();
3966 Address AffinitiesArray = Address::invalid();
3967 // Calculate number of elements to form the array of affinity data.
3968 llvm::Value *NumOfElements = nullptr;
3969 unsigned NumAffinities = 0;
3970 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3971 if (const Expr *Modifier = C->getModifier()) {
3972 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3973 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3974 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3975 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3976 NumOfElements =
3977 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3978 }
3979 } else {
3980 NumAffinities += C->varlist_size();
3981 }
3982 }
3983 getKmpAffinityType(C&: CGM.getContext(), KmpTaskAffinityInfoTy);
3984 // Fields ids in kmp_task_affinity_info record.
3985 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3986
3987 QualType KmpTaskAffinityInfoArrayTy;
3988 if (NumOfElements) {
3989 NumOfElements = CGF.Builder.CreateNUWAdd(
3990 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3991 auto *OVE = new (C) OpaqueValueExpr(
3992 Loc,
3993 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3994 VK_PRValue);
3995 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3996 RValue::get(V: NumOfElements));
3997 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3998 EltTy: KmpTaskAffinityInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
3999 /*IndexTypeQuals=*/0);
4000 // Properly emit variable-sized array.
4001 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
4002 ParamKind: ImplicitParamKind::Other);
4003 CGF.EmitVarDecl(D: *PD);
4004 AffinitiesArray = CGF.GetAddrOfLocalVar(VD: PD);
4005 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4006 /*isSigned=*/false);
4007 } else {
4008 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4009 EltTy: KmpTaskAffinityInfoTy,
4010 ArySize: llvm::APInt(C.getTypeSize(T: C.getSizeType()), NumAffinities), SizeExpr: nullptr,
4011 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4012 AffinitiesArray = CGF.CreateMemTempWithoutCast(T: KmpTaskAffinityInfoArrayTy,
4013 Name: ".affs.arr.addr");
4014 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
4015 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
4016 /*isSigned=*/IsSigned: false);
4017 }
4018
4019 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4020 // Fill array by elements without iterators.
4021 unsigned Pos = 0;
4022 bool HasIterator = false;
4023 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4024 if (C->getModifier()) {
4025 HasIterator = true;
4026 continue;
4027 }
4028 for (const Expr *E : C->varlist()) {
4029 llvm::Value *Addr;
4030 llvm::Value *Size;
4031 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4032 LValue Base =
4033 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateConstGEP(Addr: AffinitiesArray, Index: Pos),
4034 T: KmpTaskAffinityInfoTy);
4035 // affs[i].base_addr = &<Affinities[i].second>;
4036 LValue BaseAddrLVal = CGF.EmitLValueForField(
4037 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4038 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4039 lvalue: BaseAddrLVal);
4040 // affs[i].len = sizeof(<Affinities[i].second>);
4041 LValue LenLVal = CGF.EmitLValueForField(
4042 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4043 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4044 ++Pos;
4045 }
4046 }
4047 LValue PosLVal;
4048 if (HasIterator) {
4049 PosLVal = CGF.MakeAddrLValue(
4050 Addr: CGF.CreateMemTempWithoutCast(T: C.getSizeType(), Name: "affs.counter.addr"),
4051 T: C.getSizeType());
4052 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4053 }
4054 // Process elements with iterators.
4055 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4056 const Expr *Modifier = C->getModifier();
4057 if (!Modifier)
4058 continue;
4059 OMPIteratorGeneratorScope IteratorScope(
4060 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
4061 for (const Expr *E : C->varlist()) {
4062 llvm::Value *Addr;
4063 llvm::Value *Size;
4064 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4065 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4066 LValue Base =
4067 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateGEP(CGF, Addr: AffinitiesArray, Index: Idx),
4068 T: KmpTaskAffinityInfoTy);
4069 // affs[i].base_addr = &<Affinities[i].second>;
4070 LValue BaseAddrLVal = CGF.EmitLValueForField(
4071 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
4072 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
4073 lvalue: BaseAddrLVal);
4074 // affs[i].len = sizeof(<Affinities[i].second>);
4075 LValue LenLVal = CGF.EmitLValueForField(
4076 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
4077 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4078 Idx = CGF.Builder.CreateNUWAdd(
4079 LHS: Idx, RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4080 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4081 }
4082 }
4083 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4084 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4085 // naffins, kmp_task_affinity_info_t *affin_list);
4086 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4087 llvm::Value *GTid = getThreadID(CGF, Loc);
4088 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4089 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
4090 // FIXME: Emit the function and ignore its result for now unless the
4091 // runtime function is properly implemented.
4092 (void)CGF.EmitRuntimeCall(
4093 callee: OMPBuilder.getOrCreateRuntimeFunction(
4094 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
4095 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4096 }
4097 llvm::Value *NewTaskNewTaskTTy =
4098 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4099 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
4100 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
4101 T: KmpTaskTWithPrivatesQTy);
4102 LValue TDBase =
4103 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
4104 // Fill the data in the resulting kmp_task_t record.
4105 // Copy shareds if there are any.
4106 Address KmpTaskSharedsPtr = Address::invalid();
4107 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
4108 KmpTaskSharedsPtr = Address(
4109 CGF.EmitLoadOfScalar(
4110 lvalue: CGF.EmitLValueForField(
4111 Base: TDBase,
4112 Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds)),
4113 Loc),
4114 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
4115 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
4116 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
4117 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
4118 }
4119 // Emit initial values for private copies (if any).
4120 TaskResultTy Result;
4121 if (!Privates.empty()) {
4122 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
4123 SharedsTy, SharedsPtrTy, Data, Privates,
4124 /*ForDup=*/false);
4125 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
4126 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4127 Result.TaskDupFn = emitTaskDupFunction(
4128 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4129 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4130 /*WithLastIter=*/!Data.LastprivateVars.empty());
4131 }
4132 }
4133 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4134 enum { Priority = 0, Destructors = 1 };
4135 // Provide pointer to function with destructors for privates.
4136 auto FI = std::next(x: KmpTaskTQTyRD->field_begin(), n: Data1);
4137 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
4138 assert(KmpCmplrdataUD->isUnion());
4139 if (NeedsCleanup) {
4140 llvm::Value *DestructorFn = emitDestructorsFunction(
4141 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4142 KmpTaskTWithPrivatesQTy);
4143 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
4144 LValue DestructorsLV = CGF.EmitLValueForField(
4145 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
4146 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4147 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
4148 lvalue: DestructorsLV);
4149 }
4150 // Set priority.
4151 if (Data.Priority.getInt()) {
4152 LValue Data2LV = CGF.EmitLValueForField(
4153 Base: TDBase, Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: Data2));
4154 LValue PriorityLV = CGF.EmitLValueForField(
4155 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
4156 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
4157 }
4158 Result.NewTask = NewTask;
4159 Result.TaskEntry = TaskEntry;
4160 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4161 Result.TDBase = TDBase;
4162 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4163 return Result;
4164}
4165
4166/// Translates internal dependency kind into the runtime kind.
4167static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4168 RTLDependenceKindTy DepKind;
4169 switch (K) {
4170 case OMPC_DEPEND_in:
4171 DepKind = RTLDependenceKindTy::DepIn;
4172 break;
4173 // Out and InOut dependencies must use the same code.
4174 case OMPC_DEPEND_out:
4175 case OMPC_DEPEND_inout:
4176 DepKind = RTLDependenceKindTy::DepInOut;
4177 break;
4178 case OMPC_DEPEND_mutexinoutset:
4179 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4180 break;
4181 case OMPC_DEPEND_inoutset:
4182 DepKind = RTLDependenceKindTy::DepInOutSet;
4183 break;
4184 case OMPC_DEPEND_outallmemory:
4185 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4186 break;
4187 case OMPC_DEPEND_source:
4188 case OMPC_DEPEND_sink:
4189 case OMPC_DEPEND_depobj:
4190 case OMPC_DEPEND_inoutallmemory:
4191 case OMPC_DEPEND_unknown:
4192 llvm_unreachable("Unknown task dependence type");
4193 }
4194 return DepKind;
4195}
4196
4197/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4198static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4199 QualType &FlagsTy) {
4200 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.BoolTy), /*Signed=*/false);
4201 if (KmpDependInfoTy.isNull()) {
4202 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4203 KmpDependInfoRD->startDefinition();
4204 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getIntPtrType());
4205 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getSizeType());
4206 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: FlagsTy);
4207 KmpDependInfoRD->completeDefinition();
4208 KmpDependInfoTy = C.getCanonicalTagType(TD: KmpDependInfoRD);
4209 }
4210}
4211
4212std::pair<llvm::Value *, LValue>
4213CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4214 SourceLocation Loc) {
4215 ASTContext &C = CGM.getContext();
4216 QualType FlagsTy;
4217 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4218 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4219 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4220 LValue Base = CGF.EmitLoadOfPointerLValue(
4221 Ptr: DepobjLVal.getAddress().withElementType(
4222 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4223 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4224 Address DepObjAddr = CGF.Builder.CreateGEP(
4225 CGF, Addr: Base.getAddress(),
4226 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4227 LValue NumDepsBase = CGF.MakeAddrLValue(
4228 Addr: DepObjAddr, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(), TBAAInfo: Base.getTBAAInfo());
4229 // NumDeps = deps[i].base_addr;
4230 LValue BaseAddrLVal = CGF.EmitLValueForField(
4231 Base: NumDepsBase,
4232 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4233 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4234 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4235 return std::make_pair(x&: NumDeps, y&: Base);
4236}
4237
4238static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4239 llvm::PointerUnion<unsigned *, LValue *> Pos,
4240 const OMPTaskDataTy::DependData &Data,
4241 Address DependenciesArray) {
4242 CodeGenModule &CGM = CGF.CGM;
4243 ASTContext &C = CGM.getContext();
4244 QualType FlagsTy;
4245 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4246 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4247 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4248
4249 OMPIteratorGeneratorScope IteratorScope(
4250 CGF, cast_or_null<OMPIteratorExpr>(
4251 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4252 : nullptr));
4253 for (const Expr *E : Data.DepExprs) {
4254 llvm::Value *Addr;
4255 llvm::Value *Size;
4256
4257 // The expression will be a nullptr in the 'omp_all_memory' case.
4258 if (E) {
4259 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4260 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4261 } else {
4262 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4263 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4264 }
4265 LValue Base;
4266 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4267 Base = CGF.MakeAddrLValue(
4268 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4269 } else {
4270 assert(E && "Expected a non-null expression");
4271 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4272 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4273 Base = CGF.MakeAddrLValue(
4274 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4275 }
4276 // deps[i].base_addr = &<Dependencies[i].second>;
4277 LValue BaseAddrLVal = CGF.EmitLValueForField(
4278 Base,
4279 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4280 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4281 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4282 // deps[i].len = sizeof(<Dependencies[i].second>);
4283 LValue LenLVal = CGF.EmitLValueForField(
4284 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4285 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4286 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4287 // deps[i].flags = <Dependencies[i].first>;
4288 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4289 LValue FlagsLVal = CGF.EmitLValueForField(
4290 Base,
4291 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4292 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4293 CGF.EmitStoreOfScalar(
4294 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4295 lvalue: FlagsLVal);
4296 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4297 ++(*P);
4298 } else {
4299 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4300 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4301 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4302 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4303 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4304 }
4305 }
4306}
4307
4308SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4309 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4310 const OMPTaskDataTy::DependData &Data) {
4311 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4312 "Expected depobj dependency kind.");
4313 SmallVector<llvm::Value *, 4> Sizes;
4314 SmallVector<LValue, 4> SizeLVals;
4315 ASTContext &C = CGF.getContext();
4316 {
4317 OMPIteratorGeneratorScope IteratorScope(
4318 CGF, cast_or_null<OMPIteratorExpr>(
4319 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4320 : nullptr));
4321 for (const Expr *E : Data.DepExprs) {
4322 llvm::Value *NumDeps;
4323 LValue Base;
4324 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4325 std::tie(args&: NumDeps, args&: Base) =
4326 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4327 LValue NumLVal = CGF.MakeAddrLValue(
4328 Addr: CGF.CreateMemTempWithoutCast(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4329 T: C.getUIntPtrType());
4330 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4331 Addr: NumLVal.getAddress());
4332 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4333 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4334 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4335 SizeLVals.push_back(Elt: NumLVal);
4336 }
4337 }
4338 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4339 llvm::Value *Size =
4340 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4341 Sizes.push_back(Elt: Size);
4342 }
4343 return Sizes;
4344}
4345
4346void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4347 QualType &KmpDependInfoTy,
4348 LValue PosLVal,
4349 const OMPTaskDataTy::DependData &Data,
4350 Address DependenciesArray) {
4351 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4352 "Expected depobj dependency kind.");
4353 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4354 {
4355 OMPIteratorGeneratorScope IteratorScope(
4356 CGF, cast_or_null<OMPIteratorExpr>(
4357 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4358 : nullptr));
4359 for (const Expr *E : Data.DepExprs) {
4360 llvm::Value *NumDeps;
4361 LValue Base;
4362 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4363 std::tie(args&: NumDeps, args&: Base) =
4364 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4365
4366 // memcopy dependency data.
4367 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4368 LHS: ElSize,
4369 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4370 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4371 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4372 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4373
4374 // Increase pos.
4375 // pos += size;
4376 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4377 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4378 }
4379 }
4380}
4381
4382std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4383 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4384 SourceLocation Loc) {
4385 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4386 return D.DepExprs.empty();
4387 }))
4388 return std::make_pair(x: nullptr, y: Address::invalid());
4389 // Process list of dependencies.
4390 ASTContext &C = CGM.getContext();
4391 Address DependenciesArray = Address::invalid();
4392 llvm::Value *NumOfElements = nullptr;
4393 unsigned NumDependencies = std::accumulate(
4394 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4395 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4396 return D.DepKind == OMPC_DEPEND_depobj
4397 ? V
4398 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4399 });
4400 QualType FlagsTy;
4401 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4402 bool HasDepobjDeps = false;
4403 bool HasRegularWithIterators = false;
4404 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4405 llvm::Value *NumOfRegularWithIterators =
4406 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4407 // Calculate number of depobj dependencies and regular deps with the
4408 // iterators.
4409 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4410 if (D.DepKind == OMPC_DEPEND_depobj) {
4411 SmallVector<llvm::Value *, 4> Sizes =
4412 emitDepobjElementsSizes(CGF, KmpDependInfoTy, Data: D);
4413 for (llvm::Value *Size : Sizes) {
4414 NumOfDepobjElements =
4415 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4416 }
4417 HasDepobjDeps = true;
4418 continue;
4419 }
4420 // Include number of iterations, if any.
4421
4422 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4423 llvm::Value *ClauseIteratorSpace =
4424 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4425 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4426 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4427 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4428 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4429 }
4430 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4431 LHS: ClauseIteratorSpace,
4432 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4433 NumOfRegularWithIterators =
4434 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4435 HasRegularWithIterators = true;
4436 continue;
4437 }
4438 }
4439
4440 QualType KmpDependInfoArrayTy;
4441 if (HasDepobjDeps || HasRegularWithIterators) {
4442 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4443 /*isSigned=*/IsSigned: false);
4444 if (HasDepobjDeps) {
4445 NumOfElements =
4446 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4447 }
4448 if (HasRegularWithIterators) {
4449 NumOfElements =
4450 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4451 }
4452 auto *OVE = new (C) OpaqueValueExpr(
4453 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4454 VK_PRValue);
4455 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4456 RValue::get(V: NumOfElements));
4457 KmpDependInfoArrayTy =
4458 C.getVariableArrayType(EltTy: KmpDependInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
4459 /*IndexTypeQuals=*/0);
4460 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4461 // Properly emit variable-sized array.
4462 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4463 ParamKind: ImplicitParamKind::Other);
4464 CGF.EmitVarDecl(D: *PD);
4465 DependenciesArray = CGF.GetAddrOfLocalVar(VD: PD);
4466 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4467 /*isSigned=*/false);
4468 } else {
4469 KmpDependInfoArrayTy = C.getConstantArrayType(
4470 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies), SizeExpr: nullptr,
4471 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4472 DependenciesArray =
4473 CGF.CreateMemTempWithoutCast(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4474 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4475 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4476 /*isSigned=*/IsSigned: false);
4477 }
4478 unsigned Pos = 0;
4479 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4480 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4481 continue;
4482 emitDependData(CGF, KmpDependInfoTy, Pos: &Pos, Data: Dep, DependenciesArray);
4483 }
4484 // Copy regular dependencies with iterators.
4485 LValue PosLVal = CGF.MakeAddrLValue(
4486 Addr: CGF.CreateMemTempWithoutCast(T: C.getSizeType(), Name: "dep.counter.addr"),
4487 T: C.getSizeType());
4488 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4489 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4490 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4491 continue;
4492 emitDependData(CGF, KmpDependInfoTy, Pos: &PosLVal, Data: Dep, DependenciesArray);
4493 }
4494 // Copy final depobj arrays without iterators.
4495 if (HasDepobjDeps) {
4496 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4497 if (Dep.DepKind != OMPC_DEPEND_depobj)
4498 continue;
4499 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Data: Dep, DependenciesArray);
4500 }
4501 }
4502 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4503 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4504 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4505}
4506
4507Address CGOpenMPRuntime::emitDepobjDependClause(
4508 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4509 SourceLocation Loc) {
4510 if (Dependencies.DepExprs.empty())
4511 return Address::invalid();
4512 // Process list of dependencies.
4513 ASTContext &C = CGM.getContext();
4514 Address DependenciesArray = Address::invalid();
4515 unsigned NumDependencies = Dependencies.DepExprs.size();
4516 QualType FlagsTy;
4517 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4518 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4519
4520 llvm::Value *Size;
4521 // Define type kmp_depend_info[<Dependencies.size()>];
4522 // For depobj reserve one extra element to store the number of elements.
4523 // It is required to handle depobj(x) update(in) construct.
4524 // kmp_depend_info[<Dependencies.size()>] deps;
4525 llvm::Value *NumDepsVal;
4526 CharUnits Align = C.getTypeAlignInChars(T: KmpDependInfoTy);
4527 if (const auto *IE =
4528 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4529 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4530 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4531 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4532 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4533 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4534 }
4535 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4536 RHS: NumDepsVal);
4537 CharUnits SizeInBytes =
4538 C.getTypeSizeInChars(T: KmpDependInfoTy).alignTo(Align);
4539 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4540 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4541 NumDepsVal =
4542 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4543 } else {
4544 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4545 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4546 SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4547 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4548 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4549 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4550 }
4551 // Need to allocate on the dynamic memory.
4552 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4553 // Use default allocator.
4554 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4555 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4556
4557 llvm::Value *Addr =
4558 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4559 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4560 args: Args, name: ".dep.arr.addr");
4561 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(T: KmpDependInfoTy);
4562 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4563 V: Addr, DestTy: CGF.Builder.getPtrTy(AddrSpace: 0));
4564 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4565 // Write number of elements in the first element of array for depobj.
4566 LValue Base = CGF.MakeAddrLValue(Addr: DependenciesArray, T: KmpDependInfoTy);
4567 // deps[i].base_addr = NumDependencies;
4568 LValue BaseAddrLVal = CGF.EmitLValueForField(
4569 Base,
4570 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4571 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4572 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4573 llvm::PointerUnion<unsigned *, LValue *> Pos;
4574 unsigned Idx = 1;
4575 LValue PosLVal;
4576 if (Dependencies.IteratorExpr) {
4577 PosLVal = CGF.MakeAddrLValue(
4578 Addr: CGF.CreateMemTempWithoutCast(T: C.getSizeType(), Name: "iterator.counter.addr"),
4579 T: C.getSizeType());
4580 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4581 /*IsInit=*/isInit: true);
4582 Pos = &PosLVal;
4583 } else {
4584 Pos = &Idx;
4585 }
4586 emitDependData(CGF, KmpDependInfoTy, Pos, Data: Dependencies, DependenciesArray);
4587 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4588 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4589 ElementTy: CGF.Int8Ty);
4590 return DependenciesArray;
4591}
4592
4593void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4594 SourceLocation Loc) {
4595 ASTContext &C = CGM.getContext();
4596 QualType FlagsTy;
4597 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4598 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4599 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4600 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4601 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4602 Addr: Base.getAddress(), Ty: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy),
4603 ElementTy: CGF.ConvertTypeForMem(T: KmpDependInfoTy));
4604 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4605 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4606 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4607 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4608 DestTy: CGF.VoidPtrTy);
4609 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4610 // Use default allocator.
4611 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4612 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4613
4614 // _kmpc_free(gtid, addr, nullptr);
4615 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4616 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4617 args: Args);
4618}
4619
4620void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4621 OpenMPDependClauseKind NewDepKind,
4622 SourceLocation Loc) {
4623 ASTContext &C = CGM.getContext();
4624 QualType FlagsTy;
4625 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4626 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4627 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4628 llvm::Value *NumDeps;
4629 LValue Base;
4630 std::tie(args&: NumDeps, args&: Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4631
4632 Address Begin = Base.getAddress();
4633 // Cast from pointer to array type to pointer to single element.
4634 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4635 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4636 // The basic structure here is a while-do loop.
4637 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4638 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4639 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4640 CGF.EmitBlock(BB: BodyBB);
4641 llvm::PHINode *ElementPHI =
4642 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4643 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4644 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4645 Base = CGF.MakeAddrLValue(Addr: Begin, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(),
4646 TBAAInfo: Base.getTBAAInfo());
4647 // deps[i].flags = NewDepKind;
4648 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4649 LValue FlagsLVal = CGF.EmitLValueForField(
4650 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4651 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4652 CGF.EmitStoreOfScalar(
4653 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4654 lvalue: FlagsLVal);
4655
4656 // Shift the address forward by one element.
4657 llvm::Value *ElementNext =
4658 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4659 .emitRawPointer(CGF);
4660 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4661 llvm::Value *IsEmpty =
4662 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4663 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4664 // Done.
4665 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4666}
4667
4668void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4669 const OMPExecutableDirective &D,
4670 llvm::Function *TaskFunction,
4671 QualType SharedsTy, Address Shareds,
4672 const Expr *IfCond,
4673 const OMPTaskDataTy &Data) {
4674 if (!CGF.HaveInsertPoint())
4675 return;
4676
4677 TaskResultTy Result =
4678 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4679 llvm::Value *NewTask = Result.NewTask;
4680 llvm::Function *TaskEntry = Result.TaskEntry;
4681 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4682 LValue TDBase = Result.TDBase;
4683 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4684 // Process list of dependences.
4685 Address DependenciesArray = Address::invalid();
4686 llvm::Value *NumOfElements;
4687 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4688 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4689
4690 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4691 // libcall.
4692 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4693 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4694 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4695 // list is not empty
4696 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4697 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4698 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4699 llvm::Value *DepTaskArgs[7];
4700 if (!Data.Dependences.empty()) {
4701 DepTaskArgs[0] = UpLoc;
4702 DepTaskArgs[1] = ThreadID;
4703 DepTaskArgs[2] = NewTask;
4704 DepTaskArgs[3] = NumOfElements;
4705 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4706 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4707 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4708 }
4709 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4710 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4711 if (!Data.Tied) {
4712 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4713 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4714 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4715 }
4716 if (!Data.Dependences.empty()) {
4717 CGF.EmitRuntimeCall(
4718 callee: OMPBuilder.getOrCreateRuntimeFunction(
4719 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4720 args: DepTaskArgs);
4721 } else {
4722 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4723 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4724 args: TaskArgs);
4725 }
4726 // Check if parent region is untied and build return for untied task;
4727 if (auto *Region =
4728 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4729 Region->emitUntiedSwitch(CGF);
4730 };
4731
4732 llvm::Value *DepWaitTaskArgs[7];
4733 if (!Data.Dependences.empty()) {
4734 DepWaitTaskArgs[0] = UpLoc;
4735 DepWaitTaskArgs[1] = ThreadID;
4736 DepWaitTaskArgs[2] = NumOfElements;
4737 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4738 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4739 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4740 DepWaitTaskArgs[6] =
4741 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4742 }
4743 auto &M = CGM.getModule();
4744 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4745 TaskEntry, &Data, &DepWaitTaskArgs,
4746 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4747 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4748 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4749 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4750 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4751 // is specified.
4752 if (!Data.Dependences.empty())
4753 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4754 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4755 args: DepWaitTaskArgs);
4756 // Call proxy_task_entry(gtid, new_task);
4757 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4758 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4759 Action.Enter(CGF);
4760 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4761 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4762 Args: OutlinedFnArgs);
4763 };
4764
4765 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4766 // kmp_task_t *new_task);
4767 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4768 // kmp_task_t *new_task);
4769 RegionCodeGenTy RCG(CodeGen);
4770 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4771 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4772 TaskArgs,
4773 OMPBuilder.getOrCreateRuntimeFunction(
4774 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4775 TaskArgs);
4776 RCG.setAction(Action);
4777 RCG(CGF);
4778 };
4779
4780 if (IfCond) {
4781 emitIfClause(CGF, Cond: IfCond, ThenGen: ThenCodeGen, ElseGen: ElseCodeGen);
4782 } else {
4783 RegionCodeGenTy ThenRCG(ThenCodeGen);
4784 ThenRCG(CGF);
4785 }
4786}
4787
4788void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4789 const OMPLoopDirective &D,
4790 llvm::Function *TaskFunction,
4791 QualType SharedsTy, Address Shareds,
4792 const Expr *IfCond,
4793 const OMPTaskDataTy &Data) {
4794 if (!CGF.HaveInsertPoint())
4795 return;
4796 TaskResultTy Result =
4797 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4798 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4799 // libcall.
4800 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4801 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4802 // sched, kmp_uint64 grainsize, void *task_dup);
4803 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4804 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4805 llvm::Value *IfVal;
4806 if (IfCond) {
4807 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4808 /*isSigned=*/true);
4809 } else {
4810 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4811 }
4812
4813 LValue LBLVal = CGF.EmitLValueForField(
4814 Base: Result.TDBase,
4815 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4816 const auto *LBVar =
4817 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4818 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4819 /*IsInitializer=*/true);
4820 LValue UBLVal = CGF.EmitLValueForField(
4821 Base: Result.TDBase,
4822 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4823 const auto *UBVar =
4824 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4825 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4826 /*IsInitializer=*/true);
4827 LValue StLVal = CGF.EmitLValueForField(
4828 Base: Result.TDBase,
4829 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4830 const auto *StVar =
4831 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4832 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4833 /*IsInitializer=*/true);
4834 // Store reductions address.
4835 LValue RedLVal = CGF.EmitLValueForField(
4836 Base: Result.TDBase,
4837 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4838 if (Data.Reductions) {
4839 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4840 } else {
4841 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4842 Ty: CGF.getContext().VoidPtrTy);
4843 }
4844 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4845 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4846 UpLoc,
4847 ThreadID,
4848 Result.NewTask,
4849 IfVal,
4850 LBLVal.getPointer(CGF),
4851 UBLVal.getPointer(CGF),
4852 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4853 llvm::ConstantInt::getSigned(
4854 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4855 llvm::ConstantInt::getSigned(
4856 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4857 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4858 : NoSchedule),
4859 Data.Schedule.getPointer()
4860 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4861 /*isSigned=*/false)
4862 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0)};
4863 if (Data.HasModifier)
4864 TaskArgs.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 1));
4865
4866 TaskArgs.push_back(Elt: Result.TaskDupFn
4867 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4868 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4869 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy));
4870 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4871 M&: CGM.getModule(), FnID: Data.HasModifier
4872 ? OMPRTL___kmpc_taskloop_5
4873 : OMPRTL___kmpc_taskloop),
4874 args: TaskArgs);
4875}
4876
4877/// Emit reduction operation for each element of array (required for
4878/// array sections) LHS op = RHS.
4879/// \param Type Type of array.
4880/// \param LHSVar Variable on the left side of the reduction operation
4881/// (references element of array in original variable).
4882/// \param RHSVar Variable on the right side of the reduction operation
4883/// (references element of array in original variable).
4884/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4885/// RHSVar.
4886static void EmitOMPAggregateReduction(
4887 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4888 const VarDecl *RHSVar,
4889 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4890 const Expr *, const Expr *)> &RedOpGen,
4891 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4892 const Expr *UpExpr = nullptr) {
4893 // Perform element-by-element initialization.
4894 QualType ElementTy;
4895 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4896 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4897
4898 // Drill down to the base element type on both arrays.
4899 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4900 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4901
4902 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4903 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4904 // Cast from pointer to array type to pointer to single element.
4905 llvm::Value *LHSEnd =
4906 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4907 // The basic structure here is a while-do loop.
4908 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4909 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4910 llvm::Value *IsEmpty =
4911 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4912 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4913
4914 // Enter the loop body, making that address the current address.
4915 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4916 CGF.EmitBlock(BB: BodyBB);
4917
4918 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4919
4920 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4921 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4922 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4923 Address RHSElementCurrent(
4924 RHSElementPHI, RHSAddr.getElementType(),
4925 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4926
4927 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4928 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4929 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4930 Address LHSElementCurrent(
4931 LHSElementPHI, LHSAddr.getElementType(),
4932 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4933
4934 // Emit copy.
4935 CodeGenFunction::OMPPrivateScope Scope(CGF);
4936 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4937 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4938 Scope.Privatize();
4939 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4940 Scope.ForceCleanup();
4941
4942 // Shift the address forward by one element.
4943 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4944 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4945 Name: "omp.arraycpy.dest.element");
4946 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4947 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4948 Name: "omp.arraycpy.src.element");
4949 // Check whether we've reached the end.
4950 llvm::Value *Done =
4951 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4952 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4953 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4954 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4955
4956 // Done.
4957 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4958}
4959
4960/// Emit reduction combiner. If the combiner is a simple expression emit it as
4961/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4962/// UDR combiner function.
4963static void emitReductionCombiner(CodeGenFunction &CGF,
4964 const Expr *ReductionOp) {
4965 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4966 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4967 if (const auto *DRE =
4968 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4969 if (const auto *DRD =
4970 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4971 std::pair<llvm::Function *, llvm::Function *> Reduction =
4972 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4973 RValue Func = RValue::get(V: Reduction.first);
4974 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4975 CGF.EmitIgnoredExpr(E: ReductionOp);
4976 return;
4977 }
4978 CGF.EmitIgnoredExpr(E: ReductionOp);
4979}
4980
4981llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4982 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4983 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4984 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4985 ASTContext &C = CGM.getContext();
4986
4987 // void reduction_func(void *LHSArg, void *RHSArg);
4988 auto *LHSArg =
4989 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
4990 T: C.VoidPtrTy, ParamKind: ImplicitParamKind::Other);
4991 auto *RHSArg =
4992 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
4993 T: C.VoidPtrTy, ParamKind: ImplicitParamKind::Other);
4994 FunctionArgList Args{LHSArg, RHSArg};
4995 const auto &CGFI =
4996 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
4997 std::string Name = getReductionFuncName(Name: ReducerName);
4998 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
4999 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
5000 M: &CGM.getModule());
5001 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
5002 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5003 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5004 Fn->setDoesNotRecurse();
5005 CodeGenFunction CGF(CGM);
5006 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
5007
5008 // Dst = (void*[n])(LHSArg);
5009 // Src = (void*[n])(RHSArg);
5010 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5011 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: LHSArg)),
5012 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5013 ArgsElemType, CGF.getPointerAlign());
5014 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5015 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: RHSArg)),
5016 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5017 ArgsElemType, CGF.getPointerAlign());
5018
5019 // ...
5020 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5021 // ...
5022 CodeGenFunction::OMPPrivateScope Scope(CGF);
5023 const auto *IPriv = Privates.begin();
5024 unsigned Idx = 0;
5025 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5026 const auto *RHSVar =
5027 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
5028 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
5029 const auto *LHSVar =
5030 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
5031 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
5032 QualType PrivTy = (*IPriv)->getType();
5033 if (PrivTy->isVariablyModifiedType()) {
5034 // Get array size and emit VLA type.
5035 ++Idx;
5036 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
5037 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
5038 const VariableArrayType *VLA =
5039 CGF.getContext().getAsVariableArrayType(T: PrivTy);
5040 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
5041 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5042 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
5043 CGF.EmitVariablyModifiedType(Ty: PrivTy);
5044 }
5045 }
5046 Scope.Privatize();
5047 IPriv = Privates.begin();
5048 const auto *ILHS = LHSExprs.begin();
5049 const auto *IRHS = RHSExprs.begin();
5050 for (const Expr *E : ReductionOps) {
5051 if ((*IPriv)->getType()->isArrayType()) {
5052 // Emit reduction for array section.
5053 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5054 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5055 EmitOMPAggregateReduction(
5056 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5057 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5058 emitReductionCombiner(CGF, ReductionOp: E);
5059 });
5060 } else {
5061 // Emit reduction for array subscript or single variable.
5062 emitReductionCombiner(CGF, ReductionOp: E);
5063 }
5064 ++IPriv;
5065 ++ILHS;
5066 ++IRHS;
5067 }
5068 Scope.ForceCleanup();
5069 CGF.FinishFunction();
5070 return Fn;
5071}
5072
5073void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5074 const Expr *ReductionOp,
5075 const Expr *PrivateRef,
5076 const DeclRefExpr *LHS,
5077 const DeclRefExpr *RHS) {
5078 if (PrivateRef->getType()->isArrayType()) {
5079 // Emit reduction for array section.
5080 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
5081 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
5082 EmitOMPAggregateReduction(
5083 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
5084 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5085 emitReductionCombiner(CGF, ReductionOp);
5086 });
5087 } else {
5088 // Emit reduction for array subscript or single variable.
5089 emitReductionCombiner(CGF, ReductionOp);
5090 }
5091}
5092
5093static std::string generateUniqueName(CodeGenModule &CGM,
5094 llvm::StringRef Prefix, const Expr *Ref);
5095
5096void CGOpenMPRuntime::emitPrivateReduction(
5097 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
5098 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
5099
5100 // Create a shared global variable (__shared_reduction_var) to accumulate the
5101 // final result.
5102 //
5103 // Call __kmpc_barrier to synchronize threads before initialization.
5104 //
5105 // The master thread (thread_id == 0) initializes __shared_reduction_var
5106 // with the identity value or initializer.
5107 //
5108 // Call __kmpc_barrier to synchronize before combining.
5109 // For each i:
5110 // - Thread enters critical section.
5111 // - Reads its private value from LHSExprs[i].
5112 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
5113 // Privates[i]).
5114 // - Exits critical section.
5115 //
5116 // Call __kmpc_barrier after combining.
5117 //
5118 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
5119 //
5120 // Final __kmpc_barrier to synchronize after broadcasting
5121 QualType PrivateType = Privates->getType();
5122 llvm::Type *LLVMType = CGF.ConvertTypeForMem(T: PrivateType);
5123
5124 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOp: ReductionOps);
5125 std::string ReductionVarNameStr;
5126 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates->IgnoreParenCasts()))
5127 ReductionVarNameStr =
5128 generateUniqueName(CGM, Prefix: DRE->getDecl()->getNameAsString(), Ref: Privates);
5129 else
5130 ReductionVarNameStr = "unnamed_priv_var";
5131
5132 // Create an internal shared variable
5133 std::string SharedName =
5134 CGM.getOpenMPRuntime().getName(Parts: {"internal_pivate_", ReductionVarNameStr});
5135 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
5136 Ty: LLVMType, Name: ".omp.reduction." + SharedName);
5137
5138 SharedVar->setAlignment(
5139 llvm::MaybeAlign(CGF.getContext().getTypeAlign(T: PrivateType) / 8));
5140
5141 Address SharedResult =
5142 CGF.MakeNaturalAlignRawAddrLValue(V: SharedVar, T: PrivateType).getAddress();
5143
5144 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5145 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5146 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5147
5148 llvm::BasicBlock *InitBB = CGF.createBasicBlock(name: "init");
5149 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock(name: "init.end");
5150
5151 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5152 LHS: ThreadId, RHS: llvm::ConstantInt::get(Ty: ThreadId->getType(), V: 0));
5153 CGF.Builder.CreateCondBr(Cond: IsWorker, True: InitBB, False: InitEndBB);
5154
5155 CGF.EmitBlock(BB: InitBB);
5156
5157 auto EmitSharedInit = [&]() {
5158 if (UDR) { // Check if it's a User-Defined Reduction
5159 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5160 std::pair<llvm::Function *, llvm::Function *> FnPair =
5161 getUserDefinedReduction(D: UDR);
5162 llvm::Function *InitializerFn = FnPair.second;
5163 if (InitializerFn) {
5164 if (const auto *CE =
5165 dyn_cast<CallExpr>(Val: UDRInitExpr->IgnoreParenImpCasts())) {
5166 const auto *OutDRE = cast<DeclRefExpr>(
5167 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5168 ->getSubExpr());
5169 const VarDecl *OutVD = cast<VarDecl>(Val: OutDRE->getDecl());
5170
5171 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5172 LocalScope.addPrivate(LocalVD: OutVD, Addr: SharedResult);
5173
5174 (void)LocalScope.Privatize();
5175 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5176 Val: CE->getCallee()->IgnoreParenImpCasts())) {
5177 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5178 CGF, OVE, RValue::get(V: InitializerFn));
5179 CGF.EmitIgnoredExpr(E: CE);
5180 } else {
5181 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5182 Quals: PrivateType.getQualifiers(),
5183 /*IsInitializer=*/true);
5184 }
5185 } else {
5186 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5187 Quals: PrivateType.getQualifiers(),
5188 /*IsInitializer=*/true);
5189 }
5190 } else {
5191 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5192 Quals: PrivateType.getQualifiers(),
5193 /*IsInitializer=*/true);
5194 }
5195 } else {
5196 // EmitNullInitialization handles default construction for C++ classes
5197 // and zeroing for scalars, which is a reasonable default.
5198 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5199 }
5200 return; // UDR initialization handled
5201 }
5202 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates)) {
5203 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
5204 if (const Expr *InitExpr = VD->getInit()) {
5205 CGF.EmitAnyExprToMem(E: InitExpr, Location: SharedResult,
5206 Quals: PrivateType.getQualifiers(), IsInitializer: true);
5207 return;
5208 }
5209 }
5210 }
5211 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5212 };
5213 EmitSharedInit();
5214 CGF.Builder.CreateBr(Dest: InitEndBB);
5215 CGF.EmitBlock(BB: InitEndBB);
5216
5217 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5218 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5219 args: BarrierArgs);
5220
5221 const Expr *ReductionOp = ReductionOps;
5222 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5223 LValue SharedLV = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5224 LValue LHSLV = CGF.EmitLValue(E: Privates);
5225
5226 auto EmitCriticalReduction = [&](auto ReductionGen) {
5227 std::string CriticalName = getName(Parts: {"reduction_critical"});
5228 emitCriticalRegion(CGF, CriticalName, CriticalOpGen: ReductionGen, Loc);
5229 };
5230
5231 if (CurrentUDR) {
5232 // Handle user-defined reduction.
5233 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5234 Action.Enter(CGF);
5235 std::pair<llvm::Function *, llvm::Function *> FnPair =
5236 getUserDefinedReduction(D: CurrentUDR);
5237 if (FnPair.first) {
5238 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp)) {
5239 const auto *OutDRE = cast<DeclRefExpr>(
5240 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5241 ->getSubExpr());
5242 const auto *InDRE = cast<DeclRefExpr>(
5243 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 1)->IgnoreParenImpCasts())
5244 ->getSubExpr());
5245 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5246 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: OutDRE->getDecl()),
5247 Addr: SharedLV.getAddress());
5248 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: InDRE->getDecl()),
5249 Addr: LHSLV.getAddress());
5250 (void)LocalScope.Privatize();
5251 emitReductionCombiner(CGF, ReductionOp);
5252 }
5253 }
5254 };
5255 EmitCriticalReduction(ReductionGen);
5256 } else {
5257 // Handle built-in reduction operations.
5258#ifndef NDEBUG
5259 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5260 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5261 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5262
5263 const Expr *AssignRHS = nullptr;
5264 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5265 if (BinOp->getOpcode() == BO_Assign)
5266 AssignRHS = BinOp->getRHS();
5267 } else if (const auto *OpCall =
5268 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5269 if (OpCall->getOperator() == OO_Equal)
5270 AssignRHS = OpCall->getArg(1);
5271 }
5272
5273 assert(AssignRHS &&
5274 "Private Variable Reduction : Invalid ReductionOp expression");
5275#endif
5276
5277 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5278 Action.Enter(CGF);
5279 const auto *OmpOutDRE =
5280 dyn_cast<DeclRefExpr>(Val: LHSExprs->IgnoreParenImpCasts());
5281 const auto *OmpInDRE =
5282 dyn_cast<DeclRefExpr>(Val: RHSExprs->IgnoreParenImpCasts());
5283 assert(
5284 OmpOutDRE && OmpInDRE &&
5285 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5286 const VarDecl *OmpOutVD = cast<VarDecl>(Val: OmpOutDRE->getDecl());
5287 const VarDecl *OmpInVD = cast<VarDecl>(Val: OmpInDRE->getDecl());
5288 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5289 LocalScope.addPrivate(LocalVD: OmpOutVD, Addr: SharedLV.getAddress());
5290 LocalScope.addPrivate(LocalVD: OmpInVD, Addr: LHSLV.getAddress());
5291 (void)LocalScope.Privatize();
5292 // Emit the actual reduction operation
5293 CGF.EmitIgnoredExpr(E: ReductionOp);
5294 };
5295 EmitCriticalReduction(ReductionGen);
5296 }
5297
5298 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5299 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5300 args: BarrierArgs);
5301
5302 // Broadcast final result
5303 bool IsAggregate = PrivateType->isAggregateType();
5304 LValue SharedLV1 = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5305 llvm::Value *FinalResultVal = nullptr;
5306 Address FinalResultAddr = Address::invalid();
5307
5308 if (IsAggregate)
5309 FinalResultAddr = SharedResult;
5310 else
5311 FinalResultVal = CGF.EmitLoadOfScalar(lvalue: SharedLV1, Loc);
5312
5313 LValue TargetLHSLV = CGF.EmitLValue(E: RHSExprs);
5314 if (IsAggregate) {
5315 CGF.EmitAggregateCopy(Dest: TargetLHSLV,
5316 Src: CGF.MakeAddrLValue(Addr: FinalResultAddr, T: PrivateType),
5317 EltTy: PrivateType, MayOverlap: AggValueSlot::DoesNotOverlap, isVolatile: false);
5318 } else {
5319 CGF.EmitStoreOfScalar(value: FinalResultVal, lvalue: TargetLHSLV);
5320 }
5321 // Final synchronization barrier
5322 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5323 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5324 args: BarrierArgs);
5325
5326 // Combiner with original list item
5327 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5328 PrePostActionTy &Action) {
5329 Action.Enter(CGF);
5330 emitSingleReductionCombiner(CGF, ReductionOp: ReductionOps, PrivateRef: Privates,
5331 LHS: cast<DeclRefExpr>(Val: LHSExprs),
5332 RHS: cast<DeclRefExpr>(Val: RHSExprs));
5333 };
5334 EmitCriticalReduction(OriginalListCombiner);
5335}
5336
5337void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5338 ArrayRef<const Expr *> OrgPrivates,
5339 ArrayRef<const Expr *> OrgLHSExprs,
5340 ArrayRef<const Expr *> OrgRHSExprs,
5341 ArrayRef<const Expr *> OrgReductionOps,
5342 ReductionOptionsTy Options) {
5343 if (!CGF.HaveInsertPoint())
5344 return;
5345
5346 bool WithNowait = Options.WithNowait;
5347 bool SimpleReduction = Options.SimpleReduction;
5348
5349 // Next code should be emitted for reduction:
5350 //
5351 // static kmp_critical_name lock = { 0 };
5352 //
5353 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5354 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5355 // ...
5356 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5357 // *(Type<n>-1*)rhs[<n>-1]);
5358 // }
5359 //
5360 // ...
5361 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5362 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5363 // RedList, reduce_func, &<lock>)) {
5364 // case 1:
5365 // ...
5366 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5367 // ...
5368 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5369 // break;
5370 // case 2:
5371 // ...
5372 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5373 // ...
5374 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5375 // break;
5376 // default:;
5377 // }
5378 //
5379 // if SimpleReduction is true, only the next code is generated:
5380 // ...
5381 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5382 // ...
5383
5384 ASTContext &C = CGM.getContext();
5385
5386 if (SimpleReduction) {
5387 CodeGenFunction::RunCleanupsScope Scope(CGF);
5388 const auto *IPriv = OrgPrivates.begin();
5389 const auto *ILHS = OrgLHSExprs.begin();
5390 const auto *IRHS = OrgRHSExprs.begin();
5391 for (const Expr *E : OrgReductionOps) {
5392 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5393 RHS: cast<DeclRefExpr>(Val: *IRHS));
5394 ++IPriv;
5395 ++ILHS;
5396 ++IRHS;
5397 }
5398 return;
5399 }
5400
5401 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5402 // Only keep entries where the corresponding variable is not private.
5403 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5404 FilteredRHSExprs, FilteredReductionOps;
5405 for (unsigned I : llvm::seq<unsigned>(
5406 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5407 if (!Options.IsPrivateVarReduction[I]) {
5408 FilteredPrivates.emplace_back(Args: OrgPrivates[I]);
5409 FilteredLHSExprs.emplace_back(Args: OrgLHSExprs[I]);
5410 FilteredRHSExprs.emplace_back(Args: OrgRHSExprs[I]);
5411 FilteredReductionOps.emplace_back(Args: OrgReductionOps[I]);
5412 }
5413 }
5414 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5415 // processing.
5416 ArrayRef<const Expr *> Privates = FilteredPrivates;
5417 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5418 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5419 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5420
5421 // 1. Build a list of reduction variables.
5422 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5423 auto Size = RHSExprs.size();
5424 for (const Expr *E : Privates) {
5425 if (E->getType()->isVariablyModifiedType())
5426 // Reserve place for array size.
5427 ++Size;
5428 }
5429 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5430 QualType ReductionArrayTy = C.getConstantArrayType(
5431 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5432 /*IndexTypeQuals=*/0);
5433 RawAddress ReductionList =
5434 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5435 const auto *IPriv = Privates.begin();
5436 unsigned Idx = 0;
5437 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5438 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5439 CGF.Builder.CreateStore(
5440 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5441 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5442 Addr: Elem);
5443 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5444 // Store array size.
5445 ++Idx;
5446 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5447 llvm::Value *Size = CGF.Builder.CreateIntCast(
5448 V: CGF.getVLASize(
5449 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5450 .NumElts,
5451 DestTy: CGF.SizeTy, /*isSigned=*/false);
5452 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5453 Addr: Elem);
5454 }
5455 }
5456
5457 // 2. Emit reduce_func().
5458 llvm::Function *ReductionFn = emitReductionFunction(
5459 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5460 Privates, LHSExprs, RHSExprs, ReductionOps);
5461
5462 // 3. Create static kmp_critical_name lock = { 0 };
5463 std::string Name = getName(Parts: {"reduction"});
5464 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5465
5466 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5467 // RedList, reduce_func, &<lock>);
5468 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5469 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5470 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5471 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5472 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5473 llvm::Value *Args[] = {
5474 IdentTLoc, // ident_t *<loc>
5475 ThreadId, // i32 <gtid>
5476 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5477 ReductionArrayTySize, // size_type sizeof(RedList)
5478 RL, // void *RedList
5479 ReductionFn, // void (*) (void *, void *) <reduce_func>
5480 Lock // kmp_critical_name *&<lock>
5481 };
5482 llvm::Value *Res = CGF.EmitRuntimeCall(
5483 callee: OMPBuilder.getOrCreateRuntimeFunction(
5484 M&: CGM.getModule(),
5485 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5486 args: Args);
5487
5488 // 5. Build switch(res)
5489 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5490 llvm::SwitchInst *SwInst =
5491 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5492
5493 // 6. Build case 1:
5494 // ...
5495 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5496 // ...
5497 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5498 // break;
5499 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5500 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5501 CGF.EmitBlock(BB: Case1BB);
5502
5503 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5504 llvm::Value *EndArgs[] = {
5505 IdentTLoc, // ident_t *<loc>
5506 ThreadId, // i32 <gtid>
5507 Lock // kmp_critical_name *&<lock>
5508 };
5509 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5510 CodeGenFunction &CGF, PrePostActionTy &Action) {
5511 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5512 const auto *IPriv = Privates.begin();
5513 const auto *ILHS = LHSExprs.begin();
5514 const auto *IRHS = RHSExprs.begin();
5515 for (const Expr *E : ReductionOps) {
5516 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5517 RHS: cast<DeclRefExpr>(Val: *IRHS));
5518 ++IPriv;
5519 ++ILHS;
5520 ++IRHS;
5521 }
5522 };
5523 RegionCodeGenTy RCG(CodeGen);
5524 CommonActionTy Action(
5525 nullptr, {},
5526 OMPBuilder.getOrCreateRuntimeFunction(
5527 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5528 : OMPRTL___kmpc_end_reduce),
5529 EndArgs);
5530 RCG.setAction(Action);
5531 RCG(CGF);
5532
5533 CGF.EmitBranch(Block: DefaultBB);
5534
5535 // 7. Build case 2:
5536 // ...
5537 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5538 // ...
5539 // break;
5540 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5541 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5542 CGF.EmitBlock(BB: Case2BB);
5543
5544 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5545 CodeGenFunction &CGF, PrePostActionTy &Action) {
5546 const auto *ILHS = LHSExprs.begin();
5547 const auto *IRHS = RHSExprs.begin();
5548 const auto *IPriv = Privates.begin();
5549 for (const Expr *E : ReductionOps) {
5550 const Expr *XExpr = nullptr;
5551 const Expr *EExpr = nullptr;
5552 const Expr *UpExpr = nullptr;
5553 BinaryOperatorKind BO = BO_Comma;
5554 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5555 if (BO->getOpcode() == BO_Assign) {
5556 XExpr = BO->getLHS();
5557 UpExpr = BO->getRHS();
5558 }
5559 }
5560 // Try to emit update expression as a simple atomic.
5561 const Expr *RHSExpr = UpExpr;
5562 if (RHSExpr) {
5563 // Analyze RHS part of the whole expression.
5564 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5565 Val: RHSExpr->IgnoreParenImpCasts())) {
5566 // If this is a conditional operator, analyze its condition for
5567 // min/max reduction operator.
5568 RHSExpr = ACO->getCond();
5569 }
5570 if (const auto *BORHS =
5571 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5572 EExpr = BORHS->getRHS();
5573 BO = BORHS->getOpcode();
5574 }
5575 }
5576 if (XExpr) {
5577 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5578 auto &&AtomicRedGen = [BO, VD,
5579 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5580 const Expr *EExpr, const Expr *UpExpr) {
5581 LValue X = CGF.EmitLValue(E: XExpr);
5582 RValue E;
5583 if (EExpr)
5584 E = CGF.EmitAnyExpr(E: EExpr);
5585 CGF.EmitOMPAtomicSimpleUpdateExpr(
5586 X, E, BO, /*IsXLHSInRHSPart=*/true,
5587 AO: llvm::AtomicOrdering::Monotonic, Loc,
5588 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5589 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5590 Address LHSTemp = CGF.CreateMemTemp(T: VD->getType());
5591 CGF.emitOMPSimpleStore(
5592 LVal: CGF.MakeAddrLValue(Addr: LHSTemp, T: VD->getType()), RVal: XRValue,
5593 RValTy: VD->getType().getNonReferenceType(), Loc);
5594 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5595 (void)PrivateScope.Privatize();
5596 return CGF.EmitAnyExpr(E: UpExpr);
5597 });
5598 };
5599 if ((*IPriv)->getType()->isArrayType()) {
5600 // Emit atomic reduction for array section.
5601 const auto *RHSVar =
5602 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5603 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5604 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5605 } else {
5606 // Emit atomic reduction for array subscript or single variable.
5607 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5608 }
5609 } else {
5610 // Emit as a critical region.
5611 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5612 const Expr *, const Expr *) {
5613 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5614 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5615 RT.emitCriticalRegion(
5616 CGF, CriticalName: Name,
5617 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5618 Action.Enter(CGF);
5619 emitReductionCombiner(CGF, ReductionOp: E);
5620 },
5621 Loc);
5622 };
5623 if ((*IPriv)->getType()->isArrayType()) {
5624 const auto *LHSVar =
5625 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5626 const auto *RHSVar =
5627 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5628 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5629 RedOpGen: CritRedGen);
5630 } else {
5631 CritRedGen(CGF, nullptr, nullptr, nullptr);
5632 }
5633 }
5634 ++ILHS;
5635 ++IRHS;
5636 ++IPriv;
5637 }
5638 };
5639 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5640 if (!WithNowait) {
5641 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5642 llvm::Value *EndArgs[] = {
5643 IdentTLoc, // ident_t *<loc>
5644 ThreadId, // i32 <gtid>
5645 Lock // kmp_critical_name *&<lock>
5646 };
5647 CommonActionTy Action(nullptr, {},
5648 OMPBuilder.getOrCreateRuntimeFunction(
5649 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5650 EndArgs);
5651 AtomicRCG.setAction(Action);
5652 AtomicRCG(CGF);
5653 } else {
5654 AtomicRCG(CGF);
5655 }
5656
5657 CGF.EmitBranch(Block: DefaultBB);
5658 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5659 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5660 "PrivateVarReduction: Privates size mismatch");
5661 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5662 "PrivateVarReduction: ReductionOps size mismatch");
5663 for (unsigned I : llvm::seq<unsigned>(
5664 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5665 if (Options.IsPrivateVarReduction[I])
5666 emitPrivateReduction(CGF, Loc, Privates: OrgPrivates[I], LHSExprs: OrgLHSExprs[I],
5667 RHSExprs: OrgRHSExprs[I], ReductionOps: OrgReductionOps[I]);
5668 }
5669}
5670
5671/// Generates unique name for artificial threadprivate variables.
5672/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5673static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5674 const Expr *Ref) {
5675 SmallString<256> Buffer;
5676 llvm::raw_svector_ostream Out(Buffer);
5677 const clang::DeclRefExpr *DE;
5678 const VarDecl *D = ::getBaseDecl(Ref, DE);
5679 if (!D)
5680 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5681 D = D->getCanonicalDecl();
5682 std::string Name = CGM.getOpenMPRuntime().getName(
5683 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5684 Out << Prefix << Name << "_"
5685 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5686 return std::string(Out.str());
5687}
5688
5689/// Emits reduction initializer function:
5690/// \code
5691/// void @.red_init(void* %arg, void* %orig) {
5692/// %0 = bitcast void* %arg to <type>*
5693/// store <type> <init>, <type>* %0
5694/// ret void
5695/// }
5696/// \endcode
5697static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5698 SourceLocation Loc,
5699 ReductionCodeGen &RCG, unsigned N) {
5700 ASTContext &C = CGM.getContext();
5701 QualType VoidPtrTy = C.VoidPtrTy;
5702 VoidPtrTy.addRestrict();
5703 FunctionArgList Args;
5704 auto *Param =
5705 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
5706 T: VoidPtrTy, ParamKind: ImplicitParamKind::Other);
5707 auto *ParamOrig =
5708 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
5709 T: VoidPtrTy, ParamKind: ImplicitParamKind::Other);
5710 Args.emplace_back(Args&: Param);
5711 Args.emplace_back(Args&: ParamOrig);
5712 const auto &FnInfo =
5713 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5714 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5715 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5716 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5717 N: Name, M: &CGM.getModule());
5718 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5719 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5720 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5721 Fn->setDoesNotRecurse();
5722 CodeGenFunction CGF(CGM);
5723 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5724 QualType PrivateType = RCG.getPrivateType(N);
5725 Address PrivateAddr = CGF.EmitLoadOfPointer(
5726 Ptr: CGF.GetAddrOfLocalVar(VD: Param).withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5727 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5728 llvm::Value *Size = nullptr;
5729 // If the size of the reduction item is non-constant, load it from global
5730 // threadprivate variable.
5731 if (RCG.getSizes(N).second) {
5732 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5733 CGF, VarType: CGM.getContext().getSizeType(),
5734 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5735 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5736 Ty: CGM.getContext().getSizeType(), Loc);
5737 }
5738 RCG.emitAggregateType(CGF, N, Size);
5739 Address OrigAddr = Address::invalid();
5740 // If initializer uses initializer from declare reduction construct, emit a
5741 // pointer to the address of the original reduction item (reuired by reduction
5742 // initializer)
5743 if (RCG.usesReductionInitializer(N)) {
5744 Address SharedAddr = CGF.GetAddrOfLocalVar(VD: ParamOrig);
5745 OrigAddr = CGF.EmitLoadOfPointer(
5746 Ptr: SharedAddr,
5747 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5748 }
5749 // Emit the initializer:
5750 // %0 = bitcast void* %arg to <type>*
5751 // store <type> <init>, <type>* %0
5752 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5753 DefaultInit: [](CodeGenFunction &) { return false; });
5754 CGF.FinishFunction();
5755 return Fn;
5756}
5757
5758/// Emits reduction combiner function:
5759/// \code
5760/// void @.red_comb(void* %arg0, void* %arg1) {
5761/// %lhs = bitcast void* %arg0 to <type>*
5762/// %rhs = bitcast void* %arg1 to <type>*
5763/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5764/// store <type> %2, <type>* %lhs
5765/// ret void
5766/// }
5767/// \endcode
5768static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5769 SourceLocation Loc,
5770 ReductionCodeGen &RCG, unsigned N,
5771 const Expr *ReductionOp,
5772 const Expr *LHS, const Expr *RHS,
5773 const Expr *PrivateRef) {
5774 ASTContext &C = CGM.getContext();
5775 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5776 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5777 FunctionArgList Args;
5778 auto *ParamInOut =
5779 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
5780 T: C.VoidPtrTy, ParamKind: ImplicitParamKind::Other);
5781 auto *ParamIn =
5782 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
5783 T: C.VoidPtrTy, ParamKind: ImplicitParamKind::Other);
5784 Args.emplace_back(Args&: ParamInOut);
5785 Args.emplace_back(Args&: ParamIn);
5786 const auto &FnInfo =
5787 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5788 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5789 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5790 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5791 N: Name, M: &CGM.getModule());
5792 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5793 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5794 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5795 Fn->setDoesNotRecurse();
5796 CodeGenFunction CGF(CGM);
5797 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5798 llvm::Value *Size = nullptr;
5799 // If the size of the reduction item is non-constant, load it from global
5800 // threadprivate variable.
5801 if (RCG.getSizes(N).second) {
5802 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5803 CGF, VarType: CGM.getContext().getSizeType(),
5804 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5805 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5806 Ty: CGM.getContext().getSizeType(), Loc);
5807 }
5808 RCG.emitAggregateType(CGF, N, Size);
5809 // Remap lhs and rhs variables to the addresses of the function arguments.
5810 // %lhs = bitcast void* %arg0 to <type>*
5811 // %rhs = bitcast void* %arg1 to <type>*
5812 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5813 PrivateScope.addPrivate(
5814 LocalVD: LHSVD,
5815 // Pull out the pointer to the variable.
5816 Addr: CGF.EmitLoadOfPointer(
5817 Ptr: CGF.GetAddrOfLocalVar(VD: ParamInOut)
5818 .withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5819 PtrTy: C.getPointerType(T: LHSVD->getType())->castAs<PointerType>()));
5820 PrivateScope.addPrivate(
5821 LocalVD: RHSVD,
5822 // Pull out the pointer to the variable.
5823 Addr: CGF.EmitLoadOfPointer(
5824 Ptr: CGF.GetAddrOfLocalVar(VD: ParamIn).withElementType(
5825 ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5826 PtrTy: C.getPointerType(T: RHSVD->getType())->castAs<PointerType>()));
5827 PrivateScope.Privatize();
5828 // Emit the combiner body:
5829 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5830 // store <type> %2, <type>* %lhs
5831 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5832 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5833 RHS: cast<DeclRefExpr>(Val: RHS));
5834 CGF.FinishFunction();
5835 return Fn;
5836}
5837
5838/// Emits reduction finalizer function:
5839/// \code
5840/// void @.red_fini(void* %arg) {
5841/// %0 = bitcast void* %arg to <type>*
5842/// <destroy>(<type>* %0)
5843/// ret void
5844/// }
5845/// \endcode
5846static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5847 SourceLocation Loc,
5848 ReductionCodeGen &RCG, unsigned N) {
5849 if (!RCG.needCleanups(N))
5850 return nullptr;
5851 ASTContext &C = CGM.getContext();
5852 FunctionArgList Args;
5853 auto *Param =
5854 ImplicitParamDecl::Create(C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
5855 T: C.VoidPtrTy, ParamKind: ImplicitParamKind::Other);
5856 Args.emplace_back(Args&: Param);
5857 const auto &FnInfo =
5858 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5859 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5860 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5861 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5862 N: Name, M: &CGM.getModule());
5863 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5864 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
5865 Fn->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
5866 Fn->setDoesNotRecurse();
5867 CodeGenFunction CGF(CGM);
5868 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5869 Address PrivateAddr = CGF.EmitLoadOfPointer(
5870 Ptr: CGF.GetAddrOfLocalVar(VD: Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5871 llvm::Value *Size = nullptr;
5872 // If the size of the reduction item is non-constant, load it from global
5873 // threadprivate variable.
5874 if (RCG.getSizes(N).second) {
5875 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5876 CGF, VarType: CGM.getContext().getSizeType(),
5877 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5878 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5879 Ty: CGM.getContext().getSizeType(), Loc);
5880 }
5881 RCG.emitAggregateType(CGF, N, Size);
5882 // Emit the finalizer body:
5883 // <destroy>(<type>* %0)
5884 RCG.emitCleanups(CGF, N, PrivateAddr);
5885 CGF.FinishFunction(EndLoc: Loc);
5886 return Fn;
5887}
5888
5889llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5890 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5891 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5892 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5893 return nullptr;
5894
5895 // Build typedef struct:
5896 // kmp_taskred_input {
5897 // void *reduce_shar; // shared reduction item
5898 // void *reduce_orig; // original reduction item used for initialization
5899 // size_t reduce_size; // size of data item
5900 // void *reduce_init; // data initialization routine
5901 // void *reduce_fini; // data finalization routine
5902 // void *reduce_comb; // data combiner routine
5903 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5904 // } kmp_taskred_input_t;
5905 ASTContext &C = CGM.getContext();
5906 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5907 RD->startDefinition();
5908 const FieldDecl *SharedFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5909 const FieldDecl *OrigFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5910 const FieldDecl *SizeFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.getSizeType());
5911 const FieldDecl *InitFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5912 const FieldDecl *FiniFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5913 const FieldDecl *CombFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5914 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5915 C, DC: RD, FieldTy: C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5916 RD->completeDefinition();
5917 CanQualType RDType = C.getCanonicalTagType(TD: RD);
5918 unsigned Size = Data.ReductionVars.size();
5919 llvm::APInt ArraySize(/*numBits=*/64, Size);
5920 QualType ArrayRDType =
5921 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5922 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5923 // kmp_task_red_input_t .rd_input.[Size];
5924 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5925 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5926 Data.ReductionCopies, Data.ReductionOps);
5927 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5928 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5929 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5930 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5931 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5932 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5933 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5934 Name: ".rd_input.gep.");
5935 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5936 // ElemLVal.reduce_shar = &Shareds[Cnt];
5937 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5938 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5939 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5940 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5941 // ElemLVal.reduce_orig = &Origs[Cnt];
5942 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5943 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5944 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5945 RCG.emitAggregateType(CGF, N: Cnt);
5946 llvm::Value *SizeValInChars;
5947 llvm::Value *SizeVal;
5948 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5949 // We use delayed creation/initialization for VLAs and array sections. It is
5950 // required because runtime does not provide the way to pass the sizes of
5951 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5952 // threadprivate global variables are used to store these values and use
5953 // them in the functions.
5954 bool DelayedCreation = !!SizeVal;
5955 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5956 /*isSigned=*/false);
5957 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5958 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5959 // ElemLVal.reduce_init = init;
5960 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5961 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5962 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5963 // ElemLVal.reduce_fini = fini;
5964 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5965 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5966 llvm::Value *FiniAddr =
5967 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5968 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5969 // ElemLVal.reduce_comb = comb;
5970 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5971 llvm::Value *CombAddr = emitReduceCombFunction(
5972 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5973 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5974 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5975 // ElemLVal.flags = 0;
5976 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5977 if (DelayedCreation) {
5978 CGF.EmitStoreOfScalar(
5979 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5980 lvalue: FlagsLVal);
5981 } else
5982 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5983 }
5984 if (Data.IsReductionWithTaskMod) {
5985 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5986 // is_ws, int num, void *data);
5987 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5988 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5989 DestTy: CGM.IntTy, /*isSigned=*/true);
5990 llvm::Value *Args[] = {
5991 IdentTLoc, GTid,
5992 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5993 /*isSigned=*/IsSigned: true),
5994 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5995 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5996 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5997 return CGF.EmitRuntimeCall(
5998 callee: OMPBuilder.getOrCreateRuntimeFunction(
5999 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
6000 args: Args);
6001 }
6002 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6003 llvm::Value *Args[] = {
6004 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
6005 /*isSigned=*/true),
6006 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
6007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
6008 DestTy: CGM.VoidPtrTy)};
6009 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6010 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
6011 args: Args);
6012}
6013
6014void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6015 SourceLocation Loc,
6016 bool IsWorksharingReduction) {
6017 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6018 // is_ws, int num, void *data);
6019 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6020 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6021 DestTy: CGM.IntTy, /*isSigned=*/true);
6022 llvm::Value *Args[] = {IdentTLoc, GTid,
6023 llvm::ConstantInt::get(Ty: CGM.IntTy,
6024 V: IsWorksharingReduction ? 1 : 0,
6025 /*isSigned=*/IsSigned: true)};
6026 (void)CGF.EmitRuntimeCall(
6027 callee: OMPBuilder.getOrCreateRuntimeFunction(
6028 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
6029 args: Args);
6030}
6031
6032void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6033 SourceLocation Loc,
6034 ReductionCodeGen &RCG,
6035 unsigned N) {
6036 auto Sizes = RCG.getSizes(N);
6037 // Emit threadprivate global variable if the type is non-constant
6038 // (Sizes.second = nullptr).
6039 if (Sizes.second) {
6040 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
6041 /*isSigned=*/false);
6042 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6043 CGF, VarType: CGM.getContext().getSizeType(),
6044 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
6045 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
6046 }
6047}
6048
6049Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6050 SourceLocation Loc,
6051 llvm::Value *ReductionsPtr,
6052 LValue SharedLVal) {
6053 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6054 // *d);
6055 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
6056 DestTy: CGM.IntTy,
6057 /*isSigned=*/true),
6058 ReductionsPtr,
6059 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6060 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
6061 return Address(
6062 CGF.EmitRuntimeCall(
6063 callee: OMPBuilder.getOrCreateRuntimeFunction(
6064 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
6065 args: Args),
6066 CGF.Int8Ty, SharedLVal.getAlignment());
6067}
6068
6069void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6070 const OMPTaskDataTy &Data) {
6071 if (!CGF.HaveInsertPoint())
6072 return;
6073
6074 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6075 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6076 OMPBuilder.createTaskwait(Loc: CGF.Builder);
6077 } else {
6078 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6079 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6080 auto &M = CGM.getModule();
6081 Address DependenciesArray = Address::invalid();
6082 llvm::Value *NumOfElements;
6083 std::tie(args&: NumOfElements, args&: DependenciesArray) =
6084 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
6085 if (!Data.Dependences.empty()) {
6086 llvm::Value *DepWaitTaskArgs[7];
6087 DepWaitTaskArgs[0] = UpLoc;
6088 DepWaitTaskArgs[1] = ThreadID;
6089 DepWaitTaskArgs[2] = NumOfElements;
6090 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
6091 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
6092 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6093 DepWaitTaskArgs[6] =
6094 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
6095
6096 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6097
6098 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
6099 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6100 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
6101 // kmp_int32 has_no_wait); if dependence info is specified.
6102 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6103 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
6104 args: DepWaitTaskArgs);
6105
6106 } else {
6107
6108 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6109 // global_tid);
6110 llvm::Value *Args[] = {UpLoc, ThreadID};
6111 // Ignore return result until untied tasks are supported.
6112 CGF.EmitRuntimeCall(
6113 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
6114 args: Args);
6115 }
6116 }
6117
6118 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
6119 Region->emitUntiedSwitch(CGF);
6120}
6121
6122void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6123 OpenMPDirectiveKind InnerKind,
6124 const RegionCodeGenTy &CodeGen,
6125 bool HasCancel) {
6126 if (!CGF.HaveInsertPoint())
6127 return;
6128 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6129 InnerKind != OMPD_critical &&
6130 InnerKind != OMPD_master &&
6131 InnerKind != OMPD_masked);
6132 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6133}
6134
6135namespace {
6136enum RTCancelKind {
6137 CancelNoreq = 0,
6138 CancelParallel = 1,
6139 CancelLoop = 2,
6140 CancelSections = 3,
6141 CancelTaskgroup = 4
6142};
6143} // anonymous namespace
6144
6145static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6146 RTCancelKind CancelKind = CancelNoreq;
6147 if (CancelRegion == OMPD_parallel)
6148 CancelKind = CancelParallel;
6149 else if (CancelRegion == OMPD_for)
6150 CancelKind = CancelLoop;
6151 else if (CancelRegion == OMPD_sections)
6152 CancelKind = CancelSections;
6153 else {
6154 assert(CancelRegion == OMPD_taskgroup);
6155 CancelKind = CancelTaskgroup;
6156 }
6157 return CancelKind;
6158}
6159
6160void CGOpenMPRuntime::emitCancellationPointCall(
6161 CodeGenFunction &CGF, SourceLocation Loc,
6162 OpenMPDirectiveKind CancelRegion) {
6163 if (!CGF.HaveInsertPoint())
6164 return;
6165 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6166 // global_tid, kmp_int32 cncl_kind);
6167 if (auto *OMPRegionInfo =
6168 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6169 // For 'cancellation point taskgroup', the task region info may not have a
6170 // cancel. This may instead happen in another adjacent task.
6171 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6172 llvm::Value *Args[] = {
6173 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6174 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6175 // Ignore return result until untied tasks are supported.
6176 llvm::Value *Result = CGF.EmitRuntimeCall(
6177 callee: OMPBuilder.getOrCreateRuntimeFunction(
6178 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
6179 args: Args);
6180 // if (__kmpc_cancellationpoint()) {
6181 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6182 // exit from construct;
6183 // }
6184 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6185 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6186 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6187 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6188 CGF.EmitBlock(BB: ExitBB);
6189 if (CancelRegion == OMPD_parallel)
6190 emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6191 // exit from construct;
6192 CodeGenFunction::JumpDest CancelDest =
6193 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6194 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6195 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6196 }
6197 }
6198}
6199
6200void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6201 const Expr *IfCond,
6202 OpenMPDirectiveKind CancelRegion) {
6203 if (!CGF.HaveInsertPoint())
6204 return;
6205 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6206 // kmp_int32 cncl_kind);
6207 auto &M = CGM.getModule();
6208 if (auto *OMPRegionInfo =
6209 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6210 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6211 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6212 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6213 llvm::Value *Args[] = {
6214 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6215 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6216 // Ignore return result until untied tasks are supported.
6217 llvm::Value *Result = CGF.EmitRuntimeCall(
6218 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), args: Args);
6219 // if (__kmpc_cancel()) {
6220 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6221 // exit from construct;
6222 // }
6223 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6224 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6225 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6226 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6227 CGF.EmitBlock(BB: ExitBB);
6228 if (CancelRegion == OMPD_parallel)
6229 RT.emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6230 // exit from construct;
6231 CodeGenFunction::JumpDest CancelDest =
6232 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6233 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6234 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6235 };
6236 if (IfCond) {
6237 emitIfClause(CGF, Cond: IfCond, ThenGen,
6238 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
6239 } else {
6240 RegionCodeGenTy ThenRCG(ThenGen);
6241 ThenRCG(CGF);
6242 }
6243 }
6244}
6245
6246namespace {
6247/// Cleanup action for uses_allocators support.
6248class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6249 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6250
6251public:
6252 OMPUsesAllocatorsActionTy(
6253 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6254 : Allocators(Allocators) {}
6255 void Enter(CodeGenFunction &CGF) override {
6256 if (!CGF.HaveInsertPoint())
6257 return;
6258 for (const auto &AllocatorData : Allocators) {
6259 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6260 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
6261 }
6262 }
6263 void Exit(CodeGenFunction &CGF) override {
6264 if (!CGF.HaveInsertPoint())
6265 return;
6266 for (const auto &AllocatorData : Allocators) {
6267 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6268 Allocator: AllocatorData.first);
6269 }
6270 }
6271};
6272} // namespace
6273
6274void CGOpenMPRuntime::emitTargetOutlinedFunction(
6275 const OMPExecutableDirective &D, StringRef ParentName,
6276 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6277 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6278 assert(!ParentName.empty() && "Invalid target entry parent name!");
6279 HasEmittedTargetRegion = true;
6280 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6281 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6282 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6283 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6284 if (!D.AllocatorTraits)
6285 continue;
6286 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
6287 }
6288 }
6289 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6290 CodeGen.setAction(UsesAllocatorAction);
6291 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6292 IsOffloadEntry, CodeGen);
6293}
6294
6295void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6296 const Expr *Allocator,
6297 const Expr *AllocatorTraits) {
6298 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6299 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6300 // Use default memspace handle.
6301 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6302 llvm::Value *NumTraits = llvm::ConstantInt::get(
6303 Ty: CGF.IntTy, V: cast<ConstantArrayType>(
6304 Val: AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6305 ->getSize()
6306 .getLimitedValue());
6307 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
6308 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6309 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
6310 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, T: CGF.getContext().VoidPtrTy,
6311 BaseInfo: AllocatorTraitsLVal.getBaseInfo(),
6312 TBAAInfo: AllocatorTraitsLVal.getTBAAInfo());
6313 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6314
6315 llvm::Value *AllocatorVal =
6316 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6317 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
6318 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
6319 // Store to allocator.
6320 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
6321 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
6322 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6323 AllocatorVal =
6324 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
6325 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
6326 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
6327}
6328
6329void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6330 const Expr *Allocator) {
6331 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6332 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6333 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6334 llvm::Value *AllocatorVal =
6335 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
6336 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
6337 DstTy: CGF.getContext().VoidPtrTy,
6338 Loc: Allocator->getExprLoc());
6339 (void)CGF.EmitRuntimeCall(
6340 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
6341 FnID: OMPRTL___kmpc_destroy_allocator),
6342 args: {ThreadId, AllocatorVal});
6343}
6344
6345void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6346 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6347 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6348 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6349 "invalid default attrs structure");
6350 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6351 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6352
6353 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: Attrs.MinTeams, MaxTeamsVal);
6354 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
6355 /*UpperBoundOnly=*/true);
6356
6357 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6358 for (auto *A : C->getAttrs()) {
6359 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6360 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6361 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(Val: A))
6362 CGM.handleCUDALaunchBoundsAttr(F: nullptr, A: Attr, MaxThreadsVal: &AttrMaxThreadsVal,
6363 MinBlocksVal: &AttrMinBlocksVal, MaxClusterRankVal: &AttrMaxBlocksVal);
6364 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(Val: A))
6365 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6366 F: nullptr, A: Attr, /*ReqdWGS=*/nullptr, MinThreadsVal: &AttrMinThreadsVal,
6367 MaxThreadsVal: &AttrMaxThreadsVal);
6368 else
6369 continue;
6370
6371 Attrs.MinThreads = std::max(a: Attrs.MinThreads, b: AttrMinThreadsVal);
6372 if (AttrMaxThreadsVal > 0)
6373 MaxThreadsVal = MaxThreadsVal > 0
6374 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
6375 : AttrMaxThreadsVal;
6376 Attrs.MinTeams = std::max(a: Attrs.MinTeams, b: AttrMinBlocksVal);
6377 if (AttrMaxBlocksVal > 0)
6378 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
6379 : AttrMaxBlocksVal;
6380 }
6381 }
6382}
6383
6384void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6385 const OMPExecutableDirective &D, StringRef ParentName,
6386 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6387 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6388
6389 llvm::TargetRegionEntryInfo EntryInfo =
6390 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
6391
6392 CodeGenFunction CGF(CGM, true);
6393 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6394 [&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
6395 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
6396
6397 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6398 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6399 if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
6400 return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(S: CS, D);
6401 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, D);
6402 };
6403
6404 cantFail(Err: OMPBuilder.emitTargetRegionFunction(
6405 EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6406 OutlinedFnID));
6407
6408 if (!OutlinedFn)
6409 return;
6410
6411 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
6412
6413 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6414 for (auto *A : C->getAttrs()) {
6415 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(Val: A))
6416 CGM.handleAMDGPUWavesPerEUAttr(F: OutlinedFn, A: Attr);
6417 }
6418 }
6419 registerVTable(D);
6420}
6421
6422/// Checks if the expression is constant or does not have non-trivial function
6423/// calls.
6424static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6425 // We can skip constant expressions.
6426 // We can skip expressions with trivial calls or simple expressions.
6427 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
6428 !E->hasNonTrivialCall(Ctx)) &&
6429 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6430}
6431
6432const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6433 const Stmt *Body) {
6434 const Stmt *Child = Body->IgnoreContainers();
6435 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
6436 Child = nullptr;
6437 for (const Stmt *S : C->body()) {
6438 if (const auto *E = dyn_cast<Expr>(Val: S)) {
6439 if (isTrivial(Ctx, E))
6440 continue;
6441 }
6442 // Some of the statements can be ignored.
6443 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
6444 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
6445 continue;
6446 // Analyze declarations.
6447 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
6448 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
6449 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
6450 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
6451 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
6452 isa<UsingDirectiveDecl>(Val: D) ||
6453 isa<OMPDeclareReductionDecl>(Val: D) ||
6454 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6455 return true;
6456 const auto *VD = dyn_cast<VarDecl>(Val: D);
6457 if (!VD)
6458 return false;
6459 return VD->hasGlobalStorage() || !VD->isUsed();
6460 }))
6461 continue;
6462 }
6463 // Found multiple children - cannot get the one child only.
6464 if (Child)
6465 return nullptr;
6466 Child = S;
6467 }
6468 if (Child)
6469 Child = Child->IgnoreContainers();
6470 }
6471 return Child;
6472}
6473
6474const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6475 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6476 int32_t &MaxTeamsVal) {
6477
6478 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6479 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6480 "Expected target-based executable directive.");
6481 switch (DirectiveKind) {
6482 case OMPD_target: {
6483 const auto *CS = D.getInnermostCapturedStmt();
6484 const auto *Body =
6485 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6486 const Stmt *ChildStmt =
6487 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6488 if (const auto *NestedDir =
6489 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6490 if (isOpenMPTeamsDirective(DKind: NestedDir->getDirectiveKind())) {
6491 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6492 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6493 ->getNumTeams()
6494 .front();
6495 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6496 if (auto Constant =
6497 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6498 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6499 return NumTeams;
6500 }
6501 MinTeamsVal = MaxTeamsVal = 0;
6502 return nullptr;
6503 }
6504 MinTeamsVal = MaxTeamsVal = 1;
6505 return nullptr;
6506 }
6507 // A value of -1 is used to check if we need to emit no teams region
6508 MinTeamsVal = MaxTeamsVal = -1;
6509 return nullptr;
6510 }
6511 case OMPD_target_teams_loop:
6512 case OMPD_target_teams:
6513 case OMPD_target_teams_distribute:
6514 case OMPD_target_teams_distribute_simd:
6515 case OMPD_target_teams_distribute_parallel_for:
6516 case OMPD_target_teams_distribute_parallel_for_simd: {
6517 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6518 const Expr *NumTeams =
6519 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6520 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6521 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6522 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6523 return NumTeams;
6524 }
6525 MinTeamsVal = MaxTeamsVal = 0;
6526 return nullptr;
6527 }
6528 case OMPD_target_parallel:
6529 case OMPD_target_parallel_for:
6530 case OMPD_target_parallel_for_simd:
6531 case OMPD_target_parallel_loop:
6532 case OMPD_target_simd:
6533 MinTeamsVal = MaxTeamsVal = 1;
6534 return nullptr;
6535 case OMPD_parallel:
6536 case OMPD_for:
6537 case OMPD_parallel_for:
6538 case OMPD_parallel_loop:
6539 case OMPD_parallel_master:
6540 case OMPD_parallel_sections:
6541 case OMPD_for_simd:
6542 case OMPD_parallel_for_simd:
6543 case OMPD_cancel:
6544 case OMPD_cancellation_point:
6545 case OMPD_ordered:
6546 case OMPD_threadprivate:
6547 case OMPD_allocate:
6548 case OMPD_task:
6549 case OMPD_simd:
6550 case OMPD_tile:
6551 case OMPD_unroll:
6552 case OMPD_sections:
6553 case OMPD_section:
6554 case OMPD_single:
6555 case OMPD_master:
6556 case OMPD_critical:
6557 case OMPD_taskyield:
6558 case OMPD_barrier:
6559 case OMPD_taskwait:
6560 case OMPD_taskgroup:
6561 case OMPD_atomic:
6562 case OMPD_flush:
6563 case OMPD_depobj:
6564 case OMPD_scan:
6565 case OMPD_teams:
6566 case OMPD_target_data:
6567 case OMPD_target_exit_data:
6568 case OMPD_target_enter_data:
6569 case OMPD_distribute:
6570 case OMPD_distribute_simd:
6571 case OMPD_distribute_parallel_for:
6572 case OMPD_distribute_parallel_for_simd:
6573 case OMPD_teams_distribute:
6574 case OMPD_teams_distribute_simd:
6575 case OMPD_teams_distribute_parallel_for:
6576 case OMPD_teams_distribute_parallel_for_simd:
6577 case OMPD_target_update:
6578 case OMPD_declare_simd:
6579 case OMPD_declare_variant:
6580 case OMPD_begin_declare_variant:
6581 case OMPD_end_declare_variant:
6582 case OMPD_declare_target:
6583 case OMPD_end_declare_target:
6584 case OMPD_declare_reduction:
6585 case OMPD_declare_mapper:
6586 case OMPD_taskloop:
6587 case OMPD_taskloop_simd:
6588 case OMPD_master_taskloop:
6589 case OMPD_master_taskloop_simd:
6590 case OMPD_parallel_master_taskloop:
6591 case OMPD_parallel_master_taskloop_simd:
6592 case OMPD_requires:
6593 case OMPD_metadirective:
6594 case OMPD_unknown:
6595 break;
6596 default:
6597 break;
6598 }
6599 llvm_unreachable("Unexpected directive kind.");
6600}
6601
6602llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6603 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6604 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6605 "Clauses associated with the teams directive expected to be emitted "
6606 "only for the host!");
6607 CGBuilderTy &Bld = CGF.Builder;
6608 int32_t MinNT = -1, MaxNT = -1;
6609 const Expr *NumTeams =
6610 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6611 if (NumTeams != nullptr) {
6612 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6613
6614 switch (DirectiveKind) {
6615 case OMPD_target: {
6616 const auto *CS = D.getInnermostCapturedStmt();
6617 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6618 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6619 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6620 /*IgnoreResultAssign*/ true);
6621 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6622 /*isSigned=*/true);
6623 }
6624 case OMPD_target_teams:
6625 case OMPD_target_teams_distribute:
6626 case OMPD_target_teams_distribute_simd:
6627 case OMPD_target_teams_distribute_parallel_for:
6628 case OMPD_target_teams_distribute_parallel_for_simd: {
6629 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6630 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6631 /*IgnoreResultAssign*/ true);
6632 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6633 /*isSigned=*/true);
6634 }
6635 default:
6636 break;
6637 }
6638 }
6639
6640 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6641 return llvm::ConstantInt::getSigned(Ty: CGF.Int32Ty, V: MinNT);
6642}
6643
6644/// Check for a num threads constant value (stored in \p DefaultVal), or
6645/// expression (stored in \p E). If the value is conditional (via an if-clause),
6646/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6647/// nullptr, no expression evaluation is perfomed.
6648static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6649 const Expr **E, int32_t &UpperBound,
6650 bool UpperBoundOnly, llvm::Value **CondVal) {
6651 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6652 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6653 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6654 if (!Dir)
6655 return;
6656
6657 if (isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6658 // Handle if clause. If if clause present, the number of threads is
6659 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6660 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6661 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6662 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6663 const OMPIfClause *IfClause = nullptr;
6664 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6665 if (C->getNameModifier() == OMPD_unknown ||
6666 C->getNameModifier() == OMPD_parallel) {
6667 IfClause = C;
6668 break;
6669 }
6670 }
6671 if (IfClause) {
6672 const Expr *CondExpr = IfClause->getCondition();
6673 bool Result;
6674 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6675 if (!Result) {
6676 UpperBound = 1;
6677 return;
6678 }
6679 } else {
6680 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6681 if (const auto *PreInit =
6682 cast_or_null<DeclStmt>(Val: IfClause->getPreInitStmt())) {
6683 for (const auto *I : PreInit->decls()) {
6684 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6685 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6686 } else {
6687 CodeGenFunction::AutoVarEmission Emission =
6688 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6689 CGF.EmitAutoVarCleanups(emission: Emission);
6690 }
6691 }
6692 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6693 }
6694 }
6695 }
6696 }
6697 // Check the value of num_threads clause iff if clause was not specified
6698 // or is not evaluated to false.
6699 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6700 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6701 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6702 const auto *NumThreadsClause =
6703 Dir->getSingleClause<OMPNumThreadsClause>();
6704 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6705 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6706 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6707 UpperBound =
6708 UpperBound
6709 ? Constant->getZExtValue()
6710 : std::min(a: UpperBound,
6711 b: static_cast<int32_t>(Constant->getZExtValue()));
6712 // If we haven't found a upper bound, remember we saw a thread limiting
6713 // clause.
6714 if (UpperBound == -1)
6715 UpperBound = 0;
6716 if (!E)
6717 return;
6718 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6719 if (const auto *PreInit =
6720 cast_or_null<DeclStmt>(Val: NumThreadsClause->getPreInitStmt())) {
6721 for (const auto *I : PreInit->decls()) {
6722 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6723 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6724 } else {
6725 CodeGenFunction::AutoVarEmission Emission =
6726 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6727 CGF.EmitAutoVarCleanups(emission: Emission);
6728 }
6729 }
6730 }
6731 *E = NTExpr;
6732 }
6733 return;
6734 }
6735 if (isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6736 UpperBound = 1;
6737}
6738
6739const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6740 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6741 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6742 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6743 "Clauses associated with the teams directive expected to be emitted "
6744 "only for the host!");
6745 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6746 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6747 "Expected target-based executable directive.");
6748
6749 const Expr *NT = nullptr;
6750 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6751
6752 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6753 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6754 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6755 UpperBound = UpperBound ? Constant->getZExtValue()
6756 : std::min(a: UpperBound,
6757 b: int32_t(Constant->getZExtValue()));
6758 }
6759 // If we haven't found a upper bound, remember we saw a thread limiting
6760 // clause.
6761 if (UpperBound == -1)
6762 UpperBound = 0;
6763 if (EPtr)
6764 *EPtr = E;
6765 };
6766
6767 auto ReturnSequential = [&]() {
6768 UpperBound = 1;
6769 return NT;
6770 };
6771
6772 switch (DirectiveKind) {
6773 case OMPD_target: {
6774 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6775 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6776 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6777 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6778 // TODO: The standard is not clear how to resolve two thread limit clauses,
6779 // let's pick the teams one if it's present, otherwise the target one.
6780 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6781 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6782 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6783 ThreadLimitClause = TLC;
6784 if (ThreadLimitExpr) {
6785 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6786 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6787 CodeGenFunction::LexicalScope Scope(
6788 CGF,
6789 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6790 if (const auto *PreInit =
6791 cast_or_null<DeclStmt>(Val: ThreadLimitClause->getPreInitStmt())) {
6792 for (const auto *I : PreInit->decls()) {
6793 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6794 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6795 } else {
6796 CodeGenFunction::AutoVarEmission Emission =
6797 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6798 CGF.EmitAutoVarCleanups(emission: Emission);
6799 }
6800 }
6801 }
6802 }
6803 }
6804 }
6805 if (ThreadLimitClause)
6806 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6807 ThreadLimitExpr);
6808 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6809 if (isOpenMPTeamsDirective(DKind: Dir->getDirectiveKind()) &&
6810 !isOpenMPDistributeDirective(DKind: Dir->getDirectiveKind())) {
6811 CS = Dir->getInnermostCapturedStmt();
6812 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6813 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6814 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6815 }
6816 if (Dir && isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6817 CS = Dir->getInnermostCapturedStmt();
6818 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6819 } else if (Dir && isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6820 return ReturnSequential();
6821 }
6822 return NT;
6823 }
6824 case OMPD_target_teams: {
6825 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6826 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6827 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6828 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6829 ThreadLimitExpr);
6830 }
6831 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6832 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6833 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6834 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6835 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6836 if (Dir->getDirectiveKind() == OMPD_distribute) {
6837 CS = Dir->getInnermostCapturedStmt();
6838 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6839 }
6840 }
6841 return NT;
6842 }
6843 case OMPD_target_teams_distribute:
6844 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6845 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6846 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6847 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6848 ThreadLimitExpr);
6849 }
6850 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6851 UpperBoundOnly, CondVal);
6852 return NT;
6853 case OMPD_target_teams_loop:
6854 case OMPD_target_parallel_loop:
6855 case OMPD_target_parallel:
6856 case OMPD_target_parallel_for:
6857 case OMPD_target_parallel_for_simd:
6858 case OMPD_target_teams_distribute_parallel_for:
6859 case OMPD_target_teams_distribute_parallel_for_simd: {
6860 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6861 const OMPIfClause *IfClause = nullptr;
6862 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6863 if (C->getNameModifier() == OMPD_unknown ||
6864 C->getNameModifier() == OMPD_parallel) {
6865 IfClause = C;
6866 break;
6867 }
6868 }
6869 if (IfClause) {
6870 const Expr *Cond = IfClause->getCondition();
6871 bool Result;
6872 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6873 if (!Result)
6874 return ReturnSequential();
6875 } else {
6876 CodeGenFunction::RunCleanupsScope Scope(CGF);
6877 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6878 }
6879 }
6880 }
6881 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6882 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6883 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6884 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6885 ThreadLimitExpr);
6886 }
6887 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6888 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6889 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6890 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6891 return NumThreadsClause->getNumThreads();
6892 }
6893 return NT;
6894 }
6895 case OMPD_target_teams_distribute_simd:
6896 case OMPD_target_simd:
6897 return ReturnSequential();
6898 default:
6899 break;
6900 }
6901 llvm_unreachable("Unsupported directive kind.");
6902}
6903
6904llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6905 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6906 llvm::Value *NumThreadsVal = nullptr;
6907 llvm::Value *CondVal = nullptr;
6908 llvm::Value *ThreadLimitVal = nullptr;
6909 const Expr *ThreadLimitExpr = nullptr;
6910 int32_t UpperBound = -1;
6911
6912 const Expr *NT = getNumThreadsExprForTargetDirective(
6913 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6914 ThreadLimitExpr: &ThreadLimitExpr);
6915
6916 // Thread limit expressions are used below, emit them.
6917 if (ThreadLimitExpr) {
6918 ThreadLimitVal =
6919 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6920 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6921 /*isSigned=*/false);
6922 }
6923
6924 // Generate the num teams expression.
6925 if (UpperBound == 1) {
6926 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6927 } else if (NT) {
6928 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6929 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6930 /*isSigned=*/false);
6931 } else if (ThreadLimitVal) {
6932 // If we do not have a num threads value but a thread limit, replace the
6933 // former with the latter. We know handled the thread limit expression.
6934 NumThreadsVal = ThreadLimitVal;
6935 ThreadLimitVal = nullptr;
6936 } else {
6937 // Default to "0" which means runtime choice.
6938 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6939 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6940 }
6941
6942 // Handle if clause. If if clause present, the number of threads is
6943 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6944 if (CondVal) {
6945 CodeGenFunction::RunCleanupsScope Scope(CGF);
6946 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6947 False: CGF.Builder.getInt32(C: 1));
6948 }
6949
6950 // If the thread limit and num teams expression were present, take the
6951 // minimum.
6952 if (ThreadLimitVal) {
6953 NumThreadsVal = CGF.Builder.CreateSelect(
6954 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6955 True: ThreadLimitVal, False: NumThreadsVal);
6956 }
6957
6958 return NumThreadsVal;
6959}
6960
6961namespace {
6962LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6963
6964// Utility to handle information from clauses associated with a given
6965// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6966// It provides a convenient interface to obtain the information and generate
6967// code for that information.
6968class MappableExprsHandler {
6969public:
6970 /// Custom comparator for attach-pointer expressions that compares them by
6971 /// complexity (i.e. their component-depth) first, then by the order in which
6972 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6973 /// different.
6974 struct AttachPtrExprComparator {
6975 const MappableExprsHandler &Handler;
6976 // Cache of previous equality comparison results.
6977 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6978 CachedEqualityComparisons;
6979
6980 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6981 AttachPtrExprComparator() = delete;
6982
6983 // Return true iff LHS is "less than" RHS.
6984 bool operator()(const Expr *LHS, const Expr *RHS) const {
6985 if (LHS == RHS)
6986 return false;
6987
6988 // First, compare by complexity (depth)
6989 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(Val: LHS);
6990 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(Val: RHS);
6991
6992 std::optional<size_t> DepthLHS =
6993 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6994 : std::nullopt;
6995 std::optional<size_t> DepthRHS =
6996 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6997 : std::nullopt;
6998
6999 // std::nullopt (no attach pointer) has lowest complexity
7000 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
7001 // Both have same complexity, now check semantic equality
7002 if (areEqual(LHS, RHS))
7003 return false;
7004 // Different semantically, compare by computation order
7005 return wasComputedBefore(LHS, RHS);
7006 }
7007 if (!DepthLHS.has_value())
7008 return true; // LHS has lower complexity
7009 if (!DepthRHS.has_value())
7010 return false; // RHS has lower complexity
7011
7012 // Both have values, compare by depth (lower depth = lower complexity)
7013 if (DepthLHS.value() != DepthRHS.value())
7014 return DepthLHS.value() < DepthRHS.value();
7015
7016 // Same complexity, now check semantic equality
7017 if (areEqual(LHS, RHS))
7018 return false;
7019 // Different semantically, compare by computation order
7020 return wasComputedBefore(LHS, RHS);
7021 }
7022
7023 public:
7024 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
7025 /// results, if available, otherwise does a recursive semantic comparison.
7026 bool areEqual(const Expr *LHS, const Expr *RHS) const {
7027 // Check cache first for faster lookup
7028 const auto CachedResultIt = CachedEqualityComparisons.find(Val: {LHS, RHS});
7029 if (CachedResultIt != CachedEqualityComparisons.end())
7030 return CachedResultIt->second;
7031
7032 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
7033
7034 // Cache the result for future lookups (both orders since semantic
7035 // equality is commutative)
7036 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
7037 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
7038 return ComparisonResult;
7039 }
7040
7041 /// Compare the two attach-ptr expressions by their computation order.
7042 /// Returns true iff LHS was computed before RHS by
7043 /// collectAttachPtrExprInfo().
7044 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
7045 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(Val: LHS);
7046 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(Val: RHS);
7047
7048 return OrderLHS < OrderRHS;
7049 }
7050
7051 private:
7052 /// Helper function to compare attach-pointer expressions semantically.
7053 /// This function handles various expression types that can be part of an
7054 /// attach-pointer.
7055 /// TODO: Not urgent, but we should ideally return true when comparing
7056 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
7057 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
7058 if (LHS == RHS)
7059 return true;
7060
7061 // If only one is null, they aren't equal
7062 if (!LHS || !RHS)
7063 return false;
7064
7065 ASTContext &Ctx = Handler.CGF.getContext();
7066 // Strip away parentheses and no-op casts to get to the core expression
7067 LHS = LHS->IgnoreParenNoopCasts(Ctx);
7068 RHS = RHS->IgnoreParenNoopCasts(Ctx);
7069
7070 // Direct pointer comparison of the underlying expressions
7071 if (LHS == RHS)
7072 return true;
7073
7074 // Check if the expression classes match
7075 if (LHS->getStmtClass() != RHS->getStmtClass())
7076 return false;
7077
7078 // Handle DeclRefExpr (variable references)
7079 if (const auto *LD = dyn_cast<DeclRefExpr>(Val: LHS)) {
7080 const auto *RD = dyn_cast<DeclRefExpr>(Val: RHS);
7081 if (!RD)
7082 return false;
7083 return LD->getDecl()->getCanonicalDecl() ==
7084 RD->getDecl()->getCanonicalDecl();
7085 }
7086
7087 // Handle ArraySubscriptExpr (array indexing like a[i])
7088 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(Val: LHS)) {
7089 const auto *RA = dyn_cast<ArraySubscriptExpr>(Val: RHS);
7090 if (!RA)
7091 return false;
7092 return areSemanticallyEqual(LHS: LA->getBase(), RHS: RA->getBase()) &&
7093 areSemanticallyEqual(LHS: LA->getIdx(), RHS: RA->getIdx());
7094 }
7095
7096 // Handle MemberExpr (member access like s.m or p->m)
7097 if (const auto *LM = dyn_cast<MemberExpr>(Val: LHS)) {
7098 const auto *RM = dyn_cast<MemberExpr>(Val: RHS);
7099 if (!RM)
7100 return false;
7101 if (LM->getMemberDecl()->getCanonicalDecl() !=
7102 RM->getMemberDecl()->getCanonicalDecl())
7103 return false;
7104 return areSemanticallyEqual(LHS: LM->getBase(), RHS: RM->getBase());
7105 }
7106
7107 // Handle UnaryOperator (unary operations like *p, &x, etc.)
7108 if (const auto *LU = dyn_cast<UnaryOperator>(Val: LHS)) {
7109 const auto *RU = dyn_cast<UnaryOperator>(Val: RHS);
7110 if (!RU)
7111 return false;
7112 if (LU->getOpcode() != RU->getOpcode())
7113 return false;
7114 return areSemanticallyEqual(LHS: LU->getSubExpr(), RHS: RU->getSubExpr());
7115 }
7116
7117 // Handle BinaryOperator (binary operations like p + offset)
7118 if (const auto *LB = dyn_cast<BinaryOperator>(Val: LHS)) {
7119 const auto *RB = dyn_cast<BinaryOperator>(Val: RHS);
7120 if (!RB)
7121 return false;
7122 if (LB->getOpcode() != RB->getOpcode())
7123 return false;
7124 return areSemanticallyEqual(LHS: LB->getLHS(), RHS: RB->getLHS()) &&
7125 areSemanticallyEqual(LHS: LB->getRHS(), RHS: RB->getRHS());
7126 }
7127
7128 // Handle ArraySectionExpr (array sections like a[0:1])
7129 // Attach pointers should not contain array-sections, but currently we
7130 // don't emit an error.
7131 if (const auto *LAS = dyn_cast<ArraySectionExpr>(Val: LHS)) {
7132 const auto *RAS = dyn_cast<ArraySectionExpr>(Val: RHS);
7133 if (!RAS)
7134 return false;
7135 return areSemanticallyEqual(LHS: LAS->getBase(), RHS: RAS->getBase()) &&
7136 areSemanticallyEqual(LHS: LAS->getLowerBound(),
7137 RHS: RAS->getLowerBound()) &&
7138 areSemanticallyEqual(LHS: LAS->getLength(), RHS: RAS->getLength());
7139 }
7140
7141 // Handle CastExpr (explicit casts)
7142 if (const auto *LC = dyn_cast<CastExpr>(Val: LHS)) {
7143 const auto *RC = dyn_cast<CastExpr>(Val: RHS);
7144 if (!RC)
7145 return false;
7146 if (LC->getCastKind() != RC->getCastKind())
7147 return false;
7148 return areSemanticallyEqual(LHS: LC->getSubExpr(), RHS: RC->getSubExpr());
7149 }
7150
7151 // Handle CXXThisExpr (this pointer)
7152 if (isa<CXXThisExpr>(Val: LHS) && isa<CXXThisExpr>(Val: RHS))
7153 return true;
7154
7155 // Handle IntegerLiteral (integer constants)
7156 if (const auto *LI = dyn_cast<IntegerLiteral>(Val: LHS)) {
7157 const auto *RI = dyn_cast<IntegerLiteral>(Val: RHS);
7158 if (!RI)
7159 return false;
7160 return LI->getValue() == RI->getValue();
7161 }
7162
7163 // Handle CharacterLiteral (character constants)
7164 if (const auto *LC = dyn_cast<CharacterLiteral>(Val: LHS)) {
7165 const auto *RC = dyn_cast<CharacterLiteral>(Val: RHS);
7166 if (!RC)
7167 return false;
7168 return LC->getValue() == RC->getValue();
7169 }
7170
7171 // Handle FloatingLiteral (floating point constants)
7172 if (const auto *LF = dyn_cast<FloatingLiteral>(Val: LHS)) {
7173 const auto *RF = dyn_cast<FloatingLiteral>(Val: RHS);
7174 if (!RF)
7175 return false;
7176 // Use bitwise comparison for floating point literals
7177 return LF->getValue().bitwiseIsEqual(RHS: RF->getValue());
7178 }
7179
7180 // Handle StringLiteral (string constants)
7181 if (const auto *LS = dyn_cast<StringLiteral>(Val: LHS)) {
7182 const auto *RS = dyn_cast<StringLiteral>(Val: RHS);
7183 if (!RS)
7184 return false;
7185 return LS->getString() == RS->getString();
7186 }
7187
7188 // Handle CXXNullPtrLiteralExpr (nullptr)
7189 if (isa<CXXNullPtrLiteralExpr>(Val: LHS) && isa<CXXNullPtrLiteralExpr>(Val: RHS))
7190 return true;
7191
7192 // Handle CXXBoolLiteralExpr (true/false)
7193 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(Val: LHS)) {
7194 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(Val: RHS);
7195 if (!RB)
7196 return false;
7197 return LB->getValue() == RB->getValue();
7198 }
7199
7200 // Fallback for other forms - use the existing comparison method
7201 return Expr::isSameComparisonOperand(E1: LHS, E2: RHS);
7202 }
7203 };
7204
7205 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7206 static unsigned getFlagMemberOffset() {
7207 unsigned Offset = 0;
7208 for (uint64_t Remain =
7209 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7210 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7211 !(Remain & 1); Remain = Remain >> 1)
7212 Offset++;
7213 return Offset;
7214 }
7215
7216 /// Class that holds debugging information for a data mapping to be passed to
7217 /// the runtime library.
7218 class MappingExprInfo {
7219 /// The variable declaration used for the data mapping.
7220 const ValueDecl *MapDecl = nullptr;
7221 /// The original expression used in the map clause, or null if there is
7222 /// none.
7223 const Expr *MapExpr = nullptr;
7224
7225 public:
7226 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7227 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7228
7229 const ValueDecl *getMapDecl() const { return MapDecl; }
7230 const Expr *getMapExpr() const { return MapExpr; }
7231 };
7232
7233 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7234 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7235 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7236 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7237 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7238 using MapNonContiguousArrayTy =
7239 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7240 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7241 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7242 using MapData =
7243 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7244 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7245 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7246 using MapDataArrayTy = SmallVector<MapData, 4>;
7247
7248 /// This structure contains combined information generated for mappable
7249 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7250 /// mappers, and non-contiguous information.
7251 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7252 MapExprsArrayTy Exprs;
7253 MapValueDeclsArrayTy Mappers;
7254 MapValueDeclsArrayTy DevicePtrDecls;
7255
7256 /// Append arrays in \a CurInfo.
7257 void append(MapCombinedInfoTy &CurInfo) {
7258 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
7259 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
7260 in_end: CurInfo.DevicePtrDecls.end());
7261 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
7262 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7263 }
7264 };
7265
7266 /// Map between a struct and the its lowest & highest elements which have been
7267 /// mapped.
7268 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7269 /// HE(FieldIndex, Pointer)}
7270 struct StructRangeInfoTy {
7271 MapCombinedInfoTy PreliminaryMapData;
7272 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7273 0, Address::invalid()};
7274 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7275 0, Address::invalid()};
7276 Address Base = Address::invalid();
7277 Address LB = Address::invalid();
7278 bool IsArraySection = false;
7279 bool HasCompleteRecord = false;
7280 };
7281
7282 /// A struct to store the attach pointer and pointee information, to be used
7283 /// when emitting an attach entry.
7284 struct AttachInfoTy {
7285 Address AttachPtrAddr = Address::invalid();
7286 Address AttachPteeAddr = Address::invalid();
7287 const ValueDecl *AttachPtrDecl = nullptr;
7288 const Expr *AttachMapExpr = nullptr;
7289
7290 bool isValid() const {
7291 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7292 }
7293 };
7294
7295 /// Check if there's any component list where the attach pointer expression
7296 /// matches the given captured variable.
7297 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7298 for (const auto &AttachEntry : AttachPtrExprMap) {
7299 if (AttachEntry.second) {
7300 // Check if the attach pointer expression is a DeclRefExpr that
7301 // references the captured variable
7302 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: AttachEntry.second))
7303 if (DRE->getDecl() == VD)
7304 return true;
7305 }
7306 }
7307 return false;
7308 }
7309
7310 /// Get the previously-cached attach pointer for a component list, if-any.
7311 const Expr *getAttachPtrExpr(
7312 OMPClauseMappableExprCommon::MappableExprComponentListRef Components)
7313 const {
7314 const auto It = AttachPtrExprMap.find(Val: Components);
7315 if (It != AttachPtrExprMap.end())
7316 return It->second;
7317
7318 return nullptr;
7319 }
7320
7321private:
7322 /// Kind that defines how a device pointer has to be returned.
7323 struct MapInfo {
7324 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7325 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7326 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7327 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7328 bool ReturnDevicePointer = false;
7329 bool IsImplicit = false;
7330 const ValueDecl *Mapper = nullptr;
7331 const Expr *VarRef = nullptr;
7332 bool ForDeviceAddr = false;
7333 bool HasUdpFbNullify = false;
7334
7335 MapInfo() = default;
7336 MapInfo(
7337 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7338 OpenMPMapClauseKind MapType,
7339 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7340 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7341 bool ReturnDevicePointer, bool IsImplicit,
7342 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7343 bool ForDeviceAddr = false, bool HasUdpFbNullify = false)
7344 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7345 MotionModifiers(MotionModifiers),
7346 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7347 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr),
7348 HasUdpFbNullify(HasUdpFbNullify) {}
7349 };
7350
7351 /// The target directive from where the mappable clauses were extracted. It
7352 /// is either a executable directive or a user-defined mapper directive.
7353 llvm::PointerUnion<const OMPExecutableDirective *,
7354 const OMPDeclareMapperDecl *>
7355 CurDir;
7356
7357 /// Function the directive is being generated for.
7358 CodeGenFunction &CGF;
7359
7360 /// Set of all first private variables in the current directive.
7361 /// bool data is set to true if the variable is implicitly marked as
7362 /// firstprivate, false otherwise.
7363 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7364
7365 /// Set of defaultmap clause kinds that use firstprivate behavior.
7366 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7367
7368 /// Map between device pointer declarations and their expression components.
7369 /// The key value for declarations in 'this' is null.
7370 llvm::DenseMap<
7371 const ValueDecl *,
7372 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7373 DevPointersMap;
7374
7375 /// Map between device addr declarations and their expression components.
7376 /// The key value for declarations in 'this' is null.
7377 llvm::DenseMap<
7378 const ValueDecl *,
7379 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7380 HasDevAddrsMap;
7381
7382 /// Map between lambda declarations and their map type.
7383 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7384
7385 /// Map from component lists to their attach pointer expressions.
7386 llvm::DenseMap<OMPClauseMappableExprCommon::MappableExprComponentListRef,
7387 const Expr *>
7388 AttachPtrExprMap;
7389
7390 /// Map from attach pointer expressions to their component depth.
7391 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7392 /// expressions with increasing/decreasing depth.
7393 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7394 /// TODO: Not urgent, but we should ideally use the number of pointer
7395 /// dereferences in an expr as an indicator of its complexity, instead of the
7396 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7397 /// `*(p + 5 + 5)` together.
7398 llvm::DenseMap<const Expr *, std::optional<size_t>>
7399 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7400
7401 /// Map from attach pointer expressions to the order they were computed in, in
7402 /// collectAttachPtrExprInfo().
7403 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7404 {nullptr, 0}};
7405
7406 /// An instance of attach-ptr-expr comparator that can be used throughout the
7407 /// lifetime of this handler.
7408 AttachPtrExprComparator AttachPtrComparator;
7409
7410 llvm::Value *getExprTypeSize(const Expr *E) const {
7411 QualType ExprTy = E->getType().getCanonicalType();
7412
7413 // Calculate the size for array shaping expression.
7414 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
7415 llvm::Value *Size =
7416 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
7417 for (const Expr *SE : OAE->getDimensions()) {
7418 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
7419 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
7420 DstTy: CGF.getContext().getSizeType(),
7421 Loc: SE->getExprLoc());
7422 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
7423 }
7424 return Size;
7425 }
7426
7427 // Reference types are ignored for mapping purposes.
7428 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7429 ExprTy = RefTy->getPointeeType().getCanonicalType();
7430
7431 // Given that an array section is considered a built-in type, we need to
7432 // do the calculation based on the length of the section instead of relying
7433 // on CGF.getTypeSize(E->getType()).
7434 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
7435 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7436 Base: OAE->getBase()->IgnoreParenImpCasts())
7437 .getCanonicalType();
7438
7439 // If there is no length associated with the expression and lower bound is
7440 // not specified too, that means we are using the whole length of the
7441 // base.
7442 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7443 !OAE->getLowerBound())
7444 return CGF.getTypeSize(Ty: BaseTy);
7445
7446 llvm::Value *ElemSize;
7447 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7448 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
7449 } else {
7450 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
7451 assert(ATy && "Expecting array type if not a pointer type.");
7452 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
7453 }
7454
7455 // If we don't have a length at this point, that is because we have an
7456 // array section with a single element.
7457 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7458 return ElemSize;
7459
7460 if (const Expr *LenExpr = OAE->getLength()) {
7461 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
7462 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
7463 DstTy: CGF.getContext().getSizeType(),
7464 Loc: LenExpr->getExprLoc());
7465 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
7466 }
7467 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7468 OAE->getLowerBound() && "expected array_section[lb:].");
7469 // Size = sizetype - lb * elemtype;
7470 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
7471 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
7472 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
7473 DstTy: CGF.getContext().getSizeType(),
7474 Loc: OAE->getLowerBound()->getExprLoc());
7475 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
7476 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
7477 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
7478 LengthVal = CGF.Builder.CreateSelect(
7479 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
7480 return LengthVal;
7481 }
7482 return CGF.getTypeSize(Ty: ExprTy);
7483 }
7484
7485 /// Return the corresponding bits for a given map clause modifier. Add
7486 /// a flag marking the map as a pointer if requested. Add a flag marking the
7487 /// map as the first one of a series of maps that relate to the same map
7488 /// expression.
7489 OpenMPOffloadMappingFlags getMapTypeBits(
7490 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7491 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7492 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7493 OpenMPOffloadMappingFlags Bits =
7494 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7495 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7496 switch (MapType) {
7497 case OMPC_MAP_alloc:
7498 case OMPC_MAP_release:
7499 // alloc and release is the default behavior in the runtime library, i.e.
7500 // if we don't pass any bits alloc/release that is what the runtime is
7501 // going to do. Therefore, we don't need to signal anything for these two
7502 // type modifiers.
7503 break;
7504 case OMPC_MAP_to:
7505 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7506 break;
7507 case OMPC_MAP_from:
7508 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7509 break;
7510 case OMPC_MAP_tofrom:
7511 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7512 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7513 break;
7514 case OMPC_MAP_delete:
7515 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7516 break;
7517 case OMPC_MAP_unknown:
7518 llvm_unreachable("Unexpected map type!");
7519 }
7520 if (AddPtrFlag)
7521 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7522 if (AddIsTargetParamFlag)
7523 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7524 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
7525 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7526 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
7527 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7528 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
7529 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
7530 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7531 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
7532 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7533 if (IsNonContiguous)
7534 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7535 return Bits;
7536 }
7537
7538 /// Return true if the provided expression is a final array section. A
7539 /// final array section, is one whose length can't be proved to be one.
7540 bool isFinalArraySectionExpression(const Expr *E) const {
7541 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
7542
7543 // It is not an array section and therefore not a unity-size one.
7544 if (!OASE)
7545 return false;
7546
7547 // An array section with no colon always refer to a single element.
7548 if (OASE->getColonLocFirst().isInvalid())
7549 return false;
7550
7551 const Expr *Length = OASE->getLength();
7552
7553 // If we don't have a length we have to check if the array has size 1
7554 // for this dimension. Also, we should always expect a length if the
7555 // base type is pointer.
7556 if (!Length) {
7557 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7558 Base: OASE->getBase()->IgnoreParenImpCasts())
7559 .getCanonicalType();
7560 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
7561 return ATy->getSExtSize() != 1;
7562 // If we don't have a constant dimension length, we have to consider
7563 // the current section as having any size, so it is not necessarily
7564 // unitary. If it happen to be unity size, that's user fault.
7565 return true;
7566 }
7567
7568 // Check if the length evaluates to 1.
7569 Expr::EvalResult Result;
7570 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
7571 return true; // Can have more that size 1.
7572
7573 llvm::APSInt ConstLength = Result.Val.getInt();
7574 return ConstLength.getSExtValue() != 1;
7575 }
7576
7577 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7578 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7579 /// an attach entry has the following form:
7580 /// &p, &p[1], sizeof(void*), ATTACH
7581 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7582 const AttachInfoTy &AttachInfo) const {
7583 assert(AttachInfo.isValid() &&
7584 "Expected valid attach pointer/pointee information!");
7585
7586 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7587 // size
7588 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7589 V: llvm::ConstantInt::get(
7590 Ty: CGF.CGM.SizeTy, V: CGF.getContext()
7591 .getTypeSizeInChars(T: CGF.getContext().VoidPtrTy)
7592 .getQuantity()),
7593 DestTy: CGF.Int64Ty, /*isSigned=*/true);
7594
7595 CombinedInfo.Exprs.emplace_back(Args: AttachInfo.AttachPtrDecl,
7596 Args: AttachInfo.AttachMapExpr);
7597 CombinedInfo.BasePointers.push_back(
7598 Elt: AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7599 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7600 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7601 CombinedInfo.Pointers.push_back(
7602 Elt: AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7603 CombinedInfo.Sizes.push_back(Elt: PointerSize);
7604 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7605 CombinedInfo.Mappers.push_back(Elt: nullptr);
7606 CombinedInfo.NonContigInfo.Dims.push_back(Elt: 1);
7607 }
7608
7609 /// A helper class to copy structures with overlapped elements, i.e. those
7610 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7611 /// are not explicitly copied have mapping nodes synthesized for them,
7612 /// taking care to avoid generating zero-sized copies.
7613 class CopyOverlappedEntryGaps {
7614 CodeGenFunction &CGF;
7615 MapCombinedInfoTy &CombinedInfo;
7616 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7617 const ValueDecl *MapDecl = nullptr;
7618 const Expr *MapExpr = nullptr;
7619 Address BP = Address::invalid();
7620 bool IsNonContiguous = false;
7621 uint64_t DimSize = 0;
7622 // These elements track the position as the struct is iterated over
7623 // (in order of increasing element address).
7624 const RecordDecl *LastParent = nullptr;
7625 uint64_t Cursor = 0;
7626 unsigned LastIndex = -1u;
7627 Address LB = Address::invalid();
7628
7629 public:
7630 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7631 MapCombinedInfoTy &CombinedInfo,
7632 OpenMPOffloadMappingFlags Flags,
7633 const ValueDecl *MapDecl, const Expr *MapExpr,
7634 Address BP, Address LB, bool IsNonContiguous,
7635 uint64_t DimSize)
7636 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7637 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7638 DimSize(DimSize), LB(LB) {}
7639
7640 void processField(
7641 const OMPClauseMappableExprCommon::MappableComponent &MC,
7642 const FieldDecl *FD,
7643 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7644 EmitMemberExprBase) {
7645 const RecordDecl *RD = FD->getParent();
7646 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(D: RD);
7647 uint64_t FieldOffset = RL.getFieldOffset(FieldNo: FD->getFieldIndex());
7648 uint64_t FieldSize =
7649 CGF.getContext().getTypeSize(T: FD->getType().getCanonicalType());
7650 Address ComponentLB = Address::invalid();
7651
7652 if (FD->getType()->isLValueReferenceType()) {
7653 const auto *ME = cast<MemberExpr>(Val: MC.getAssociatedExpression());
7654 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7655 ComponentLB =
7656 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD).getAddress();
7657 } else {
7658 ComponentLB =
7659 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression()).getAddress();
7660 }
7661
7662 if (!LastParent)
7663 LastParent = RD;
7664 if (FD->getParent() == LastParent) {
7665 if (FD->getFieldIndex() != LastIndex + 1)
7666 copyUntilField(FD, ComponentLB);
7667 } else {
7668 LastParent = FD->getParent();
7669 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7670 copyUntilField(FD, ComponentLB);
7671 }
7672 Cursor = FieldOffset + FieldSize;
7673 LastIndex = FD->getFieldIndex();
7674 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7675 }
7676
7677 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7678 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7679 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7680 llvm::Value *Size = CGF.Builder.CreatePtrDiff(LHS: ComponentLBPtr, RHS: LBPtr);
7681 copySizedChunk(Base: LBPtr, Size);
7682 }
7683
7684 void copyUntilEnd(Address HB) {
7685 if (LastParent) {
7686 const ASTRecordLayout &RL =
7687 CGF.getContext().getASTRecordLayout(D: LastParent);
7688 if ((uint64_t)CGF.getContext().toBits(CharSize: RL.getSize()) <= Cursor)
7689 return;
7690 }
7691 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7692 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7693 LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF), RHS: LBPtr);
7694 copySizedChunk(Base: LBPtr, Size);
7695 }
7696
7697 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7698 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7699 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7700 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7701 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7702 CombinedInfo.Pointers.push_back(Elt: Base);
7703 CombinedInfo.Sizes.push_back(
7704 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/false));
7705 CombinedInfo.Types.push_back(Elt: Flags);
7706 CombinedInfo.Mappers.push_back(Elt: nullptr);
7707 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize : 1);
7708 }
7709 };
7710
7711 /// Generate the base pointers, section pointers, sizes, map type bits, and
7712 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7713 /// map type, map or motion modifiers, and expression components.
7714 /// \a IsFirstComponent should be set to true if the provided set of
7715 /// components is the first associated with a capture.
7716 void generateInfoForComponentList(
7717 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7718 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7719 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7720 MapCombinedInfoTy &CombinedInfo,
7721 MapCombinedInfoTy &StructBaseCombinedInfo,
7722 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7723 bool IsFirstComponentList, bool IsImplicit,
7724 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7725 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7726 const Expr *MapExpr = nullptr,
7727 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7728 OverlappedElements = {}) const {
7729
7730 // The following summarizes what has to be generated for each map and the
7731 // types below. The generated information is expressed in this order:
7732 // base pointer, section pointer, size, flags
7733 // (to add to the ones that come from the map type and modifier).
7734 // Entries annotated with (+) are only generated for "target" constructs,
7735 // and only if the variable at the beginning of the expression is used in
7736 // the region.
7737 //
7738 // double d;
7739 // int i[100];
7740 // float *p;
7741 // int **a = &i;
7742 //
7743 // struct S1 {
7744 // int i;
7745 // float f[50];
7746 // }
7747 // struct S2 {
7748 // int i;
7749 // float f[50];
7750 // S1 s;
7751 // double *p;
7752 // double *&pref;
7753 // struct S2 *ps;
7754 // int &ref;
7755 // }
7756 // S2 s;
7757 // S2 *ps;
7758 //
7759 // map(d)
7760 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7761 //
7762 // map(i)
7763 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7764 //
7765 // map(i[1:23])
7766 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7767 //
7768 // map(p)
7769 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7770 //
7771 // map(p[1:24])
7772 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7773 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7774 // // are present, and either is new
7775 //
7776 // map(([22])p)
7777 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7778 // &p, p, sizeof(void*), ATTACH
7779 //
7780 // map((*a)[0:3])
7781 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7782 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7783 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7784 // (+) Only on target, if a is used in the region
7785 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7786 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7787 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7788 // referenced in the target region, because it is a pointer.
7789 //
7790 // map(**a)
7791 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7792 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7793 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7794 // (+) Only on target, if a is used in the region
7795 //
7796 // map(s)
7797 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7798 // effect is supposed to be same as if the user had a map for every element
7799 // of the struct. We currently do a shallow-map of s.
7800 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7801 //
7802 // map(s.i)
7803 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7804 //
7805 // map(s.s.f)
7806 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7807 //
7808 // map(s.p)
7809 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7810 //
7811 // map(to: s.p[:22])
7812 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7813 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7814 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7815 //
7816 // map(to: s.ref)
7817 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7818 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7819 // (*) alloc space for struct members, only this is a target parameter.
7820 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7821 // optimizes this entry out, same in the examples below)
7822 // (***) map the pointee (map: to)
7823 // Note: ptr(s.ref) represents the referring pointer of s.ref
7824 // ptee(s.ref) represents the referenced pointee of s.ref
7825 //
7826 // map(to: s.pref)
7827 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7828 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7829 //
7830 // map(to: s.pref[:22])
7831 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7832 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7833 // FROM | IMPLICIT // (+)
7834 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7835 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7836 //
7837 // map(s.ps)
7838 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7839 //
7840 // map(from: s.ps->s.i)
7841 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7842 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7843 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7844 //
7845 // map(to: s.ps->ps)
7846 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7847 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7848 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7849 //
7850 // map(s.ps->ps->ps)
7851 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7852 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7853 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7854 //
7855 // map(to: s.ps->ps->s.f[:22])
7856 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7857 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7858 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7859 //
7860 // map(ps)
7861 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7862 //
7863 // map(ps->i)
7864 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7865 // &ps, &(ps->i), sizeof(void*), ATTACH
7866 //
7867 // map(ps->s.f)
7868 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7869 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7870 //
7871 // map(from: ps->p)
7872 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7873 // &ps, &(ps->p), sizeof(ps), ATTACH
7874 //
7875 // map(to: ps->p[:22])
7876 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7877 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7878 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7879 //
7880 // map(ps->ps)
7881 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7882 // &ps, &(ps->ps), sizeof(ps), ATTACH
7883 //
7884 // map(from: ps->ps->s.i)
7885 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7886 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7887 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7888 //
7889 // map(from: ps->ps->ps)
7890 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7891 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7892 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7893 //
7894 // map(ps->ps->ps->ps)
7895 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7896 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7897 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7898 //
7899 // map(to: ps->ps->ps->s.f[:22])
7900 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7901 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7902 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7903 //
7904 // map(to: s.f[:22]) map(from: s.p[:33])
7905 // On target, and if s is used in the region:
7906 //
7907 // &s, &(s.f[0]), 50*sizeof(float) +
7908 // sizeof(struct S1) +
7909 // sizeof(double*) (**), TARGET_PARAM
7910 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7911 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7912 // FROM | IMPLICIT
7913 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7914 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7915 // (**) allocate contiguous space needed to fit all mapped members even if
7916 // we allocate space for members not mapped (in this example,
7917 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7918 // them as well because they fall between &s.f[0] and &s.p)
7919 //
7920 // On other constructs, and, if s is not used in the region, on target:
7921 // &s, &(s.f[0]), 22*sizeof(float), TO
7922 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7923 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7924 //
7925 // map(from: s.f[:22]) map(to: ps->p[:33])
7926 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7927 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7928 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7929 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7930 //
7931 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7932 // &s, &(s.f[0]), 50*sizeof(float) +
7933 // sizeof(struct S1), TARGET_PARAM
7934 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7935 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7936 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7937 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7938 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7939 //
7940 // map(p[:100], p)
7941 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7942 // p, &p[0], 100*sizeof(float), TO | FROM
7943 // &p, &p[0], sizeof(float*), ATTACH
7944
7945 // Track if the map information being generated is the first for a capture.
7946 bool IsCaptureFirstInfo = IsFirstComponentList;
7947 // When the variable is on a declare target link or in a to clause with
7948 // unified memory, a reference is needed to hold the host/device address
7949 // of the variable.
7950 bool RequiresReference = false;
7951
7952 // Scan the components from the base to the complete expression.
7953 auto CI = Components.rbegin();
7954 auto CE = Components.rend();
7955 auto I = CI;
7956
7957 // Track if the map information being generated is the first for a list of
7958 // components.
7959 bool IsExpressionFirstInfo = true;
7960 bool FirstPointerInComplexData = false;
7961 Address BP = Address::invalid();
7962 Address FinalLowestElem = Address::invalid();
7963 const Expr *AssocExpr = I->getAssociatedExpression();
7964 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7965 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7966 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7967
7968 // Get the pointer-attachment base-pointer for the given list, if any.
7969 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7970 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7971 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7972
7973 bool HasAttachPtr = AttachPtrExpr != nullptr;
7974 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7975 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7976
7977 if (FirstComponentIsForAttachPtr) {
7978 // No need to process AttachPtr here. It will be processed at the end
7979 // after we have computed the pointee's address.
7980 ++I;
7981 } else if (isa<MemberExpr>(Val: AssocExpr)) {
7982 // The base is the 'this' pointer. The content of the pointer is going
7983 // to be the base of the field being mapped.
7984 BP = CGF.LoadCXXThisAddress();
7985 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7986 (OASE &&
7987 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7988 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7989 } else if (OAShE &&
7990 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7991 BP = Address(
7992 CGF.EmitScalarExpr(E: OAShE->getBase()),
7993 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7994 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7995 } else {
7996 // The base is the reference to the variable.
7997 // BP = &Var.
7998 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7999 if (const auto *VD =
8000 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
8001 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8002 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
8003 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
8004 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
8005 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
8006 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
8007 RequiresReference = true;
8008 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
8009 }
8010 }
8011 }
8012
8013 // If the variable is a pointer and is being dereferenced (i.e. is not
8014 // the last component), the base has to be the pointer itself, not its
8015 // reference. References are ignored for mapping purposes.
8016 QualType Ty =
8017 I->getAssociatedDeclaration()->getType().getNonReferenceType();
8018 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
8019 // No need to generate individual map information for the pointer, it
8020 // can be associated with the combined storage if shared memory mode is
8021 // active or the base declaration is not global variable.
8022 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
8023 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8024 !VD || VD->hasLocalStorage() || HasAttachPtr)
8025 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8026 else
8027 FirstPointerInComplexData = true;
8028 ++I;
8029 }
8030 }
8031
8032 // Track whether a component of the list should be marked as MEMBER_OF some
8033 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
8034 // in a component list should be marked as MEMBER_OF, all subsequent entries
8035 // do not belong to the base struct. E.g.
8036 // struct S2 s;
8037 // s.ps->ps->ps->f[:]
8038 // (1) (2) (3) (4)
8039 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
8040 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
8041 // is the pointee of ps(2) which is not member of struct s, so it should not
8042 // be marked as such (it is still PTR_AND_OBJ).
8043 // The variable is initialized to false so that PTR_AND_OBJ entries which
8044 // are not struct members are not considered (e.g. array of pointers to
8045 // data).
8046 bool ShouldBeMemberOf = false;
8047
8048 // Variable keeping track of whether or not we have encountered a component
8049 // in the component list which is a member expression. Useful when we have a
8050 // pointer or a final array section, in which case it is the previous
8051 // component in the list which tells us whether we have a member expression.
8052 // E.g. X.f[:]
8053 // While processing the final array section "[:]" it is "f" which tells us
8054 // whether we are dealing with a member of a declared struct.
8055 const MemberExpr *EncounteredME = nullptr;
8056
8057 // Track for the total number of dimension. Start from one for the dummy
8058 // dimension.
8059 uint64_t DimSize = 1;
8060
8061 // Detects non-contiguous updates due to strided accesses.
8062 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
8063 // correctly when generating information to be passed to the runtime. The
8064 // flag is set to true if any array section has a stride not equal to 1, or
8065 // if the stride is not a constant expression (conservatively assumed
8066 // non-contiguous).
8067 bool IsNonContiguous =
8068 CombinedInfo.NonContigInfo.IsNonContiguous ||
8069 any_of(Range&: Components, P: [&](const auto &Component) {
8070 const auto *OASE =
8071 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
8072 if (!OASE)
8073 return false;
8074
8075 const Expr *StrideExpr = OASE->getStride();
8076 if (!StrideExpr)
8077 return false;
8078
8079 assert(StrideExpr->getType()->isIntegerType() &&
8080 "Stride expression must be of integer type");
8081
8082 // If stride is not evaluatable as a constant, treat as
8083 // non-contiguous.
8084 const auto Constant =
8085 StrideExpr->getIntegerConstantExpr(Ctx: CGF.getContext());
8086 if (!Constant)
8087 return true;
8088
8089 // Treat non-unitary strides as non-contiguous.
8090 return !Constant->isOne();
8091 });
8092
8093 bool IsPrevMemberReference = false;
8094
8095 bool IsPartialMapped =
8096 !PartialStruct.PreliminaryMapData.BasePointers.empty();
8097
8098 // We need to check if we will be encountering any MEs. If we do not
8099 // encounter any ME expression it means we will be mapping the whole struct.
8100 // In that case we need to skip adding an entry for the struct to the
8101 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
8102 // list only when generating all info for clauses.
8103 bool IsMappingWholeStruct = true;
8104 if (!GenerateAllInfoForClauses) {
8105 IsMappingWholeStruct = false;
8106 } else {
8107 for (auto TempI = I; TempI != CE; ++TempI) {
8108 const MemberExpr *PossibleME =
8109 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
8110 if (PossibleME) {
8111 IsMappingWholeStruct = false;
8112 break;
8113 }
8114 }
8115 }
8116
8117 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
8118 for (; I != CE; ++I) {
8119 // If we have a valid attach-ptr, we skip processing all components until
8120 // after the attach-ptr.
8121 if (HasAttachPtr && !SeenAttachPtr) {
8122 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
8123 continue;
8124 }
8125
8126 // After finding the attach pointer, skip binary-ops, to skip past
8127 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
8128 // the attach-ptr.
8129 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
8130 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8131 if (BO)
8132 continue;
8133
8134 // Found the first non-binary-operator component after attach
8135 SeenFirstNonBinOpExprAfterAttachPtr = true;
8136 BP = AttachPteeBaseAddr;
8137 }
8138
8139 // If the current component is member of a struct (parent struct) mark it.
8140 if (!EncounteredME) {
8141 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
8142 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
8143 // as MEMBER_OF the parent struct.
8144 if (EncounteredME) {
8145 ShouldBeMemberOf = true;
8146 // Do not emit as complex pointer if this is actually not array-like
8147 // expression.
8148 if (FirstPointerInComplexData) {
8149 QualType Ty = std::prev(x: I)
8150 ->getAssociatedDeclaration()
8151 ->getType()
8152 .getNonReferenceType();
8153 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8154 FirstPointerInComplexData = false;
8155 }
8156 }
8157 }
8158
8159 auto Next = std::next(x: I);
8160
8161 // We need to generate the addresses and sizes if this is the last
8162 // component, if the component is a pointer or if it is an array section
8163 // whose length can't be proved to be one. If this is a pointer, it
8164 // becomes the base address for the following components.
8165
8166 // A final array section, is one whose length can't be proved to be one.
8167 // If the map item is non-contiguous then we don't treat any array section
8168 // as final array section.
8169 bool IsFinalArraySection =
8170 !IsNonContiguous &&
8171 isFinalArraySectionExpression(E: I->getAssociatedExpression());
8172
8173 // If we have a declaration for the mapping use that, otherwise use
8174 // the base declaration of the map clause.
8175 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8176 ? I->getAssociatedDeclaration()
8177 : BaseDecl;
8178 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8179 : MapExpr;
8180
8181 // Get information on whether the element is a pointer. Have to do a
8182 // special treatment for array sections given that they are built-in
8183 // types.
8184 const auto *OASE =
8185 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
8186 const auto *OAShE =
8187 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
8188 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
8189 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
8190 bool IsPointer =
8191 OAShE ||
8192 (OASE && ArraySectionExpr::getBaseOriginalType(Base: OASE)
8193 .getCanonicalType()
8194 ->isAnyPointerType()) ||
8195 I->getAssociatedExpression()->getType()->isAnyPointerType();
8196 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
8197 MapDecl &&
8198 MapDecl->getType()->isLValueReferenceType();
8199 bool IsNonDerefPointer = IsPointer &&
8200 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8201 !IsNonContiguous;
8202
8203 if (OASE)
8204 ++DimSize;
8205
8206 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8207 IsFinalArraySection) {
8208 // If this is not the last component, we expect the pointer to be
8209 // associated with an array expression or member expression.
8210 assert((Next == CE ||
8211 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8212 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8213 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8214 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8215 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8216 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8217 "Unexpected expression");
8218
8219 Address LB = Address::invalid();
8220 Address LowestElem = Address::invalid();
8221 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8222 const MemberExpr *E) {
8223 const Expr *BaseExpr = E->getBase();
8224 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8225 // scalar.
8226 LValue BaseLV;
8227 if (E->isArrow()) {
8228 LValueBaseInfo BaseInfo;
8229 TBAAAccessInfo TBAAInfo;
8230 Address Addr =
8231 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
8232 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8233 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
8234 } else {
8235 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
8236 }
8237 return BaseLV;
8238 };
8239 if (OAShE) {
8240 LowestElem = LB =
8241 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
8242 CGF.ConvertTypeForMem(
8243 T: OAShE->getBase()->getType()->getPointeeType()),
8244 CGF.getContext().getTypeAlignInChars(
8245 T: OAShE->getBase()->getType()));
8246 } else if (IsMemberReference) {
8247 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
8248 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8249 LowestElem = CGF.EmitLValueForFieldInitialization(
8250 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
8251 .getAddress();
8252 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
8253 .getAddress();
8254 } else {
8255 LowestElem = LB =
8256 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
8257 .getAddress();
8258 }
8259
8260 // Save the final LowestElem, to use it as the pointee in attach maps,
8261 // if emitted.
8262 if (Next == CE)
8263 FinalLowestElem = LowestElem;
8264
8265 // If this component is a pointer inside the base struct then we don't
8266 // need to create any entry for it - it will be combined with the object
8267 // it is pointing to into a single PTR_AND_OBJ entry.
8268 bool IsMemberPointerOrAddr =
8269 EncounteredME &&
8270 (((IsPointer || ForDeviceAddr) &&
8271 I->getAssociatedExpression() == EncounteredME) ||
8272 (IsPrevMemberReference && !IsPointer) ||
8273 (IsMemberReference && Next != CE &&
8274 !Next->getAssociatedExpression()->getType()->isPointerType()));
8275 if (!OverlappedElements.empty() && Next == CE) {
8276 // Handle base element with the info for overlapped elements.
8277 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8278 assert(!IsPointer &&
8279 "Unexpected base element with the pointer type.");
8280 // Mark the whole struct as the struct that requires allocation on the
8281 // device.
8282 PartialStruct.LowestElem = {0, LowestElem};
8283 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8284 T: I->getAssociatedExpression()->getType());
8285 Address HB = CGF.Builder.CreateConstGEP(
8286 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8287 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
8288 Index: TypeSize.getQuantity() - 1);
8289 PartialStruct.HighestElem = {
8290 std::numeric_limits<decltype(
8291 PartialStruct.HighestElem.first)>::max(),
8292 HB};
8293 PartialStruct.Base = BP;
8294 PartialStruct.LB = LB;
8295 assert(
8296 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8297 "Overlapped elements must be used only once for the variable.");
8298 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
8299 // Emit data for non-overlapped data.
8300 OpenMPOffloadMappingFlags Flags =
8301 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8302 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8303 /*AddPtrFlag=*/false,
8304 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8305 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8306 MapExpr, BP, LB, IsNonContiguous,
8307 DimSize);
8308 // Do bitcopy of all non-overlapped structure elements.
8309 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8310 Component : OverlappedElements) {
8311 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8312 Component) {
8313 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8314 if (const auto *FD = dyn_cast<FieldDecl>(Val: VD)) {
8315 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8316 }
8317 }
8318 }
8319 }
8320 CopyGaps.copyUntilEnd(HB);
8321 break;
8322 }
8323 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
8324 // Skip adding an entry in the CurInfo of this combined entry if the
8325 // whole struct is currently being mapped. The struct needs to be added
8326 // in the first position before any data internal to the struct is being
8327 // mapped.
8328 // Skip adding an entry in the CurInfo of this combined entry if the
8329 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8330 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8331 (Next == CE && MapType != OMPC_MAP_unknown)) {
8332 if (!IsMappingWholeStruct) {
8333 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8334 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
8335 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8336 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8337 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8338 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8339 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8340 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
8341 : 1);
8342 } else {
8343 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
8344 StructBaseCombinedInfo.BasePointers.push_back(
8345 Elt: BP.emitRawPointer(CGF));
8346 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8347 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8348 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
8349 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8350 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
8351 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8352 Elt: IsNonContiguous ? DimSize : 1);
8353 }
8354
8355 // If Mapper is valid, the last component inherits the mapper.
8356 bool HasMapper = Mapper && Next == CE;
8357 if (!IsMappingWholeStruct)
8358 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
8359 else
8360 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
8361 : nullptr);
8362
8363 // We need to add a pointer flag for each map that comes from the
8364 // same expression except for the first one. We also need to signal
8365 // this map is the first one that relates with the current capture
8366 // (there is a set of entries for each capture).
8367 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8368 MapType, MapModifiers, MotionModifiers, IsImplicit,
8369 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
8370 FirstPointerInComplexData || IsMemberReference,
8371 AddIsTargetParamFlag: IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8372
8373 if (!IsExpressionFirstInfo || IsMemberReference) {
8374 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8375 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8376 if (IsPointer || (IsMemberReference && Next != CE))
8377 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8378 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8379 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8380 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8381 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8382
8383 if (ShouldBeMemberOf) {
8384 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8385 // should be later updated with the correct value of MEMBER_OF.
8386 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8387 // From now on, all subsequent PTR_AND_OBJ entries should not be
8388 // marked as MEMBER_OF.
8389 ShouldBeMemberOf = false;
8390 }
8391 }
8392
8393 if (!IsMappingWholeStruct)
8394 CombinedInfo.Types.push_back(Elt: Flags);
8395 else
8396 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
8397 }
8398
8399 // If we have encountered a member expression so far, keep track of the
8400 // mapped member. If the parent is "*this", then the value declaration
8401 // is nullptr.
8402 if (EncounteredME) {
8403 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
8404 unsigned FieldIndex = FD->getFieldIndex();
8405
8406 // Update info about the lowest and highest elements for this struct
8407 if (!PartialStruct.Base.isValid()) {
8408 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8409 if (IsFinalArraySection && OASE) {
8410 Address HB =
8411 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8412 .getAddress();
8413 PartialStruct.HighestElem = {FieldIndex, HB};
8414 } else {
8415 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8416 }
8417 PartialStruct.Base = BP;
8418 PartialStruct.LB = BP;
8419 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8420 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8421 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8422 if (IsFinalArraySection && OASE) {
8423 Address HB =
8424 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
8425 .getAddress();
8426 PartialStruct.HighestElem = {FieldIndex, HB};
8427 } else {
8428 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8429 }
8430 }
8431 }
8432
8433 // Need to emit combined struct for array sections.
8434 if (IsFinalArraySection || IsNonContiguous)
8435 PartialStruct.IsArraySection = true;
8436
8437 // If we have a final array section, we are done with this expression.
8438 if (IsFinalArraySection)
8439 break;
8440
8441 // The pointer becomes the base for the next element.
8442 if (Next != CE)
8443 BP = IsMemberReference ? LowestElem : LB;
8444 if (!IsPartialMapped)
8445 IsExpressionFirstInfo = false;
8446 IsCaptureFirstInfo = false;
8447 FirstPointerInComplexData = false;
8448 IsPrevMemberReference = IsMemberReference;
8449 } else if (FirstPointerInComplexData) {
8450 QualType Ty = Components.rbegin()
8451 ->getAssociatedDeclaration()
8452 ->getType()
8453 .getNonReferenceType();
8454 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
8455 FirstPointerInComplexData = false;
8456 }
8457 }
8458 // If ran into the whole component - allocate the space for the whole
8459 // record.
8460 if (!EncounteredME)
8461 PartialStruct.HasCompleteRecord = true;
8462
8463 // Populate ATTACH information for later processing by emitAttachEntry.
8464 if (shouldEmitAttachEntry(PointerExpr: AttachPtrExpr, MapBaseDecl: BaseDecl, CGF, CurDir)) {
8465 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8466 AttachInfo.AttachPteeAddr = FinalLowestElem;
8467 AttachInfo.AttachPtrDecl = BaseDecl;
8468 AttachInfo.AttachMapExpr = MapExpr;
8469 }
8470
8471 if (!IsNonContiguous)
8472 return;
8473
8474 const ASTContext &Context = CGF.getContext();
8475
8476 // For supporting stride in array section, we need to initialize the first
8477 // dimension size as 1, first offset as 0, and first count as 1
8478 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
8479 MapValuesArrayTy CurCounts;
8480 MapValuesArrayTy CurStrides = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8481 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
8482 uint64_t ElementTypeSize;
8483
8484 // Collect Size information for each dimension and get the element size as
8485 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8486 // should be [10, 10] and the first stride is 4 btyes.
8487 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8488 Components) {
8489 const Expr *AssocExpr = Component.getAssociatedExpression();
8490 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8491
8492 if (!OASE)
8493 continue;
8494
8495 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
8496 auto *CAT = Context.getAsConstantArrayType(T: Ty);
8497 auto *VAT = Context.getAsVariableArrayType(T: Ty);
8498
8499 // We need all the dimension size except for the last dimension.
8500 assert((VAT || CAT || &Component == &*Components.begin()) &&
8501 "Should be either ConstantArray or VariableArray if not the "
8502 "first Component");
8503
8504 // Get element size if CurCounts is empty.
8505 if (CurCounts.empty()) {
8506 const Type *ElementType = nullptr;
8507 if (CAT)
8508 ElementType = CAT->getElementType().getTypePtr();
8509 else if (VAT)
8510 ElementType = VAT->getElementType().getTypePtr();
8511 else if (&Component == &*Components.begin()) {
8512 // If the base is a raw pointer (e.g. T *data with data[a:b:c]),
8513 // there was no earlier CAT/VAT/array handling to establish
8514 // ElementType. Capture the pointee type now so that subsequent
8515 // components (offset/length/stride) have a concrete element type to
8516 // work with. This makes pointer-backed sections behave consistently
8517 // with CAT/VAT/array bases.
8518 if (const auto *PtrType = Ty->getAs<PointerType>())
8519 ElementType = PtrType->getPointeeType().getTypePtr();
8520 } else {
8521 // Any component after the first should never have a raw pointer type;
8522 // by this point. ElementType must already be known (set above or in
8523 // prior array / CAT / VAT handling).
8524 assert(!Ty->isPointerType() &&
8525 "Non-first components should not be raw pointers");
8526 }
8527
8528 // At this stage, if ElementType was a base pointer and we are in the
8529 // first iteration, it has been computed.
8530 if (ElementType) {
8531 // For the case that having pointer as base, we need to remove one
8532 // level of indirection.
8533 if (&Component != &*Components.begin())
8534 ElementType = ElementType->getPointeeOrArrayElementType();
8535 ElementTypeSize =
8536 Context.getTypeSizeInChars(T: ElementType).getQuantity();
8537 CurCounts.push_back(
8538 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
8539 }
8540 }
8541 // Get dimension value except for the last dimension since we don't need
8542 // it.
8543 if (DimSizes.size() < Components.size() - 1) {
8544 if (CAT)
8545 DimSizes.push_back(
8546 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
8547 else if (VAT)
8548 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
8549 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
8550 /*IsSigned=*/isSigned: false));
8551 }
8552 }
8553
8554 // Skip the dummy dimension since we have already have its information.
8555 auto *DI = DimSizes.begin() + 1;
8556 // Product of dimension.
8557 llvm::Value *DimProd =
8558 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
8559
8560 // Collect info for non-contiguous. Notice that offset, count, and stride
8561 // are only meaningful for array-section, so we insert a null for anything
8562 // other than array-section.
8563 // Also, the size of offset, count, and stride are not the same as
8564 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8565 // count, and stride are the same as the number of non-contiguous
8566 // declaration in target update to/from clause.
8567 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8568 Components) {
8569 const Expr *AssocExpr = Component.getAssociatedExpression();
8570
8571 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
8572 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8573 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
8574 /*isSigned=*/false);
8575 CurOffsets.push_back(Elt: Offset);
8576 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
8577 CurStrides.push_back(Elt: CurStrides.back());
8578 continue;
8579 }
8580
8581 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
8582
8583 if (!OASE)
8584 continue;
8585
8586 // Offset
8587 const Expr *OffsetExpr = OASE->getLowerBound();
8588 llvm::Value *Offset = nullptr;
8589 if (!OffsetExpr) {
8590 // If offset is absent, then we just set it to zero.
8591 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
8592 } else {
8593 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
8594 DestTy: CGF.Int64Ty,
8595 /*isSigned=*/false);
8596 }
8597
8598 // Count
8599 const Expr *CountExpr = OASE->getLength();
8600 llvm::Value *Count = nullptr;
8601 if (!CountExpr) {
8602 // In Clang, once a high dimension is an array section, we construct all
8603 // the lower dimension as array section, however, for case like
8604 // arr[0:2][2], Clang construct the inner dimension as an array section
8605 // but it actually is not in an array section form according to spec.
8606 if (!OASE->getColonLocFirst().isValid() &&
8607 !OASE->getColonLocSecond().isValid()) {
8608 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
8609 } else {
8610 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8611 // When the length is absent it defaults to ⌈(size −
8612 // lower-bound)/stride⌉, where size is the size of the array
8613 // dimension.
8614 const Expr *StrideExpr = OASE->getStride();
8615 llvm::Value *Stride =
8616 StrideExpr
8617 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8618 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8619 : nullptr;
8620 if (Stride)
8621 Count = CGF.Builder.CreateUDiv(
8622 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
8623 else
8624 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
8625 }
8626 } else {
8627 Count = CGF.EmitScalarExpr(E: CountExpr);
8628 }
8629 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
8630 CurCounts.push_back(Elt: Count);
8631
8632 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8633 // Offset_n' = Offset_n * (D_0 * D_1 ... * D_n-1) * Unit size
8634 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8635 // Offset Count Stride
8636 // D0 0 4 1 (int) <- dummy dimension
8637 // D1 0 2 8 (2 * (1) * 4)
8638 // D2 100 2 20 (1 * (1 * 5) * 4)
8639 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8640 const Expr *StrideExpr = OASE->getStride();
8641 llvm::Value *Stride =
8642 StrideExpr
8643 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
8644 DestTy: CGF.Int64Ty, /*isSigned=*/false)
8645 : nullptr;
8646 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
8647 if (Stride)
8648 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
8649 else
8650 CurStrides.push_back(Elt: DimProd);
8651
8652 Offset = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Offset);
8653 CurOffsets.push_back(Elt: Offset);
8654
8655 if (DI != DimSizes.end())
8656 ++DI;
8657 }
8658
8659 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
8660 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
8661 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
8662 }
8663
8664 /// Return the adjusted map modifiers if the declaration a capture refers to
8665 /// appears in a first-private clause. This is expected to be used only with
8666 /// directives that start with 'target'.
8667 OpenMPOffloadMappingFlags
8668 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8669 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8670
8671 // A first private variable captured by reference will use only the
8672 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8673 // declaration is known as first-private in this handler.
8674 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
8675 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8676 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8677 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8678 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8679 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8680 }
8681 auto I = LambdasMap.find(Val: Cap.getCapturedVar()->getCanonicalDecl());
8682 if (I != LambdasMap.end())
8683 // for map(to: lambda): using user specified map type.
8684 return getMapTypeBits(
8685 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
8686 /*MotionModifiers=*/{}, IsImplicit: I->getSecond()->isImplicit(),
8687 /*AddPtrFlag=*/false,
8688 /*AddIsTargetParamFlag=*/false,
8689 /*isNonContiguous=*/IsNonContiguous: false);
8690 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8691 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8692 }
8693
8694 void getPlainLayout(const CXXRecordDecl *RD,
8695 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8696 bool AsBase) const {
8697 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8698
8699 llvm::StructType *St =
8700 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8701
8702 unsigned NumElements = St->getNumElements();
8703 llvm::SmallVector<
8704 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8705 RecordLayout(NumElements);
8706
8707 // Fill bases.
8708 for (const auto &I : RD->bases()) {
8709 if (I.isVirtual())
8710 continue;
8711
8712 QualType BaseTy = I.getType();
8713 const auto *Base = BaseTy->getAsCXXRecordDecl();
8714 // Ignore empty bases.
8715 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
8716 CGF.getContext()
8717 .getASTRecordLayout(D: Base)
8718 .getNonVirtualSize()
8719 .isZero())
8720 continue;
8721
8722 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
8723 RecordLayout[FieldIndex] = Base;
8724 }
8725 // Fill in virtual bases.
8726 for (const auto &I : RD->vbases()) {
8727 QualType BaseTy = I.getType();
8728 // Ignore empty bases.
8729 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
8730 continue;
8731
8732 const auto *Base = BaseTy->getAsCXXRecordDecl();
8733 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
8734 if (RecordLayout[FieldIndex])
8735 continue;
8736 RecordLayout[FieldIndex] = Base;
8737 }
8738 // Fill in all the fields.
8739 assert(!RD->isUnion() && "Unexpected union.");
8740 for (const auto *Field : RD->fields()) {
8741 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8742 // will fill in later.)
8743 if (!Field->isBitField() &&
8744 !isEmptyFieldForLayout(Context: CGF.getContext(), FD: Field)) {
8745 unsigned FieldIndex = RL.getLLVMFieldNo(FD: Field);
8746 RecordLayout[FieldIndex] = Field;
8747 }
8748 }
8749 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8750 &Data : RecordLayout) {
8751 if (Data.isNull())
8752 continue;
8753 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Val: Data))
8754 getPlainLayout(RD: Base, Layout, /*AsBase=*/true);
8755 else
8756 Layout.push_back(Elt: cast<const FieldDecl *>(Val: Data));
8757 }
8758 }
8759
8760 /// Returns the address corresponding to \p PointerExpr.
8761 static Address getAttachPtrAddr(const Expr *PointerExpr,
8762 CodeGenFunction &CGF) {
8763 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8764 Address AttachPtrAddr = Address::invalid();
8765
8766 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: PointerExpr)) {
8767 // If the pointer is a variable, we can use its address directly.
8768 AttachPtrAddr = CGF.EmitLValue(E: DRE).getAddress();
8769 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(Val: PointerExpr)) {
8770 AttachPtrAddr =
8771 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/true).getAddress();
8772 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: PointerExpr)) {
8773 AttachPtrAddr = CGF.EmitLValue(E: ASE).getAddress();
8774 } else if (auto *ME = dyn_cast<MemberExpr>(Val: PointerExpr)) {
8775 AttachPtrAddr = CGF.EmitMemberExpr(E: ME).getAddress();
8776 } else if (auto *UO = dyn_cast<UnaryOperator>(Val: PointerExpr)) {
8777 assert(UO->getOpcode() == UO_Deref &&
8778 "Unexpected unary-operator on attach-ptr-expr");
8779 AttachPtrAddr = CGF.EmitLValue(E: UO).getAddress();
8780 }
8781 assert(AttachPtrAddr.isValid() &&
8782 "Failed to get address for attach pointer expression");
8783 return AttachPtrAddr;
8784 }
8785
8786 /// Get the address of the attach pointer, and a load from it, to get the
8787 /// pointee base address.
8788 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8789 /// contains invalid addresses if \p AttachPtrExpr is null.
8790 static std::pair<Address, Address>
8791 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8792 CodeGenFunction &CGF) {
8793
8794 if (!AttachPtrExpr)
8795 return {Address::invalid(), Address::invalid()};
8796
8797 Address AttachPtrAddr = getAttachPtrAddr(PointerExpr: AttachPtrExpr, CGF);
8798 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8799
8800 QualType AttachPtrType =
8801 OMPClauseMappableExprCommon::getComponentExprElementType(Exp: AttachPtrExpr)
8802 .getCanonicalType();
8803
8804 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8805 Ptr: AttachPtrAddr, PtrTy: AttachPtrType->castAs<PointerType>());
8806 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8807
8808 return {AttachPtrAddr, AttachPteeBaseAddr};
8809 }
8810
8811 /// Returns whether an attach entry should be emitted for a map on
8812 /// \p MapBaseDecl on the directive \p CurDir.
8813 static bool
8814 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8815 CodeGenFunction &CGF,
8816 llvm::PointerUnion<const OMPExecutableDirective *,
8817 const OMPDeclareMapperDecl *>
8818 CurDir) {
8819 if (!PointerExpr)
8820 return false;
8821
8822 // Pointer attachment is needed at map-entering time or for declare
8823 // mappers.
8824 return isa<const OMPDeclareMapperDecl *>(Val: CurDir) ||
8825 isOpenMPTargetMapEnteringDirective(
8826 DKind: cast<const OMPExecutableDirective *>(Val&: CurDir)
8827 ->getDirectiveKind());
8828 }
8829
8830 /// Computes the attach-ptr expr for \p Components, and updates various maps
8831 /// with the information.
8832 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8833 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8834 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8835 /// AttachPtrExprMap.
8836 void collectAttachPtrExprInfo(
8837 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
8838 llvm::PointerUnion<const OMPExecutableDirective *,
8839 const OMPDeclareMapperDecl *>
8840 CurDir) {
8841
8842 OpenMPDirectiveKind CurDirectiveID =
8843 isa<const OMPDeclareMapperDecl *>(Val: CurDir)
8844 ? OMPD_declare_mapper
8845 : cast<const OMPExecutableDirective *>(Val&: CurDir)->getDirectiveKind();
8846
8847 const auto &[AttachPtrExpr, Depth] =
8848 OMPClauseMappableExprCommon::findAttachPtrExpr(Components,
8849 CurDirKind: CurDirectiveID);
8850
8851 AttachPtrComputationOrderMap.try_emplace(
8852 Key: AttachPtrExpr, Args: AttachPtrComputationOrderMap.size());
8853 AttachPtrComponentDepthMap.try_emplace(Key: AttachPtrExpr, Args: Depth);
8854 AttachPtrExprMap.try_emplace(Key: Components, Args: AttachPtrExpr);
8855 }
8856
8857 /// Generate all the base pointers, section pointers, sizes, map types, and
8858 /// mappers for the extracted mappable expressions (all included in \a
8859 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8860 /// pair of the relevant declaration and index where it occurs is appended to
8861 /// the device pointers info array.
8862 void generateAllInfoForClauses(
8863 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8864 llvm::OpenMPIRBuilder &OMPBuilder,
8865 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8866 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8867 // We have to process the component lists that relate with the same
8868 // declaration in a single chunk so that we can generate the map flags
8869 // correctly. Therefore, we organize all lists in a map.
8870 enum MapKind { Present, Allocs, Other, Total };
8871 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8872 SmallVector<SmallVector<MapInfo, 8>, 4>>
8873 Info;
8874
8875 // Helper function to fill the information map for the different supported
8876 // clauses.
8877 auto &&InfoGen =
8878 [&Info, &SkipVarSet](
8879 const ValueDecl *D, MapKind Kind,
8880 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8881 OpenMPMapClauseKind MapType,
8882 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8883 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8884 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8885 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8886 if (SkipVarSet.contains(V: D))
8887 return;
8888 auto It = Info.try_emplace(Key: D, Args: Total).first;
8889 It->second[Kind].emplace_back(
8890 Args&: L, Args&: MapType, Args&: MapModifiers, Args&: MotionModifiers, Args&: ReturnDevicePointer,
8891 Args&: IsImplicit, Args&: Mapper, Args&: VarRef, Args&: ForDeviceAddr);
8892 };
8893
8894 for (const auto *Cl : Clauses) {
8895 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
8896 if (!C)
8897 continue;
8898 MapKind Kind = Other;
8899 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
8900 Element: OMPC_MAP_MODIFIER_present))
8901 Kind = Present;
8902 else if (C->getMapType() == OMPC_MAP_alloc)
8903 Kind = Allocs;
8904 const auto *EI = C->getVarRefs().begin();
8905 for (const auto L : C->component_lists()) {
8906 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8907 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), C->getMapType(),
8908 C->getMapTypeModifiers(), {},
8909 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8910 E);
8911 ++EI;
8912 }
8913 }
8914 for (const auto *Cl : Clauses) {
8915 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
8916 if (!C)
8917 continue;
8918 MapKind Kind = Other;
8919 if (llvm::is_contained(Range: C->getMotionModifiers(),
8920 Element: OMPC_MOTION_MODIFIER_present))
8921 Kind = Present;
8922 if (llvm::is_contained(Range: C->getMotionModifiers(),
8923 Element: OMPC_MOTION_MODIFIER_iterator)) {
8924 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8925 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8926 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8927 CGF.EmitVarDecl(D: *VD);
8928 }
8929 }
8930
8931 const auto *EI = C->getVarRefs().begin();
8932 for (const auto L : C->component_lists()) {
8933 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_to, {},
8934 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8935 C->isImplicit(), std::get<2>(t: L), *EI);
8936 ++EI;
8937 }
8938 }
8939 for (const auto *Cl : Clauses) {
8940 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
8941 if (!C)
8942 continue;
8943 MapKind Kind = Other;
8944 if (llvm::is_contained(Range: C->getMotionModifiers(),
8945 Element: OMPC_MOTION_MODIFIER_present))
8946 Kind = Present;
8947 if (llvm::is_contained(Range: C->getMotionModifiers(),
8948 Element: OMPC_MOTION_MODIFIER_iterator)) {
8949 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8950 Val: C->getIteratorModifier()->IgnoreParenImpCasts())) {
8951 const auto *VD = cast<VarDecl>(Val: IteratorExpr->getIteratorDecl(I: 0));
8952 CGF.EmitVarDecl(D: *VD);
8953 }
8954 }
8955
8956 const auto *EI = C->getVarRefs().begin();
8957 for (const auto L : C->component_lists()) {
8958 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_from, {},
8959 C->getMotionModifiers(),
8960 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8961 *EI);
8962 ++EI;
8963 }
8964 }
8965
8966 // Look at the use_device_ptr and use_device_addr clauses information and
8967 // mark the existing map entries as such. If there is no map information for
8968 // an entry in the use_device_ptr and use_device_addr list, we create one
8969 // with map type 'return_param' and zero size section. It is the user's
8970 // fault if that was not mapped before. If there is no map information, then
8971 // we defer the emission of that entry until all the maps for the same VD
8972 // have been handled.
8973 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8974
8975 auto &&UseDeviceDataCombinedInfoGen =
8976 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8977 CodeGenFunction &CGF, bool IsDevAddr,
8978 bool HasUdpFbNullify = false) {
8979 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
8980 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
8981 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8982 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8983 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8984 // FIXME: For use_device_addr on array-sections, this should
8985 // be the starting address of the section.
8986 // e.g. int *p;
8987 // ... use_device_addr(p[3])
8988 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8989 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
8990 UseDeviceDataCombinedInfo.Sizes.push_back(
8991 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8992 OpenMPOffloadMappingFlags Flags =
8993 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8994 if (HasUdpFbNullify)
8995 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
8996 UseDeviceDataCombinedInfo.Types.push_back(Elt: Flags);
8997 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
8998 };
8999
9000 auto &&MapInfoGen =
9001 [&UseDeviceDataCombinedInfoGen](
9002 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
9003 OMPClauseMappableExprCommon::MappableExprComponentListRef
9004 Components,
9005 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false,
9006 bool HasUdpFbNullify = false) {
9007 // We didn't find any match in our map information - generate a zero
9008 // size array section.
9009 llvm::Value *Ptr;
9010 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
9011 if (IE->isGLValue())
9012 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
9013 else
9014 Ptr = CGF.EmitScalarExpr(E: IE);
9015 } else {
9016 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
9017 }
9018 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
9019 // For the purpose of address-translation, treat something like the
9020 // following:
9021 // int *p;
9022 // ... use_device_addr(p[1])
9023 // equivalent to
9024 // ... use_device_ptr(p)
9025 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
9026 !TreatDevAddrAsDevPtr,
9027 HasUdpFbNullify);
9028 };
9029
9030 auto &&IsMapInfoExist =
9031 [&Info, this](CodeGenFunction &CGF, const ValueDecl *VD, const Expr *IE,
9032 const Expr *DesiredAttachPtrExpr, bool IsDevAddr,
9033 bool HasUdpFbNullify = false) -> bool {
9034 // We potentially have map information for this declaration already.
9035 // Look for the first set of components that refer to it. If found,
9036 // return true.
9037 // If the first component is a member expression, we have to look into
9038 // 'this', which maps to null in the map of map information. Otherwise
9039 // look directly for the information.
9040 auto It = Info.find(Key: isa<MemberExpr>(Val: IE) ? nullptr : VD);
9041 if (It != Info.end()) {
9042 bool Found = false;
9043 for (auto &Data : It->second) {
9044 MapInfo *CI = nullptr;
9045 // We potentially have multiple maps for the same decl. We need to
9046 // only consider those for which the attach-ptr matches the desired
9047 // attach-ptr.
9048 auto *It = llvm::find_if(Range&: Data, P: [&](const MapInfo &MI) {
9049 if (MI.Components.back().getAssociatedDeclaration() != VD)
9050 return false;
9051
9052 const Expr *MapAttachPtr = getAttachPtrExpr(Components: MI.Components);
9053 bool Match = AttachPtrComparator.areEqual(LHS: MapAttachPtr,
9054 RHS: DesiredAttachPtrExpr);
9055 return Match;
9056 });
9057
9058 if (It != Data.end())
9059 CI = &*It;
9060
9061 if (CI) {
9062 if (IsDevAddr) {
9063 CI->ForDeviceAddr = true;
9064 CI->ReturnDevicePointer = true;
9065 CI->HasUdpFbNullify = HasUdpFbNullify;
9066 Found = true;
9067 break;
9068 } else {
9069 auto PrevCI = std::next(x: CI->Components.rbegin());
9070 const auto *VarD = dyn_cast<VarDecl>(Val: VD);
9071 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: CI->Components);
9072 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
9073 isa<MemberExpr>(Val: IE) ||
9074 !VD->getType().getNonReferenceType()->isPointerType() ||
9075 PrevCI == CI->Components.rend() ||
9076 isa<MemberExpr>(Val: PrevCI->getAssociatedExpression()) || !VarD ||
9077 VarD->hasLocalStorage() ||
9078 (isa_and_nonnull<DeclRefExpr>(Val: AttachPtrExpr) &&
9079 VD == cast<DeclRefExpr>(Val: AttachPtrExpr)->getDecl())) {
9080 CI->ForDeviceAddr = IsDevAddr;
9081 CI->ReturnDevicePointer = true;
9082 CI->HasUdpFbNullify = HasUdpFbNullify;
9083 Found = true;
9084 break;
9085 }
9086 }
9087 }
9088 }
9089 return Found;
9090 }
9091 return false;
9092 };
9093
9094 // Look at the use_device_ptr clause information and mark the existing map
9095 // entries as such. If there is no map information for an entry in the
9096 // use_device_ptr list, we create one with map type 'alloc' and zero size
9097 // section. It is the user fault if that was not mapped before. If there is
9098 // no map information and the pointer is a struct member, then we defer the
9099 // emission of that entry until the whole struct has been processed.
9100 for (const auto *Cl : Clauses) {
9101 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
9102 if (!C)
9103 continue;
9104 bool HasUdpFbNullify =
9105 C->getFallbackModifier() == OMPC_USE_DEVICE_PTR_FALLBACK_fb_nullify;
9106 for (const auto L : C->component_lists()) {
9107 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9108 std::get<1>(t: L);
9109 assert(!Components.empty() &&
9110 "Not expecting empty list of components!");
9111 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
9112 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9113 const Expr *IE = Components.back().getAssociatedExpression();
9114 // For use_device_ptr, we match an existing map clause if its attach-ptr
9115 // is same as the use_device_ptr operand. e.g.
9116 // map expr | use_device_ptr expr | current behavior
9117 // ---------|---------------------|-----------------
9118 // p[1] | p | match
9119 // ps->a | ps | match
9120 // p | p | no match
9121 const Expr *UDPOperandExpr =
9122 Components.front().getAssociatedExpression();
9123 if (IsMapInfoExist(CGF, VD, IE,
9124 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
9125 /*IsDevAddr=*/false, HasUdpFbNullify))
9126 continue;
9127 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false,
9128 /*IEIsAttachPtrForDevAddr=*/false, HasUdpFbNullify);
9129 }
9130 }
9131
9132 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
9133 for (const auto *Cl : Clauses) {
9134 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
9135 if (!C)
9136 continue;
9137 for (const auto L : C->component_lists()) {
9138 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9139 std::get<1>(t: L);
9140 assert(!std::get<1>(L).empty() &&
9141 "Not expecting empty list of components!");
9142 const ValueDecl *VD = std::get<1>(t: L).back().getAssociatedDeclaration();
9143 if (!Processed.insert(V: VD).second)
9144 continue;
9145 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
9146 // For use_device_addr, we match an existing map clause if the
9147 // use_device_addr operand's attach-ptr matches the map operand's
9148 // attach-ptr.
9149 // We chould also restrict to only match cases when there is a full
9150 // match between the map/use_device_addr clause exprs, but that may be
9151 // unnecessary.
9152 //
9153 // map expr | use_device_addr expr | current | possible restrictive/
9154 // | | behavior | safer behavior
9155 // ---------|----------------------|-----------|-----------------------
9156 // p | p | match | match
9157 // p[0] | p[0] | match | match
9158 // p[0:1] | p[0] | match | no match
9159 // p[0:1] | p[2:1] | match | no match
9160 // p[1] | p[0] | match | no match
9161 // ps->a | ps->b | match | no match
9162 // p | p[0] | no match | no match
9163 // pp | pp[0][0] | no match | no match
9164 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
9165 const Expr *IE = std::get<1>(t: L).back().getAssociatedExpression();
9166 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
9167 "use_device_addr operand has an attach-ptr, but does not match "
9168 "last component's expr.");
9169 if (IsMapInfoExist(CGF, VD, IE,
9170 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
9171 /*IsDevAddr=*/true))
9172 continue;
9173 MapInfoGen(CGF, IE, VD, Components,
9174 /*IsDevAddr=*/true,
9175 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
9176 }
9177 }
9178
9179 for (const auto &Data : Info) {
9180 MapCombinedInfoTy CurInfo;
9181 const Decl *D = Data.first;
9182 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
9183 // Group component lists by their AttachPtrExpr and process them in order
9184 // of increasing complexity (nullptr first, then simple expressions like
9185 // p, then more complex ones like p[0], etc.)
9186 //
9187 // This is similar to how generateInfoForCaptureFromClauseInfo handles
9188 // grouping for target constructs.
9189 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
9190
9191 // First, collect all MapData entries with their attach-ptr exprs.
9192 for (const auto &M : Data.second) {
9193 for (const MapInfo &L : M) {
9194 assert(!L.Components.empty() &&
9195 "Not expecting declaration with no component lists.");
9196
9197 const Expr *AttachPtrExpr = getAttachPtrExpr(Components: L.Components);
9198 AttachPtrMapInfoPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9199 }
9200 }
9201
9202 // Next, sort by increasing order of their complexity.
9203 llvm::stable_sort(Range&: AttachPtrMapInfoPairs,
9204 C: [this](const auto &LHS, const auto &RHS) {
9205 return AttachPtrComparator(LHS.first, RHS.first);
9206 });
9207
9208 // And finally, process them all in order, grouping those with
9209 // equivalent attach-ptr exprs together.
9210 auto *It = AttachPtrMapInfoPairs.begin();
9211 while (It != AttachPtrMapInfoPairs.end()) {
9212 const Expr *AttachPtrExpr = It->first;
9213
9214 SmallVector<MapInfo, 8> GroupLists;
9215 while (It != AttachPtrMapInfoPairs.end() &&
9216 (It->first == AttachPtrExpr ||
9217 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
9218 GroupLists.push_back(Elt: It->second);
9219 ++It;
9220 }
9221 assert(!GroupLists.empty() && "GroupLists should not be empty");
9222
9223 StructRangeInfoTy PartialStruct;
9224 AttachInfoTy AttachInfo;
9225 MapCombinedInfoTy GroupCurInfo;
9226 // Current group's struct base information:
9227 MapCombinedInfoTy GroupStructBaseCurInfo;
9228 for (const MapInfo &L : GroupLists) {
9229 // Remember the current base pointer index.
9230 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9231 unsigned StructBasePointersIdx =
9232 GroupStructBaseCurInfo.BasePointers.size();
9233
9234 GroupCurInfo.NonContigInfo.IsNonContiguous =
9235 L.Components.back().isNonContiguous();
9236 generateInfoForComponentList(
9237 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
9238 CombinedInfo&: GroupCurInfo, StructBaseCombinedInfo&: GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9239 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
9240 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
9241 MapExpr: L.VarRef, /*OverlappedElements*/ {});
9242
9243 // If this entry relates to a device pointer, set the relevant
9244 // declaration and add the 'return pointer' flag.
9245 if (L.ReturnDevicePointer) {
9246 // Check whether a value was added to either GroupCurInfo or
9247 // GroupStructBaseCurInfo and error if no value was added to either
9248 // of them:
9249 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9250 StructBasePointersIdx <
9251 GroupStructBaseCurInfo.BasePointers.size()) &&
9252 "Unexpected number of mapped base pointers.");
9253
9254 // Choose a base pointer index which is always valid:
9255 const ValueDecl *RelevantVD =
9256 L.Components.back().getAssociatedDeclaration();
9257 assert(RelevantVD &&
9258 "No relevant declaration related with device pointer??");
9259
9260 // If GroupStructBaseCurInfo has been updated this iteration then
9261 // work on the first new entry added to it i.e. make sure that when
9262 // multiple values are added to any of the lists, the first value
9263 // added is being modified by the assignments below (not the last
9264 // value added).
9265 auto SetDevicePointerInfo = [&](MapCombinedInfoTy &Info,
9266 unsigned Idx) {
9267 Info.DevicePtrDecls[Idx] = RelevantVD;
9268 Info.DevicePointers[Idx] = L.ForDeviceAddr
9269 ? DeviceInfoTy::Address
9270 : DeviceInfoTy::Pointer;
9271 Info.Types[Idx] |=
9272 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9273 if (L.HasUdpFbNullify)
9274 Info.Types[Idx] |=
9275 OpenMPOffloadMappingFlags::OMP_MAP_FB_NULLIFY;
9276 };
9277
9278 if (StructBasePointersIdx <
9279 GroupStructBaseCurInfo.BasePointers.size())
9280 SetDevicePointerInfo(GroupStructBaseCurInfo,
9281 StructBasePointersIdx);
9282 else
9283 SetDevicePointerInfo(GroupCurInfo, CurrentBasePointersIdx);
9284 }
9285 }
9286
9287 // Unify entries in one list making sure the struct mapping precedes the
9288 // individual fields:
9289 MapCombinedInfoTy GroupUnionCurInfo;
9290 GroupUnionCurInfo.append(CurInfo&: GroupStructBaseCurInfo);
9291 GroupUnionCurInfo.append(CurInfo&: GroupCurInfo);
9292
9293 // If there is an entry in PartialStruct it means we have a struct with
9294 // individual members mapped. Emit an extra combined entry.
9295 if (PartialStruct.Base.isValid()) {
9296 // Prepend a synthetic dimension of length 1 to represent the
9297 // aggregated struct object. Using 1 (not 0, as 0 produced an
9298 // incorrect non-contiguous descriptor (DimSize==1), causing the
9299 // non-contiguous motion clause path to be skipped.) is important:
9300 // * It preserves the correct rank so targetDataUpdate() computes
9301 // DimSize == 2 for cases like strided array sections originating
9302 // from user-defined mappers (e.g. test with s.data[0:8:2]).
9303 GroupUnionCurInfo.NonContigInfo.Dims.insert(
9304 I: GroupUnionCurInfo.NonContigInfo.Dims.begin(), Elt: 1);
9305 emitCombinedEntry(
9306 CombinedInfo&: CurInfo, CurTypes&: GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9307 /*IsMapThis=*/!VD, OMPBuilder, VD,
9308 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9309 /*NotTargetParams=*/true);
9310 }
9311
9312 // Append this group's results to the overall CurInfo in the correct
9313 // order: combined-entry -> original-field-entries -> attach-entry
9314 CurInfo.append(CurInfo&: GroupUnionCurInfo);
9315 if (AttachInfo.isValid())
9316 emitAttachEntry(CGF, CombinedInfo&: CurInfo, AttachInfo);
9317 }
9318
9319 // We need to append the results of this capture to what we already have.
9320 CombinedInfo.append(CurInfo);
9321 }
9322 // Append data for use_device_ptr/addr clauses.
9323 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
9324 }
9325
9326public:
9327 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9328 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9329 // Extract firstprivate clause information.
9330 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9331 for (const auto *D : C->varlist())
9332 FirstPrivateDecls.try_emplace(
9333 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl()), Args: C->isImplicit());
9334 // Extract implicit firstprivates from uses_allocators clauses.
9335 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9336 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9337 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9338 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
9339 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
9340 /*Implicit=*/Args: true);
9341 else if (const auto *VD = dyn_cast<VarDecl>(
9342 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
9343 ->getDecl()))
9344 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
9345 }
9346 }
9347 // Extract defaultmap clause information.
9348 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9349 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9350 DefaultmapFirstprivateKinds.insert(V: C->getDefaultmapKind());
9351 // Extract device pointer clause information.
9352 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9353 for (auto L : C->component_lists())
9354 DevPointersMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9355 // Extract device addr clause information.
9356 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9357 for (auto L : C->component_lists())
9358 HasDevAddrsMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
9359 // Extract map information.
9360 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9361 if (C->getMapType() != OMPC_MAP_to)
9362 continue;
9363 for (auto L : C->component_lists()) {
9364 const ValueDecl *VD = std::get<0>(t&: L);
9365 const auto *RD = VD ? VD->getType()
9366 .getCanonicalType()
9367 .getNonReferenceType()
9368 ->getAsCXXRecordDecl()
9369 : nullptr;
9370 if (RD && RD->isLambda())
9371 LambdasMap.try_emplace(Key: std::get<0>(t&: L), Args&: C);
9372 }
9373 }
9374
9375 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9376 for (auto L : C->component_lists()) {
9377 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9378 std::get<1>(L);
9379 if (!Components.empty())
9380 collectAttachPtrExprInfo(Components, CurDir);
9381 }
9382 };
9383
9384 // Populate the AttachPtrExprMap for all component lists from map-related
9385 // clauses.
9386 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9387 CollectAttachPtrExprsForClauseComponents(C);
9388 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9389 CollectAttachPtrExprsForClauseComponents(C);
9390 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9391 CollectAttachPtrExprsForClauseComponents(C);
9392 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9393 CollectAttachPtrExprsForClauseComponents(C);
9394 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9395 CollectAttachPtrExprsForClauseComponents(C);
9396 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9397 CollectAttachPtrExprsForClauseComponents(C);
9398 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9399 CollectAttachPtrExprsForClauseComponents(C);
9400 }
9401
9402 /// Constructor for the declare mapper directive.
9403 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9404 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9405
9406 /// Generate code for the combined entry if we have a partially mapped struct
9407 /// and take care of the mapping flags of the arguments corresponding to
9408 /// individual struct members.
9409 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9410 /// to the combined-entry's begin address, if emitted.
9411 /// \p PartialStruct contains attach base-pointer information.
9412 /// \returns The index of the combined entry if one was added, std::nullopt
9413 /// otherwise.
9414 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9415 MapFlagsArrayTy &CurTypes,
9416 const StructRangeInfoTy &PartialStruct,
9417 AttachInfoTy &AttachInfo, bool IsMapThis,
9418 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9419 unsigned OffsetForMemberOfFlag,
9420 bool NotTargetParams) const {
9421 if (CurTypes.size() == 1 &&
9422 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9423 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9424 !PartialStruct.IsArraySection)
9425 return;
9426 Address LBAddr = PartialStruct.LowestElem.second;
9427 Address HBAddr = PartialStruct.HighestElem.second;
9428 if (PartialStruct.HasCompleteRecord) {
9429 LBAddr = PartialStruct.LB;
9430 HBAddr = PartialStruct.LB;
9431 }
9432 CombinedInfo.Exprs.push_back(Elt: VD);
9433 // Base is the base of the struct
9434 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9435 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9436 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9437 // Pointer is the address of the lowest element
9438 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9439 const CXXMethodDecl *MD =
9440 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
9441 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9442 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9443 // There should not be a mapper for a combined entry.
9444 if (HasBaseClass) {
9445 // OpenMP 5.2 148:21:
9446 // If the target construct is within a class non-static member function,
9447 // and a variable is an accessible data member of the object for which the
9448 // non-static data member function is invoked, the variable is treated as
9449 // if the this[:1] expression had appeared in a map clause with a map-type
9450 // of tofrom.
9451 // Emit this[:1]
9452 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
9453 QualType Ty = MD->getFunctionObjectParameterType();
9454 llvm::Value *Size =
9455 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
9456 /*isSigned=*/true);
9457 CombinedInfo.Sizes.push_back(Elt: Size);
9458 } else {
9459 CombinedInfo.Pointers.push_back(Elt: LB);
9460 // Size is (addr of {highest+1} element) - (addr of lowest element)
9461 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9462 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9463 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
9464 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
9465 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
9466 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(LHS: CHAddr, RHS: CLAddr);
9467 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
9468 /*isSigned=*/false);
9469 CombinedInfo.Sizes.push_back(Elt: Size);
9470 }
9471 CombinedInfo.Mappers.push_back(Elt: nullptr);
9472 // Map type is always TARGET_PARAM, if generate info for captures.
9473 CombinedInfo.Types.push_back(
9474 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9475 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9476 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9477 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9478 // If any element has the present modifier, then make sure the runtime
9479 // doesn't attempt to allocate the struct.
9480 if (CurTypes.end() !=
9481 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9482 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9483 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9484 }))
9485 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9486 // Remove TARGET_PARAM flag from the first element
9487 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9488 // If any element has the ompx_hold modifier, then make sure the runtime
9489 // uses the hold reference count for the struct as a whole so that it won't
9490 // be unmapped by an extra dynamic reference count decrement. Add it to all
9491 // elements as well so the runtime knows which reference count to check
9492 // when determining whether it's time for device-to-host transfers of
9493 // individual elements.
9494 if (CurTypes.end() !=
9495 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
9496 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9497 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9498 })) {
9499 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9500 for (auto &M : CurTypes)
9501 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9502 }
9503
9504 // All other current entries will be MEMBER_OF the combined entry
9505 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9506 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9507 // to be handled by themselves, after all other maps).
9508 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9509 Position: OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9510 for (auto &M : CurTypes)
9511 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
9512
9513 // When we are emitting a combined entry. If there were any pending
9514 // attachments to be done, we do them to the begin address of the combined
9515 // entry. Note that this means only one attachment per combined-entry will
9516 // be done. So, for instance, if we have:
9517 // S *ps;
9518 // ... map(ps->a, ps->b)
9519 // When we are emitting a combined entry. If AttachInfo is valid,
9520 // update the pointee address to point to the begin address of the combined
9521 // entry. This ensures that if we have multiple maps like:
9522 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9523 //
9524 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9525 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9526 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9527 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9528 if (AttachInfo.isValid())
9529 AttachInfo.AttachPteeAddr = LBAddr;
9530 }
9531
9532 /// Generate all the base pointers, section pointers, sizes, map types, and
9533 /// mappers for the extracted mappable expressions (all included in \a
9534 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9535 /// pair of the relevant declaration and index where it occurs is appended to
9536 /// the device pointers info array.
9537 void generateAllInfo(
9538 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9539 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9540 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9541 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9542 "Expect a executable directive");
9543 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9544 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9545 SkipVarSet);
9546 }
9547
9548 /// Generate all the base pointers, section pointers, sizes, map types, and
9549 /// mappers for the extracted map clauses of user-defined mapper (all included
9550 /// in \a CombinedInfo).
9551 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9552 llvm::OpenMPIRBuilder &OMPBuilder) const {
9553 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9554 "Expect a declare mapper directive");
9555 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(Val: CurDir);
9556 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
9557 OMPBuilder);
9558 }
9559
9560 /// Emit capture info for lambdas for variables captured by reference.
9561 void generateInfoForLambdaCaptures(
9562 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9563 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9564 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9565 const auto *RD = VDType->getAsCXXRecordDecl();
9566 if (!RD || !RD->isLambda())
9567 return;
9568 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
9569 CGF.getContext().getDeclAlign(D: VD));
9570 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
9571 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9572 FieldDecl *ThisCapture = nullptr;
9573 RD->getCaptureFields(Captures, ThisCapture);
9574 if (ThisCapture) {
9575 LValue ThisLVal =
9576 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
9577 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
9578 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
9579 Args: VDLVal.getPointer(CGF));
9580 CombinedInfo.Exprs.push_back(Elt: VD);
9581 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
9582 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9583 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9584 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
9585 CombinedInfo.Sizes.push_back(
9586 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
9587 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9588 CombinedInfo.Types.push_back(
9589 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9590 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9591 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9592 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9593 CombinedInfo.Mappers.push_back(Elt: nullptr);
9594 }
9595 for (const LambdaCapture &LC : RD->captures()) {
9596 if (!LC.capturesVariable())
9597 continue;
9598 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
9599 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9600 continue;
9601 auto It = Captures.find(Val: VD);
9602 assert(It != Captures.end() && "Found lambda capture without field.");
9603 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
9604 if (LC.getCaptureKind() == LCK_ByRef) {
9605 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
9606 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9607 Args: VDLVal.getPointer(CGF));
9608 CombinedInfo.Exprs.push_back(Elt: VD);
9609 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9610 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9611 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9612 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
9613 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9614 V: CGF.getTypeSize(
9615 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
9616 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9617 } else {
9618 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
9619 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
9620 Args: VDLVal.getPointer(CGF));
9621 CombinedInfo.Exprs.push_back(Elt: VD);
9622 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
9623 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9624 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9625 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
9626 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
9627 }
9628 CombinedInfo.Types.push_back(
9629 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9630 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9631 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9632 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9633 CombinedInfo.Mappers.push_back(Elt: nullptr);
9634 }
9635 }
9636
9637 /// Set correct indices for lambdas captures.
9638 void adjustMemberOfForLambdaCaptures(
9639 llvm::OpenMPIRBuilder &OMPBuilder,
9640 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9641 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9642 MapFlagsArrayTy &Types) const {
9643 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9644 // Set correct member_of idx for all implicit lambda captures.
9645 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9646 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9647 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9648 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9649 continue;
9650 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
9651 assert(BasePtr && "Unable to find base lambda address.");
9652 int TgtIdx = -1;
9653 for (unsigned J = I; J > 0; --J) {
9654 unsigned Idx = J - 1;
9655 if (Pointers[Idx] != BasePtr)
9656 continue;
9657 TgtIdx = Idx;
9658 break;
9659 }
9660 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9661 // All other current entries will be MEMBER_OF the combined entry
9662 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9663 // 0xFFFF in the MEMBER_OF field).
9664 OpenMPOffloadMappingFlags MemberOfFlag =
9665 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
9666 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
9667 }
9668 }
9669
9670 /// Populate component lists for non-lambda captured variables from map,
9671 /// is_device_ptr and has_device_addr clause info.
9672 void populateComponentListsForNonLambdaCaptureFromClauses(
9673 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9674 SmallVectorImpl<
9675 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9676 &StorageForImplicitlyAddedComponentLists) const {
9677 if (VD && LambdasMap.count(Val: VD))
9678 return;
9679
9680 // For member fields list in is_device_ptr, store it in
9681 // DeclComponentLists for generating components info.
9682 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9683 auto It = DevPointersMap.find(Val: VD);
9684 if (It != DevPointersMap.end())
9685 for (const auto &MCL : It->second)
9686 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_to, Args: Unknown,
9687 /*IsImpicit = */ Args: true, Args: nullptr,
9688 Args: nullptr);
9689 auto I = HasDevAddrsMap.find(Val: VD);
9690 if (I != HasDevAddrsMap.end())
9691 for (const auto &MCL : I->second)
9692 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_tofrom, Args: Unknown,
9693 /*IsImpicit = */ Args: true, Args: nullptr,
9694 Args: nullptr);
9695 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9696 "Expect a executable directive");
9697 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
9698 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9699 const auto *EI = C->getVarRefs().begin();
9700 for (const auto L : C->decl_component_lists(VD)) {
9701 const ValueDecl *VDecl, *Mapper;
9702 // The Expression is not correct if the mapping is implicit
9703 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9705 std::tie(args&: VDecl, args&: Components, args&: Mapper) = L;
9706 assert(VDecl == VD && "We got information for the wrong declaration??");
9707 assert(!Components.empty() &&
9708 "Not expecting declaration with no component lists.");
9709 DeclComponentLists.emplace_back(Args&: Components, Args: C->getMapType(),
9710 Args: C->getMapTypeModifiers(),
9711 Args: C->isImplicit(), Args&: Mapper, Args&: E);
9712 ++EI;
9713 }
9714 }
9715
9716 // For the target construct, if there's a map with a base-pointer that's
9717 // a member of an implicitly captured struct, of the current class,
9718 // we need to emit an implicit map on the pointer.
9719 if (isOpenMPTargetExecutionDirective(DKind: CurExecDir->getDirectiveKind()))
9720 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9721 CapturedVD: VD, DeclComponentLists, ComponentVectorStorage&: StorageForImplicitlyAddedComponentLists);
9722
9723 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
9724 const MapData &RHS) {
9725 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
9726 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
9727 bool HasPresent =
9728 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9729 bool HasAllocs = MapType == OMPC_MAP_alloc;
9730 MapModifiers = std::get<2>(t: RHS);
9731 MapType = std::get<1>(t: LHS);
9732 bool HasPresentR =
9733 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
9734 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9735 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9736 });
9737 }
9738
9739 /// On a target construct, if there's an implicit map on a struct, or that of
9740 /// this[:], and an explicit map with a member of that struct/class as the
9741 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9742 /// to make sure we don't map the full struct/class. For example:
9743 ///
9744 /// \code
9745 /// struct S {
9746 /// int dummy[10000];
9747 /// int *p;
9748 /// void f1() {
9749 /// #pragma omp target map(p[0:1])
9750 /// (void)this;
9751 /// }
9752 /// }; S s;
9753 ///
9754 /// void f2() {
9755 /// #pragma omp target map(s.p[0:10])
9756 /// (void)s;
9757 /// }
9758 /// \endcode
9759 ///
9760 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9761 //
9762 // OpenMP 6.0: 7.9.6 map clause, pg 285
9763 // If a list item with an implicitly determined data-mapping attribute does
9764 // not have any corresponding storage in the device data environment prior to
9765 // a task encountering the construct associated with the map clause, and one
9766 // or more contiguous parts of the original storage are either list items or
9767 // base pointers to list items that are explicitly mapped on the construct,
9768 // only those parts of the original storage will have corresponding storage in
9769 // the device data environment as a result of the map clauses on the
9770 // construct.
9771 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9772 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9773 SmallVectorImpl<
9774 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9775 &ComponentVectorStorage) const {
9776 bool IsThisCapture = CapturedVD == nullptr;
9777
9778 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9779 OMPClauseMappableExprCommon::MappableExprComponentListRef
9780 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9781 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9782 if (!AttachPtrExpr)
9783 continue;
9784
9785 const auto *ME = dyn_cast<MemberExpr>(Val: AttachPtrExpr);
9786 if (!ME)
9787 continue;
9788
9789 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9790
9791 // If we are handling a "this" capture, then we are looking for
9792 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9793 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Val: Base))
9794 continue;
9795
9796 if (!IsThisCapture && (!isa<DeclRefExpr>(Val: Base) ||
9797 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9798 continue;
9799
9800 // For non-this captures, we are looking for attach-ptrs of form
9801 // `s.p`.
9802 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9803 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Val: Base) ||
9804 cast<DeclRefExpr>(Val: Base)->getDecl() != CapturedVD))
9805 continue;
9806
9807 // Check if we have an existing map on either:
9808 // this[:], s, this->p, or s.p, in which case, we don't need to add
9809 // an implicit one for the attach-ptr s.p/this->p.
9810 bool FoundExistingMap = false;
9811 for (const MapData &ExistingL : DeclComponentLists) {
9812 OMPClauseMappableExprCommon::MappableExprComponentListRef
9813 ExistingComponents = std::get<0>(t: ExistingL);
9814
9815 if (ExistingComponents.empty())
9816 continue;
9817
9818 // First check if we have a map like map(this->p) or map(s.p).
9819 const auto &FirstComponent = ExistingComponents.front();
9820 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9821
9822 if (!FirstExpr)
9823 continue;
9824
9825 // First check if we have a map like map(this->p) or map(s.p).
9826 if (AttachPtrComparator.areEqual(LHS: FirstExpr, RHS: AttachPtrExpr)) {
9827 FoundExistingMap = true;
9828 break;
9829 }
9830
9831 // Check if we have a map like this[0:1]
9832 if (IsThisCapture) {
9833 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: FirstExpr)) {
9834 if (isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts())) {
9835 FoundExistingMap = true;
9836 break;
9837 }
9838 }
9839 continue;
9840 }
9841
9842 // When the attach-ptr is something like `s.p`, check if
9843 // `s` itself is mapped explicitly.
9844 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: FirstExpr)) {
9845 if (DRE->getDecl() == CapturedVD) {
9846 FoundExistingMap = true;
9847 break;
9848 }
9849 }
9850 }
9851
9852 if (FoundExistingMap)
9853 continue;
9854
9855 // If no base map is found, we need to create an implicit map for the
9856 // attach-pointer expr.
9857
9858 ComponentVectorStorage.emplace_back();
9859 auto &AttachPtrComponents = ComponentVectorStorage.back();
9860
9861 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9862 bool SeenAttachPtrComponent = false;
9863 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9864 // components from the component-list which has `s.p/this->p`
9865 // as the attach-ptr, starting from the component which matches
9866 // `s.p/this->p`. This way, we'll have component-lists of
9867 // `s.p` -> `s`, and `this->p` -> `this`.
9868 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9869 const auto &Component = ComponentsWithAttachPtr[i];
9870 const Expr *ComponentExpr = Component.getAssociatedExpression();
9871
9872 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9873 continue;
9874 SeenAttachPtrComponent = true;
9875
9876 AttachPtrComponents.emplace_back(Args: Component.getAssociatedExpression(),
9877 Args: Component.getAssociatedDeclaration(),
9878 Args: Component.isNonContiguous());
9879 }
9880 assert(!AttachPtrComponents.empty() &&
9881 "Could not populate component-lists for mapping attach-ptr");
9882
9883 DeclComponentLists.emplace_back(
9884 Args&: AttachPtrComponents, Args: OMPC_MAP_tofrom, Args: Unknown,
9885 /*IsImplicit=*/Args: true, /*mapper=*/Args: nullptr, Args&: AttachPtrExpr);
9886 }
9887 }
9888
9889 /// For a capture that has an associated clause, generate the base pointers,
9890 /// section pointers, sizes, map types, and mappers (all included in
9891 /// \a CurCaptureVarInfo).
9892 void generateInfoForCaptureFromClauseInfo(
9893 const MapDataArrayTy &DeclComponentListsFromClauses,
9894 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9895 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9896 unsigned OffsetForMemberOfFlag) const {
9897 assert(!Cap->capturesVariableArrayType() &&
9898 "Not expecting to generate map info for a variable array type!");
9899
9900 // We need to know when we generating information for the first component
9901 const ValueDecl *VD = Cap->capturesThis()
9902 ? nullptr
9903 : Cap->getCapturedVar()->getCanonicalDecl();
9904
9905 // for map(to: lambda): skip here, processing it in
9906 // generateDefaultMapInfo
9907 if (LambdasMap.count(Val: VD))
9908 return;
9909
9910 // If this declaration appears in a is_device_ptr clause we just have to
9911 // pass the pointer by value. If it is a reference to a declaration, we just
9912 // pass its value.
9913 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
9914 CurCaptureVarInfo.Exprs.push_back(Elt: VD);
9915 CurCaptureVarInfo.BasePointers.emplace_back(Args&: Arg);
9916 CurCaptureVarInfo.DevicePtrDecls.emplace_back(Args&: VD);
9917 CurCaptureVarInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
9918 CurCaptureVarInfo.Pointers.push_back(Elt: Arg);
9919 CurCaptureVarInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9920 V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), DestTy: CGF.Int64Ty,
9921 /*isSigned=*/true));
9922 CurCaptureVarInfo.Types.push_back(
9923 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9924 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9925 CurCaptureVarInfo.Mappers.push_back(Elt: nullptr);
9926 return;
9927 }
9928
9929 auto GenerateInfoForComponentLists =
9930 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9931 bool IsEligibleForTargetParamFlag) {
9932 MapCombinedInfoTy CurInfoForComponentLists;
9933 StructRangeInfoTy PartialStruct;
9934 AttachInfoTy AttachInfo;
9935
9936 if (DeclComponentListsFromClauses.empty())
9937 return;
9938
9939 generateInfoForCaptureFromComponentLists(
9940 VD, DeclComponentLists: DeclComponentListsFromClauses, CurComponentListInfo&: CurInfoForComponentLists,
9941 PartialStruct, AttachInfo, IsListEligibleForTargetParamFlag: IsEligibleForTargetParamFlag);
9942
9943 // If there is an entry in PartialStruct it means we have a
9944 // struct with individual members mapped. Emit an extra combined
9945 // entry.
9946 if (PartialStruct.Base.isValid()) {
9947 CurCaptureVarInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9948 emitCombinedEntry(
9949 CombinedInfo&: CurCaptureVarInfo, CurTypes&: CurInfoForComponentLists.Types,
9950 PartialStruct, AttachInfo, IsMapThis: Cap->capturesThis(), OMPBuilder,
9951 /*VD=*/nullptr, OffsetForMemberOfFlag,
9952 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9953 }
9954
9955 // We do the appends to get the entries in the following order:
9956 // combined-entry -> individual-field-entries -> attach-entry,
9957 CurCaptureVarInfo.append(CurInfo&: CurInfoForComponentLists);
9958 if (AttachInfo.isValid())
9959 emitAttachEntry(CGF, CombinedInfo&: CurCaptureVarInfo, AttachInfo);
9960 };
9961
9962 // Group component lists by their AttachPtrExpr and process them in order
9963 // of increasing complexity (nullptr first, then simple expressions like p,
9964 // then more complex ones like p[0], etc.)
9965 //
9966 // This ensure that we:
9967 // * handle maps that can contribute towards setting the kernel argument,
9968 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9969 // * allocate a single contiguous storage for all exprs with the same
9970 // captured var and having the same attach-ptr.
9971 //
9972 // Example: The map clauses below should be handled grouped together based
9973 // on their attachable-base-pointers:
9974 // map-clause | attachable-base-pointer
9975 // --------------------------+------------------------
9976 // map(p, ps) | nullptr
9977 // map(p[0]) | p
9978 // map(p[0]->b, p[0]->c) | p[0]
9979 // map(ps->d, ps->e, ps->pt) | ps
9980 // map(ps->pt->d, ps->pt->e) | ps->pt
9981
9982 // First, collect all MapData entries with their attach-ptr exprs.
9983 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9984
9985 for (const MapData &L : DeclComponentListsFromClauses) {
9986 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
9987 std::get<0>(t: L);
9988 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9989 AttachPtrMapDataPairs.emplace_back(Args&: AttachPtrExpr, Args: L);
9990 }
9991
9992 // Next, sort by increasing order of their complexity.
9993 llvm::stable_sort(Range&: AttachPtrMapDataPairs,
9994 C: [this](const auto &LHS, const auto &RHS) {
9995 return AttachPtrComparator(LHS.first, RHS.first);
9996 });
9997
9998 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9999 bool IsFirstGroup = true;
10000
10001 // And finally, process them all in order, grouping those with
10002 // equivalent attach-ptr exprs together.
10003 auto *It = AttachPtrMapDataPairs.begin();
10004 while (It != AttachPtrMapDataPairs.end()) {
10005 const Expr *AttachPtrExpr = It->first;
10006
10007 MapDataArrayTy GroupLists;
10008 while (It != AttachPtrMapDataPairs.end() &&
10009 (It->first == AttachPtrExpr ||
10010 AttachPtrComparator.areEqual(LHS: It->first, RHS: AttachPtrExpr))) {
10011 GroupLists.push_back(Elt: It->second);
10012 ++It;
10013 }
10014 assert(!GroupLists.empty() && "GroupLists should not be empty");
10015
10016 // Determine if this group of component-lists is eligible for TARGET_PARAM
10017 // flag. Only the first group processed should be eligible, and only if no
10018 // default mapping was done.
10019 bool IsEligibleForTargetParamFlag =
10020 IsFirstGroup && NoDefaultMappingDoneForVD;
10021
10022 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
10023 IsFirstGroup = false;
10024 }
10025 }
10026
10027 /// Generate the base pointers, section pointers, sizes, map types, and
10028 /// mappers associated to \a DeclComponentLists for a given capture
10029 /// \a VD (all included in \a CurComponentListInfo).
10030 void generateInfoForCaptureFromComponentLists(
10031 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
10032 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
10033 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
10034 // Find overlapping elements (including the offset from the base element).
10035 llvm::SmallDenseMap<
10036 const MapData *,
10037 llvm::SmallVector<
10038 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
10039 4>
10040 OverlappedData;
10041 size_t Count = 0;
10042 for (const MapData &L : DeclComponentLists) {
10043 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10044 OpenMPMapClauseKind MapType;
10045 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10046 bool IsImplicit;
10047 const ValueDecl *Mapper;
10048 const Expr *VarRef;
10049 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10050 L;
10051 ++Count;
10052 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
10053 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
10054 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
10055 args&: VarRef) = L1;
10056 auto CI = Components.rbegin();
10057 auto CE = Components.rend();
10058 auto SI = Components1.rbegin();
10059 auto SE = Components1.rend();
10060 for (; CI != CE && SI != SE; ++CI, ++SI) {
10061 if (CI->getAssociatedExpression()->getStmtClass() !=
10062 SI->getAssociatedExpression()->getStmtClass())
10063 break;
10064 // Are we dealing with different variables/fields?
10065 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
10066 break;
10067 }
10068 // Found overlapping if, at least for one component, reached the head
10069 // of the components list.
10070 if (CI == CE || SI == SE) {
10071 // Ignore it if it is the same component.
10072 if (CI == CE && SI == SE)
10073 continue;
10074 const auto It = (SI == SE) ? CI : SI;
10075 // If one component is a pointer and another one is a kind of
10076 // dereference of this pointer (array subscript, section, dereference,
10077 // etc.), it is not an overlapping.
10078 // Same, if one component is a base and another component is a
10079 // dereferenced pointer memberexpr with the same base.
10080 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
10081 (std::prev(x: It)->getAssociatedDeclaration() &&
10082 std::prev(x: It)
10083 ->getAssociatedDeclaration()
10084 ->getType()
10085 ->isPointerType()) ||
10086 (It->getAssociatedDeclaration() &&
10087 It->getAssociatedDeclaration()->getType()->isPointerType() &&
10088 std::next(x: It) != CE && std::next(x: It) != SE))
10089 continue;
10090 const MapData &BaseData = CI == CE ? L : L1;
10091 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
10092 SI == SE ? Components : Components1;
10093 OverlappedData[&BaseData].push_back(Elt: SubData);
10094 }
10095 }
10096 }
10097 // Sort the overlapped elements for each item.
10098 llvm::SmallVector<const FieldDecl *, 4> Layout;
10099 if (!OverlappedData.empty()) {
10100 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
10101 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
10102 while (BaseType != OrigType) {
10103 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
10104 OrigType = BaseType->getPointeeOrArrayElementType();
10105 }
10106
10107 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
10108 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
10109 else {
10110 const auto *RD = BaseType->getAsRecordDecl();
10111 Layout.append(in_start: RD->field_begin(), in_end: RD->field_end());
10112 }
10113 }
10114 for (auto &Pair : OverlappedData) {
10115 llvm::stable_sort(
10116 Range&: Pair.getSecond(),
10117 C: [&Layout](
10118 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
10119 OMPClauseMappableExprCommon::MappableExprComponentListRef
10120 Second) {
10121 auto CI = First.rbegin();
10122 auto CE = First.rend();
10123 auto SI = Second.rbegin();
10124 auto SE = Second.rend();
10125 for (; CI != CE && SI != SE; ++CI, ++SI) {
10126 if (CI->getAssociatedExpression()->getStmtClass() !=
10127 SI->getAssociatedExpression()->getStmtClass())
10128 break;
10129 // Are we dealing with different variables/fields?
10130 if (CI->getAssociatedDeclaration() !=
10131 SI->getAssociatedDeclaration())
10132 break;
10133 }
10134
10135 // Lists contain the same elements.
10136 if (CI == CE && SI == SE)
10137 return false;
10138
10139 // List with less elements is less than list with more elements.
10140 if (CI == CE || SI == SE)
10141 return CI == CE;
10142
10143 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
10144 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
10145 if (FD1->getParent() == FD2->getParent())
10146 return FD1->getFieldIndex() < FD2->getFieldIndex();
10147 const auto *It =
10148 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
10149 return FD == FD1 || FD == FD2;
10150 });
10151 return *It == FD1;
10152 });
10153 }
10154
10155 // Associated with a capture, because the mapping flags depend on it.
10156 // Go through all of the elements with the overlapped elements.
10157 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
10158 MapCombinedInfoTy StructBaseCombinedInfo;
10159 for (const auto &Pair : OverlappedData) {
10160 const MapData &L = *Pair.getFirst();
10161 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10162 OpenMPMapClauseKind MapType;
10163 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10164 bool IsImplicit;
10165 const ValueDecl *Mapper;
10166 const Expr *VarRef;
10167 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10168 L;
10169 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
10170 OverlappedComponents = Pair.getSecond();
10171 generateInfoForComponentList(
10172 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10173 StructBaseCombinedInfo, PartialStruct, AttachInfo, IsFirstComponentList: AddTargetParamFlag,
10174 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
10175 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
10176 AddTargetParamFlag = false;
10177 }
10178 // Go through other elements without overlapped elements.
10179 for (const MapData &L : DeclComponentLists) {
10180 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
10181 OpenMPMapClauseKind MapType;
10182 ArrayRef<OpenMPMapModifierKind> MapModifiers;
10183 bool IsImplicit;
10184 const ValueDecl *Mapper;
10185 const Expr *VarRef;
10186 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
10187 L;
10188 auto It = OverlappedData.find(Val: &L);
10189 if (It == OverlappedData.end())
10190 generateInfoForComponentList(
10191 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
10192 StructBaseCombinedInfo, PartialStruct, AttachInfo,
10193 IsFirstComponentList: AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
10194 Mapper, /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
10195 /*OverlappedElements*/ {});
10196 AddTargetParamFlag = false;
10197 }
10198 }
10199
10200 /// Check if a variable should be treated as firstprivate due to explicit
10201 /// firstprivate clause or defaultmap(firstprivate:...).
10202 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
10203 // Check explicit firstprivate clauses (not implicit from defaultmap)
10204 auto I = FirstPrivateDecls.find(Val: VD);
10205 if (I != FirstPrivateDecls.end() && !I->getSecond())
10206 return true; // Explicit firstprivate only
10207
10208 // Check defaultmap(firstprivate:scalar) for scalar types
10209 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_scalar)) {
10210 if (Type->isScalarType())
10211 return true;
10212 }
10213
10214 // Check defaultmap(firstprivate:pointer) for pointer types
10215 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_pointer)) {
10216 if (Type->isAnyPointerType())
10217 return true;
10218 }
10219
10220 // Check defaultmap(firstprivate:aggregate) for aggregate types
10221 if (DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_aggregate)) {
10222 if (Type->isAggregateType())
10223 return true;
10224 }
10225
10226 // Check defaultmap(firstprivate:all) for all types
10227 return DefaultmapFirstprivateKinds.count(V: OMPC_DEFAULTMAP_all);
10228 }
10229
10230 /// Generate the default map information for a given capture \a CI,
10231 /// record field declaration \a RI and captured value \a CV.
10232 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10233 const FieldDecl &RI, llvm::Value *CV,
10234 MapCombinedInfoTy &CombinedInfo) const {
10235 bool IsImplicit = true;
10236 // Do the default mapping.
10237 if (CI.capturesThis()) {
10238 CombinedInfo.Exprs.push_back(Elt: nullptr);
10239 CombinedInfo.BasePointers.push_back(Elt: CV);
10240 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10241 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10242 CombinedInfo.Pointers.push_back(Elt: CV);
10243 const auto *PtrTy = cast<PointerType>(Val: RI.getType().getTypePtr());
10244 CombinedInfo.Sizes.push_back(
10245 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
10246 DestTy: CGF.Int64Ty, /*isSigned=*/true));
10247 // Default map type.
10248 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
10249 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10250 } else if (CI.capturesVariableByCopy()) {
10251 const VarDecl *VD = CI.getCapturedVar();
10252 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10253 CombinedInfo.BasePointers.push_back(Elt: CV);
10254 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10255 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10256 CombinedInfo.Pointers.push_back(Elt: CV);
10257 bool IsFirstprivate =
10258 isEffectivelyFirstprivate(VD, Type: RI.getType().getNonReferenceType());
10259
10260 if (!RI.getType()->isAnyPointerType()) {
10261 // We have to signal to the runtime captures passed by value that are
10262 // not pointers.
10263 CombinedInfo.Types.push_back(
10264 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10265 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10266 V: CGF.getTypeSize(Ty: RI.getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10267 } else if (IsFirstprivate) {
10268 // Firstprivate pointers should be passed by value (as literals)
10269 // without performing a present table lookup at runtime.
10270 CombinedInfo.Types.push_back(
10271 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10272 // Use zero size for pointer literals (just passing the pointer value)
10273 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10274 } else {
10275 // Pointers are implicitly mapped with a zero size and no flags
10276 // (other than first map that is added for all implicit maps).
10277 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10278 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10279 }
10280 auto I = FirstPrivateDecls.find(Val: VD);
10281 if (I != FirstPrivateDecls.end())
10282 IsImplicit = I->getSecond();
10283 } else {
10284 assert(CI.capturesVariable() && "Expected captured reference.");
10285 const auto *PtrTy = cast<ReferenceType>(Val: RI.getType().getTypePtr());
10286 QualType ElementType = PtrTy->getPointeeType();
10287 const VarDecl *VD = CI.getCapturedVar();
10288 bool IsFirstprivate = isEffectivelyFirstprivate(VD, Type: ElementType);
10289 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
10290 CombinedInfo.BasePointers.push_back(Elt: CV);
10291 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10292 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
10293
10294 // For firstprivate pointers, pass by value instead of dereferencing
10295 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10296 // Treat as a literal value (pass the pointer value itself)
10297 CombinedInfo.Pointers.push_back(Elt: CV);
10298 // Use zero size for pointer literals
10299 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
10300 CombinedInfo.Types.push_back(
10301 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10302 } else {
10303 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10304 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10305 // The default map type for a scalar/complex type is 'to' because by
10306 // default the value doesn't have to be retrieved. For an aggregate
10307 // type, the default is 'tofrom'.
10308 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
10309 CombinedInfo.Pointers.push_back(Elt: CV);
10310 }
10311 auto I = FirstPrivateDecls.find(Val: VD);
10312 if (I != FirstPrivateDecls.end())
10313 IsImplicit = I->getSecond();
10314 }
10315 // Every default map produces a single argument which is a target parameter.
10316 CombinedInfo.Types.back() |=
10317 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10318
10319 // Add flag stating this is an implicit map.
10320 if (IsImplicit)
10321 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10322
10323 // No user-defined mapper for default mapping.
10324 CombinedInfo.Mappers.push_back(Elt: nullptr);
10325 }
10326};
10327} // anonymous namespace
10328
10329// Try to extract the base declaration from a `this->x` expression if possible.
10330static ValueDecl *getDeclFromThisExpr(const Expr *E) {
10331 if (!E)
10332 return nullptr;
10333
10334 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
10335 if (const MemberExpr *ME =
10336 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
10337 return ME->getMemberDecl();
10338 return nullptr;
10339}
10340
10341/// Emit a string constant containing the names of the values mapped to the
10342/// offloading runtime library.
10343static llvm::Constant *
10344emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10345 MappableExprsHandler::MappingExprInfo &MapExprs) {
10346
10347 uint32_t SrcLocStrSize;
10348 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10349 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10350
10351 SourceLocation Loc;
10352 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10353 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
10354 Loc = VD->getLocation();
10355 else
10356 Loc = MapExprs.getMapExpr()->getExprLoc();
10357 } else {
10358 Loc = MapExprs.getMapDecl()->getLocation();
10359 }
10360
10361 std::string ExprName;
10362 if (MapExprs.getMapExpr()) {
10363 PrintingPolicy P(CGF.getContext().getLangOpts());
10364 llvm::raw_string_ostream OS(ExprName);
10365 MapExprs.getMapExpr()->printPretty(OS, Helper: nullptr, Policy: P);
10366 } else {
10367 ExprName = MapExprs.getMapDecl()->getNameAsString();
10368 }
10369
10370 std::string FileName;
10371 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
10372 if (auto *DbgInfo = CGF.getDebugInfo())
10373 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10374 else
10375 FileName = PLoc.getFilename();
10376 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: FileName, FileName: ExprName, Line: PLoc.getLine(),
10377 Column: PLoc.getColumn(), SrcLocStrSize);
10378}
10379/// Emit the arrays used to pass the captures and map information to the
10380/// offloading runtime library. If there is no map or capture information,
10381/// return nullptr by reference.
10382static void emitOffloadingArraysAndArgs(
10383 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10384 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10385 bool IsNonContiguous = false, bool ForEndCall = false) {
10386 CodeGenModule &CGM = CGF.CGM;
10387
10388 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10389 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10390 CGF.AllocaInsertPt->getIterator());
10391 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10392 CGF.Builder.GetInsertPoint());
10393
10394 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10395 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10396 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10397 }
10398 };
10399
10400 auto CustomMapperCB = [&](unsigned int I) {
10401 llvm::Function *MFunc = nullptr;
10402 if (CombinedInfo.Mappers[I]) {
10403 Info.HasMapper = true;
10404 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10405 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10406 }
10407 return MFunc;
10408 };
10409 cantFail(Err: OMPBuilder.emitOffloadingArraysAndArgs(
10410 AllocaIP, CodeGenIP, Info, RTArgs&: Info.RTArgs, CombinedInfo, CustomMapperCB,
10411 IsNonContiguous, ForEndCall, DeviceAddrCB));
10412}
10413
10414/// Check for inner distribute directive.
10415static const OMPExecutableDirective *
10416getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
10417 const auto *CS = D.getInnermostCapturedStmt();
10418 const auto *Body =
10419 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10420 const Stmt *ChildStmt =
10421 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10422
10423 if (const auto *NestedDir =
10424 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10425 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10426 switch (D.getDirectiveKind()) {
10427 case OMPD_target:
10428 // For now, treat 'target' with nested 'teams loop' as if it's
10429 // distributed (target teams distribute).
10430 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10431 return NestedDir;
10432 if (DKind == OMPD_teams) {
10433 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10434 /*IgnoreCaptured=*/true);
10435 if (!Body)
10436 return nullptr;
10437 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10438 if (const auto *NND =
10439 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
10440 DKind = NND->getDirectiveKind();
10441 if (isOpenMPDistributeDirective(DKind))
10442 return NND;
10443 }
10444 }
10445 return nullptr;
10446 case OMPD_target_teams:
10447 if (isOpenMPDistributeDirective(DKind))
10448 return NestedDir;
10449 return nullptr;
10450 case OMPD_target_parallel:
10451 case OMPD_target_simd:
10452 case OMPD_target_parallel_for:
10453 case OMPD_target_parallel_for_simd:
10454 return nullptr;
10455 case OMPD_target_teams_distribute:
10456 case OMPD_target_teams_distribute_simd:
10457 case OMPD_target_teams_distribute_parallel_for:
10458 case OMPD_target_teams_distribute_parallel_for_simd:
10459 case OMPD_parallel:
10460 case OMPD_for:
10461 case OMPD_parallel_for:
10462 case OMPD_parallel_master:
10463 case OMPD_parallel_sections:
10464 case OMPD_for_simd:
10465 case OMPD_parallel_for_simd:
10466 case OMPD_cancel:
10467 case OMPD_cancellation_point:
10468 case OMPD_ordered:
10469 case OMPD_threadprivate:
10470 case OMPD_allocate:
10471 case OMPD_task:
10472 case OMPD_simd:
10473 case OMPD_tile:
10474 case OMPD_unroll:
10475 case OMPD_sections:
10476 case OMPD_section:
10477 case OMPD_single:
10478 case OMPD_master:
10479 case OMPD_critical:
10480 case OMPD_taskyield:
10481 case OMPD_barrier:
10482 case OMPD_taskwait:
10483 case OMPD_taskgroup:
10484 case OMPD_atomic:
10485 case OMPD_flush:
10486 case OMPD_depobj:
10487 case OMPD_scan:
10488 case OMPD_teams:
10489 case OMPD_target_data:
10490 case OMPD_target_exit_data:
10491 case OMPD_target_enter_data:
10492 case OMPD_distribute:
10493 case OMPD_distribute_simd:
10494 case OMPD_distribute_parallel_for:
10495 case OMPD_distribute_parallel_for_simd:
10496 case OMPD_teams_distribute:
10497 case OMPD_teams_distribute_simd:
10498 case OMPD_teams_distribute_parallel_for:
10499 case OMPD_teams_distribute_parallel_for_simd:
10500 case OMPD_target_update:
10501 case OMPD_declare_simd:
10502 case OMPD_declare_variant:
10503 case OMPD_begin_declare_variant:
10504 case OMPD_end_declare_variant:
10505 case OMPD_declare_target:
10506 case OMPD_end_declare_target:
10507 case OMPD_declare_reduction:
10508 case OMPD_declare_mapper:
10509 case OMPD_taskloop:
10510 case OMPD_taskloop_simd:
10511 case OMPD_master_taskloop:
10512 case OMPD_master_taskloop_simd:
10513 case OMPD_parallel_master_taskloop:
10514 case OMPD_parallel_master_taskloop_simd:
10515 case OMPD_requires:
10516 case OMPD_metadirective:
10517 case OMPD_unknown:
10518 default:
10519 llvm_unreachable("Unexpected directive.");
10520 }
10521 }
10522
10523 return nullptr;
10524}
10525
10526/// Emit the user-defined mapper function. The code generation follows the
10527/// pattern in the example below.
10528/// \code
10529/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10530/// void *base, void *begin,
10531/// int64_t size, int64_t type,
10532/// void *name = nullptr) {
10533/// // Allocate space for an array section first.
10534/// if ((size > 1 || (base != begin)) && !maptype.IsDelete)
10535/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10536/// size*sizeof(Ty), clearToFromMember(type));
10537/// // Map members.
10538/// for (unsigned i = 0; i < size; i++) {
10539/// // For each component specified by this mapper:
10540/// for (auto c : begin[i]->all_components) {
10541/// if (c.hasMapper())
10542/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10543/// c.arg_type, c.arg_name);
10544/// else
10545/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10546/// c.arg_begin, c.arg_size, c.arg_type,
10547/// c.arg_name);
10548/// }
10549/// }
10550/// // Delete the array section.
10551/// if (size > 1 && maptype.IsDelete)
10552/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10553/// size*sizeof(Ty), clearToFromMember(type));
10554/// }
10555/// \endcode
10556void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
10557 CodeGenFunction *CGF) {
10558 if (UDMMap.count(Val: D) > 0)
10559 return;
10560 ASTContext &C = CGM.getContext();
10561 QualType Ty = D->getType();
10562 auto *MapperVarDecl =
10563 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
10564 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
10565 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
10566
10567 CodeGenFunction MapperCGF(CGM);
10568 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10569 auto PrivatizeAndGenMapInfoCB =
10570 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10571 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10572 MapperCGF.Builder.restoreIP(IP: CodeGenIP);
10573
10574 // Privatize the declared variable of mapper to be the current array
10575 // element.
10576 Address PtrCurrent(
10577 PtrPHI, ElemTy,
10578 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10579 .getAlignment()
10580 .alignmentOfArrayElement(elementSize: ElementSize));
10581 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10582 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
10583 (void)Scope.Privatize();
10584
10585 // Get map clause information.
10586 MappableExprsHandler MEHandler(*D, MapperCGF);
10587 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10588
10589 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10590 return emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: MapExpr);
10591 };
10592 if (CGM.getCodeGenOpts().getDebugInfo() !=
10593 llvm::codegenoptions::NoDebugInfo) {
10594 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10595 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10596 F: FillInfoMap);
10597 }
10598
10599 return CombinedInfo;
10600 };
10601
10602 auto CustomMapperCB = [&](unsigned I) {
10603 llvm::Function *MapperFunc = nullptr;
10604 if (CombinedInfo.Mappers[I]) {
10605 // Call the corresponding mapper function.
10606 MapperFunc = getOrCreateUserDefinedMapperFunc(
10607 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10608 assert(MapperFunc && "Expect a valid mapper function is available.");
10609 }
10610 return MapperFunc;
10611 };
10612
10613 SmallString<64> TyStr;
10614 llvm::raw_svector_ostream Out(TyStr);
10615 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
10616 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
10617
10618 llvm::Function *NewFn = cantFail(ValOrErr: OMPBuilder.emitUserDefinedMapper(
10619 PrivAndGenMapInfoCB: PrivatizeAndGenMapInfoCB, ElemTy, FuncName: Name, CustomMapperCB));
10620 UDMMap.try_emplace(Key: D, Args&: NewFn);
10621 if (CGF)
10622 FunctionUDMMap[CGF->CurFn].push_back(Elt: D);
10623}
10624
10625llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10626 const OMPDeclareMapperDecl *D) {
10627 auto I = UDMMap.find(Val: D);
10628 if (I != UDMMap.end())
10629 return I->second;
10630 emitUserDefinedMapper(D);
10631 return UDMMap.lookup(Val: D);
10632}
10633
10634llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
10635 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10636 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10637 const OMPLoopDirective &D)>
10638 SizeEmitter) {
10639 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10640 const OMPExecutableDirective *TD = &D;
10641 // Get nested teams distribute kind directive, if any. For now, treat
10642 // 'target_teams_loop' as if it's really a target_teams_distribute.
10643 if ((!isOpenMPDistributeDirective(DKind: Kind) || !isOpenMPTeamsDirective(DKind: Kind)) &&
10644 Kind != OMPD_target_teams_loop)
10645 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
10646 if (!TD)
10647 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10648
10649 const auto *LD = cast<OMPLoopDirective>(Val: TD);
10650 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10651 return NumIterations;
10652 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
10653}
10654
10655static void
10656emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10657 const OMPExecutableDirective &D,
10658 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10659 bool RequiresOuterTask, const CapturedStmt &CS,
10660 bool OffloadingMandatory, CodeGenFunction &CGF) {
10661 if (OffloadingMandatory) {
10662 CGF.Builder.CreateUnreachable();
10663 } else {
10664 if (RequiresOuterTask) {
10665 CapturedVars.clear();
10666 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
10667 }
10668 llvm::SmallVector<llvm::Value *, 16> Args(CapturedVars.begin(),
10669 CapturedVars.end());
10670 Args.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Builder.getPtrTy()));
10671 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
10672 Args);
10673 }
10674}
10675
10676static llvm::Value *emitDeviceID(
10677 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10678 CodeGenFunction &CGF) {
10679 // Emit device ID if any.
10680 llvm::Value *DeviceID;
10681 if (Device.getPointer()) {
10682 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10683 Device.getInt() == OMPC_DEVICE_device_num) &&
10684 "Expected device_num modifier.");
10685 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
10686 DeviceID =
10687 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
10688 } else {
10689 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10690 }
10691 return DeviceID;
10692}
10693
10694static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10695emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF) {
10696 llvm::Value *DynGP = CGF.Builder.getInt32(C: 0);
10697 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10698
10699 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10700 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10701 llvm::Value *DynGPVal =
10702 CGF.EmitScalarExpr(E: DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10703 DynGP = CGF.Builder.CreateIntCast(V: DynGPVal, DestTy: CGF.Int32Ty,
10704 /*isSigned=*/false);
10705 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10706 switch (FallbackModifier) {
10707 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10708 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10709 break;
10710 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10711 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10712 break;
10713 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10714 case OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown:
10715 // This is the default for dyn_groupprivate.
10716 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10717 break;
10718 default:
10719 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10720 }
10721 } else if (auto *OMPXDynCGClause =
10722 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10723 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10724 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(E: OMPXDynCGClause->getSize(),
10725 /*IgnoreResultAssign=*/true);
10726 DynGP = CGF.Builder.CreateIntCast(V: DynCGMemVal, DestTy: CGF.Int32Ty,
10727 /*isSigned=*/false);
10728 }
10729 return {DynGP, DynGPFallback};
10730}
10731
10732static void genMapInfoForCaptures(
10733 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10734 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10735 llvm::OpenMPIRBuilder &OMPBuilder,
10736 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10737 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10738
10739 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10740 auto RI = CS.getCapturedRecordDecl()->field_begin();
10741 auto *CV = CapturedVars.begin();
10742 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10743 CE = CS.capture_end();
10744 CI != CE; ++CI, ++RI, ++CV) {
10745 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10746
10747 // VLA sizes are passed to the outlined region by copy and do not have map
10748 // information associated.
10749 if (CI->capturesVariableArrayType()) {
10750 CurInfo.Exprs.push_back(Elt: nullptr);
10751 CurInfo.BasePointers.push_back(Elt: *CV);
10752 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
10753 CurInfo.DevicePointers.push_back(
10754 Elt: MappableExprsHandler::DeviceInfoTy::None);
10755 CurInfo.Pointers.push_back(Elt: *CV);
10756 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
10757 V: CGF.getTypeSize(Ty: RI->getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
10758 // Copy to the device as an argument. No need to retrieve it.
10759 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10760 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10761 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10762 CurInfo.Mappers.push_back(Elt: nullptr);
10763 } else {
10764 const ValueDecl *CapturedVD =
10765 CI->capturesThis() ? nullptr
10766 : CI->getCapturedVar()->getCanonicalDecl();
10767 bool HasEntryWithCVAsAttachPtr = false;
10768 if (CapturedVD)
10769 HasEntryWithCVAsAttachPtr =
10770 MEHandler.hasAttachEntryForCapturedVar(VD: CapturedVD);
10771
10772 // Populate component lists for the captured variable from clauses.
10773 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10774 SmallVector<
10775 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>, 4>
10776 StorageForImplicitlyAddedComponentLists;
10777 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10778 VD: CapturedVD, DeclComponentLists,
10779 StorageForImplicitlyAddedComponentLists);
10780
10781 // OpenMP 6.0, 15.8, target construct, restrictions:
10782 // * A list item in a map clause that is specified on a target construct
10783 // must have a base variable or base pointer.
10784 //
10785 // Map clauses on a target construct must either have a base pointer, or a
10786 // base-variable. So, if we don't have a base-pointer, that means that it
10787 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10788 // etc. In such cases, we do not need to handle default map generation
10789 // for `s`.
10790 bool HasEntryWithoutAttachPtr =
10791 llvm::any_of(Range&: DeclComponentLists, P: [&](const auto &MapData) {
10792 OMPClauseMappableExprCommon::MappableExprComponentListRef
10793 Components = std::get<0>(MapData);
10794 return !MEHandler.getAttachPtrExpr(Components);
10795 });
10796
10797 // Generate default map info first if there's no direct map with CV as
10798 // the base-variable, or attach pointer.
10799 if (DeclComponentLists.empty() ||
10800 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10801 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
10802
10803 // If we have any information in the map clause, we use it, otherwise we
10804 // just do a default mapping.
10805 MEHandler.generateInfoForCaptureFromClauseInfo(
10806 DeclComponentListsFromClauses: DeclComponentLists, Cap: CI, Arg: *CV, CurCaptureVarInfo&: CurInfo, OMPBuilder,
10807 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10808
10809 if (!CI->capturesThis())
10810 MappedVarSet.insert(V: CI->getCapturedVar());
10811 else
10812 MappedVarSet.insert(V: nullptr);
10813
10814 // Generate correct mapping for variables captured by reference in
10815 // lambdas.
10816 if (CI->capturesVariable())
10817 MEHandler.generateInfoForLambdaCaptures(VD: CI->getCapturedVar(), Arg: *CV,
10818 CombinedInfo&: CurInfo, LambdaPointers);
10819 }
10820 // We expect to have at least an element of information for this capture.
10821 assert(!CurInfo.BasePointers.empty() &&
10822 "Non-existing map pointer for capture!");
10823 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10824 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10825 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10826 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10827 "Inconsistent map information sizes!");
10828
10829 // We need to append the results of this capture to what we already have.
10830 CombinedInfo.append(CurInfo);
10831 }
10832 // Adjust MEMBER_OF flags for the lambdas captures.
10833 MEHandler.adjustMemberOfForLambdaCaptures(
10834 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
10835 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
10836}
10837static void
10838genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10839 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10840 llvm::OpenMPIRBuilder &OMPBuilder,
10841 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10842 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10843
10844 CodeGenModule &CGM = CGF.CGM;
10845 // Map any list items in a map clause that were not captures because they
10846 // weren't referenced within the construct.
10847 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: SkippedVarSet);
10848
10849 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10850 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10851 };
10852 if (CGM.getCodeGenOpts().getDebugInfo() !=
10853 llvm::codegenoptions::NoDebugInfo) {
10854 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10855 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10856 F: FillInfoMap);
10857 }
10858}
10859
10860static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
10861 const CapturedStmt &CS,
10862 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10863 llvm::OpenMPIRBuilder &OMPBuilder,
10864 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10865 // Get mappable expression information.
10866 MappableExprsHandler MEHandler(D, CGF);
10867 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10868
10869 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10870 MappedVarSet, CombinedInfo);
10871 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, SkippedVarSet: MappedVarSet);
10872}
10873
10874template <typename ClauseTy>
10875static void
10876emitClauseForBareTargetDirective(CodeGenFunction &CGF,
10877 const OMPExecutableDirective &D,
10878 llvm::SmallVectorImpl<llvm::Value *> &Values) {
10879 const auto *C = D.getSingleClause<ClauseTy>();
10880 assert(!C->varlist_empty() &&
10881 "ompx_bare requires explicit num_teams and thread_limit");
10882 CodeGenFunction::RunCleanupsScope Scope(CGF);
10883 for (auto *E : C->varlist()) {
10884 llvm::Value *V = CGF.EmitScalarExpr(E);
10885 Values.push_back(
10886 Elt: CGF.Builder.CreateIntCast(V, DestTy: CGF.Int32Ty, /*isSigned=*/true));
10887 }
10888}
10889
10890static void emitTargetCallKernelLaunch(
10891 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10892 const OMPExecutableDirective &D,
10893 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10894 const CapturedStmt &CS, bool OffloadingMandatory,
10895 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10896 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10897 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10898 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10899 const OMPLoopDirective &D)>
10900 SizeEmitter,
10901 CodeGenFunction &CGF, CodeGenModule &CGM) {
10902 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10903
10904 // Fill up the arrays with all the captured variables.
10905 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10906 CGOpenMPRuntime::TargetDataInfo Info;
10907 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10908
10909 // Append a null entry for the implicit dyn_ptr argument.
10910 using OpenMPOffloadMappingFlags = llvm::omp::OpenMPOffloadMappingFlags;
10911 auto *NullPtr = llvm::Constant::getNullValue(Ty: CGF.Builder.getPtrTy());
10912 CombinedInfo.BasePointers.push_back(Elt: NullPtr);
10913 CombinedInfo.Pointers.push_back(Elt: NullPtr);
10914 CombinedInfo.DevicePointers.push_back(
10915 Elt: llvm::OpenMPIRBuilder::DeviceInfoTy::None);
10916 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.getInt64(C: 0));
10917 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10918 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10919 if (!CombinedInfo.Names.empty())
10920 CombinedInfo.Names.push_back(Elt: NullPtr);
10921 CombinedInfo.Exprs.push_back(Elt: nullptr);
10922 CombinedInfo.Mappers.push_back(Elt: nullptr);
10923 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
10924
10925 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10926 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10927
10928 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10929 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10930 CGF.VoidPtrTy, CGM.getPointerAlign());
10931 InputInfo.PointersArray =
10932 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10933 InputInfo.SizesArray =
10934 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10935 InputInfo.MappersArray =
10936 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10937 MapTypesArray = Info.RTArgs.MapTypesArray;
10938 MapNamesArray = Info.RTArgs.MapNamesArray;
10939
10940 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10941 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10942 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10943 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10944 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10945
10946 if (IsReverseOffloading) {
10947 // Reverse offloading is not supported, so just execute on the host.
10948 // FIXME: This fallback solution is incorrect since it ignores the
10949 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10950 // assert here and ensure SEMA emits an error.
10951 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10952 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10953 return;
10954 }
10955
10956 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10957 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10958
10959 llvm::Value *BasePointersArray =
10960 InputInfo.BasePointersArray.emitRawPointer(CGF);
10961 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10962 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10963 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10964
10965 auto &&EmitTargetCallFallbackCB =
10966 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10967 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10968 -> llvm::OpenMPIRBuilder::InsertPointTy {
10969 CGF.Builder.restoreIP(IP);
10970 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10971 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10972 return CGF.Builder.saveIP();
10973 };
10974
10975 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10976 SmallVector<llvm::Value *, 3> NumTeams;
10977 SmallVector<llvm::Value *, 3> NumThreads;
10978 if (IsBare) {
10979 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, Values&: NumTeams);
10980 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
10981 Values&: NumThreads);
10982 } else {
10983 NumTeams.push_back(Elt: OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10984 NumThreads.push_back(
10985 Elt: OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10986 }
10987
10988 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10989 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10990 llvm::Value *NumIterations =
10991 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10992 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10993 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10994 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10995
10996 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10997 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10998 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10999
11000 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
11001 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
11002 DynCGroupMem, HasNoWait, /*StrictBlocksAndThreads=*/IsBare,
11003 DynCGroupMemFallback);
11004
11005 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11006 cantFail(ValOrErr: OMPRuntime->getOMPBuilder().emitKernelLaunch(
11007 Loc: CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
11008 RTLoc, AllocaIP));
11009 CGF.Builder.restoreIP(IP: AfterIP);
11010 };
11011
11012 if (RequiresOuterTask)
11013 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
11014 else
11015 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
11016}
11017
11018static void
11019emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
11020 const OMPExecutableDirective &D,
11021 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
11022 bool RequiresOuterTask, const CapturedStmt &CS,
11023 bool OffloadingMandatory, CodeGenFunction &CGF) {
11024
11025 // Notify that the host version must be executed.
11026 auto &&ElseGen =
11027 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11028 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11029 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
11030 RequiresOuterTask, CS, OffloadingMandatory, CGF);
11031 };
11032
11033 if (RequiresOuterTask) {
11034 CodeGenFunction::OMPTargetDataInfo InputInfo;
11035 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
11036 } else {
11037 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ElseGen);
11038 }
11039}
11040
11041void CGOpenMPRuntime::emitTargetCall(
11042 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11043 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11044 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
11045 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11046 const OMPLoopDirective &D)>
11047 SizeEmitter) {
11048 if (!CGF.HaveInsertPoint())
11049 return;
11050
11051 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
11052 CGM.getLangOpts().OpenMPOffloadMandatory;
11053
11054 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
11055
11056 const bool RequiresOuterTask =
11057 D.hasClausesOfKind<OMPDependClause>() ||
11058 D.hasClausesOfKind<OMPNowaitClause>() ||
11059 D.hasClausesOfKind<OMPInReductionClause>() ||
11060 (CGM.getLangOpts().OpenMP >= 51 &&
11061 needsTaskBasedThreadLimit(DKind: D.getDirectiveKind()) &&
11062 D.hasClausesOfKind<OMPThreadLimitClause>());
11063 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
11064 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
11065 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
11066 PrePostActionTy &) {
11067 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
11068 };
11069 emitInlinedDirective(CGF, InnerKind: OMPD_unknown, CodeGen: ArgsCodegen);
11070
11071 CodeGenFunction::OMPTargetDataInfo InputInfo;
11072 llvm::Value *MapTypesArray = nullptr;
11073 llvm::Value *MapNamesArray = nullptr;
11074
11075 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
11076 RequiresOuterTask, &CS, OffloadingMandatory, Device,
11077 OutlinedFnID, &InputInfo, &MapTypesArray,
11078 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
11079 PrePostActionTy &) {
11080 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
11081 RequiresOuterTask, CS, OffloadingMandatory,
11082 Device, OutlinedFnID, InputInfo, MapTypesArray,
11083 MapNamesArray, SizeEmitter, CGF, CGM);
11084 };
11085
11086 auto &&TargetElseGen =
11087 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
11088 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
11089 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
11090 CS, OffloadingMandatory, CGF);
11091 };
11092
11093 // If we have a target function ID it means that we need to support
11094 // offloading, otherwise, just execute on the host. We need to execute on host
11095 // regardless of the conditional in the if clause if, e.g., the user do not
11096 // specify target triples.
11097 if (OutlinedFnID) {
11098 if (IfCond) {
11099 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
11100 } else {
11101 RegionCodeGenTy ThenRCG(TargetThenGen);
11102 ThenRCG(CGF);
11103 }
11104 } else {
11105 RegionCodeGenTy ElseRCG(TargetElseGen);
11106 ElseRCG(CGF);
11107 }
11108}
11109
11110void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
11111 StringRef ParentName) {
11112 if (!S)
11113 return;
11114
11115 // Register vtable from device for target data and target directives.
11116 // Add this block here since scanForTargetRegionsFunctions ignores
11117 // target data by checking if S is a executable directive (target).
11118 if (auto *E = dyn_cast<OMPExecutableDirective>(Val: S);
11119 E && isOpenMPTargetDataManagementDirective(DKind: E->getDirectiveKind())) {
11120 // Don't need to check if it's device compile
11121 // since scanForTargetRegionsFunctions currently only called
11122 // in device compilation.
11123 registerVTable(D: *E);
11124 }
11125
11126 // Codegen OMP target directives that offload compute to the device.
11127 bool RequiresDeviceCodegen =
11128 isa<OMPExecutableDirective>(Val: S) &&
11129 isOpenMPTargetExecutionDirective(
11130 DKind: cast<OMPExecutableDirective>(Val: S)->getDirectiveKind());
11131
11132 if (RequiresDeviceCodegen) {
11133 const auto &E = *cast<OMPExecutableDirective>(Val: S);
11134
11135 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
11136 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
11137
11138 // Is this a target region that should not be emitted as an entry point? If
11139 // so just signal we are done with this target region.
11140 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
11141 return;
11142
11143 switch (E.getDirectiveKind()) {
11144 case OMPD_target:
11145 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
11146 S: cast<OMPTargetDirective>(Val: E));
11147 break;
11148 case OMPD_target_parallel:
11149 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
11150 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
11151 break;
11152 case OMPD_target_teams:
11153 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
11154 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
11155 break;
11156 case OMPD_target_teams_distribute:
11157 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
11158 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
11159 break;
11160 case OMPD_target_teams_distribute_simd:
11161 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
11162 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
11163 break;
11164 case OMPD_target_parallel_for:
11165 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
11166 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
11167 break;
11168 case OMPD_target_parallel_for_simd:
11169 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
11170 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
11171 break;
11172 case OMPD_target_simd:
11173 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
11174 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
11175 break;
11176 case OMPD_target_teams_distribute_parallel_for:
11177 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
11178 CGM, ParentName,
11179 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
11180 break;
11181 case OMPD_target_teams_distribute_parallel_for_simd:
11182 CodeGenFunction::
11183 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
11184 CGM, ParentName,
11185 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
11186 break;
11187 case OMPD_target_teams_loop:
11188 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
11189 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
11190 break;
11191 case OMPD_target_parallel_loop:
11192 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
11193 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
11194 break;
11195 case OMPD_parallel:
11196 case OMPD_for:
11197 case OMPD_parallel_for:
11198 case OMPD_parallel_master:
11199 case OMPD_parallel_sections:
11200 case OMPD_for_simd:
11201 case OMPD_parallel_for_simd:
11202 case OMPD_cancel:
11203 case OMPD_cancellation_point:
11204 case OMPD_ordered:
11205 case OMPD_threadprivate:
11206 case OMPD_allocate:
11207 case OMPD_task:
11208 case OMPD_simd:
11209 case OMPD_tile:
11210 case OMPD_unroll:
11211 case OMPD_sections:
11212 case OMPD_section:
11213 case OMPD_single:
11214 case OMPD_master:
11215 case OMPD_critical:
11216 case OMPD_taskyield:
11217 case OMPD_barrier:
11218 case OMPD_taskwait:
11219 case OMPD_taskgroup:
11220 case OMPD_atomic:
11221 case OMPD_flush:
11222 case OMPD_depobj:
11223 case OMPD_scan:
11224 case OMPD_teams:
11225 case OMPD_target_data:
11226 case OMPD_target_exit_data:
11227 case OMPD_target_enter_data:
11228 case OMPD_distribute:
11229 case OMPD_distribute_simd:
11230 case OMPD_distribute_parallel_for:
11231 case OMPD_distribute_parallel_for_simd:
11232 case OMPD_teams_distribute:
11233 case OMPD_teams_distribute_simd:
11234 case OMPD_teams_distribute_parallel_for:
11235 case OMPD_teams_distribute_parallel_for_simd:
11236 case OMPD_target_update:
11237 case OMPD_declare_simd:
11238 case OMPD_declare_variant:
11239 case OMPD_begin_declare_variant:
11240 case OMPD_end_declare_variant:
11241 case OMPD_declare_target:
11242 case OMPD_end_declare_target:
11243 case OMPD_declare_reduction:
11244 case OMPD_declare_mapper:
11245 case OMPD_taskloop:
11246 case OMPD_taskloop_simd:
11247 case OMPD_master_taskloop:
11248 case OMPD_master_taskloop_simd:
11249 case OMPD_parallel_master_taskloop:
11250 case OMPD_parallel_master_taskloop_simd:
11251 case OMPD_requires:
11252 case OMPD_metadirective:
11253 case OMPD_unknown:
11254 default:
11255 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11256 }
11257 return;
11258 }
11259
11260 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
11261 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11262 return;
11263
11264 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
11265 return;
11266 }
11267
11268 // If this is a lambda function, look into its body.
11269 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
11270 S = L->getBody();
11271
11272 // Keep looking for target regions recursively.
11273 for (const Stmt *II : S->children())
11274 scanForTargetRegionsFunctions(S: II, ParentName);
11275}
11276
11277static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11278 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11279 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11280 if (!DevTy)
11281 return false;
11282 // Do not emit device_type(nohost) functions for the host.
11283 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11284 return true;
11285 // Do not emit device_type(host) functions for the device.
11286 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11287 return true;
11288 return false;
11289}
11290
11291bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
11292 // If emitting code for the host, we do not process FD here. Instead we do
11293 // the normal code generation.
11294 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11295 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
11296 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11297 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11298 return true;
11299 return false;
11300 }
11301
11302 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
11303 // Try to detect target regions in the function.
11304 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
11305 StringRef Name = CGM.getMangledName(GD);
11306 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
11307 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
11308 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11309 return true;
11310 }
11311
11312 // Do not emit function if it is not marked as declare target.
11313 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11314 AlreadyEmittedTargetDecls.count(V: VD) == 0;
11315}
11316
11317bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11318 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
11319 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
11320 return true;
11321
11322 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11323 return false;
11324
11325 // Check if there are Ctors/Dtors in this declaration and look for target
11326 // regions in it. We use the complete variant to produce the kernel name
11327 // mangling.
11328 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
11329 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11330 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11331 StringRef ParentName =
11332 CGM.getMangledName(GD: GlobalDecl(Ctor, Ctor_Complete));
11333 scanForTargetRegionsFunctions(S: Ctor->getBody(), ParentName);
11334 }
11335 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11336 StringRef ParentName =
11337 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
11338 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
11339 }
11340 }
11341
11342 // Do not emit variable if it is not marked as declare target.
11343 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11344 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11345 VD: cast<VarDecl>(Val: GD.getDecl()));
11346 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11347 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11348 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11349 HasRequiresUnifiedSharedMemory)) {
11350 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
11351 return true;
11352 }
11353 return false;
11354}
11355
11356void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
11357 llvm::Constant *Addr) {
11358 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11359 !CGM.getLangOpts().OpenMPIsTargetDevice)
11360 return;
11361
11362 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11363 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11364
11365 // If this is an 'extern' declaration we defer to the canonical definition and
11366 // do not emit an offloading entry.
11367 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11368 VD->hasExternalStorage())
11369 return;
11370
11371 // MT_Local variables use direct access with no host-device mapping.
11372 // No offload entry needed — the device global keeps its own initializer.
11373 if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Local)
11374 return;
11375
11376 if (!Res) {
11377 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11378 // Register non-target variables being emitted in device code (debug info
11379 // may cause this).
11380 StringRef VarName = CGM.getMangledName(GD: VD);
11381 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
11382 }
11383 return;
11384 }
11385
11386 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
11387 auto LinkageForVariable = [&VD, this]() {
11388 return CGM.getLLVMLinkageVarDefinition(VD);
11389 };
11390
11391 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11392 OMPBuilder.registerTargetGlobalVariable(
11393 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
11394 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11395 IsExternallyVisible: VD->isExternallyVisible(),
11396 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
11397 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
11398 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
11399 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
11400 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
11401 T: CGM.getContext().getPointerType(T: VD->getType())),
11402 Addr);
11403
11404 for (auto *ref : GeneratedRefs)
11405 CGM.addCompilerUsedGlobal(GV: ref);
11406}
11407
11408bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
11409 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
11410 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
11411 return emitTargetFunctions(GD);
11412
11413 return emitTargetGlobalVariable(GD);
11414}
11415
11416void CGOpenMPRuntime::emitDeferredTargetDecls() const {
11417 for (const VarDecl *VD : DeferredGlobalVariables) {
11418 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11419 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11420 if (!Res)
11421 continue;
11422 // MT_Local and MT_To/MT_Enter without USM are always emitted.
11423 if (*Res == OMPDeclareTargetDeclAttr::MT_Local ||
11424 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11425 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11426 !HasRequiresUnifiedSharedMemory)) {
11427 CGM.EmitGlobal(D: VD);
11428 } else {
11429 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11430 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11431 *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
11432 *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
11433 HasRequiresUnifiedSharedMemory)) &&
11434 "Expected link clause or to clause with unified memory.");
11435 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11436 }
11437 }
11438}
11439
11440void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
11441 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11442 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11443 " Expected target-based directive.");
11444}
11445
11446void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
11447 for (const OMPClause *Clause : D->clauselists()) {
11448 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11449 HasRequiresUnifiedSharedMemory = true;
11450 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11451 } else if (const auto *AC =
11452 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
11453 switch (AC->getAtomicDefaultMemOrderKind()) {
11454 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11455 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11456 break;
11457 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11458 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11459 break;
11460 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11461 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11462 break;
11463 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11464 break;
11465 }
11466 }
11467 }
11468}
11469
11470llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11471 return RequiresAtomicOrdering;
11472}
11473
11474bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11475 LangAS &AS) {
11476 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11477 return false;
11478 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11479 switch(A->getAllocatorType()) {
11480 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11481 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11482 // Not supported, fallback to the default mem space.
11483 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11484 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11485 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11486 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11487 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11488 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11489 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11490 AS = LangAS::Default;
11491 return true;
11492 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11493 llvm_unreachable("Expected predefined allocator for the variables with the "
11494 "static storage.");
11495 }
11496 return false;
11497}
11498
11499bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11500 return HasRequiresUnifiedSharedMemory;
11501}
11502
11503CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11504 CodeGenModule &CGM)
11505 : CGM(CGM) {
11506 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11507 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11508 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11509 }
11510}
11511
11512CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11513 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11514 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11515}
11516
11517bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11518 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11519 return true;
11520
11521 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
11522 // Do not emit function if it is marked as declare target as it was already
11523 // emitted.
11524 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: D)) {
11525 if (D->hasBody() && AlreadyEmittedTargetDecls.count(V: D) == 0) {
11526 if (auto *F = dyn_cast_or_null<llvm::Function>(
11527 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
11528 return !F->isDeclaration();
11529 return false;
11530 }
11531 return true;
11532 }
11533
11534 return !AlreadyEmittedTargetDecls.insert(V: D).second;
11535}
11536
11537void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11538 const OMPExecutableDirective &D,
11539 SourceLocation Loc,
11540 llvm::Function *OutlinedFn,
11541 ArrayRef<llvm::Value *> CapturedVars) {
11542 if (!CGF.HaveInsertPoint())
11543 return;
11544
11545 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11546 CodeGenFunction::RunCleanupsScope Scope(CGF);
11547
11548 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11549 llvm::Value *Args[] = {
11550 RTLoc,
11551 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
11552 OutlinedFn};
11553 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11554 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
11555 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
11556
11557 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11558 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
11559 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
11560}
11561
11562void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11563 const Expr *NumTeams,
11564 const Expr *ThreadLimit,
11565 SourceLocation Loc) {
11566 if (!CGF.HaveInsertPoint())
11567 return;
11568
11569 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11570
11571 llvm::Value *NumTeamsVal =
11572 NumTeams
11573 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
11574 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11575 : CGF.Builder.getInt32(C: 0);
11576
11577 llvm::Value *ThreadLimitVal =
11578 ThreadLimit
11579 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11580 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11581 : CGF.Builder.getInt32(C: 0);
11582
11583 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11584 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11585 ThreadLimitVal};
11586 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11587 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
11588 args: PushNumTeamsArgs);
11589}
11590
11591void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
11592 const Expr *ThreadLimit,
11593 SourceLocation Loc) {
11594 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11595 llvm::Value *ThreadLimitVal =
11596 ThreadLimit
11597 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
11598 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
11599 : CGF.Builder.getInt32(C: 0);
11600
11601 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11602 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11603 ThreadLimitVal};
11604 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
11605 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
11606 args: ThreadLimitArgs);
11607}
11608
11609void CGOpenMPRuntime::emitTargetDataCalls(
11610 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11611 const Expr *Device, const RegionCodeGenTy &CodeGen,
11612 CGOpenMPRuntime::TargetDataInfo &Info) {
11613 if (!CGF.HaveInsertPoint())
11614 return;
11615
11616 // Action used to replace the default codegen action and turn privatization
11617 // off.
11618 PrePostActionTy NoPrivAction;
11619
11620 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11621
11622 llvm::Value *IfCondVal = nullptr;
11623 if (IfCond)
11624 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
11625
11626 // Emit device ID if any.
11627 llvm::Value *DeviceID = nullptr;
11628 if (Device) {
11629 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11630 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11631 } else {
11632 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11633 }
11634
11635 // Fill up the arrays with all the mapped variables.
11636 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11637 auto GenMapInfoCB =
11638 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11639 CGF.Builder.restoreIP(IP: CodeGenIP);
11640 // Get map clause information.
11641 MappableExprsHandler MEHandler(D, CGF);
11642 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11643
11644 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11645 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
11646 };
11647 if (CGM.getCodeGenOpts().getDebugInfo() !=
11648 llvm::codegenoptions::NoDebugInfo) {
11649 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
11650 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
11651 F: FillInfoMap);
11652 }
11653
11654 return CombinedInfo;
11655 };
11656 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11657 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11658 CGF.Builder.restoreIP(IP: CodeGenIP);
11659 switch (BodyGenType) {
11660 case BodyGenTy::Priv:
11661 if (!Info.CaptureDeviceAddrMap.empty())
11662 CodeGen(CGF);
11663 break;
11664 case BodyGenTy::DupNoPriv:
11665 if (!Info.CaptureDeviceAddrMap.empty()) {
11666 CodeGen.setAction(NoPrivAction);
11667 CodeGen(CGF);
11668 }
11669 break;
11670 case BodyGenTy::NoPriv:
11671 if (Info.CaptureDeviceAddrMap.empty()) {
11672 CodeGen.setAction(NoPrivAction);
11673 CodeGen(CGF);
11674 }
11675 break;
11676 }
11677 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11678 CGF.Builder.GetInsertPoint());
11679 };
11680
11681 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11682 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11683 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
11684 }
11685 };
11686
11687 auto CustomMapperCB = [&](unsigned int I) {
11688 llvm::Function *MFunc = nullptr;
11689 if (CombinedInfo.Mappers[I]) {
11690 Info.HasMapper = true;
11691 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
11692 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
11693 }
11694 return MFunc;
11695 };
11696
11697 // Source location for the ident struct
11698 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11699
11700 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11701 CGF.AllocaInsertPt->getIterator());
11702 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11703 CGF.Builder.GetInsertPoint());
11704 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11705 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11706 cantFail(ValOrErr: OMPBuilder.createTargetData(
11707 Loc: OmpLoc, AllocaIP, CodeGenIP, /*DeallocBlocks=*/{}, DeviceID,
11708 IfCond: IfCondVal, Info, GenMapInfoCB, CustomMapperCB,
11709 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, SrcLocInfo: RTLoc));
11710 CGF.Builder.restoreIP(IP: AfterIP);
11711}
11712
11713void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11714 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11715 const Expr *Device) {
11716 if (!CGF.HaveInsertPoint())
11717 return;
11718
11719 assert((isa<OMPTargetEnterDataDirective>(D) ||
11720 isa<OMPTargetExitDataDirective>(D) ||
11721 isa<OMPTargetUpdateDirective>(D)) &&
11722 "Expecting either target enter, exit data, or update directives.");
11723
11724 CodeGenFunction::OMPTargetDataInfo InputInfo;
11725 llvm::Value *MapTypesArray = nullptr;
11726 llvm::Value *MapNamesArray = nullptr;
11727 // Generate the code for the opening of the data environment.
11728 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11729 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11730 // Emit device ID if any.
11731 llvm::Value *DeviceID = nullptr;
11732 if (Device) {
11733 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
11734 DestTy: CGF.Int64Ty, /*isSigned=*/true);
11735 } else {
11736 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
11737 }
11738
11739 // Emit the number of elements in the offloading arrays.
11740 llvm::Constant *PointerNum =
11741 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
11742
11743 // Source location for the ident struct
11744 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
11745
11746 SmallVector<llvm::Value *, 13> OffloadingArgs(
11747 {RTLoc, DeviceID, PointerNum,
11748 InputInfo.BasePointersArray.emitRawPointer(CGF),
11749 InputInfo.PointersArray.emitRawPointer(CGF),
11750 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11751 InputInfo.MappersArray.emitRawPointer(CGF)});
11752
11753 // Select the right runtime function call for each standalone
11754 // directive.
11755 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11756 RuntimeFunction RTLFn;
11757 switch (D.getDirectiveKind()) {
11758 case OMPD_target_enter_data:
11759 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11760 : OMPRTL___tgt_target_data_begin_mapper;
11761 break;
11762 case OMPD_target_exit_data:
11763 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11764 : OMPRTL___tgt_target_data_end_mapper;
11765 break;
11766 case OMPD_target_update:
11767 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11768 : OMPRTL___tgt_target_data_update_mapper;
11769 break;
11770 case OMPD_parallel:
11771 case OMPD_for:
11772 case OMPD_parallel_for:
11773 case OMPD_parallel_master:
11774 case OMPD_parallel_sections:
11775 case OMPD_for_simd:
11776 case OMPD_parallel_for_simd:
11777 case OMPD_cancel:
11778 case OMPD_cancellation_point:
11779 case OMPD_ordered:
11780 case OMPD_threadprivate:
11781 case OMPD_allocate:
11782 case OMPD_task:
11783 case OMPD_simd:
11784 case OMPD_tile:
11785 case OMPD_unroll:
11786 case OMPD_sections:
11787 case OMPD_section:
11788 case OMPD_single:
11789 case OMPD_master:
11790 case OMPD_critical:
11791 case OMPD_taskyield:
11792 case OMPD_barrier:
11793 case OMPD_taskwait:
11794 case OMPD_taskgroup:
11795 case OMPD_atomic:
11796 case OMPD_flush:
11797 case OMPD_depobj:
11798 case OMPD_scan:
11799 case OMPD_teams:
11800 case OMPD_target_data:
11801 case OMPD_distribute:
11802 case OMPD_distribute_simd:
11803 case OMPD_distribute_parallel_for:
11804 case OMPD_distribute_parallel_for_simd:
11805 case OMPD_teams_distribute:
11806 case OMPD_teams_distribute_simd:
11807 case OMPD_teams_distribute_parallel_for:
11808 case OMPD_teams_distribute_parallel_for_simd:
11809 case OMPD_declare_simd:
11810 case OMPD_declare_variant:
11811 case OMPD_begin_declare_variant:
11812 case OMPD_end_declare_variant:
11813 case OMPD_declare_target:
11814 case OMPD_end_declare_target:
11815 case OMPD_declare_reduction:
11816 case OMPD_declare_mapper:
11817 case OMPD_taskloop:
11818 case OMPD_taskloop_simd:
11819 case OMPD_master_taskloop:
11820 case OMPD_master_taskloop_simd:
11821 case OMPD_parallel_master_taskloop:
11822 case OMPD_parallel_master_taskloop_simd:
11823 case OMPD_target:
11824 case OMPD_target_simd:
11825 case OMPD_target_teams_distribute:
11826 case OMPD_target_teams_distribute_simd:
11827 case OMPD_target_teams_distribute_parallel_for:
11828 case OMPD_target_teams_distribute_parallel_for_simd:
11829 case OMPD_target_teams:
11830 case OMPD_target_parallel:
11831 case OMPD_target_parallel_for:
11832 case OMPD_target_parallel_for_simd:
11833 case OMPD_requires:
11834 case OMPD_metadirective:
11835 case OMPD_unknown:
11836 default:
11837 llvm_unreachable("Unexpected standalone target data directive.");
11838 break;
11839 }
11840 if (HasNowait) {
11841 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11842 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11843 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
11844 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
11845 }
11846 CGF.EmitRuntimeCall(
11847 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
11848 args: OffloadingArgs);
11849 };
11850
11851 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11852 &MapNamesArray](CodeGenFunction &CGF,
11853 PrePostActionTy &) {
11854 // Fill up the arrays with all the mapped variables.
11855 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11856 CGOpenMPRuntime::TargetDataInfo Info;
11857 MappableExprsHandler MEHandler(D, CGF);
11858 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11859 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11860 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11861
11862 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11863 D.hasClausesOfKind<OMPNowaitClause>();
11864
11865 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11866 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11867 CGF.VoidPtrTy, CGM.getPointerAlign());
11868 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11869 CGM.getPointerAlign());
11870 InputInfo.SizesArray =
11871 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11872 InputInfo.MappersArray =
11873 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11874 MapTypesArray = Info.RTArgs.MapTypesArray;
11875 MapNamesArray = Info.RTArgs.MapNamesArray;
11876 if (RequiresOuterTask)
11877 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
11878 else
11879 emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
11880 };
11881
11882 if (IfCond) {
11883 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
11884 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
11885 } else {
11886 RegionCodeGenTy ThenRCG(TargetThenGen);
11887 ThenRCG(CGF);
11888 }
11889}
11890
11891static unsigned
11892evaluateCDTSize(const FunctionDecl *FD,
11893 ArrayRef<llvm::OpenMPIRBuilder::DeclareSimdAttrTy> ParamAttrs) {
11894 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11895 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11896 // of that clause. The VLEN value must be power of 2.
11897 // In other case the notion of the function`s "characteristic data type" (CDT)
11898 // is used to compute the vector length.
11899 // CDT is defined in the following order:
11900 // a) For non-void function, the CDT is the return type.
11901 // b) If the function has any non-uniform, non-linear parameters, then the
11902 // CDT is the type of the first such parameter.
11903 // c) If the CDT determined by a) or b) above is struct, union, or class
11904 // type which is pass-by-value (except for the type that maps to the
11905 // built-in complex data type), the characteristic data type is int.
11906 // d) If none of the above three cases is applicable, the CDT is int.
11907 // The VLEN is then determined based on the CDT and the size of vector
11908 // register of that ISA for which current vector version is generated. The
11909 // VLEN is computed using the formula below:
11910 // VLEN = sizeof(vector_register) / sizeof(CDT),
11911 // where vector register size specified in section 3.2.1 Registers and the
11912 // Stack Frame of original AMD64 ABI document.
11913 QualType RetType = FD->getReturnType();
11914 if (RetType.isNull())
11915 return 0;
11916 ASTContext &C = FD->getASTContext();
11917 QualType CDT;
11918 if (!RetType.isNull() && !RetType->isVoidType()) {
11919 CDT = RetType;
11920 } else {
11921 unsigned Offset = 0;
11922 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
11923 if (ParamAttrs[Offset].Kind ==
11924 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector)
11925 CDT = C.getPointerType(T: C.getCanonicalTagType(TD: MD->getParent()));
11926 ++Offset;
11927 }
11928 if (CDT.isNull()) {
11929 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11930 if (ParamAttrs[I + Offset].Kind ==
11931 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector) {
11932 CDT = FD->getParamDecl(i: I)->getType();
11933 break;
11934 }
11935 }
11936 }
11937 }
11938 if (CDT.isNull())
11939 CDT = C.IntTy;
11940 CDT = CDT->getCanonicalTypeUnqualified();
11941 if (CDT->isRecordType() || CDT->isUnionType())
11942 CDT = C.IntTy;
11943 return C.getTypeSize(T: CDT);
11944}
11945
11946// This are the Functions that are needed to mangle the name of the
11947// vector functions generated by the compiler, according to the rules
11948// defined in the "Vector Function ABI specifications for AArch64",
11949// available at
11950// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11951
11952/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11953static bool getAArch64MTV(QualType QT,
11954 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind) {
11955 QT = QT.getCanonicalType();
11956
11957 if (QT->isVoidType())
11958 return false;
11959
11960 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform)
11961 return false;
11962
11963 if (Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal ||
11964 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef)
11965 return false;
11966
11967 if ((Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
11968 Kind == llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal) &&
11969 !QT->isReferenceType())
11970 return false;
11971
11972 return true;
11973}
11974
11975/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11976static bool getAArch64PBV(QualType QT, ASTContext &C) {
11977 QT = QT.getCanonicalType();
11978 unsigned Size = C.getTypeSize(T: QT);
11979
11980 // Only scalars and complex within 16 bytes wide set PVB to true.
11981 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11982 return false;
11983
11984 if (QT->isFloatingType())
11985 return true;
11986
11987 if (QT->isIntegerType())
11988 return true;
11989
11990 if (QT->isPointerType())
11991 return true;
11992
11993 // TODO: Add support for complex types (section 3.1.2, item 2).
11994
11995 return false;
11996}
11997
11998/// Computes the lane size (LS) of a return type or of an input parameter,
11999/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
12000/// TODO: Add support for references, section 3.2.1, item 1.
12001static unsigned getAArch64LS(QualType QT,
12002 llvm::OpenMPIRBuilder::DeclareSimdKindTy Kind,
12003 ASTContext &C) {
12004 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
12005 QualType PTy = QT.getCanonicalType()->getPointeeType();
12006 if (getAArch64PBV(QT: PTy, C))
12007 return C.getTypeSize(T: PTy);
12008 }
12009 if (getAArch64PBV(QT, C))
12010 return C.getTypeSize(T: QT);
12011
12012 return C.getTypeSize(T: C.getUIntPtrType());
12013}
12014
12015// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
12016// signature of the scalar function, as defined in 3.2.2 of the
12017// AAVFABI.
12018static std::tuple<unsigned, unsigned, bool>
12019getNDSWDS(const FunctionDecl *FD,
12020 ArrayRef<llvm::OpenMPIRBuilder::DeclareSimdAttrTy> ParamAttrs) {
12021 QualType RetType = FD->getReturnType().getCanonicalType();
12022
12023 ASTContext &C = FD->getASTContext();
12024
12025 bool OutputBecomesInput = false;
12026
12027 llvm::SmallVector<unsigned, 8> Sizes;
12028 if (!RetType->isVoidType()) {
12029 Sizes.push_back(Elt: getAArch64LS(
12030 QT: RetType, Kind: llvm::OpenMPIRBuilder::DeclareSimdKindTy::Vector, C));
12031 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
12032 OutputBecomesInput = true;
12033 }
12034 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
12035 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
12036 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
12037 }
12038
12039 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
12040 // The LS of a function parameter / return value can only be a power
12041 // of 2, starting from 8 bits, up to 128.
12042 assert(llvm::all_of(Sizes,
12043 [](unsigned Size) {
12044 return Size == 8 || Size == 16 || Size == 32 ||
12045 Size == 64 || Size == 128;
12046 }) &&
12047 "Invalid size");
12048
12049 return std::make_tuple(args&: *llvm::min_element(Range&: Sizes), args&: *llvm::max_element(Range&: Sizes),
12050 args&: OutputBecomesInput);
12051}
12052
12053static llvm::OpenMPIRBuilder::DeclareSimdBranch
12054convertDeclareSimdBranch(OMPDeclareSimdDeclAttr::BranchStateTy State) {
12055 switch (State) {
12056 case OMPDeclareSimdDeclAttr::BS_Undefined:
12057 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Undefined;
12058 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12059 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Inbranch;
12060 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12061 return llvm::OpenMPIRBuilder::DeclareSimdBranch::Notinbranch;
12062 }
12063 llvm_unreachable("unexpected declare simd branch state");
12064}
12065
12066// Check the values provided via `simdlen` by the user.
12067static bool validateAArch64Simdlen(CodeGenModule &CGM, SourceLocation SLoc,
12068 unsigned UserVLEN, unsigned WDS, char ISA) {
12069 // 1. A `simdlen(1)` doesn't produce vector signatures.
12070 if (UserVLEN == 1) {
12071 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_1_no_effect);
12072 return false;
12073 }
12074
12075 // 2. Section 3.3.1, item 1: user input must be a power of 2 for Advanced
12076 // SIMD.
12077 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
12078 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_requires_power_of_2);
12079 return false;
12080 }
12081
12082 // 3. Section 3.4.1: SVE fixed length must obey the architectural limits.
12083 if (ISA == 's' && UserVLEN != 0 &&
12084 ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0))) {
12085 CGM.getDiags().Report(Loc: SLoc, DiagID: diag::warn_simdlen_must_fit_lanes) << WDS;
12086 return false;
12087 }
12088
12089 return true;
12090}
12091
12092void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
12093 llvm::Function *Fn) {
12094 ASTContext &C = CGM.getContext();
12095 FD = FD->getMostRecentDecl();
12096 while (FD) {
12097 // Map params to their positions in function decl.
12098 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12099 if (isa<CXXMethodDecl>(Val: FD))
12100 ParamPositions.try_emplace(Key: FD, Args: 0);
12101 unsigned ParamPos = ParamPositions.size();
12102 for (const ParmVarDecl *P : FD->parameters()) {
12103 ParamPositions.try_emplace(Key: P->getCanonicalDecl(), Args&: ParamPos);
12104 ++ParamPos;
12105 }
12106 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12107 llvm::SmallVector<llvm::OpenMPIRBuilder::DeclareSimdAttrTy, 8> ParamAttrs(
12108 ParamPositions.size());
12109 // Mark uniform parameters.
12110 for (const Expr *E : Attr->uniforms()) {
12111 E = E->IgnoreParenImpCasts();
12112 unsigned Pos;
12113 if (isa<CXXThisExpr>(Val: E)) {
12114 Pos = ParamPositions[FD];
12115 } else {
12116 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12117 ->getCanonicalDecl();
12118 auto It = ParamPositions.find(Val: PVD);
12119 assert(It != ParamPositions.end() && "Function parameter not found");
12120 Pos = It->second;
12121 }
12122 ParamAttrs[Pos].Kind =
12123 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Uniform;
12124 }
12125 // Get alignment info.
12126 auto *NI = Attr->alignments_begin();
12127 for (const Expr *E : Attr->aligneds()) {
12128 E = E->IgnoreParenImpCasts();
12129 unsigned Pos;
12130 QualType ParmTy;
12131 if (isa<CXXThisExpr>(Val: E)) {
12132 Pos = ParamPositions[FD];
12133 ParmTy = E->getType();
12134 } else {
12135 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12136 ->getCanonicalDecl();
12137 auto It = ParamPositions.find(Val: PVD);
12138 assert(It != ParamPositions.end() && "Function parameter not found");
12139 Pos = It->second;
12140 ParmTy = PVD->getType();
12141 }
12142 ParamAttrs[Pos].Alignment =
12143 (*NI)
12144 ? (*NI)->EvaluateKnownConstInt(Ctx: C)
12145 : llvm::APSInt::getUnsigned(
12146 X: C.toCharUnitsFromBits(BitSize: C.getOpenMPDefaultSimdAlign(T: ParmTy))
12147 .getQuantity());
12148 ++NI;
12149 }
12150 // Mark linear parameters.
12151 auto *SI = Attr->steps_begin();
12152 auto *MI = Attr->modifiers_begin();
12153 for (const Expr *E : Attr->linears()) {
12154 E = E->IgnoreParenImpCasts();
12155 unsigned Pos;
12156 bool IsReferenceType = false;
12157 // Rescaling factor needed to compute the linear parameter
12158 // value in the mangled name.
12159 unsigned PtrRescalingFactor = 1;
12160 if (isa<CXXThisExpr>(Val: E)) {
12161 Pos = ParamPositions[FD];
12162 auto *P = cast<PointerType>(Val: E->getType());
12163 PtrRescalingFactor = CGM.getContext()
12164 .getTypeSizeInChars(T: P->getPointeeType())
12165 .getQuantity();
12166 } else {
12167 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
12168 ->getCanonicalDecl();
12169 auto It = ParamPositions.find(Val: PVD);
12170 assert(It != ParamPositions.end() && "Function parameter not found");
12171 Pos = It->second;
12172 if (auto *P = dyn_cast<PointerType>(Val: PVD->getType()))
12173 PtrRescalingFactor = CGM.getContext()
12174 .getTypeSizeInChars(T: P->getPointeeType())
12175 .getQuantity();
12176 else if (PVD->getType()->isReferenceType()) {
12177 IsReferenceType = true;
12178 PtrRescalingFactor =
12179 CGM.getContext()
12180 .getTypeSizeInChars(T: PVD->getType().getNonReferenceType())
12181 .getQuantity();
12182 }
12183 }
12184 llvm::OpenMPIRBuilder::DeclareSimdAttrTy &ParamAttr = ParamAttrs[Pos];
12185 if (*MI == OMPC_LINEAR_ref)
12186 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef;
12187 else if (*MI == OMPC_LINEAR_uval)
12188 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearUVal;
12189 else if (IsReferenceType)
12190 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearVal;
12191 else
12192 ParamAttr.Kind = llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear;
12193 // Assuming a stride of 1, for `linear` without modifiers.
12194 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: 1);
12195 if (*SI) {
12196 Expr::EvalResult Result;
12197 if (!(*SI)->EvaluateAsInt(Result, Ctx: C, AllowSideEffects: Expr::SE_AllowSideEffects)) {
12198 if (const auto *DRE =
12199 cast<DeclRefExpr>(Val: (*SI)->IgnoreParenImpCasts())) {
12200 if (const auto *StridePVD =
12201 dyn_cast<ParmVarDecl>(Val: DRE->getDecl())) {
12202 ParamAttr.HasVarStride = true;
12203 auto It = ParamPositions.find(Val: StridePVD->getCanonicalDecl());
12204 assert(It != ParamPositions.end() &&
12205 "Function parameter not found");
12206 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: It->second);
12207 }
12208 }
12209 } else {
12210 ParamAttr.StrideOrArg = Result.Val.getInt();
12211 }
12212 }
12213 // If we are using a linear clause on a pointer, we need to
12214 // rescale the value of linear_step with the byte size of the
12215 // pointee type.
12216 if (!ParamAttr.HasVarStride &&
12217 (ParamAttr.Kind ==
12218 llvm::OpenMPIRBuilder::DeclareSimdKindTy::Linear ||
12219 ParamAttr.Kind ==
12220 llvm::OpenMPIRBuilder::DeclareSimdKindTy::LinearRef))
12221 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12222 ++SI;
12223 ++MI;
12224 }
12225 llvm::APSInt VLENVal;
12226 SourceLocation ExprLoc;
12227 const Expr *VLENExpr = Attr->getSimdlen();
12228 if (VLENExpr) {
12229 VLENVal = VLENExpr->EvaluateKnownConstInt(Ctx: C);
12230 ExprLoc = VLENExpr->getExprLoc();
12231 }
12232 llvm::OpenMPIRBuilder::DeclareSimdBranch State =
12233 convertDeclareSimdBranch(State: Attr->getBranchState());
12234 if (CGM.getTriple().isX86()) {
12235 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
12236 assert(NumElts && "Non-zero simdlen/cdtsize expected");
12237 OMPBuilder.emitX86DeclareSimdFunction(Fn, NumElements: NumElts, VLENVal, ParamAttrs,
12238 Branch: State);
12239 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12240 unsigned VLEN = VLENVal.getExtValue();
12241 // Get basic data for building the vector signature.
12242 const auto Data = getNDSWDS(FD, ParamAttrs);
12243 const unsigned NDS = std::get<0>(t: Data);
12244 const unsigned WDS = std::get<1>(t: Data);
12245 const bool OutputBecomesInput = std::get<2>(t: Data);
12246 if (CGM.getTarget().hasFeature(Feature: "sve")) {
12247 if (validateAArch64Simdlen(CGM, SLoc: ExprLoc, UserVLEN: VLEN, WDS, ISA: 's'))
12248 OMPBuilder.emitAArch64DeclareSimdFunction(
12249 Fn, VLENVal: VLEN, ParamAttrs, Branch: State, ISA: 's', NarrowestDataSize: NDS, OutputBecomesInput);
12250 } else if (CGM.getTarget().hasFeature(Feature: "neon")) {
12251 if (validateAArch64Simdlen(CGM, SLoc: ExprLoc, UserVLEN: VLEN, WDS, ISA: 'n'))
12252 OMPBuilder.emitAArch64DeclareSimdFunction(
12253 Fn, VLENVal: VLEN, ParamAttrs, Branch: State, ISA: 'n', NarrowestDataSize: NDS, OutputBecomesInput);
12254 }
12255 }
12256 }
12257 FD = FD->getPreviousDecl();
12258 }
12259}
12260
12261namespace {
12262/// Cleanup action for doacross support.
12263class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12264public:
12265 static const int DoacrossFinArgs = 2;
12266
12267private:
12268 llvm::FunctionCallee RTLFn;
12269 llvm::Value *Args[DoacrossFinArgs];
12270
12271public:
12272 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12273 ArrayRef<llvm::Value *> CallArgs)
12274 : RTLFn(RTLFn) {
12275 assert(CallArgs.size() == DoacrossFinArgs);
12276 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
12277 }
12278 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12279 if (!CGF.HaveInsertPoint())
12280 return;
12281 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12282 }
12283};
12284} // namespace
12285
12286void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12287 const OMPLoopDirective &D,
12288 ArrayRef<Expr *> NumIterations) {
12289 if (!CGF.HaveInsertPoint())
12290 return;
12291
12292 ASTContext &C = CGM.getContext();
12293 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12294 RecordDecl *RD;
12295 if (KmpDimTy.isNull()) {
12296 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12297 // kmp_int64 lo; // lower
12298 // kmp_int64 up; // upper
12299 // kmp_int64 st; // stride
12300 // };
12301 RD = C.buildImplicitRecord(Name: "kmp_dim");
12302 RD->startDefinition();
12303 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12304 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12305 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
12306 RD->completeDefinition();
12307 KmpDimTy = C.getCanonicalTagType(TD: RD);
12308 } else {
12309 RD = KmpDimTy->castAsRecordDecl();
12310 }
12311 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12312 QualType ArrayTy = C.getConstantArrayType(EltTy: KmpDimTy, ArySize: Size, SizeExpr: nullptr,
12313 ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12314
12315 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
12316 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
12317 enum { LowerFD = 0, UpperFD, StrideFD };
12318 // Fill dims with data.
12319 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12320 LValue DimsLVal = CGF.MakeAddrLValue(
12321 Addr: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: I), T: KmpDimTy);
12322 // dims.upper = num_iterations;
12323 LValue UpperLVal = CGF.EmitLValueForField(
12324 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
12325 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12326 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
12327 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
12328 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
12329 // dims.stride = 1;
12330 LValue StrideLVal = CGF.EmitLValueForField(
12331 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
12332 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
12333 lvalue: StrideLVal);
12334 }
12335
12336 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12337 // kmp_int32 num_dims, struct kmp_dim * dims);
12338 llvm::Value *Args[] = {
12339 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
12340 getThreadID(CGF, Loc: D.getBeginLoc()),
12341 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
12342 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12343 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
12344 DestTy: CGM.VoidPtrTy)};
12345
12346 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12347 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
12348 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12349 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12350 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
12351 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12352 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
12353 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
12354 A: llvm::ArrayRef(FiniArgs));
12355}
12356
12357template <typename T>
12358static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
12359 const T *C, llvm::Value *ULoc,
12360 llvm::Value *ThreadID) {
12361 QualType Int64Ty =
12362 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12363 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12364 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12365 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
12366 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
12367 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12368 const Expr *CounterVal = C->getLoopData(I);
12369 assert(CounterVal);
12370 llvm::Value *CntVal = CGF.EmitScalarConversion(
12371 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
12372 Loc: CounterVal->getExprLoc());
12373 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
12374 /*Volatile=*/false, Ty: Int64Ty);
12375 }
12376 llvm::Value *Args[] = {
12377 ULoc, ThreadID,
12378 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
12379 llvm::FunctionCallee RTLFn;
12380 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12381 OMPDoacrossKind<T> ODK;
12382 if (ODK.isSource(C)) {
12383 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12384 FnID: OMPRTL___kmpc_doacross_post);
12385 } else {
12386 assert(ODK.isSink(C) && "Expect sink modifier.");
12387 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
12388 FnID: OMPRTL___kmpc_doacross_wait);
12389 }
12390 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12391}
12392
12393void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12394 const OMPDependClause *C) {
12395 return EmitDoacrossOrdered<OMPDependClause>(
12396 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12397 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12398}
12399
12400void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12401 const OMPDoacrossClause *C) {
12402 return EmitDoacrossOrdered<OMPDoacrossClause>(
12403 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
12404 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
12405}
12406
12407void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12408 llvm::FunctionCallee Callee,
12409 ArrayRef<llvm::Value *> Args) const {
12410 assert(Loc.isValid() && "Outlined function call location must be valid.");
12411 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
12412
12413 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
12414 if (Fn->doesNotThrow()) {
12415 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
12416 return;
12417 }
12418 }
12419 CGF.EmitRuntimeCall(callee: Callee, args: Args);
12420}
12421
12422void CGOpenMPRuntime::emitOutlinedFunctionCall(
12423 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12424 ArrayRef<llvm::Value *> Args) const {
12425 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
12426}
12427
12428void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12429 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
12430 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: FD))
12431 HasEmittedDeclareTargetRegion = true;
12432}
12433
12434Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12435 const VarDecl *NativeParam,
12436 const VarDecl *TargetParam) const {
12437 return CGF.GetAddrOfLocalVar(VD: NativeParam);
12438}
12439
12440/// Return allocator value from expression, or return a null allocator (default
12441/// when no allocator specified).
12442static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12443 const Expr *Allocator) {
12444 llvm::Value *AllocVal;
12445 if (Allocator) {
12446 AllocVal = CGF.EmitScalarExpr(E: Allocator);
12447 // According to the standard, the original allocator type is a enum
12448 // (integer). Convert to pointer type, if required.
12449 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
12450 DstTy: CGF.getContext().VoidPtrTy,
12451 Loc: Allocator->getExprLoc());
12452 } else {
12453 // If no allocator specified, it defaults to the null allocator.
12454 AllocVal = llvm::Constant::getNullValue(
12455 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
12456 }
12457 return AllocVal;
12458}
12459
12460/// Return the alignment from an allocate directive if present.
12461static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12462 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12463
12464 if (!AllocateAlignment)
12465 return nullptr;
12466
12467 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
12468}
12469
12470Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12471 const VarDecl *VD) {
12472 if (!VD)
12473 return Address::invalid();
12474 Address UntiedAddr = Address::invalid();
12475 Address UntiedRealAddr = Address::invalid();
12476 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12477 if (It != FunctionToUntiedTaskStackMap.end()) {
12478 const UntiedLocalVarsAddressesMap &UntiedData =
12479 UntiedLocalVarsStack[It->second];
12480 auto I = UntiedData.find(Key: VD);
12481 if (I != UntiedData.end()) {
12482 UntiedAddr = I->second.first;
12483 UntiedRealAddr = I->second.second;
12484 }
12485 }
12486 const VarDecl *CVD = VD->getCanonicalDecl();
12487 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12488 // Use the default allocation.
12489 if (!isAllocatableDecl(VD))
12490 return UntiedAddr;
12491 llvm::Value *Size;
12492 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
12493 if (CVD->getType()->isVariablyModifiedType()) {
12494 Size = CGF.getTypeSize(Ty: CVD->getType());
12495 // Align the size: ((size + align - 1) / align) * align
12496 Size = CGF.Builder.CreateNUWAdd(
12497 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
12498 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
12499 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
12500 } else {
12501 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
12502 Size = CGM.getSize(numChars: Sz.alignTo(Align));
12503 }
12504 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
12505 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12506 const Expr *Allocator = AA->getAllocator();
12507 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12508 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
12509 SmallVector<llvm::Value *, 4> Args;
12510 Args.push_back(Elt: ThreadID);
12511 if (Alignment)
12512 Args.push_back(Elt: Alignment);
12513 Args.push_back(Elt: Size);
12514 Args.push_back(Elt: AllocVal);
12515 llvm::omp::RuntimeFunction FnID =
12516 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12517 llvm::Value *Addr = CGF.EmitRuntimeCall(
12518 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args,
12519 name: getName(Parts: {CVD->getName(), ".void.addr"}));
12520 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12521 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
12522 QualType Ty = CGM.getContext().getPointerType(T: CVD->getType());
12523 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12524 V: Addr, DestTy: CGF.ConvertTypeForMem(T: Ty), Name: getName(Parts: {CVD->getName(), ".addr"}));
12525 if (UntiedAddr.isValid())
12526 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
12527
12528 // Cleanup action for allocate support.
12529 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12530 llvm::FunctionCallee RTLFn;
12531 SourceLocation::UIntTy LocEncoding;
12532 Address Addr;
12533 const Expr *AllocExpr;
12534
12535 public:
12536 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12537 SourceLocation::UIntTy LocEncoding, Address Addr,
12538 const Expr *AllocExpr)
12539 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12540 AllocExpr(AllocExpr) {}
12541 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12542 if (!CGF.HaveInsertPoint())
12543 return;
12544 llvm::Value *Args[3];
12545 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12546 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
12547 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12548 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
12549 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
12550 Args[2] = AllocVal;
12551 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
12552 }
12553 };
12554 Address VDAddr =
12555 UntiedRealAddr.isValid()
12556 ? UntiedRealAddr
12557 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
12558 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12559 Kind: NormalAndEHCleanup, A: FiniRTLFn, A: CVD->getLocation().getRawEncoding(),
12560 A: VDAddr, A: Allocator);
12561 if (UntiedRealAddr.isValid())
12562 if (auto *Region =
12563 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
12564 Region->emitUntiedSwitch(CGF);
12565 return VDAddr;
12566 }
12567 return UntiedAddr;
12568}
12569
12570bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12571 const VarDecl *VD) const {
12572 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
12573 if (It == FunctionToUntiedTaskStackMap.end())
12574 return false;
12575 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
12576}
12577
12578CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12579 CodeGenModule &CGM, const OMPLoopDirective &S)
12580 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12581 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12582 if (!NeedToPush)
12583 return;
12584 NontemporalDeclsSet &DS =
12585 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12586 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12587 for (const Stmt *Ref : C->private_refs()) {
12588 const auto *SimpleRefExpr = cast<Expr>(Val: Ref)->IgnoreParenImpCasts();
12589 const ValueDecl *VD;
12590 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SimpleRefExpr)) {
12591 VD = DRE->getDecl();
12592 } else {
12593 const auto *ME = cast<MemberExpr>(Val: SimpleRefExpr);
12594 assert((ME->isImplicitCXXThis() ||
12595 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12596 "Expected member of current class.");
12597 VD = ME->getMemberDecl();
12598 }
12599 DS.insert(V: VD);
12600 }
12601 }
12602}
12603
12604CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12605 if (!NeedToPush)
12606 return;
12607 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12608}
12609
12610CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12611 CodeGenFunction &CGF,
12612 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12613 std::pair<Address, Address>> &LocalVars)
12614 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12615 if (!NeedToPush)
12616 return;
12617 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12618 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12619 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
12620}
12621
12622CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12623 if (!NeedToPush)
12624 return;
12625 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12626}
12627
12628bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12629 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12630
12631 return llvm::any_of(
12632 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
12633 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(V: VD); });
12634}
12635
12636void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12637 const OMPExecutableDirective &S,
12638 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12639 const {
12640 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12641 // Vars in target/task regions must be excluded completely.
12642 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()) ||
12643 isOpenMPTaskingDirective(Kind: S.getDirectiveKind())) {
12644 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12645 getOpenMPCaptureRegions(CaptureRegions, DKind: S.getDirectiveKind());
12646 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CaptureRegions.front());
12647 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12648 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12649 NeedToCheckForLPCs.insert(V: Cap.getCapturedVar());
12650 }
12651 }
12652 // Exclude vars in private clauses.
12653 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12654 for (const Expr *Ref : C->varlist()) {
12655 if (!Ref->getType()->isScalarType())
12656 continue;
12657 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12658 if (!DRE)
12659 continue;
12660 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12661 }
12662 }
12663 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12664 for (const Expr *Ref : C->varlist()) {
12665 if (!Ref->getType()->isScalarType())
12666 continue;
12667 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12668 if (!DRE)
12669 continue;
12670 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12671 }
12672 }
12673 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12674 for (const Expr *Ref : C->varlist()) {
12675 if (!Ref->getType()->isScalarType())
12676 continue;
12677 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12678 if (!DRE)
12679 continue;
12680 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12681 }
12682 }
12683 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12684 for (const Expr *Ref : C->varlist()) {
12685 if (!Ref->getType()->isScalarType())
12686 continue;
12687 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12688 if (!DRE)
12689 continue;
12690 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12691 }
12692 }
12693 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12694 for (const Expr *Ref : C->varlist()) {
12695 if (!Ref->getType()->isScalarType())
12696 continue;
12697 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
12698 if (!DRE)
12699 continue;
12700 NeedToCheckForLPCs.insert(V: DRE->getDecl());
12701 }
12702 }
12703 for (const Decl *VD : NeedToCheckForLPCs) {
12704 for (const LastprivateConditionalData &Data :
12705 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12706 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
12707 if (!Data.Disabled)
12708 NeedToAddForLPCsAsDisabled.insert(V: VD);
12709 break;
12710 }
12711 }
12712 }
12713}
12714
12715CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12716 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12717 : CGM(CGF.CGM),
12718 Action((CGM.getLangOpts().OpenMP >= 50 &&
12719 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
12720 P: [](const OMPLastprivateClause *C) {
12721 return C->getKind() ==
12722 OMPC_LASTPRIVATE_conditional;
12723 }))
12724 ? ActionToDo::PushAsLastprivateConditional
12725 : ActionToDo::DoNotPush) {
12726 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12727 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12728 return;
12729 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12730 "Expected a push action.");
12731 LastprivateConditionalData &Data =
12732 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12733 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12734 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12735 continue;
12736
12737 for (const Expr *Ref : C->varlist()) {
12738 Data.DeclToUniqueName.insert(KV: std::make_pair(
12739 x: cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts())->getDecl(),
12740 y: SmallString<16>(generateUniqueName(CGM, Prefix: "pl_cond", Ref))));
12741 }
12742 }
12743 Data.IVLVal = IVLVal;
12744 Data.Fn = CGF.CurFn;
12745}
12746
12747CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12748 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12749 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12750 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12751 if (CGM.getLangOpts().OpenMP < 50)
12752 return;
12753 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12754 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12755 if (!NeedToAddForLPCsAsDisabled.empty()) {
12756 Action = ActionToDo::DisableLastprivateConditional;
12757 LastprivateConditionalData &Data =
12758 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12759 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12760 Data.DeclToUniqueName.try_emplace(Key: VD);
12761 Data.Fn = CGF.CurFn;
12762 Data.Disabled = true;
12763 }
12764}
12765
12766CGOpenMPRuntime::LastprivateConditionalRAII
12767CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12768 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12769 return LastprivateConditionalRAII(CGF, S);
12770}
12771
12772CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12773 if (CGM.getLangOpts().OpenMP < 50)
12774 return;
12775 if (Action == ActionToDo::DisableLastprivateConditional) {
12776 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12777 "Expected list of disabled private vars.");
12778 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12779 }
12780 if (Action == ActionToDo::PushAsLastprivateConditional) {
12781 assert(
12782 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12783 "Expected list of lastprivate conditional vars.");
12784 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12785 }
12786}
12787
12788Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12789 const VarDecl *VD) {
12790 ASTContext &C = CGM.getContext();
12791 auto I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
12792 QualType NewType;
12793 const FieldDecl *VDField;
12794 const FieldDecl *FiredField;
12795 LValue BaseLVal;
12796 auto VI = I->getSecond().find(Val: VD);
12797 if (VI == I->getSecond().end()) {
12798 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
12799 RD->startDefinition();
12800 VDField = addFieldToRecordDecl(C, DC: RD, FieldTy: VD->getType().getNonReferenceType());
12801 FiredField = addFieldToRecordDecl(C, DC: RD, FieldTy: C.CharTy);
12802 RD->completeDefinition();
12803 NewType = C.getCanonicalTagType(TD: RD);
12804 Address Addr = CGF.CreateMemTemp(T: NewType, Align: C.getDeclAlign(D: VD), Name: VD->getName());
12805 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
12806 I->getSecond().try_emplace(Key: VD, Args&: NewType, Args&: VDField, Args&: FiredField, Args&: BaseLVal);
12807 } else {
12808 NewType = std::get<0>(t&: VI->getSecond());
12809 VDField = std::get<1>(t&: VI->getSecond());
12810 FiredField = std::get<2>(t&: VI->getSecond());
12811 BaseLVal = std::get<3>(t&: VI->getSecond());
12812 }
12813 LValue FiredLVal =
12814 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
12815 CGF.EmitStoreOfScalar(
12816 value: llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
12817 lvalue: FiredLVal);
12818 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
12819}
12820
12821namespace {
12822/// Checks if the lastprivate conditional variable is referenced in LHS.
12823class LastprivateConditionalRefChecker final
12824 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12825 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12826 const Expr *FoundE = nullptr;
12827 const Decl *FoundD = nullptr;
12828 StringRef UniqueDeclName;
12829 LValue IVLVal;
12830 llvm::Function *FoundFn = nullptr;
12831 SourceLocation Loc;
12832
12833public:
12834 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12835 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12836 llvm::reverse(C&: LPM)) {
12837 auto It = D.DeclToUniqueName.find(Key: E->getDecl());
12838 if (It == D.DeclToUniqueName.end())
12839 continue;
12840 if (D.Disabled)
12841 return false;
12842 FoundE = E;
12843 FoundD = E->getDecl()->getCanonicalDecl();
12844 UniqueDeclName = It->second;
12845 IVLVal = D.IVLVal;
12846 FoundFn = D.Fn;
12847 break;
12848 }
12849 return FoundE == E;
12850 }
12851 bool VisitMemberExpr(const MemberExpr *E) {
12852 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
12853 return false;
12854 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12855 llvm::reverse(C&: LPM)) {
12856 auto It = D.DeclToUniqueName.find(Key: E->getMemberDecl());
12857 if (It == D.DeclToUniqueName.end())
12858 continue;
12859 if (D.Disabled)
12860 return false;
12861 FoundE = E;
12862 FoundD = E->getMemberDecl()->getCanonicalDecl();
12863 UniqueDeclName = It->second;
12864 IVLVal = D.IVLVal;
12865 FoundFn = D.Fn;
12866 break;
12867 }
12868 return FoundE == E;
12869 }
12870 bool VisitStmt(const Stmt *S) {
12871 for (const Stmt *Child : S->children()) {
12872 if (!Child)
12873 continue;
12874 if (const auto *E = dyn_cast<Expr>(Val: Child))
12875 if (!E->isGLValue())
12876 continue;
12877 if (Visit(S: Child))
12878 return true;
12879 }
12880 return false;
12881 }
12882 explicit LastprivateConditionalRefChecker(
12883 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12884 : LPM(LPM) {}
12885 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12886 getFoundData() const {
12887 return std::make_tuple(args: FoundE, args: FoundD, args: UniqueDeclName, args: IVLVal, args: FoundFn);
12888 }
12889};
12890} // namespace
12891
12892void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12893 LValue IVLVal,
12894 StringRef UniqueDeclName,
12895 LValue LVal,
12896 SourceLocation Loc) {
12897 // Last updated loop counter for the lastprivate conditional var.
12898 // int<xx> last_iv = 0;
12899 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
12900 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12901 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
12902 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
12903 IVLVal.getAlignment().getAsAlign());
12904 LValue LastIVLVal =
12905 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
12906
12907 // Last value of the lastprivate conditional.
12908 // decltype(priv_a) last_a;
12909 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12910 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
12911 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
12912 LVal.getAlignment().getAsAlign());
12913 LValue LastLVal =
12914 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
12915
12916 // Global loop counter. Required to handle inner parallel-for regions.
12917 // iv
12918 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
12919
12920 // #pragma omp critical(a)
12921 // if (last_iv <= iv) {
12922 // last_iv = iv;
12923 // last_a = priv_a;
12924 // }
12925 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12926 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12927 Action.Enter(CGF);
12928 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
12929 // (last_iv <= iv) ? Check if the variable is updated and store new
12930 // value in global var.
12931 llvm::Value *CmpRes;
12932 if (IVLVal.getType()->isSignedIntegerType()) {
12933 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
12934 } else {
12935 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12936 "Loop iteration variable must be integer.");
12937 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
12938 }
12939 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
12940 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
12941 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
12942 // {
12943 CGF.EmitBlock(BB: ThenBB);
12944
12945 // last_iv = iv;
12946 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
12947
12948 // last_a = priv_a;
12949 switch (CGF.getEvaluationKind(T: LVal.getType())) {
12950 case TEK_Scalar: {
12951 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
12952 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
12953 break;
12954 }
12955 case TEK_Complex: {
12956 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
12957 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
12958 break;
12959 }
12960 case TEK_Aggregate:
12961 llvm_unreachable(
12962 "Aggregates are not supported in lastprivate conditional.");
12963 }
12964 // }
12965 CGF.EmitBranch(Block: ExitBB);
12966 // There is no need to emit line number for unconditional branch.
12967 (void)ApplyDebugLocation::CreateEmpty(CGF);
12968 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
12969 };
12970
12971 if (CGM.getLangOpts().OpenMPSimd) {
12972 // Do not emit as a critical region as no parallel region could be emitted.
12973 RegionCodeGenTy ThenRCG(CodeGen);
12974 ThenRCG(CGF);
12975 } else {
12976 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
12977 }
12978}
12979
12980void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12981 const Expr *LHS) {
12982 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12983 return;
12984 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12985 if (!Checker.Visit(S: LHS))
12986 return;
12987 const Expr *FoundE;
12988 const Decl *FoundD;
12989 StringRef UniqueDeclName;
12990 LValue IVLVal;
12991 llvm::Function *FoundFn;
12992 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
12993 Checker.getFoundData();
12994 if (FoundFn != CGF.CurFn) {
12995 // Special codegen for inner parallel regions.
12996 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12997 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
12998 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12999 "Lastprivate conditional is not found in outer region.");
13000 QualType StructTy = std::get<0>(t&: It->getSecond());
13001 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
13002 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
13003 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
13004 Addr: PrivLVal.getAddress(),
13005 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
13006 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
13007 LValue BaseLVal =
13008 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
13009 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
13010 CGF.EmitAtomicStore(rvalue: RValue::get(V: llvm::ConstantInt::get(
13011 Ty: CGF.ConvertTypeForMem(T: FiredDecl->getType()), V: 1)),
13012 lvalue: FiredLVal, AO: llvm::AtomicOrdering::Unordered,
13013 /*IsVolatile=*/true, /*isInit=*/false);
13014 return;
13015 }
13016
13017 // Private address of the lastprivate conditional in the current context.
13018 // priv_a
13019 LValue LVal = CGF.EmitLValue(E: FoundE);
13020 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
13021 Loc: FoundE->getExprLoc());
13022}
13023
13024void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
13025 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13026 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13027 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13028 return;
13029 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
13030 auto It = llvm::find_if(
13031 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
13032 if (It == Range.end() || It->Fn != CGF.CurFn)
13033 return;
13034 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
13035 assert(LPCI != LastprivateConditionalToTypes.end() &&
13036 "Lastprivates must be registered already.");
13037 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
13038 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
13039 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: CaptureRegions.back());
13040 for (const auto &Pair : It->DeclToUniqueName) {
13041 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
13042 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
13043 continue;
13044 auto I = LPCI->getSecond().find(Val: Pair.first);
13045 assert(I != LPCI->getSecond().end() &&
13046 "Lastprivate must be rehistered already.");
13047 // bool Cmp = priv_a.Fired != 0;
13048 LValue BaseLVal = std::get<3>(t&: I->getSecond());
13049 LValue FiredLVal =
13050 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
13051 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
13052 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
13053 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
13054 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
13055 // if (Cmp) {
13056 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
13057 CGF.EmitBlock(BB: ThenBB);
13058 Address Addr = CGF.GetAddrOfLocalVar(VD);
13059 LValue LVal;
13060 if (VD->getType()->isReferenceType())
13061 LVal = CGF.EmitLoadOfReferenceLValue(RefAddr: Addr, RefTy: VD->getType(),
13062 Source: AlignmentSource::Decl);
13063 else
13064 LVal = CGF.MakeAddrLValue(Addr, T: VD->getType().getNonReferenceType(),
13065 Source: AlignmentSource::Decl);
13066 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
13067 Loc: D.getBeginLoc());
13068 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
13069 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
13070 // }
13071 }
13072}
13073
13074void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
13075 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13076 SourceLocation Loc) {
13077 if (CGF.getLangOpts().OpenMP < 50)
13078 return;
13079 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(Key: VD);
13080 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13081 "Unknown lastprivate conditional variable.");
13082 StringRef UniqueName = It->second;
13083 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
13084 // The variable was not updated in the region - exit.
13085 if (!GV)
13086 return;
13087 LValue LPLVal = CGF.MakeRawAddrLValue(
13088 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
13089 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
13090 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
13091}
13092
13093llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
13094 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13095 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13096 const RegionCodeGenTy &CodeGen) {
13097 llvm_unreachable("Not supported in SIMD-only mode");
13098}
13099
13100llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
13101 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13102 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13103 const RegionCodeGenTy &CodeGen) {
13104 llvm_unreachable("Not supported in SIMD-only mode");
13105}
13106
13107llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
13108 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13109 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13110 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13111 bool Tied, unsigned &NumberOfParts) {
13112 llvm_unreachable("Not supported in SIMD-only mode");
13113}
13114
13115void CGOpenMPSIMDRuntime::emitParallelCall(
13116 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13117 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13118 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13119 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13120 llvm_unreachable("Not supported in SIMD-only mode");
13121}
13122
13123void CGOpenMPSIMDRuntime::emitCriticalRegion(
13124 CodeGenFunction &CGF, StringRef CriticalName,
13125 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13126 const Expr *Hint) {
13127 llvm_unreachable("Not supported in SIMD-only mode");
13128}
13129
13130void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
13131 const RegionCodeGenTy &MasterOpGen,
13132 SourceLocation Loc) {
13133 llvm_unreachable("Not supported in SIMD-only mode");
13134}
13135
13136void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
13137 const RegionCodeGenTy &MasterOpGen,
13138 SourceLocation Loc,
13139 const Expr *Filter) {
13140 llvm_unreachable("Not supported in SIMD-only mode");
13141}
13142
13143void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
13144 SourceLocation Loc) {
13145 llvm_unreachable("Not supported in SIMD-only mode");
13146}
13147
13148void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
13149 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13150 SourceLocation Loc) {
13151 llvm_unreachable("Not supported in SIMD-only mode");
13152}
13153
13154void CGOpenMPSIMDRuntime::emitSingleRegion(
13155 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13156 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13157 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
13158 ArrayRef<const Expr *> AssignmentOps) {
13159 llvm_unreachable("Not supported in SIMD-only mode");
13160}
13161
13162void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
13163 const RegionCodeGenTy &OrderedOpGen,
13164 SourceLocation Loc,
13165 bool IsThreads) {
13166 llvm_unreachable("Not supported in SIMD-only mode");
13167}
13168
13169void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
13170 SourceLocation Loc,
13171 OpenMPDirectiveKind Kind,
13172 bool EmitChecks,
13173 bool ForceSimpleCall) {
13174 llvm_unreachable("Not supported in SIMD-only mode");
13175}
13176
13177void CGOpenMPSIMDRuntime::emitForDispatchInit(
13178 CodeGenFunction &CGF, SourceLocation Loc,
13179 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13180 bool Ordered, const DispatchRTInput &DispatchValues) {
13181 llvm_unreachable("Not supported in SIMD-only mode");
13182}
13183
13184void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
13185 SourceLocation Loc) {
13186 llvm_unreachable("Not supported in SIMD-only mode");
13187}
13188
13189void CGOpenMPSIMDRuntime::emitForStaticInit(
13190 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
13191 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13192 llvm_unreachable("Not supported in SIMD-only mode");
13193}
13194
13195void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
13196 CodeGenFunction &CGF, SourceLocation Loc,
13197 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13198 llvm_unreachable("Not supported in SIMD-only mode");
13199}
13200
13201void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
13202 SourceLocation Loc,
13203 unsigned IVSize,
13204 bool IVSigned) {
13205 llvm_unreachable("Not supported in SIMD-only mode");
13206}
13207
13208void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
13209 SourceLocation Loc,
13210 OpenMPDirectiveKind DKind) {
13211 llvm_unreachable("Not supported in SIMD-only mode");
13212}
13213
13214llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
13215 SourceLocation Loc,
13216 unsigned IVSize, bool IVSigned,
13217 Address IL, Address LB,
13218 Address UB, Address ST) {
13219 llvm_unreachable("Not supported in SIMD-only mode");
13220}
13221
13222void CGOpenMPSIMDRuntime::emitNumThreadsClause(
13223 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13224 OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
13225 SourceLocation SeverityLoc, const Expr *Message,
13226 SourceLocation MessageLoc) {
13227 llvm_unreachable("Not supported in SIMD-only mode");
13228}
13229
13230void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13231 ProcBindKind ProcBind,
13232 SourceLocation Loc) {
13233 llvm_unreachable("Not supported in SIMD-only mode");
13234}
13235
13236Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13237 const VarDecl *VD,
13238 Address VDAddr,
13239 SourceLocation Loc) {
13240 llvm_unreachable("Not supported in SIMD-only mode");
13241}
13242
13243llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13244 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13245 CodeGenFunction *CGF) {
13246 llvm_unreachable("Not supported in SIMD-only mode");
13247}
13248
13249Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13250 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13251 llvm_unreachable("Not supported in SIMD-only mode");
13252}
13253
13254void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13255 ArrayRef<const Expr *> Vars,
13256 SourceLocation Loc,
13257 llvm::AtomicOrdering AO) {
13258 llvm_unreachable("Not supported in SIMD-only mode");
13259}
13260
13261void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13262 const OMPExecutableDirective &D,
13263 llvm::Function *TaskFunction,
13264 QualType SharedsTy, Address Shareds,
13265 const Expr *IfCond,
13266 const OMPTaskDataTy &Data) {
13267 llvm_unreachable("Not supported in SIMD-only mode");
13268}
13269
13270void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13271 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13272 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13273 const Expr *IfCond, const OMPTaskDataTy &Data) {
13274 llvm_unreachable("Not supported in SIMD-only mode");
13275}
13276
13277void CGOpenMPSIMDRuntime::emitReduction(
13278 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13279 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13280 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13281 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13282 CGOpenMPRuntime::emitReduction(CGF, Loc, OrgPrivates: Privates, OrgLHSExprs: LHSExprs, OrgRHSExprs: RHSExprs,
13283 OrgReductionOps: ReductionOps, Options);
13284}
13285
13286llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13287 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13288 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13289 llvm_unreachable("Not supported in SIMD-only mode");
13290}
13291
13292void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13293 SourceLocation Loc,
13294 bool IsWorksharingReduction) {
13295 llvm_unreachable("Not supported in SIMD-only mode");
13296}
13297
13298void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13299 SourceLocation Loc,
13300 ReductionCodeGen &RCG,
13301 unsigned N) {
13302 llvm_unreachable("Not supported in SIMD-only mode");
13303}
13304
13305Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13306 SourceLocation Loc,
13307 llvm::Value *ReductionsPtr,
13308 LValue SharedLVal) {
13309 llvm_unreachable("Not supported in SIMD-only mode");
13310}
13311
13312void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13313 SourceLocation Loc,
13314 const OMPTaskDataTy &Data) {
13315 llvm_unreachable("Not supported in SIMD-only mode");
13316}
13317
13318void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13319 CodeGenFunction &CGF, SourceLocation Loc,
13320 OpenMPDirectiveKind CancelRegion) {
13321 llvm_unreachable("Not supported in SIMD-only mode");
13322}
13323
13324void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13325 SourceLocation Loc, const Expr *IfCond,
13326 OpenMPDirectiveKind CancelRegion) {
13327 llvm_unreachable("Not supported in SIMD-only mode");
13328}
13329
13330void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13331 const OMPExecutableDirective &D, StringRef ParentName,
13332 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13333 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13334 llvm_unreachable("Not supported in SIMD-only mode");
13335}
13336
13337void CGOpenMPSIMDRuntime::emitTargetCall(
13338 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13339 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13340 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13341 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13342 const OMPLoopDirective &D)>
13343 SizeEmitter) {
13344 llvm_unreachable("Not supported in SIMD-only mode");
13345}
13346
13347bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13348 llvm_unreachable("Not supported in SIMD-only mode");
13349}
13350
13351bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13352 llvm_unreachable("Not supported in SIMD-only mode");
13353}
13354
13355bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13356 return false;
13357}
13358
13359void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13360 const OMPExecutableDirective &D,
13361 SourceLocation Loc,
13362 llvm::Function *OutlinedFn,
13363 ArrayRef<llvm::Value *> CapturedVars) {
13364 llvm_unreachable("Not supported in SIMD-only mode");
13365}
13366
13367void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13368 const Expr *NumTeams,
13369 const Expr *ThreadLimit,
13370 SourceLocation Loc) {
13371 llvm_unreachable("Not supported in SIMD-only mode");
13372}
13373
13374void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13375 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13376 const Expr *Device, const RegionCodeGenTy &CodeGen,
13377 CGOpenMPRuntime::TargetDataInfo &Info) {
13378 llvm_unreachable("Not supported in SIMD-only mode");
13379}
13380
13381void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13382 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13383 const Expr *Device) {
13384 llvm_unreachable("Not supported in SIMD-only mode");
13385}
13386
13387void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13388 const OMPLoopDirective &D,
13389 ArrayRef<Expr *> NumIterations) {
13390 llvm_unreachable("Not supported in SIMD-only mode");
13391}
13392
13393void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13394 const OMPDependClause *C) {
13395 llvm_unreachable("Not supported in SIMD-only mode");
13396}
13397
13398void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13399 const OMPDoacrossClause *C) {
13400 llvm_unreachable("Not supported in SIMD-only mode");
13401}
13402
13403const VarDecl *
13404CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13405 const VarDecl *NativeParam) const {
13406 llvm_unreachable("Not supported in SIMD-only mode");
13407}
13408
13409Address
13410CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13411 const VarDecl *NativeParam,
13412 const VarDecl *TargetParam) const {
13413 llvm_unreachable("Not supported in SIMD-only mode");
13414}
13415