1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGOpenMPRuntime.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "TargetInfo.h"
18#include "clang/AST/ASTContext.h"
19#include "clang/AST/Attr.h"
20#include "clang/AST/DeclOpenMP.h"
21#include "clang/AST/OpenMPClause.h"
22#include "clang/AST/Stmt.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/OpenMPKinds.h"
26#include "clang/Basic/PrettyStackTrace.h"
27#include "clang/Basic/SourceManager.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/BinaryFormat/Dwarf.h"
30#include "llvm/Frontend/OpenMP/OMPConstants.h"
31#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DebugInfoMetadata.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/IR/Metadata.h"
37#include "llvm/Support/AtomicOrdering.h"
38#include "llvm/Support/Debug.h"
39#include <optional>
40using namespace clang;
41using namespace CodeGen;
42using namespace llvm::omp;
43
44#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
45
46static const VarDecl *getBaseDecl(const Expr *Ref);
47
48namespace {
49/// Lexical scope for OpenMP executable constructs, that handles correct codegen
50/// for captured expressions.
51class OMPLexicalScope : public CodeGenFunction::LexicalScope {
52 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
53 for (const auto *C : S.clauses()) {
54 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
55 if (const auto *PreInit =
56 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
57 for (const auto *I : PreInit->decls()) {
58 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
59 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
60 } else {
61 CodeGenFunction::AutoVarEmission Emission =
62 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
63 CGF.EmitAutoVarCleanups(emission: Emission);
64 }
65 }
66 }
67 }
68 }
69 }
70 CodeGenFunction::OMPPrivateScope InlinedShareds;
71
72 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
73 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
74 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
75 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
76 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
77 }
78
79public:
80 OMPLexicalScope(
81 CodeGenFunction &CGF, const OMPExecutableDirective &S,
82 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
83 const bool EmitPreInitStmt = true)
84 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
85 InlinedShareds(CGF) {
86 if (EmitPreInitStmt)
87 emitPreInitStmt(CGF, S);
88 if (!CapturedRegion)
89 return;
90 assert(S.hasAssociatedStmt() &&
91 "Expected associated statement for inlined directive.");
92 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion);
93 for (const auto &C : CS->captures()) {
94 if (C.capturesVariable() || C.capturesVariableByCopy()) {
95 auto *VD = C.getCapturedVar();
96 assert(VD == VD->getCanonicalDecl() &&
97 "Canonical decl must be captured.");
98 DeclRefExpr DRE(
99 CGF.getContext(), const_cast<VarDecl *>(VD),
100 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
101 InlinedShareds.isGlobalVarCaptured(VD)),
102 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
103 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
104 }
105 }
106 (void)InlinedShareds.Privatize();
107 }
108};
109
110/// Lexical scope for OpenMP parallel construct, that handles correct codegen
111/// for captured expressions.
112class OMPParallelScope final : public OMPLexicalScope {
113 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
114 OpenMPDirectiveKind Kind = S.getDirectiveKind();
115 return !(isOpenMPTargetExecutionDirective(DKind: Kind) ||
116 isOpenMPLoopBoundSharingDirective(Kind)) &&
117 isOpenMPParallelDirective(DKind: Kind);
118 }
119
120public:
121 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
122 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
123 EmitPreInitStmt(S)) {}
124};
125
126/// Lexical scope for OpenMP teams construct, that handles correct codegen
127/// for captured expressions.
128class OMPTeamsScope final : public OMPLexicalScope {
129 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
130 OpenMPDirectiveKind Kind = S.getDirectiveKind();
131 return !isOpenMPTargetExecutionDirective(DKind: Kind) &&
132 isOpenMPTeamsDirective(DKind: Kind);
133 }
134
135public:
136 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
137 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
138 EmitPreInitStmt(S)) {}
139};
140
141/// Private scope for OpenMP loop-based directives, that supports capturing
142/// of used expression from loop statement.
143class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
144 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
145 const Stmt *PreInits;
146 CodeGenFunction::OMPMapVars PreCondVars;
147 if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) {
148 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
149 for (const auto *E : LD->counters()) {
150 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
151 EmittedAsPrivate.insert(V: VD->getCanonicalDecl());
152 (void)PreCondVars.setVarAddr(
153 CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()));
154 }
155 // Mark private vars as undefs.
156 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
157 for (const Expr *IRef : C->varlists()) {
158 const auto *OrigVD =
159 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
160 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
161 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
162 (void)PreCondVars.setVarAddr(
163 CGF, LocalVD: OrigVD,
164 TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem(
165 T: CGF.getContext().getPointerType(T: OrigVDTy))),
166 CGF.ConvertTypeForMem(T: OrigVDTy),
167 CGF.getContext().getDeclAlign(D: OrigVD)));
168 }
169 }
170 }
171 (void)PreCondVars.apply(CGF);
172 // Emit init, __range and __end variables for C++ range loops.
173 (void)OMPLoopBasedDirective::doForAllLoops(
174 CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(),
175 /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(),
176 Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) {
177 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) {
178 if (const Stmt *Init = CXXFor->getInit())
179 CGF.EmitStmt(S: Init);
180 CGF.EmitStmt(S: CXXFor->getRangeStmt());
181 CGF.EmitStmt(S: CXXFor->getEndStmt());
182 }
183 return false;
184 });
185 PreInits = LD->getPreInits();
186 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) {
187 PreInits = Tile->getPreInits();
188 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) {
189 PreInits = Unroll->getPreInits();
190 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) {
191 PreInits = Reverse->getPreInits();
192 } else if (const auto *Interchange =
193 dyn_cast<OMPInterchangeDirective>(Val: &S)) {
194 PreInits = Interchange->getPreInits();
195 } else {
196 llvm_unreachable("Unknown loop-based directive kind.");
197 }
198 if (PreInits) {
199 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
200 // declarations. Since declarations must be visible in the the following
201 // that they initialize, unpack the CompoundStmt they are nested in.
202 SmallVector<const Stmt *> PreInitStmts;
203 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits))
204 llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body());
205 else
206 PreInitStmts.push_back(Elt: PreInits);
207
208 for (const Stmt *S : PreInitStmts) {
209 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
210 // here.
211 if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) {
212 for (Decl *I : PreInitDecl->decls())
213 CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I));
214 continue;
215 }
216 CGF.EmitStmt(S);
217 }
218 }
219 PreCondVars.restore(CGF);
220 }
221
222public:
223 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
224 : CodeGenFunction::RunCleanupsScope(CGF) {
225 emitPreInitStmt(CGF, S);
226 }
227};
228
229class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
230 CodeGenFunction::OMPPrivateScope InlinedShareds;
231
232 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
233 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
234 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
235 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
236 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
237 }
238
239public:
240 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
241 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
242 InlinedShareds(CGF) {
243 for (const auto *C : S.clauses()) {
244 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
245 if (const auto *PreInit =
246 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
247 for (const auto *I : PreInit->decls()) {
248 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
249 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
250 } else {
251 CodeGenFunction::AutoVarEmission Emission =
252 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
253 CGF.EmitAutoVarCleanups(emission: Emission);
254 }
255 }
256 }
257 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) {
258 for (const Expr *E : UDP->varlists()) {
259 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
260 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
261 CGF.EmitVarDecl(D: *OED);
262 }
263 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) {
264 for (const Expr *E : UDP->varlists()) {
265 const Decl *D = getBaseDecl(Ref: E);
266 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
267 CGF.EmitVarDecl(D: *OED);
268 }
269 }
270 }
271 if (!isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
272 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds);
273 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) {
274 if (const Expr *E = TG->getReductionRef())
275 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()));
276 }
277 // Temp copy arrays for inscan reductions should not be emitted as they are
278 // not used in simd only mode.
279 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
280 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
281 if (C->getModifier() != OMPC_REDUCTION_inscan)
282 continue;
283 for (const Expr *E : C->copy_array_temps())
284 CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl());
285 }
286 const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt());
287 while (CS) {
288 for (auto &C : CS->captures()) {
289 if (C.capturesVariable() || C.capturesVariableByCopy()) {
290 auto *VD = C.getCapturedVar();
291 if (CopyArrayTemps.contains(V: VD))
292 continue;
293 assert(VD == VD->getCanonicalDecl() &&
294 "Canonical decl must be captured.");
295 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
296 isCapturedVar(CGF, VD) ||
297 (CGF.CapturedStmtInfo &&
298 InlinedShareds.isGlobalVarCaptured(VD)),
299 VD->getType().getNonReferenceType(), VK_LValue,
300 C.getLocation());
301 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
302 }
303 }
304 CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt());
305 }
306 (void)InlinedShareds.Privatize();
307 }
308};
309
310} // namespace
311
312static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
313 const OMPExecutableDirective &S,
314 const RegionCodeGenTy &CodeGen);
315
316LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
317 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) {
318 if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) {
319 OrigVD = OrigVD->getCanonicalDecl();
320 bool IsCaptured =
321 LambdaCaptureFields.lookup(Val: OrigVD) ||
322 (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) ||
323 (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl));
324 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
325 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
326 return EmitLValue(E: &DRE);
327 }
328 }
329 return EmitLValue(E);
330}
331
332llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
333 ASTContext &C = getContext();
334 llvm::Value *Size = nullptr;
335 auto SizeInChars = C.getTypeSizeInChars(T: Ty);
336 if (SizeInChars.isZero()) {
337 // getTypeSizeInChars() returns 0 for a VLA.
338 while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) {
339 VlaSizePair VlaSize = getVLASize(vla: VAT);
340 Ty = VlaSize.Type;
341 Size =
342 Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts;
343 }
344 SizeInChars = C.getTypeSizeInChars(T: Ty);
345 if (SizeInChars.isZero())
346 return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0);
347 return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars));
348 }
349 return CGM.getSize(numChars: SizeInChars);
350}
351
352void CodeGenFunction::GenerateOpenMPCapturedVars(
353 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
354 const RecordDecl *RD = S.getCapturedRecordDecl();
355 auto CurField = RD->field_begin();
356 auto CurCap = S.captures().begin();
357 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
358 E = S.capture_init_end();
359 I != E; ++I, ++CurField, ++CurCap) {
360 if (CurField->hasCapturedVLAType()) {
361 const VariableArrayType *VAT = CurField->getCapturedVLAType();
362 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
363 CapturedVars.push_back(Elt: Val);
364 } else if (CurCap->capturesThis()) {
365 CapturedVars.push_back(Elt: CXXThisValue);
366 } else if (CurCap->capturesVariableByCopy()) {
367 llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation());
368
369 // If the field is not a pointer, we need to save the actual value
370 // and load it as a void pointer.
371 if (!CurField->getType()->isAnyPointerType()) {
372 ASTContext &Ctx = getContext();
373 Address DstAddr = CreateMemTemp(
374 T: Ctx.getUIntPtrType(),
375 Name: Twine(CurCap->getCapturedVar()->getName(), ".casted"));
376 LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType());
377
378 llvm::Value *SrcAddrVal = EmitScalarConversion(
379 Src: DstAddr.emitRawPointer(CGF&: *this),
380 SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()),
381 DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation());
382 LValue SrcLV =
383 MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType());
384
385 // Store the value using the source type pointer.
386 EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV);
387
388 // Load the value using the destination type pointer.
389 CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation());
390 }
391 CapturedVars.push_back(Elt: CV);
392 } else {
393 assert(CurCap->capturesVariable() && "Expected capture by reference.");
394 CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this));
395 }
396 }
397}
398
399static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
400 QualType DstType, StringRef Name,
401 LValue AddrLV) {
402 ASTContext &Ctx = CGF.getContext();
403
404 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
405 Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(),
406 DstTy: Ctx.getPointerType(T: DstType), Loc);
407 // FIXME: should the pointee type (DstType) be passed?
408 Address TmpAddr =
409 CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress();
410 return TmpAddr;
411}
412
413static QualType getCanonicalParamType(ASTContext &C, QualType T) {
414 if (T->isLValueReferenceType())
415 return C.getLValueReferenceType(
416 T: getCanonicalParamType(C, T: T.getNonReferenceType()),
417 /*SpelledAsLValue=*/false);
418 if (T->isPointerType())
419 return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType()));
420 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
421 if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A))
422 return getCanonicalParamType(C, T: VLA->getElementType());
423 if (!A->isVariablyModifiedType())
424 return C.getCanonicalType(T);
425 }
426 return C.getCanonicalParamType(T);
427}
428
429namespace {
430/// Contains required data for proper outlined function codegen.
431struct FunctionOptions {
432 /// Captured statement for which the function is generated.
433 const CapturedStmt *S = nullptr;
434 /// true if cast to/from UIntPtr is required for variables captured by
435 /// value.
436 const bool UIntPtrCastRequired = true;
437 /// true if only casted arguments must be registered as local args or VLA
438 /// sizes.
439 const bool RegisterCastedArgsOnly = false;
440 /// Name of the generated function.
441 const StringRef FunctionName;
442 /// Location of the non-debug version of the outlined function.
443 SourceLocation Loc;
444 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
445 bool RegisterCastedArgsOnly, StringRef FunctionName,
446 SourceLocation Loc)
447 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
448 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
449 FunctionName(FunctionName), Loc(Loc) {}
450};
451} // namespace
452
453static llvm::Function *emitOutlinedFunctionPrologue(
454 CodeGenFunction &CGF, FunctionArgList &Args,
455 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
456 &LocalAddrs,
457 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
458 &VLASizes,
459 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
460 const CapturedDecl *CD = FO.S->getCapturedDecl();
461 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
462 assert(CD->hasBody() && "missing CapturedDecl body");
463
464 CXXThisValue = nullptr;
465 // Build the argument list.
466 CodeGenModule &CGM = CGF.CGM;
467 ASTContext &Ctx = CGM.getContext();
468 FunctionArgList TargetArgs;
469 Args.append(in_start: CD->param_begin(),
470 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
471 TargetArgs.append(
472 in_start: CD->param_begin(),
473 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
474 auto I = FO.S->captures().begin();
475 FunctionDecl *DebugFunctionDecl = nullptr;
476 if (!FO.UIntPtrCastRequired) {
477 FunctionProtoType::ExtProtoInfo EPI;
478 QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: std::nullopt, EPI);
479 DebugFunctionDecl = FunctionDecl::Create(
480 C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(),
481 NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy,
482 TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static,
483 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
484 /*hasWrittenPrototype=*/false);
485 }
486 for (const FieldDecl *FD : RD->fields()) {
487 QualType ArgType = FD->getType();
488 IdentifierInfo *II = nullptr;
489 VarDecl *CapVar = nullptr;
490
491 // If this is a capture by copy and the type is not a pointer, the outlined
492 // function argument type should be uintptr and the value properly casted to
493 // uintptr. This is necessary given that the runtime library is only able to
494 // deal with pointers. We can pass in the same way the VLA type sizes to the
495 // outlined function.
496 if (FO.UIntPtrCastRequired &&
497 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
498 I->capturesVariableArrayType()))
499 ArgType = Ctx.getUIntPtrType();
500
501 if (I->capturesVariable() || I->capturesVariableByCopy()) {
502 CapVar = I->getCapturedVar();
503 II = CapVar->getIdentifier();
504 } else if (I->capturesThis()) {
505 II = &Ctx.Idents.get(Name: "this");
506 } else {
507 assert(I->capturesVariableArrayType());
508 II = &Ctx.Idents.get(Name: "vla");
509 }
510 if (ArgType->isVariablyModifiedType())
511 ArgType = getCanonicalParamType(C&: Ctx, T: ArgType);
512 VarDecl *Arg;
513 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
514 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
515 Id: II, T: ArgType,
516 ParamKind: ImplicitParamKind::ThreadPrivateVar);
517 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
518 Arg = ParmVarDecl::Create(
519 C&: Ctx, DC: DebugFunctionDecl,
520 StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
521 IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType,
522 /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr);
523 } else {
524 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
525 Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other);
526 }
527 Args.emplace_back(Args&: Arg);
528 // Do not cast arguments if we emit function with non-original types.
529 TargetArgs.emplace_back(
530 Args: FO.UIntPtrCastRequired
531 ? Arg
532 : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg));
533 ++I;
534 }
535 Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
536 in_end: CD->param_end());
537 TargetArgs.append(
538 in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
539 in_end: CD->param_end());
540
541 // Create the function declaration.
542 const CGFunctionInfo &FuncInfo =
543 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy, args: TargetArgs);
544 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
545
546 auto *F =
547 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
548 N: FO.FunctionName, M: &CGM.getModule());
549 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
550 if (CD->isNothrow())
551 F->setDoesNotThrow();
552 F->setDoesNotRecurse();
553
554 // Always inline the outlined function if optimizations are enabled.
555 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
556 F->removeFnAttr(Kind: llvm::Attribute::NoInline);
557 F->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
558 }
559
560 // Generate the function.
561 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs,
562 Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
563 StartLoc: FO.UIntPtrCastRequired ? FO.Loc
564 : CD->getBody()->getBeginLoc());
565 unsigned Cnt = CD->getContextParamPosition();
566 I = FO.S->captures().begin();
567 for (const FieldDecl *FD : RD->fields()) {
568 // Do not map arguments if we emit function with non-original types.
569 Address LocalAddr(Address::invalid());
570 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
571 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt],
572 TargetParam: TargetArgs[Cnt]);
573 } else {
574 LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]);
575 }
576 // If we are capturing a pointer by copy we don't need to do anything, just
577 // use the value that we get from the arguments.
578 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
579 const VarDecl *CurVD = I->getCapturedVar();
580 if (!FO.RegisterCastedArgsOnly)
581 LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}});
582 ++Cnt;
583 ++I;
584 continue;
585 }
586
587 LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(),
588 Source: AlignmentSource::Decl);
589 if (FD->hasCapturedVLAType()) {
590 if (FO.UIntPtrCastRequired) {
591 ArgLVal = CGF.MakeAddrLValue(
592 Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(),
593 Name: Args[Cnt]->getName(), AddrLV: ArgLVal),
594 T: FD->getType(), Source: AlignmentSource::Decl);
595 }
596 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
597 const VariableArrayType *VAT = FD->getCapturedVLAType();
598 VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg);
599 } else if (I->capturesVariable()) {
600 const VarDecl *Var = I->getCapturedVar();
601 QualType VarTy = Var->getType();
602 Address ArgAddr = ArgLVal.getAddress();
603 if (ArgLVal.getType()->isLValueReferenceType()) {
604 ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal);
605 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
606 assert(ArgLVal.getType()->isPointerType());
607 ArgAddr = CGF.EmitLoadOfPointer(
608 Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>());
609 }
610 if (!FO.RegisterCastedArgsOnly) {
611 LocalAddrs.insert(
612 KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}});
613 }
614 } else if (I->capturesVariableByCopy()) {
615 assert(!FD->getType()->isAnyPointerType() &&
616 "Not expecting a captured pointer.");
617 const VarDecl *Var = I->getCapturedVar();
618 LocalAddrs.insert(KV: {Args[Cnt],
619 {Var, FO.UIntPtrCastRequired
620 ? castValueFromUintptr(
621 CGF, Loc: I->getLocation(), DstType: FD->getType(),
622 Name: Args[Cnt]->getName(), AddrLV: ArgLVal)
623 : ArgLVal.getAddress()}});
624 } else {
625 // If 'this' is captured, load it into CXXThisValue.
626 assert(I->capturesThis());
627 CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
628 LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}});
629 }
630 ++Cnt;
631 ++I;
632 }
633
634 return F;
635}
636
637llvm::Function *
638CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
639 SourceLocation Loc) {
640 assert(
641 CapturedStmtInfo &&
642 "CapturedStmtInfo should be set when generating the captured function");
643 const CapturedDecl *CD = S.getCapturedDecl();
644 // Build the argument list.
645 bool NeedWrapperFunction =
646 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
647 FunctionArgList Args;
648 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
649 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
650 SmallString<256> Buffer;
651 llvm::raw_svector_ostream Out(Buffer);
652 Out << CapturedStmtInfo->getHelperName();
653 if (NeedWrapperFunction)
654 Out << "_debug__";
655 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
656 Out.str(), Loc);
657 llvm::Function *F = emitOutlinedFunctionPrologue(CGF&: *this, Args, LocalAddrs,
658 VLASizes, CXXThisValue, FO);
659 CodeGenFunction::OMPPrivateScope LocalScope(*this);
660 for (const auto &LocalAddrPair : LocalAddrs) {
661 if (LocalAddrPair.second.first) {
662 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
663 Addr: LocalAddrPair.second.second);
664 }
665 }
666 (void)LocalScope.Privatize();
667 for (const auto &VLASizePair : VLASizes)
668 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
669 PGO.assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
670 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
671 (void)LocalScope.ForceCleanup();
672 FinishFunction(EndLoc: CD->getBodyRBrace());
673 if (!NeedWrapperFunction)
674 return F;
675
676 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
677 /*RegisterCastedArgsOnly=*/true,
678 CapturedStmtInfo->getHelperName(), Loc);
679 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
680 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
681 Args.clear();
682 LocalAddrs.clear();
683 VLASizes.clear();
684 llvm::Function *WrapperF =
685 emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes,
686 CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO);
687 llvm::SmallVector<llvm::Value *, 4> CallArgs;
688 auto *PI = F->arg_begin();
689 for (const auto *Arg : Args) {
690 llvm::Value *CallArg;
691 auto I = LocalAddrs.find(Key: Arg);
692 if (I != LocalAddrs.end()) {
693 LValue LV = WrapperCGF.MakeAddrLValue(
694 Addr: I->second.second,
695 T: I->second.first ? I->second.first->getType() : Arg->getType(),
696 Source: AlignmentSource::Decl);
697 if (LV.getType()->isAnyComplexType())
698 LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType()));
699 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
700 } else {
701 auto EI = VLASizes.find(Val: Arg);
702 if (EI != VLASizes.end()) {
703 CallArg = EI->second.second;
704 } else {
705 LValue LV =
706 WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg),
707 T: Arg->getType(), Source: AlignmentSource::Decl);
708 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
709 }
710 }
711 CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType()));
712 ++PI;
713 }
714 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
715 WrapperCGF.FinishFunction();
716 return WrapperF;
717}
718
719//===----------------------------------------------------------------------===//
720// OpenMP Directive Emission
721//===----------------------------------------------------------------------===//
722void CodeGenFunction::EmitOMPAggregateAssign(
723 Address DestAddr, Address SrcAddr, QualType OriginalType,
724 const llvm::function_ref<void(Address, Address)> CopyGen) {
725 // Perform element-by-element initialization.
726 QualType ElementTy;
727
728 // Drill down to the base element type on both arrays.
729 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
730 llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
731 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
732
733 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this);
734 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this);
735 // Cast from pointer to array type to pointer to single element.
736 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(),
737 Ptr: DestBegin, IdxList: NumElements);
738
739 // The basic structure here is a while-do loop.
740 llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body");
741 llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done");
742 llvm::Value *IsEmpty =
743 Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty");
744 Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
745
746 // Enter the loop body, making that address the current address.
747 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
748 EmitBlock(BB: BodyBB);
749
750 CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy);
751
752 llvm::PHINode *SrcElementPHI =
753 Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
754 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
755 Address SrcElementCurrent =
756 Address(SrcElementPHI, SrcAddr.getElementType(),
757 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
758
759 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
760 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
761 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
762 Address DestElementCurrent =
763 Address(DestElementPHI, DestAddr.getElementType(),
764 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
765
766 // Emit copy.
767 CopyGen(DestElementCurrent, SrcElementCurrent);
768
769 // Shift the address forward by one element.
770 llvm::Value *DestElementNext =
771 Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI,
772 /*Idx0=*/1, Name: "omp.arraycpy.dest.element");
773 llvm::Value *SrcElementNext =
774 Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI,
775 /*Idx0=*/1, Name: "omp.arraycpy.src.element");
776 // Check whether we've reached the end.
777 llvm::Value *Done =
778 Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
779 Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
780 DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock());
781 SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock());
782
783 // Done.
784 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
785}
786
787void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
788 Address SrcAddr, const VarDecl *DestVD,
789 const VarDecl *SrcVD, const Expr *Copy) {
790 if (OriginalType->isArrayType()) {
791 const auto *BO = dyn_cast<BinaryOperator>(Val: Copy);
792 if (BO && BO->getOpcode() == BO_Assign) {
793 // Perform simple memcpy for simple copying.
794 LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType);
795 LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType);
796 EmitAggregateAssign(Dest, Src, EltTy: OriginalType);
797 } else {
798 // For arrays with complex element types perform element by element
799 // copying.
800 EmitOMPAggregateAssign(
801 DestAddr, SrcAddr, OriginalType,
802 CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
803 // Working with the single array element, so have to remap
804 // destination and source variables to corresponding array
805 // elements.
806 CodeGenFunction::OMPPrivateScope Remap(*this);
807 Remap.addPrivate(LocalVD: DestVD, Addr: DestElement);
808 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement);
809 (void)Remap.Privatize();
810 EmitIgnoredExpr(E: Copy);
811 });
812 }
813 } else {
814 // Remap pseudo source variable to private copy.
815 CodeGenFunction::OMPPrivateScope Remap(*this);
816 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr);
817 Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr);
818 (void)Remap.Privatize();
819 // Emit copying of the whole variable.
820 EmitIgnoredExpr(E: Copy);
821 }
822}
823
824bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
825 OMPPrivateScope &PrivateScope) {
826 if (!HaveInsertPoint())
827 return false;
828 bool DeviceConstTarget =
829 getLangOpts().OpenMPIsTargetDevice &&
830 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
831 bool FirstprivateIsLastprivate = false;
832 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
833 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
834 for (const auto *D : C->varlists())
835 Lastprivates.try_emplace(
836 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(),
837 Args: C->getKind());
838 }
839 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
840 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
841 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
842 // Force emission of the firstprivate copy if the directive does not emit
843 // outlined function, like omp for, omp simd, omp distribute etc.
844 bool MustEmitFirstprivateCopy =
845 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
846 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
847 const auto *IRef = C->varlist_begin();
848 const auto *InitsRef = C->inits().begin();
849 for (const Expr *IInit : C->private_copies()) {
850 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
851 bool ThisFirstprivateIsLastprivate =
852 Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0;
853 const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD);
854 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
855 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
856 !FD->getType()->isReferenceType() &&
857 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
858 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
859 ++IRef;
860 ++InitsRef;
861 continue;
862 }
863 // Do not emit copy for firstprivate constant variables in target regions,
864 // captured by reference.
865 if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) &&
866 FD && FD->getType()->isReferenceType() &&
867 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
868 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
869 ++IRef;
870 ++InitsRef;
871 continue;
872 }
873 FirstprivateIsLastprivate =
874 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
875 if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) {
876 const auto *VDInit =
877 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl());
878 bool IsRegistered;
879 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
880 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
881 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
882 LValue OriginalLVal;
883 if (!FD) {
884 // Check if the firstprivate variable is just a constant value.
885 ConstantEmission CE = tryEmitAsConstant(refExpr: &DRE);
886 if (CE && !CE.isReference()) {
887 // Constant value, no need to create a copy.
888 ++IRef;
889 ++InitsRef;
890 continue;
891 }
892 if (CE && CE.isReference()) {
893 OriginalLVal = CE.getReferenceLValue(CGF&: *this, refExpr: &DRE);
894 } else {
895 assert(!CE && "Expected non-constant firstprivate.");
896 OriginalLVal = EmitLValue(E: &DRE);
897 }
898 } else {
899 OriginalLVal = EmitLValue(E: &DRE);
900 }
901 QualType Type = VD->getType();
902 if (Type->isArrayType()) {
903 // Emit VarDecl with copy init for arrays.
904 // Get the address of the original variable captured in current
905 // captured region.
906 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
907 const Expr *Init = VD->getInit();
908 if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) {
909 // Perform simple memcpy.
910 LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type);
911 EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type);
912 } else {
913 EmitOMPAggregateAssign(
914 DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type,
915 CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) {
916 // Clean up any temporaries needed by the
917 // initialization.
918 RunCleanupsScope InitScope(*this);
919 // Emit initialization for single element.
920 setAddrOfLocalVar(VD: VDInit, Addr: SrcElement);
921 EmitAnyExprToMem(E: Init, Location: DestElement,
922 Quals: Init->getType().getQualifiers(),
923 /*IsInitializer*/ false);
924 LocalDeclMap.erase(Val: VDInit);
925 });
926 }
927 EmitAutoVarCleanups(emission: Emission);
928 IsRegistered =
929 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress());
930 } else {
931 Address OriginalAddr = OriginalLVal.getAddress();
932 // Emit private VarDecl with copy init.
933 // Remap temp VDInit variable to the address of the original
934 // variable (for proper handling of captured global variables).
935 setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr);
936 EmitDecl(D: *VD);
937 LocalDeclMap.erase(Val: VDInit);
938 Address VDAddr = GetAddrOfLocalVar(VD);
939 if (ThisFirstprivateIsLastprivate &&
940 Lastprivates[OrigVD->getCanonicalDecl()] ==
941 OMPC_LASTPRIVATE_conditional) {
942 // Create/init special variable for lastprivate conditionals.
943 llvm::Value *V =
944 EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
945 Source: AlignmentSource::Decl),
946 Loc: (*IRef)->getExprLoc());
947 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
948 CGF&: *this, VD: OrigVD);
949 EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
950 Source: AlignmentSource::Decl));
951 LocalDeclMap.erase(Val: VD);
952 setAddrOfLocalVar(VD, Addr: VDAddr);
953 }
954 IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
955 }
956 assert(IsRegistered &&
957 "firstprivate var already registered as private");
958 // Silence the warning about unused variable.
959 (void)IsRegistered;
960 }
961 ++IRef;
962 ++InitsRef;
963 }
964 }
965 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
966}
967
968void CodeGenFunction::EmitOMPPrivateClause(
969 const OMPExecutableDirective &D,
970 CodeGenFunction::OMPPrivateScope &PrivateScope) {
971 if (!HaveInsertPoint())
972 return;
973 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
974 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
975 auto IRef = C->varlist_begin();
976 for (const Expr *IInit : C->private_copies()) {
977 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
978 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
979 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
980 EmitDecl(D: *VD);
981 // Emit private VarDecl with copy init.
982 bool IsRegistered =
983 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD));
984 assert(IsRegistered && "private var already registered as private");
985 // Silence the warning about unused variable.
986 (void)IsRegistered;
987 }
988 ++IRef;
989 }
990 }
991}
992
993bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
994 if (!HaveInsertPoint())
995 return false;
996 // threadprivate_var1 = master_threadprivate_var1;
997 // operator=(threadprivate_var2, master_threadprivate_var2);
998 // ...
999 // __kmpc_barrier(&loc, global_tid);
1000 llvm::DenseSet<const VarDecl *> CopiedVars;
1001 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1002 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1003 auto IRef = C->varlist_begin();
1004 auto ISrcRef = C->source_exprs().begin();
1005 auto IDestRef = C->destination_exprs().begin();
1006 for (const Expr *AssignOp : C->assignment_ops()) {
1007 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1008 QualType Type = VD->getType();
1009 if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) {
1010 // Get the address of the master variable. If we are emitting code with
1011 // TLS support, the address is passed from the master as field in the
1012 // captured declaration.
1013 Address MasterAddr = Address::invalid();
1014 if (getLangOpts().OpenMPUseTLS &&
1015 getContext().getTargetInfo().isTLSSupported()) {
1016 assert(CapturedStmtInfo->lookup(VD) &&
1017 "Copyin threadprivates should have been captured!");
1018 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1019 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1020 MasterAddr = EmitLValue(E: &DRE).getAddress();
1021 LocalDeclMap.erase(Val: VD);
1022 } else {
1023 MasterAddr =
1024 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD)
1025 : CGM.GetAddrOfGlobal(GD: VD),
1026 CGM.getTypes().ConvertTypeForMem(T: VD->getType()),
1027 getContext().getDeclAlign(D: VD));
1028 }
1029 // Get the address of the threadprivate variable.
1030 Address PrivateAddr = EmitLValue(E: *IRef).getAddress();
1031 if (CopiedVars.size() == 1) {
1032 // At first check if current thread is a master thread. If it is, no
1033 // need to copy data.
1034 CopyBegin = createBasicBlock(name: "copyin.not.master");
1035 CopyEnd = createBasicBlock(name: "copyin.not.master.end");
1036 // TODO: Avoid ptrtoint conversion.
1037 auto *MasterAddrInt = Builder.CreatePtrToInt(
1038 V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1039 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1040 V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1041 Builder.CreateCondBr(
1042 Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin,
1043 False: CopyEnd);
1044 EmitBlock(BB: CopyBegin);
1045 }
1046 const auto *SrcVD =
1047 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1048 const auto *DestVD =
1049 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1050 EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp);
1051 }
1052 ++IRef;
1053 ++ISrcRef;
1054 ++IDestRef;
1055 }
1056 }
1057 if (CopyEnd) {
1058 // Exit out of copying procedure for non-master thread.
1059 EmitBlock(BB: CopyEnd, /*IsFinished=*/true);
1060 return true;
1061 }
1062 return false;
1063}
1064
1065bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1066 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1067 if (!HaveInsertPoint())
1068 return false;
1069 bool HasAtLeastOneLastprivate = false;
1070 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1071 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
1072 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
1073 for (const Expr *C : LoopDirective->counters()) {
1074 SIMDLCVs.insert(
1075 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
1076 }
1077 }
1078 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1079 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1080 HasAtLeastOneLastprivate = true;
1081 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
1082 !getLangOpts().OpenMPSimd)
1083 break;
1084 const auto *IRef = C->varlist_begin();
1085 const auto *IDestRef = C->destination_exprs().begin();
1086 for (const Expr *IInit : C->private_copies()) {
1087 // Keep the address of the original variable for future update at the end
1088 // of the loop.
1089 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1090 // Taskloops do not require additional initialization, it is done in
1091 // runtime support library.
1092 if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) {
1093 const auto *DestVD =
1094 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1095 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1096 /*RefersToEnclosingVariableOrCapture=*/
1097 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
1098 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1099 PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress());
1100 // Check if the variable is also a firstprivate: in this case IInit is
1101 // not generated. Initialization of this variable will happen in codegen
1102 // for 'firstprivate' clause.
1103 if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) {
1104 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1105 Address VDAddr = Address::invalid();
1106 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1107 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1108 CGF&: *this, VD: OrigVD);
1109 setAddrOfLocalVar(VD, Addr: VDAddr);
1110 } else {
1111 // Emit private VarDecl with copy init.
1112 EmitDecl(D: *VD);
1113 VDAddr = GetAddrOfLocalVar(VD);
1114 }
1115 bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1116 assert(IsRegistered &&
1117 "lastprivate var already registered as private");
1118 (void)IsRegistered;
1119 }
1120 }
1121 ++IRef;
1122 ++IDestRef;
1123 }
1124 }
1125 return HasAtLeastOneLastprivate;
1126}
1127
1128void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1129 const OMPExecutableDirective &D, bool NoFinals,
1130 llvm::Value *IsLastIterCond) {
1131 if (!HaveInsertPoint())
1132 return;
1133 // Emit following code:
1134 // if (<IsLastIterCond>) {
1135 // orig_var1 = private_orig_var1;
1136 // ...
1137 // orig_varn = private_orig_varn;
1138 // }
1139 llvm::BasicBlock *ThenBB = nullptr;
1140 llvm::BasicBlock *DoneBB = nullptr;
1141 if (IsLastIterCond) {
1142 // Emit implicit barrier if at least one lastprivate conditional is found
1143 // and this is not a simd mode.
1144 if (!getLangOpts().OpenMPSimd &&
1145 llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(),
1146 P: [](const OMPLastprivateClause *C) {
1147 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1148 })) {
1149 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(),
1150 Kind: OMPD_unknown,
1151 /*EmitChecks=*/false,
1152 /*ForceSimpleCall=*/true);
1153 }
1154 ThenBB = createBasicBlock(name: ".omp.lastprivate.then");
1155 DoneBB = createBasicBlock(name: ".omp.lastprivate.done");
1156 Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB);
1157 EmitBlock(BB: ThenBB);
1158 }
1159 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1160 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1161 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) {
1162 auto IC = LoopDirective->counters().begin();
1163 for (const Expr *F : LoopDirective->finals()) {
1164 const auto *D =
1165 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl();
1166 if (NoFinals)
1167 AlreadyEmittedVars.insert(V: D);
1168 else
1169 LoopCountersAndUpdates[D] = F;
1170 ++IC;
1171 }
1172 }
1173 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1174 auto IRef = C->varlist_begin();
1175 auto ISrcRef = C->source_exprs().begin();
1176 auto IDestRef = C->destination_exprs().begin();
1177 for (const Expr *AssignOp : C->assignment_ops()) {
1178 const auto *PrivateVD =
1179 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1180 QualType Type = PrivateVD->getType();
1181 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1182 if (AlreadyEmittedVars.insert(V: CanonicalVD).second) {
1183 // If lastprivate variable is a loop control variable for loop-based
1184 // directive, update its value before copyin back to original
1185 // variable.
1186 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD))
1187 EmitIgnoredExpr(E: FinalExpr);
1188 const auto *SrcVD =
1189 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1190 const auto *DestVD =
1191 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1192 // Get the address of the private variable.
1193 Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD);
1194 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1195 PrivateAddr = Address(
1196 Builder.CreateLoad(Addr: PrivateAddr),
1197 CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()),
1198 CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType()));
1199 // Store the last value to the private copy in the last iteration.
1200 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1201 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1202 CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD,
1203 Loc: (*IRef)->getExprLoc());
1204 // Get the address of the original variable.
1205 Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD);
1206 EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp);
1207 }
1208 ++IRef;
1209 ++ISrcRef;
1210 ++IDestRef;
1211 }
1212 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1213 EmitIgnoredExpr(E: PostUpdate);
1214 }
1215 if (IsLastIterCond)
1216 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1217}
1218
1219void CodeGenFunction::EmitOMPReductionClauseInit(
1220 const OMPExecutableDirective &D,
1221 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1222 if (!HaveInsertPoint())
1223 return;
1224 SmallVector<const Expr *, 4> Shareds;
1225 SmallVector<const Expr *, 4> Privates;
1226 SmallVector<const Expr *, 4> ReductionOps;
1227 SmallVector<const Expr *, 4> LHSs;
1228 SmallVector<const Expr *, 4> RHSs;
1229 OMPTaskDataTy Data;
1230 SmallVector<const Expr *, 4> TaskLHSs;
1231 SmallVector<const Expr *, 4> TaskRHSs;
1232 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1233 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1234 continue;
1235 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1236 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1237 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1238 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1239 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1240 if (C->getModifier() == OMPC_REDUCTION_task) {
1241 Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end());
1242 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1243 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
1244 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
1245 in_end: C->reduction_ops().end());
1246 TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1247 TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1248 }
1249 }
1250 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1251 unsigned Count = 0;
1252 auto *ILHS = LHSs.begin();
1253 auto *IRHS = RHSs.begin();
1254 auto *IPriv = Privates.begin();
1255 for (const Expr *IRef : Shareds) {
1256 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl());
1257 // Emit private VarDecl with reduction init.
1258 RedCG.emitSharedOrigLValue(CGF&: *this, N: Count);
1259 RedCG.emitAggregateType(CGF&: *this, N: Count);
1260 AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD);
1261 RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(),
1262 SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(),
1263 DefaultInit: [&Emission](CodeGenFunction &CGF) {
1264 CGF.EmitAutoVarInit(emission: Emission);
1265 return true;
1266 });
1267 EmitAutoVarCleanups(emission: Emission);
1268 Address BaseAddr = RedCG.adjustPrivateAddress(
1269 CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress());
1270 bool IsRegistered =
1271 PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr);
1272 assert(IsRegistered && "private var already registered as private");
1273 // Silence the warning about unused variable.
1274 (void)IsRegistered;
1275
1276 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
1277 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
1278 QualType Type = PrivateVD->getType();
1279 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef);
1280 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1281 // Store the address of the original variable associated with the LHS
1282 // implicit variable.
1283 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1284 PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
1285 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1286 isa<ArraySubscriptExpr>(Val: IRef)) {
1287 // Store the address of the original variable associated with the LHS
1288 // implicit variable.
1289 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1290 PrivateScope.addPrivate(LocalVD: RHSVD,
1291 Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1292 ElemTy: ConvertTypeForMem(T: RHSVD->getType())));
1293 } else {
1294 QualType Type = PrivateVD->getType();
1295 bool IsArray = getContext().getAsArrayType(T: Type) != nullptr;
1296 Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress();
1297 // Store the address of the original variable associated with the LHS
1298 // implicit variable.
1299 if (IsArray) {
1300 OriginalAddr =
1301 OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType()));
1302 }
1303 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr);
1304 PrivateScope.addPrivate(
1305 LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1306 ElemTy: ConvertTypeForMem(T: RHSVD->getType()))
1307 : GetAddrOfLocalVar(VD: PrivateVD));
1308 }
1309 ++ILHS;
1310 ++IRHS;
1311 ++IPriv;
1312 ++Count;
1313 }
1314 if (!Data.ReductionVars.empty()) {
1315 Data.IsReductionWithTaskMod = true;
1316 Data.IsWorksharingReduction =
1317 isOpenMPWorksharingDirective(DKind: D.getDirectiveKind());
1318 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1319 CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data);
1320 const Expr *TaskRedRef = nullptr;
1321 switch (D.getDirectiveKind()) {
1322 case OMPD_parallel:
1323 TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr();
1324 break;
1325 case OMPD_for:
1326 TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr();
1327 break;
1328 case OMPD_sections:
1329 TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr();
1330 break;
1331 case OMPD_parallel_for:
1332 TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr();
1333 break;
1334 case OMPD_parallel_master:
1335 TaskRedRef =
1336 cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr();
1337 break;
1338 case OMPD_parallel_sections:
1339 TaskRedRef =
1340 cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr();
1341 break;
1342 case OMPD_target_parallel:
1343 TaskRedRef =
1344 cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr();
1345 break;
1346 case OMPD_target_parallel_for:
1347 TaskRedRef =
1348 cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr();
1349 break;
1350 case OMPD_distribute_parallel_for:
1351 TaskRedRef =
1352 cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr();
1353 break;
1354 case OMPD_teams_distribute_parallel_for:
1355 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D)
1356 .getTaskReductionRefExpr();
1357 break;
1358 case OMPD_target_teams_distribute_parallel_for:
1359 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D)
1360 .getTaskReductionRefExpr();
1361 break;
1362 case OMPD_simd:
1363 case OMPD_for_simd:
1364 case OMPD_section:
1365 case OMPD_single:
1366 case OMPD_master:
1367 case OMPD_critical:
1368 case OMPD_parallel_for_simd:
1369 case OMPD_task:
1370 case OMPD_taskyield:
1371 case OMPD_error:
1372 case OMPD_barrier:
1373 case OMPD_taskwait:
1374 case OMPD_taskgroup:
1375 case OMPD_flush:
1376 case OMPD_depobj:
1377 case OMPD_scan:
1378 case OMPD_ordered:
1379 case OMPD_atomic:
1380 case OMPD_teams:
1381 case OMPD_target:
1382 case OMPD_cancellation_point:
1383 case OMPD_cancel:
1384 case OMPD_target_data:
1385 case OMPD_target_enter_data:
1386 case OMPD_target_exit_data:
1387 case OMPD_taskloop:
1388 case OMPD_taskloop_simd:
1389 case OMPD_master_taskloop:
1390 case OMPD_master_taskloop_simd:
1391 case OMPD_parallel_master_taskloop:
1392 case OMPD_parallel_master_taskloop_simd:
1393 case OMPD_distribute:
1394 case OMPD_target_update:
1395 case OMPD_distribute_parallel_for_simd:
1396 case OMPD_distribute_simd:
1397 case OMPD_target_parallel_for_simd:
1398 case OMPD_target_simd:
1399 case OMPD_teams_distribute:
1400 case OMPD_teams_distribute_simd:
1401 case OMPD_teams_distribute_parallel_for_simd:
1402 case OMPD_target_teams:
1403 case OMPD_target_teams_distribute:
1404 case OMPD_target_teams_distribute_parallel_for_simd:
1405 case OMPD_target_teams_distribute_simd:
1406 case OMPD_declare_target:
1407 case OMPD_end_declare_target:
1408 case OMPD_threadprivate:
1409 case OMPD_allocate:
1410 case OMPD_declare_reduction:
1411 case OMPD_declare_mapper:
1412 case OMPD_declare_simd:
1413 case OMPD_requires:
1414 case OMPD_declare_variant:
1415 case OMPD_begin_declare_variant:
1416 case OMPD_end_declare_variant:
1417 case OMPD_unknown:
1418 default:
1419 llvm_unreachable("Unexpected directive with task reductions.");
1420 }
1421
1422 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl());
1423 EmitVarDecl(D: *VD);
1424 EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD),
1425 /*Volatile=*/false, Ty: TaskRedRef->getType());
1426 }
1427}
1428
1429void CodeGenFunction::EmitOMPReductionClauseFinal(
1430 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1431 if (!HaveInsertPoint())
1432 return;
1433 llvm::SmallVector<const Expr *, 8> Privates;
1434 llvm::SmallVector<const Expr *, 8> LHSExprs;
1435 llvm::SmallVector<const Expr *, 8> RHSExprs;
1436 llvm::SmallVector<const Expr *, 8> ReductionOps;
1437 bool HasAtLeastOneReduction = false;
1438 bool IsReductionWithTaskMod = false;
1439 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1440 // Do not emit for inscan reductions.
1441 if (C->getModifier() == OMPC_REDUCTION_inscan)
1442 continue;
1443 HasAtLeastOneReduction = true;
1444 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1445 LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1446 RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1447 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1448 IsReductionWithTaskMod =
1449 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1450 }
1451 if (HasAtLeastOneReduction) {
1452 if (IsReductionWithTaskMod) {
1453 CGM.getOpenMPRuntime().emitTaskReductionFini(
1454 CGF&: *this, Loc: D.getBeginLoc(),
1455 IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: D.getDirectiveKind()));
1456 }
1457 bool TeamsLoopCanBeParallel = false;
1458 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D))
1459 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1460 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1461 isOpenMPParallelDirective(DKind: D.getDirectiveKind()) ||
1462 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1463 bool SimpleReduction = ReductionKind == OMPD_simd;
1464 // Emit nowait reduction if nowait clause is present or directive is a
1465 // parallel directive (it always has implicit barrier).
1466 CGM.getOpenMPRuntime().emitReduction(
1467 CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1468 Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .ReductionKind: ReductionKind});
1469 }
1470}
1471
1472static void emitPostUpdateForReductionClause(
1473 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1474 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1475 if (!CGF.HaveInsertPoint())
1476 return;
1477 llvm::BasicBlock *DoneBB = nullptr;
1478 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1479 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1480 if (!DoneBB) {
1481 if (llvm::Value *Cond = CondGen(CGF)) {
1482 // If the first post-update expression is found, emit conditional
1483 // block if it was requested.
1484 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu");
1485 DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done");
1486 CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
1487 CGF.EmitBlock(BB: ThenBB);
1488 }
1489 }
1490 CGF.EmitIgnoredExpr(E: PostUpdate);
1491 }
1492 }
1493 if (DoneBB)
1494 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1495}
1496
1497namespace {
1498/// Codegen lambda for appending distribute lower and upper bounds to outlined
1499/// parallel function. This is necessary for combined constructs such as
1500/// 'distribute parallel for'
1501typedef llvm::function_ref<void(CodeGenFunction &,
1502 const OMPExecutableDirective &,
1503 llvm::SmallVectorImpl<llvm::Value *> &)>
1504 CodeGenBoundParametersTy;
1505} // anonymous namespace
1506
1507static void
1508checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1509 const OMPExecutableDirective &S) {
1510 if (CGF.getLangOpts().OpenMP < 50)
1511 return;
1512 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1513 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1514 for (const Expr *Ref : C->varlists()) {
1515 if (!Ref->getType()->isScalarType())
1516 continue;
1517 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1518 if (!DRE)
1519 continue;
1520 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1521 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1522 }
1523 }
1524 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1525 for (const Expr *Ref : C->varlists()) {
1526 if (!Ref->getType()->isScalarType())
1527 continue;
1528 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1529 if (!DRE)
1530 continue;
1531 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1532 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1533 }
1534 }
1535 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1536 for (const Expr *Ref : C->varlists()) {
1537 if (!Ref->getType()->isScalarType())
1538 continue;
1539 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1540 if (!DRE)
1541 continue;
1542 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1543 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1544 }
1545 }
1546 // Privates should ne analyzed since they are not captured at all.
1547 // Task reductions may be skipped - tasks are ignored.
1548 // Firstprivates do not return value but may be passed by reference - no need
1549 // to check for updated lastprivate conditional.
1550 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1551 for (const Expr *Ref : C->varlists()) {
1552 if (!Ref->getType()->isScalarType())
1553 continue;
1554 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1555 if (!DRE)
1556 continue;
1557 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1558 }
1559 }
1560 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1561 CGF, D: S, IgnoredDecls: PrivateDecls);
1562}
1563
1564static void emitCommonOMPParallelDirective(
1565 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1566 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1567 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1568 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1569 llvm::Value *NumThreads = nullptr;
1570 llvm::Function *OutlinedFn =
1571 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1572 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
1573 CodeGen);
1574 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1575 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1576 NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1577 /*IgnoreResultAssign=*/true);
1578 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1579 CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc());
1580 }
1581 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1582 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1583 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1584 CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc());
1585 }
1586 const Expr *IfCond = nullptr;
1587 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1588 if (C->getNameModifier() == OMPD_unknown ||
1589 C->getNameModifier() == OMPD_parallel) {
1590 IfCond = C->getCondition();
1591 break;
1592 }
1593 }
1594
1595 OMPParallelScope Scope(CGF, S);
1596 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1597 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1598 // lower and upper bounds with the pragma 'for' chunking mechanism.
1599 // The following lambda takes care of appending the lower and upper bound
1600 // parameters when necessary
1601 CodeGenBoundParameters(CGF, S, CapturedVars);
1602 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
1603 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn,
1604 CapturedVars, IfCond, NumThreads);
1605}
1606
1607static bool isAllocatableDecl(const VarDecl *VD) {
1608 const VarDecl *CVD = VD->getCanonicalDecl();
1609 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1610 return false;
1611 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1612 // Use the default allocation.
1613 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1614 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1615 !AA->getAllocator());
1616}
1617
1618static void emitEmptyBoundParameters(CodeGenFunction &,
1619 const OMPExecutableDirective &,
1620 llvm::SmallVectorImpl<llvm::Value *> &) {}
1621
1622static void emitOMPCopyinClause(CodeGenFunction &CGF,
1623 const OMPExecutableDirective &S) {
1624 bool Copyins = CGF.EmitOMPCopyinClause(D: S);
1625 if (Copyins) {
1626 // Emit implicit barrier to synchronize threads and avoid data races on
1627 // propagation master's thread values of threadprivate variables to local
1628 // instances of that variables of all other implicit threads.
1629 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1630 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
1631 /*ForceSimpleCall=*/true);
1632 }
1633}
1634
1635Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1636 CodeGenFunction &CGF, const VarDecl *VD) {
1637 CodeGenModule &CGM = CGF.CGM;
1638 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1639
1640 if (!VD)
1641 return Address::invalid();
1642 const VarDecl *CVD = VD->getCanonicalDecl();
1643 if (!isAllocatableDecl(VD: CVD))
1644 return Address::invalid();
1645 llvm::Value *Size;
1646 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
1647 if (CVD->getType()->isVariablyModifiedType()) {
1648 Size = CGF.getTypeSize(Ty: CVD->getType());
1649 // Align the size: ((size + align - 1) / align) * align
1650 Size = CGF.Builder.CreateNUWAdd(
1651 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
1652 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
1653 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
1654 } else {
1655 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
1656 Size = CGM.getSize(numChars: Sz.alignTo(Align));
1657 }
1658
1659 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1660 assert(AA->getAllocator() &&
1661 "Expected allocator expression for non-default allocator.");
1662 llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator());
1663 // According to the standard, the original allocator type is a enum (integer).
1664 // Convert to pointer type, if required.
1665 if (Allocator->getType()->isIntegerTy())
1666 Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy);
1667 else if (Allocator->getType()->isPointerTy())
1668 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator,
1669 DestTy: CGM.VoidPtrTy);
1670
1671 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1672 Loc: CGF.Builder, Size, Allocator,
1673 Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr"}, FirstSeparator: ".", Separator: "."));
1674 llvm::CallInst *FreeCI =
1675 OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator);
1676
1677 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI);
1678 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1679 V: Addr,
1680 DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())),
1681 Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr"}, FirstSeparator: ".", Separator: "."));
1682 return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
1683}
1684
1685Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1686 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1687 SourceLocation Loc) {
1688 CodeGenModule &CGM = CGF.CGM;
1689 if (CGM.getLangOpts().OpenMPUseTLS &&
1690 CGM.getContext().getTargetInfo().isTLSSupported())
1691 return VDAddr;
1692
1693 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1694
1695 llvm::Type *VarTy = VDAddr.getElementType();
1696 llvm::Value *Data =
1697 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy);
1698 llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy));
1699 std::string Suffix = getNameWithSeparators(Parts: {"cache", ""});
1700 llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix);
1701
1702 llvm::CallInst *ThreadPrivateCacheCall =
1703 OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName);
1704
1705 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1706}
1707
1708std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1709 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1710 SmallString<128> Buffer;
1711 llvm::raw_svector_ostream OS(Buffer);
1712 StringRef Sep = FirstSeparator;
1713 for (StringRef Part : Parts) {
1714 OS << Sep << Part;
1715 Sep = Separator;
1716 }
1717 return OS.str().str();
1718}
1719
1720void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1721 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1722 InsertPointTy CodeGenIP, Twine RegionName) {
1723 CGBuilderTy &Builder = CGF.Builder;
1724 Builder.restoreIP(IP: CodeGenIP);
1725 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1726 Suffix: "." + RegionName + ".after");
1727
1728 {
1729 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1730 CGF.EmitStmt(S: RegionBodyStmt);
1731 }
1732
1733 if (Builder.saveIP().isSet())
1734 Builder.CreateBr(Dest: FiniBB);
1735}
1736
1737void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1738 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1739 InsertPointTy CodeGenIP, Twine RegionName) {
1740 CGBuilderTy &Builder = CGF.Builder;
1741 Builder.restoreIP(IP: CodeGenIP);
1742 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1743 Suffix: "." + RegionName + ".after");
1744
1745 {
1746 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1747 CGF.EmitStmt(S: RegionBodyStmt);
1748 }
1749
1750 if (Builder.saveIP().isSet())
1751 Builder.CreateBr(Dest: FiniBB);
1752}
1753
1754void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1755 if (CGM.getLangOpts().OpenMPIRBuilder) {
1756 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1757 // Check if we have any if clause associated with the directive.
1758 llvm::Value *IfCond = nullptr;
1759 if (const auto *C = S.getSingleClause<OMPIfClause>())
1760 IfCond = EmitScalarExpr(E: C->getCondition(),
1761 /*IgnoreResultAssign=*/true);
1762
1763 llvm::Value *NumThreads = nullptr;
1764 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1765 NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1766 /*IgnoreResultAssign=*/true);
1767
1768 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1769 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1770 ProcBind = ProcBindClause->getProcBindKind();
1771
1772 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1773
1774 // The cleanup callback that finalizes all variables at the given location,
1775 // thus calls destructors etc.
1776 auto FiniCB = [this](InsertPointTy IP) {
1777 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
1778 };
1779
1780 // Privatization callback that performs appropriate action for
1781 // shared/private/firstprivate/lastprivate/copyin/... variables.
1782 //
1783 // TODO: This defaults to shared right now.
1784 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1785 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1786 // The next line is appropriate only for variables (Val) with the
1787 // data-sharing attribute "shared".
1788 ReplVal = &Val;
1789
1790 return CodeGenIP;
1791 };
1792
1793 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1794 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1795
1796 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1797 InsertPointTy CodeGenIP) {
1798 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1799 CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel");
1800 };
1801
1802 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1803 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1804 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1805 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1806 Builder.restoreIP(
1807 IP: OMPBuilder.createParallel(Loc: Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1808 IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel()));
1809 return;
1810 }
1811
1812 // Emit parallel region as a standalone region.
1813 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1814 Action.Enter(CGF);
1815 OMPPrivateScope PrivateScope(CGF);
1816 emitOMPCopyinClause(CGF, S);
1817 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
1818 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
1819 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
1820 (void)PrivateScope.Privatize();
1821 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt());
1822 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
1823 };
1824 {
1825 auto LPCRegion =
1826 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
1827 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen,
1828 CodeGenBoundParameters: emitEmptyBoundParameters);
1829 emitPostUpdateForReductionClause(CGF&: *this, D: S,
1830 CondGen: [](CodeGenFunction &) { return nullptr; });
1831 }
1832 // Check for outer lastprivate conditional update.
1833 checkForLastprivateConditionalUpdate(CGF&: *this, S);
1834}
1835
1836void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1837 EmitStmt(S: S.getIfStmt());
1838}
1839
1840namespace {
1841/// RAII to handle scopes for loop transformation directives.
1842class OMPTransformDirectiveScopeRAII {
1843 OMPLoopScope *Scope = nullptr;
1844 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1845 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1846
1847 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1848 delete;
1849 OMPTransformDirectiveScopeRAII &
1850 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1851
1852public:
1853 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1854 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) {
1855 Scope = new OMPLoopScope(CGF, *Dir);
1856 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1857 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1858 }
1859 }
1860 ~OMPTransformDirectiveScopeRAII() {
1861 if (!Scope)
1862 return;
1863 delete CapInfoRAII;
1864 delete CGSI;
1865 delete Scope;
1866 }
1867};
1868} // namespace
1869
1870static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1871 int MaxLevel, int Level = 0) {
1872 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1873 const Stmt *SimplifiedS = S->IgnoreContainers();
1874 if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) {
1875 PrettyStackTraceLoc CrashInfo(
1876 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1877 "LLVM IR generation of compound statement ('{}')");
1878
1879 // Keep track of the current cleanup stack depth, including debug scopes.
1880 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1881 for (const Stmt *CurStmt : CS->body())
1882 emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level);
1883 return;
1884 }
1885 if (SimplifiedS == NextLoop) {
1886 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS))
1887 SimplifiedS = Dir->getTransformedStmt();
1888 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS))
1889 SimplifiedS = CanonLoop->getLoopStmt();
1890 if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) {
1891 S = For->getBody();
1892 } else {
1893 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1894 "Expected canonical for loop or range-based for loop.");
1895 const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS);
1896 CGF.EmitStmt(S: CXXFor->getLoopVarStmt());
1897 S = CXXFor->getBody();
1898 }
1899 if (Level + 1 < MaxLevel) {
1900 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1901 CurStmt: S, /*TryImperfectlyNestedLoops=*/true);
1902 emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1);
1903 return;
1904 }
1905 }
1906 CGF.EmitStmt(S);
1907}
1908
1909void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1910 JumpDest LoopExit) {
1911 RunCleanupsScope BodyScope(*this);
1912 // Update counters values on current iteration.
1913 for (const Expr *UE : D.updates())
1914 EmitIgnoredExpr(E: UE);
1915 // Update the linear variables.
1916 // In distribute directives only loop counters may be marked as linear, no
1917 // need to generate the code for them.
1918 if (!isOpenMPDistributeDirective(DKind: D.getDirectiveKind())) {
1919 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1920 for (const Expr *UE : C->updates())
1921 EmitIgnoredExpr(E: UE);
1922 }
1923 }
1924
1925 // On a continue in the body, jump to the end.
1926 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue");
1927 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
1928 for (const Expr *E : D.finals_conditions()) {
1929 if (!E)
1930 continue;
1931 // Check that loop counter in non-rectangular nest fits into the iteration
1932 // space.
1933 llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next");
1934 EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(),
1935 TrueCount: getProfileCount(S: D.getBody()));
1936 EmitBlock(BB: NextBB);
1937 }
1938
1939 OMPPrivateScope InscanScope(*this);
1940 EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true);
1941 bool IsInscanRegion = InscanScope.Privatize();
1942 if (IsInscanRegion) {
1943 // Need to remember the block before and after scan directive
1944 // to dispatch them correctly depending on the clause used in
1945 // this directive, inclusive or exclusive. For inclusive scan the natural
1946 // order of the blocks is used, for exclusive clause the blocks must be
1947 // executed in reverse order.
1948 OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb");
1949 OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb");
1950 // No need to allocate inscan exit block, in simd mode it is selected in the
1951 // codegen for the scan directive.
1952 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1953 OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb");
1954 OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch");
1955 EmitBranch(Block: OMPScanDispatch);
1956 EmitBlock(BB: OMPBeforeScanBlock);
1957 }
1958
1959 // Emit loop variables for C++ range loops.
1960 const Stmt *Body =
1961 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1962 // Emit loop body.
1963 emitBody(CGF&: *this, S: Body,
1964 NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop(
1965 CurStmt: Body, /*TryImperfectlyNestedLoops=*/true),
1966 MaxLevel: D.getLoopsNumber());
1967
1968 // Jump to the dispatcher at the end of the loop body.
1969 if (IsInscanRegion)
1970 EmitBranch(Block: OMPScanExitBlock);
1971
1972 // The end (updates/cleanups).
1973 EmitBlock(BB: Continue.getBlock());
1974 BreakContinueStack.pop_back();
1975}
1976
1977using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1978
1979/// Emit a captured statement and return the function as well as its captured
1980/// closure context.
1981static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1982 const CapturedStmt *S) {
1983 LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S);
1984 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1985 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1986 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S);
1987 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1988 llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S);
1989
1990 return {F, CapStruct.getPointer(CGF&: ParentCGF)};
1991}
1992
1993/// Emit a call to a previously captured closure.
1994static llvm::CallInst *
1995emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1996 llvm::ArrayRef<llvm::Value *> Args) {
1997 // Append the closure context to the argument.
1998 SmallVector<llvm::Value *> EffectiveArgs;
1999 EffectiveArgs.reserve(N: Args.size() + 1);
2000 llvm::append_range(C&: EffectiveArgs, R&: Args);
2001 EffectiveArgs.push_back(Elt: Cap.second);
2002
2003 return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs);
2004}
2005
2006llvm::CanonicalLoopInfo *
2007CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2008 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2009
2010 // The caller is processing the loop-associated directive processing the \p
2011 // Depth loops nested in \p S. Put the previous pending loop-associated
2012 // directive to the stack. If the current loop-associated directive is a loop
2013 // transformation directive, it will push its generated loops onto the stack
2014 // such that together with the loops left here they form the combined loop
2015 // nest for the parent loop-associated directive.
2016 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2017 ExpectedOMPLoopDepth = Depth;
2018
2019 EmitStmt(S);
2020 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2021
2022 // The last added loop is the outermost one.
2023 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2024
2025 // Pop the \p Depth loops requested by the call from that stack and restore
2026 // the previous context.
2027 OMPLoopNestStack.pop_back_n(NumItems: Depth);
2028 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2029
2030 return Result;
2031}
2032
2033void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2034 const Stmt *SyntacticalLoop = S->getLoopStmt();
2035 if (!getLangOpts().OpenMPIRBuilder) {
2036 // Ignore if OpenMPIRBuilder is not enabled.
2037 EmitStmt(S: SyntacticalLoop);
2038 return;
2039 }
2040
2041 LexicalScope ForScope(*this, S->getSourceRange());
2042
2043 // Emit init statements. The Distance/LoopVar funcs may reference variable
2044 // declarations they contain.
2045 const Stmt *BodyStmt;
2046 if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) {
2047 if (const Stmt *InitStmt = For->getInit())
2048 EmitStmt(S: InitStmt);
2049 BodyStmt = For->getBody();
2050 } else if (const auto *RangeFor =
2051 dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) {
2052 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2053 EmitStmt(S: RangeStmt);
2054 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2055 EmitStmt(S: BeginStmt);
2056 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2057 EmitStmt(S: EndStmt);
2058 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2059 EmitStmt(S: LoopVarStmt);
2060 BodyStmt = RangeFor->getBody();
2061 } else
2062 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2063
2064 // Emit closure for later use. By-value captures will be captured here.
2065 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2066 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc);
2067 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2068 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc);
2069
2070 // Call the distance function to get the number of iterations of the loop to
2071 // come.
2072 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2073 ->getParam(i: 0)
2074 ->getType()
2075 .getNonReferenceType();
2076 RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr");
2077 emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()});
2078 llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count");
2079
2080 // Emit the loop structure.
2081 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2082 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2083 llvm::Value *IndVar) {
2084 Builder.restoreIP(IP: CodeGenIP);
2085
2086 // Emit the loop body: Convert the logical iteration number to the loop
2087 // variable and emit the body.
2088 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2089 LValue LCVal = EmitLValue(E: LoopVarRef);
2090 Address LoopVarAddress = LCVal.getAddress();
2091 emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure,
2092 Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar});
2093
2094 RunCleanupsScope BodyScope(*this);
2095 EmitStmt(S: BodyStmt);
2096 };
2097 llvm::CanonicalLoopInfo *CL =
2098 OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal);
2099
2100 // Finish up the loop.
2101 Builder.restoreIP(IP: CL->getAfterIP());
2102 ForScope.ForceCleanup();
2103
2104 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2105 OMPLoopNestStack.push_back(Elt: CL);
2106}
2107
2108void CodeGenFunction::EmitOMPInnerLoop(
2109 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2110 const Expr *IncExpr,
2111 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2112 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2113 auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end");
2114
2115 // Start the loop with a block that tests the condition.
2116 auto CondBlock = createBasicBlock(name: "omp.inner.for.cond");
2117 EmitBlock(BB: CondBlock);
2118 const SourceRange R = S.getSourceRange();
2119
2120 // If attributes are attached, push to the basic block with them.
2121 const auto &OMPED = cast<OMPExecutableDirective>(Val: S);
2122 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2123 const Stmt *SS = ICS->getCapturedStmt();
2124 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS);
2125 OMPLoopNestStack.clear();
2126 if (AS)
2127 LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(),
2128 Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2129 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2130 else
2131 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2132 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2133
2134 // If there are any cleanups between here and the loop-exit scope,
2135 // create a block to stage a loop exit along.
2136 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2137 if (RequiresCleanup)
2138 ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup");
2139
2140 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body");
2141
2142 // Emit condition.
2143 EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S));
2144 if (ExitBlock != LoopExit.getBlock()) {
2145 EmitBlock(BB: ExitBlock);
2146 EmitBranchThroughCleanup(Dest: LoopExit);
2147 }
2148
2149 EmitBlock(BB: LoopBody);
2150 incrementProfileCounter(S: &S);
2151
2152 // Create a block for the increment.
2153 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc");
2154 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
2155
2156 BodyGen(*this);
2157
2158 // Emit "IV = IV + 1" and a back-edge to the condition block.
2159 EmitBlock(BB: Continue.getBlock());
2160 EmitIgnoredExpr(E: IncExpr);
2161 PostIncGen(*this);
2162 BreakContinueStack.pop_back();
2163 EmitBranch(Block: CondBlock);
2164 LoopStack.pop();
2165 // Emit the fall-through block.
2166 EmitBlock(BB: LoopExit.getBlock());
2167}
2168
2169bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2170 if (!HaveInsertPoint())
2171 return false;
2172 // Emit inits for the linear variables.
2173 bool HasLinears = false;
2174 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2175 for (const Expr *Init : C->inits()) {
2176 HasLinears = true;
2177 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl());
2178 if (const auto *Ref =
2179 dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) {
2180 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
2181 const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl());
2182 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2183 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2184 VD->getInit()->getType(), VK_LValue,
2185 VD->getInit()->getExprLoc());
2186 EmitExprAsInit(
2187 init: &DRE, D: VD,
2188 lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()),
2189 /*capturedByInit=*/false);
2190 EmitAutoVarCleanups(emission: Emission);
2191 } else {
2192 EmitVarDecl(D: *VD);
2193 }
2194 }
2195 // Emit the linear steps for the linear clauses.
2196 // If a step is not constant, it is pre-calculated before the loop.
2197 if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep()))
2198 if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) {
2199 EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl()));
2200 // Emit calculation of the linear step.
2201 EmitIgnoredExpr(E: CS);
2202 }
2203 }
2204 return HasLinears;
2205}
2206
2207void CodeGenFunction::EmitOMPLinearClauseFinal(
2208 const OMPLoopDirective &D,
2209 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2210 if (!HaveInsertPoint())
2211 return;
2212 llvm::BasicBlock *DoneBB = nullptr;
2213 // Emit the final values of the linear variables.
2214 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2215 auto IC = C->varlist_begin();
2216 for (const Expr *F : C->finals()) {
2217 if (!DoneBB) {
2218 if (llvm::Value *Cond = CondGen(*this)) {
2219 // If the first post-update expression is found, emit conditional
2220 // block if it was requested.
2221 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu");
2222 DoneBB = createBasicBlock(name: ".omp.linear.pu.done");
2223 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2224 EmitBlock(BB: ThenBB);
2225 }
2226 }
2227 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl());
2228 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2229 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2230 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2231 Address OrigAddr = EmitLValue(E: &DRE).getAddress();
2232 CodeGenFunction::OMPPrivateScope VarScope(*this);
2233 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2234 (void)VarScope.Privatize();
2235 EmitIgnoredExpr(E: F);
2236 ++IC;
2237 }
2238 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2239 EmitIgnoredExpr(E: PostUpdate);
2240 }
2241 if (DoneBB)
2242 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2243}
2244
2245static void emitAlignedClause(CodeGenFunction &CGF,
2246 const OMPExecutableDirective &D) {
2247 if (!CGF.HaveInsertPoint())
2248 return;
2249 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2250 llvm::APInt ClauseAlignment(64, 0);
2251 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2252 auto *AlignmentCI =
2253 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2254 ClauseAlignment = AlignmentCI->getValue();
2255 }
2256 for (const Expr *E : Clause->varlists()) {
2257 llvm::APInt Alignment(ClauseAlignment);
2258 if (Alignment == 0) {
2259 // OpenMP [2.8.1, Description]
2260 // If no optional parameter is specified, implementation-defined default
2261 // alignments for SIMD instructions on the target platforms are assumed.
2262 Alignment =
2263 CGF.getContext()
2264 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2265 T: E->getType()->getPointeeType()))
2266 .getQuantity();
2267 }
2268 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2269 "alignment is not power of 2");
2270 if (Alignment != 0) {
2271 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2272 CGF.emitAlignmentAssumption(
2273 PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(),
2274 Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment));
2275 }
2276 }
2277 }
2278}
2279
2280void CodeGenFunction::EmitOMPPrivateLoopCounters(
2281 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2282 if (!HaveInsertPoint())
2283 return;
2284 auto I = S.private_counters().begin();
2285 for (const Expr *E : S.counters()) {
2286 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2287 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl());
2288 // Emit var without initialization.
2289 AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD);
2290 EmitAutoVarCleanups(emission: VarEmission);
2291 LocalDeclMap.erase(Val: PrivateVD);
2292 (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress());
2293 if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) ||
2294 VD->hasGlobalStorage()) {
2295 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2296 LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD),
2297 E->getType(), VK_LValue, E->getExprLoc());
2298 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress());
2299 } else {
2300 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress());
2301 }
2302 ++I;
2303 }
2304 // Privatize extra loop counters used in loops for ordered(n) clauses.
2305 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2306 if (!C->getNumForLoops())
2307 continue;
2308 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2309 I < E; ++I) {
2310 const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I));
2311 const auto *VD = cast<VarDecl>(Val: DRE->getDecl());
2312 // Override only those variables that can be captured to avoid re-emission
2313 // of the variables declared within the loops.
2314 if (DRE->refersToEnclosingVariableOrCapture()) {
2315 (void)LoopScope.addPrivate(
2316 LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName()));
2317 }
2318 }
2319 }
2320}
2321
2322static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2323 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2324 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2325 if (!CGF.HaveInsertPoint())
2326 return;
2327 {
2328 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2329 CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope);
2330 (void)PreCondScope.Privatize();
2331 // Get initial values of real counters.
2332 for (const Expr *I : S.inits()) {
2333 CGF.EmitIgnoredExpr(E: I);
2334 }
2335 }
2336 // Create temp loop control variables with their init values to support
2337 // non-rectangular loops.
2338 CodeGenFunction::OMPMapVars PreCondVars;
2339 for (const Expr *E : S.dependent_counters()) {
2340 if (!E)
2341 continue;
2342 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2343 "dependent counter must not be an iterator.");
2344 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2345 Address CounterAddr =
2346 CGF.CreateMemTemp(T: VD->getType().getNonReferenceType());
2347 (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr);
2348 }
2349 (void)PreCondVars.apply(CGF);
2350 for (const Expr *E : S.dependent_inits()) {
2351 if (!E)
2352 continue;
2353 CGF.EmitIgnoredExpr(E);
2354 }
2355 // Check that loop is executed at least one time.
2356 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2357 PreCondVars.restore(CGF);
2358}
2359
2360void CodeGenFunction::EmitOMPLinearClause(
2361 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2362 if (!HaveInsertPoint())
2363 return;
2364 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2365 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
2366 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
2367 for (const Expr *C : LoopDirective->counters()) {
2368 SIMDLCVs.insert(
2369 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
2370 }
2371 }
2372 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2373 auto CurPrivate = C->privates().begin();
2374 for (const Expr *E : C->varlists()) {
2375 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2376 const auto *PrivateVD =
2377 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl());
2378 if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) {
2379 // Emit private VarDecl with copy init.
2380 EmitVarDecl(D: *PrivateVD);
2381 bool IsRegistered =
2382 PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
2383 assert(IsRegistered && "linear var already registered as private");
2384 // Silence the warning about unused variable.
2385 (void)IsRegistered;
2386 } else {
2387 EmitVarDecl(D: *PrivateVD);
2388 }
2389 ++CurPrivate;
2390 }
2391 }
2392}
2393
2394static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2395 const OMPExecutableDirective &D) {
2396 if (!CGF.HaveInsertPoint())
2397 return;
2398 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2399 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2400 /*ignoreResult=*/true);
2401 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2402 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2403 // In presence of finite 'safelen', it may be unsafe to mark all
2404 // the memory instructions parallel, because loop-carried
2405 // dependences of 'safelen' iterations are possible.
2406 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2407 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2408 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2409 /*ignoreResult=*/true);
2410 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2411 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2412 // In presence of finite 'safelen', it may be unsafe to mark all
2413 // the memory instructions parallel, because loop-carried
2414 // dependences of 'safelen' iterations are possible.
2415 CGF.LoopStack.setParallel(/*Enable=*/false);
2416 }
2417}
2418
2419void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2420 // Walk clauses and process safelen/lastprivate.
2421 LoopStack.setParallel(/*Enable=*/true);
2422 LoopStack.setVectorizeEnable();
2423 emitSimdlenSafelenClause(CGF&: *this, D);
2424 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2425 if (C->getKind() == OMPC_ORDER_concurrent)
2426 LoopStack.setParallel(/*Enable=*/true);
2427 if ((D.getDirectiveKind() == OMPD_simd ||
2428 (getLangOpts().OpenMPSimd &&
2429 isOpenMPSimdDirective(DKind: D.getDirectiveKind()))) &&
2430 llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(),
2431 P: [](const OMPReductionClause *C) {
2432 return C->getModifier() == OMPC_REDUCTION_inscan;
2433 }))
2434 // Disable parallel access in case of prefix sum.
2435 LoopStack.setParallel(/*Enable=*/false);
2436}
2437
2438void CodeGenFunction::EmitOMPSimdFinal(
2439 const OMPLoopDirective &D,
2440 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2441 if (!HaveInsertPoint())
2442 return;
2443 llvm::BasicBlock *DoneBB = nullptr;
2444 auto IC = D.counters().begin();
2445 auto IPC = D.private_counters().begin();
2446 for (const Expr *F : D.finals()) {
2447 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl());
2448 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl());
2449 const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD);
2450 if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) ||
2451 OrigVD->hasGlobalStorage() || CED) {
2452 if (!DoneBB) {
2453 if (llvm::Value *Cond = CondGen(*this)) {
2454 // If the first post-update expression is found, emit conditional
2455 // block if it was requested.
2456 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then");
2457 DoneBB = createBasicBlock(name: ".omp.final.done");
2458 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2459 EmitBlock(BB: ThenBB);
2460 }
2461 }
2462 Address OrigAddr = Address::invalid();
2463 if (CED) {
2464 OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress();
2465 } else {
2466 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2467 /*RefersToEnclosingVariableOrCapture=*/false,
2468 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2469 OrigAddr = EmitLValue(E: &DRE).getAddress();
2470 }
2471 OMPPrivateScope VarScope(*this);
2472 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2473 (void)VarScope.Privatize();
2474 EmitIgnoredExpr(E: F);
2475 }
2476 ++IC;
2477 ++IPC;
2478 }
2479 if (DoneBB)
2480 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2481}
2482
2483static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2484 const OMPLoopDirective &S,
2485 CodeGenFunction::JumpDest LoopExit) {
2486 CGF.EmitOMPLoopBody(D: S, LoopExit);
2487 CGF.EmitStopPoint(S: &S);
2488}
2489
2490/// Emit a helper variable and return corresponding lvalue.
2491static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2492 const DeclRefExpr *Helper) {
2493 auto VDecl = cast<VarDecl>(Val: Helper->getDecl());
2494 CGF.EmitVarDecl(D: *VDecl);
2495 return CGF.EmitLValue(E: Helper);
2496}
2497
2498static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2499 const RegionCodeGenTy &SimdInitGen,
2500 const RegionCodeGenTy &BodyCodeGen) {
2501 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2502 PrePostActionTy &) {
2503 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2504 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2505 SimdInitGen(CGF);
2506
2507 BodyCodeGen(CGF);
2508 };
2509 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2510 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2511 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2512
2513 BodyCodeGen(CGF);
2514 };
2515 const Expr *IfCond = nullptr;
2516 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
2517 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2518 if (CGF.getLangOpts().OpenMP >= 50 &&
2519 (C->getNameModifier() == OMPD_unknown ||
2520 C->getNameModifier() == OMPD_simd)) {
2521 IfCond = C->getCondition();
2522 break;
2523 }
2524 }
2525 }
2526 if (IfCond) {
2527 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2528 } else {
2529 RegionCodeGenTy ThenRCG(ThenGen);
2530 ThenRCG(CGF);
2531 }
2532}
2533
2534static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2535 PrePostActionTy &Action) {
2536 Action.Enter(CGF);
2537 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2538 "Expected simd directive");
2539 OMPLoopScope PreInitScope(CGF, S);
2540 // if (PreCond) {
2541 // for (IV in 0..LastIteration) BODY;
2542 // <Final counter/linear vars updates>;
2543 // }
2544 //
2545 if (isOpenMPDistributeDirective(DKind: S.getDirectiveKind()) ||
2546 isOpenMPWorksharingDirective(DKind: S.getDirectiveKind()) ||
2547 isOpenMPTaskLoopDirective(DKind: S.getDirectiveKind())) {
2548 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()));
2549 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()));
2550 }
2551
2552 // Emit: if (PreCond) - begin.
2553 // If the condition constant folds and can be elided, avoid emitting the
2554 // whole loop.
2555 bool CondConstant;
2556 llvm::BasicBlock *ContBlock = nullptr;
2557 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
2558 if (!CondConstant)
2559 return;
2560 } else {
2561 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then");
2562 ContBlock = CGF.createBasicBlock(name: "simd.if.end");
2563 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
2564 TrueCount: CGF.getProfileCount(S: &S));
2565 CGF.EmitBlock(BB: ThenBlock);
2566 CGF.incrementProfileCounter(S: &S);
2567 }
2568
2569 // Emit the loop iteration variable.
2570 const Expr *IVExpr = S.getIterationVariable();
2571 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
2572 CGF.EmitVarDecl(D: *IVDecl);
2573 CGF.EmitIgnoredExpr(E: S.getInit());
2574
2575 // Emit the iterations count variable.
2576 // If it is not a variable, Sema decided to calculate iterations count on
2577 // each iteration (e.g., it is foldable into a constant).
2578 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
2579 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
2580 // Emit calculation of the iterations count.
2581 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
2582 }
2583
2584 emitAlignedClause(CGF, D: S);
2585 (void)CGF.EmitOMPLinearClauseInit(D: S);
2586 {
2587 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2588 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
2589 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2590 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
2591 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
2592 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2593 CGF, S, CGF.EmitLValue(E: S.getIterationVariable()));
2594 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
2595 (void)LoopScope.Privatize();
2596 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
2597 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
2598
2599 emitCommonSimdLoop(
2600 CGF, S,
2601 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2602 CGF.EmitOMPSimdInit(D: S);
2603 },
2604 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2605 CGF.EmitOMPInnerLoop(
2606 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
2607 BodyGen: [&S](CodeGenFunction &CGF) {
2608 emitOMPLoopBodyWithStopPoint(CGF, S,
2609 LoopExit: CodeGenFunction::JumpDest());
2610 },
2611 PostIncGen: [](CodeGenFunction &) {});
2612 });
2613 CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2614 // Emit final copy of the lastprivate variables at the end of loops.
2615 if (HasLastprivateClause)
2616 CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true);
2617 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd);
2618 emitPostUpdateForReductionClause(CGF, D: S,
2619 CondGen: [](CodeGenFunction &) { return nullptr; });
2620 LoopScope.restoreMap();
2621 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2622 }
2623 // Emit: if (PreCond) - end.
2624 if (ContBlock) {
2625 CGF.EmitBranch(Block: ContBlock);
2626 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2627 }
2628}
2629
2630static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2631 // Check for unsupported clauses
2632 for (OMPClause *C : S.clauses()) {
2633 // Currently only order, simdlen and safelen clauses are supported
2634 if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) ||
2635 isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C)))
2636 return false;
2637 }
2638
2639 // Check if we have a statement with the ordered directive.
2640 // Visit the statement hierarchy to find a compound statement
2641 // with a ordered directive in it.
2642 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) {
2643 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2644 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2645 if (!SubStmt)
2646 continue;
2647 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) {
2648 for (const Stmt *CSSubStmt : CS->children()) {
2649 if (!CSSubStmt)
2650 continue;
2651 if (isa<OMPOrderedDirective>(Val: CSSubStmt)) {
2652 return false;
2653 }
2654 }
2655 }
2656 }
2657 }
2658 }
2659 return true;
2660}
2661static llvm::MapVector<llvm::Value *, llvm::Value *>
2662GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2663 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2664 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2665 llvm::APInt ClauseAlignment(64, 0);
2666 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2667 auto *AlignmentCI =
2668 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2669 ClauseAlignment = AlignmentCI->getValue();
2670 }
2671 for (const Expr *E : Clause->varlists()) {
2672 llvm::APInt Alignment(ClauseAlignment);
2673 if (Alignment == 0) {
2674 // OpenMP [2.8.1, Description]
2675 // If no optional parameter is specified, implementation-defined default
2676 // alignments for SIMD instructions on the target platforms are assumed.
2677 Alignment =
2678 CGF.getContext()
2679 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2680 T: E->getType()->getPointeeType()))
2681 .getQuantity();
2682 }
2683 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2684 "alignment is not power of 2");
2685 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2686 AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue());
2687 }
2688 }
2689 return AlignedVars;
2690}
2691
2692void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2693 bool UseOMPIRBuilder =
2694 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2695 if (UseOMPIRBuilder) {
2696 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2697 PrePostActionTy &) {
2698 // Use the OpenMPIRBuilder if enabled.
2699 if (UseOMPIRBuilder) {
2700 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2701 GetAlignedMapping(S, CGF);
2702 // Emit the associated statement and get its loop representation.
2703 const Stmt *Inner = S.getRawStmt();
2704 llvm::CanonicalLoopInfo *CLI =
2705 EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2706
2707 llvm::OpenMPIRBuilder &OMPBuilder =
2708 CGM.getOpenMPRuntime().getOMPBuilder();
2709 // Add SIMD specific metadata
2710 llvm::ConstantInt *Simdlen = nullptr;
2711 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2712 RValue Len =
2713 this->EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2714 /*ignoreResult=*/true);
2715 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2716 Simdlen = Val;
2717 }
2718 llvm::ConstantInt *Safelen = nullptr;
2719 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2720 RValue Len =
2721 this->EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2722 /*ignoreResult=*/true);
2723 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2724 Safelen = Val;
2725 }
2726 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2727 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2728 if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2729 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2730 }
2731 }
2732 // Add simd metadata to the collapsed loop. Do not generate
2733 // another loop for if clause. Support for if clause is done earlier.
2734 OMPBuilder.applySimd(Loop: CLI, AlignedVars,
2735 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2736 return;
2737 }
2738 };
2739 {
2740 auto LPCRegion =
2741 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
2742 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2743 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd,
2744 CodeGen: CodeGenIRBuilder);
2745 }
2746 return;
2747 }
2748
2749 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2750 OMPFirstScanLoop = true;
2751 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2752 emitOMPSimdRegion(CGF, S, Action);
2753 };
2754 {
2755 auto LPCRegion =
2756 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
2757 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2758 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
2759 }
2760 // Check for outer lastprivate conditional update.
2761 checkForLastprivateConditionalUpdate(CGF&: *this, S);
2762}
2763
2764void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2765 // Emit the de-sugared statement.
2766 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2767 EmitStmt(S: S.getTransformedStmt());
2768}
2769
2770void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
2771 // Emit the de-sugared statement.
2772 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
2773 EmitStmt(S: S.getTransformedStmt());
2774}
2775
2776void CodeGenFunction::EmitOMPInterchangeDirective(
2777 const OMPInterchangeDirective &S) {
2778 // Emit the de-sugared statement.
2779 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
2780 EmitStmt(S: S.getTransformedStmt());
2781}
2782
2783void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2784 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2785
2786 if (UseOMPIRBuilder) {
2787 auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc());
2788 const Stmt *Inner = S.getRawStmt();
2789
2790 // Consume nested loop. Clear the entire remaining loop stack because a
2791 // fully unrolled loop is non-transformable. For partial unrolling the
2792 // generated outer loop is pushed back to the stack.
2793 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2794 OMPLoopNestStack.clear();
2795
2796 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2797
2798 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2799 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2800
2801 if (S.hasClausesOfKind<OMPFullClause>()) {
2802 assert(ExpectedOMPLoopDepth == 0);
2803 OMPBuilder.unrollLoopFull(DL, Loop: CLI);
2804 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2805 uint64_t Factor = 0;
2806 if (Expr *FactorExpr = PartialClause->getFactor()) {
2807 Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
2808 assert(Factor >= 1 && "Only positive factors are valid");
2809 }
2810 OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor,
2811 UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2812 } else {
2813 OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI);
2814 }
2815
2816 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2817 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2818 if (UnrolledCLI)
2819 OMPLoopNestStack.push_back(Elt: UnrolledCLI);
2820
2821 return;
2822 }
2823
2824 // This function is only called if the unrolled loop is not consumed by any
2825 // other loop-associated construct. Such a loop-associated construct will have
2826 // used the transformed AST.
2827
2828 // Set the unroll metadata for the next emitted loop.
2829 LoopStack.setUnrollState(LoopAttributes::Enable);
2830
2831 if (S.hasClausesOfKind<OMPFullClause>()) {
2832 LoopStack.setUnrollState(LoopAttributes::Full);
2833 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2834 if (Expr *FactorExpr = PartialClause->getFactor()) {
2835 uint64_t Factor =
2836 FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
2837 assert(Factor >= 1 && "Only positive factors are valid");
2838 LoopStack.setUnrollCount(Factor);
2839 }
2840 }
2841
2842 EmitStmt(S: S.getAssociatedStmt());
2843}
2844
2845void CodeGenFunction::EmitOMPOuterLoop(
2846 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2847 CodeGenFunction::OMPPrivateScope &LoopScope,
2848 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2849 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2850 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2851 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2852
2853 const Expr *IVExpr = S.getIterationVariable();
2854 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
2855 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2856
2857 JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end");
2858
2859 // Start the loop with a block that tests the condition.
2860 llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond");
2861 EmitBlock(BB: CondBlock);
2862 const SourceRange R = S.getSourceRange();
2863 OMPLoopNestStack.clear();
2864 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2865 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2866
2867 llvm::Value *BoolCondVal = nullptr;
2868 if (!DynamicOrOrdered) {
2869 // UB = min(UB, GlobalUB) or
2870 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2871 // 'distribute parallel for')
2872 EmitIgnoredExpr(E: LoopArgs.EUB);
2873 // IV = LB
2874 EmitIgnoredExpr(E: LoopArgs.Init);
2875 // IV < UB
2876 BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond);
2877 } else {
2878 BoolCondVal =
2879 RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL,
2880 LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST);
2881 }
2882
2883 // If there are any cleanups between here and the loop-exit scope,
2884 // create a block to stage a loop exit along.
2885 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2886 if (LoopScope.requiresCleanups())
2887 ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup");
2888
2889 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body");
2890 Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock);
2891 if (ExitBlock != LoopExit.getBlock()) {
2892 EmitBlock(BB: ExitBlock);
2893 EmitBranchThroughCleanup(Dest: LoopExit);
2894 }
2895 EmitBlock(BB: LoopBody);
2896
2897 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2898 // LB for loop condition and emitted it above).
2899 if (DynamicOrOrdered)
2900 EmitIgnoredExpr(E: LoopArgs.Init);
2901
2902 // Create a block for the increment.
2903 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc");
2904 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
2905
2906 emitCommonSimdLoop(
2907 CGF&: *this, S,
2908 SimdInitGen: [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2909 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2910 // with dynamic/guided scheduling and without ordered clause.
2911 if (!isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
2912 CGF.LoopStack.setParallel(!IsMonotonic);
2913 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2914 if (C->getKind() == OMPC_ORDER_concurrent)
2915 CGF.LoopStack.setParallel(/*Enable=*/true);
2916 } else {
2917 CGF.EmitOMPSimdInit(D: S);
2918 }
2919 },
2920 BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2921 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2922 SourceLocation Loc = S.getBeginLoc();
2923 // when 'distribute' is not combined with a 'for':
2924 // while (idx <= UB) { BODY; ++idx; }
2925 // when 'distribute' is combined with a 'for'
2926 // (e.g. 'distribute parallel for')
2927 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2928 CGF.EmitOMPInnerLoop(
2929 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr,
2930 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2931 CodeGenLoop(CGF, S, LoopExit);
2932 },
2933 PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2934 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2935 });
2936 });
2937
2938 EmitBlock(BB: Continue.getBlock());
2939 BreakContinueStack.pop_back();
2940 if (!DynamicOrOrdered) {
2941 // Emit "LB = LB + Stride", "UB = UB + Stride".
2942 EmitIgnoredExpr(E: LoopArgs.NextLB);
2943 EmitIgnoredExpr(E: LoopArgs.NextUB);
2944 }
2945
2946 EmitBranch(Block: CondBlock);
2947 OMPLoopNestStack.clear();
2948 LoopStack.pop();
2949 // Emit the fall-through block.
2950 EmitBlock(BB: LoopExit.getBlock());
2951
2952 // Tell the runtime we are done.
2953 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
2954 if (!DynamicOrOrdered)
2955 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
2956 DKind: LoopArgs.DKind);
2957 };
2958 OMPCancelStack.emitExit(CGF&: *this, Kind: S.getDirectiveKind(), CodeGen);
2959}
2960
2961void CodeGenFunction::EmitOMPForOuterLoop(
2962 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2963 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2964 const OMPLoopArguments &LoopArgs,
2965 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2966 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2967
2968 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2969 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule);
2970
2971 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2972 LoopArgs.Chunk != nullptr)) &&
2973 "static non-chunked schedule does not need outer loop");
2974
2975 // Emit outer loop.
2976 //
2977 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2978 // When schedule(dynamic,chunk_size) is specified, the iterations are
2979 // distributed to threads in the team in chunks as the threads request them.
2980 // Each thread executes a chunk of iterations, then requests another chunk,
2981 // until no chunks remain to be distributed. Each chunk contains chunk_size
2982 // iterations, except for the last chunk to be distributed, which may have
2983 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2984 //
2985 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2986 // to threads in the team in chunks as the executing threads request them.
2987 // Each thread executes a chunk of iterations, then requests another chunk,
2988 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2989 // each chunk is proportional to the number of unassigned iterations divided
2990 // by the number of threads in the team, decreasing to 1. For a chunk_size
2991 // with value k (greater than 1), the size of each chunk is determined in the
2992 // same way, with the restriction that the chunks do not contain fewer than k
2993 // iterations (except for the last chunk to be assigned, which may have fewer
2994 // than k iterations).
2995 //
2996 // When schedule(auto) is specified, the decision regarding scheduling is
2997 // delegated to the compiler and/or runtime system. The programmer gives the
2998 // implementation the freedom to choose any possible mapping of iterations to
2999 // threads in the team.
3000 //
3001 // When schedule(runtime) is specified, the decision regarding scheduling is
3002 // deferred until run time, and the schedule and chunk size are taken from the
3003 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3004 // implementation defined
3005 //
3006 // __kmpc_dispatch_init();
3007 // while(__kmpc_dispatch_next(&LB, &UB)) {
3008 // idx = LB;
3009 // while (idx <= UB) { BODY; ++idx;
3010 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3011 // } // inner loop
3012 // }
3013 // __kmpc_dispatch_deinit();
3014 //
3015 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3016 // When schedule(static, chunk_size) is specified, iterations are divided into
3017 // chunks of size chunk_size, and the chunks are assigned to the threads in
3018 // the team in a round-robin fashion in the order of the thread number.
3019 //
3020 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3021 // while (idx <= UB) { BODY; ++idx; } // inner loop
3022 // LB = LB + ST;
3023 // UB = UB + ST;
3024 // }
3025 //
3026
3027 const Expr *IVExpr = S.getIterationVariable();
3028 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3029 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3030
3031 if (DynamicOrOrdered) {
3032 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3033 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3034 llvm::Value *LBVal = DispatchBounds.first;
3035 llvm::Value *UBVal = DispatchBounds.second;
3036 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3037 LoopArgs.Chunk};
3038 RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize,
3039 IVSigned, Ordered, DispatchValues: DipatchRTInputValues);
3040 } else {
3041 CGOpenMPRuntime::StaticRTInput StaticInit(
3042 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3043 LoopArgs.ST, LoopArgs.Chunk);
3044 RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: S.getDirectiveKind(),
3045 ScheduleKind, Values: StaticInit);
3046 }
3047
3048 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3049 const unsigned IVSize,
3050 const bool IVSigned) {
3051 if (Ordered) {
3052 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3053 IVSigned);
3054 }
3055 };
3056
3057 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3058 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3059 OuterLoopArgs.IncExpr = S.getInc();
3060 OuterLoopArgs.Init = S.getInit();
3061 OuterLoopArgs.Cond = S.getCond();
3062 OuterLoopArgs.NextLB = S.getNextLowerBound();
3063 OuterLoopArgs.NextUB = S.getNextUpperBound();
3064 OuterLoopArgs.DKind = LoopArgs.DKind;
3065 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs,
3066 CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3067 if (DynamicOrOrdered) {
3068 RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc());
3069 }
3070}
3071
3072static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3073 const unsigned IVSize, const bool IVSigned) {}
3074
3075void CodeGenFunction::EmitOMPDistributeOuterLoop(
3076 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3077 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3078 const CodeGenLoopTy &CodeGenLoopContent) {
3079
3080 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3081
3082 // Emit outer loop.
3083 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3084 // dynamic
3085 //
3086
3087 const Expr *IVExpr = S.getIterationVariable();
3088 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3089 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3090
3091 CGOpenMPRuntime::StaticRTInput StaticInit(
3092 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3093 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3094 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit);
3095
3096 // for combined 'distribute' and 'for' the increment expression of distribute
3097 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3098 Expr *IncExpr;
3099 if (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind()))
3100 IncExpr = S.getDistInc();
3101 else
3102 IncExpr = S.getInc();
3103
3104 // this routine is shared by 'omp distribute parallel for' and
3105 // 'omp distribute': select the right EUB expression depending on the
3106 // directive
3107 OMPLoopArguments OuterLoopArgs;
3108 OuterLoopArgs.LB = LoopArgs.LB;
3109 OuterLoopArgs.UB = LoopArgs.UB;
3110 OuterLoopArgs.ST = LoopArgs.ST;
3111 OuterLoopArgs.IL = LoopArgs.IL;
3112 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3113 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
3114 ? S.getCombinedEnsureUpperBound()
3115 : S.getEnsureUpperBound();
3116 OuterLoopArgs.IncExpr = IncExpr;
3117 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
3118 ? S.getCombinedInit()
3119 : S.getInit();
3120 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
3121 ? S.getCombinedCond()
3122 : S.getCond();
3123 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
3124 ? S.getCombinedNextLowerBound()
3125 : S.getNextLowerBound();
3126 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
3127 ? S.getCombinedNextUpperBound()
3128 : S.getNextUpperBound();
3129 OuterLoopArgs.DKind = OMPD_distribute;
3130
3131 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3132 LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent,
3133 CodeGenOrdered: emitEmptyOrdered);
3134}
3135
3136static std::pair<LValue, LValue>
3137emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3138 const OMPExecutableDirective &S) {
3139 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3140 LValue LB =
3141 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3142 LValue UB =
3143 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3144
3145 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3146 // parallel for') we need to use the 'distribute'
3147 // chunk lower and upper bounds rather than the whole loop iteration
3148 // space. These are parameters to the outlined function for 'parallel'
3149 // and we copy the bounds of the previous schedule into the
3150 // the current ones.
3151 LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable());
3152 LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable());
3153 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3154 lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3155 PrevLBVal = CGF.EmitScalarConversion(
3156 Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(),
3157 DstTy: LS.getIterationVariable()->getType(),
3158 Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3159 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3160 lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3161 PrevUBVal = CGF.EmitScalarConversion(
3162 Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(),
3163 DstTy: LS.getIterationVariable()->getType(),
3164 Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3165
3166 CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB);
3167 CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB);
3168
3169 return {LB, UB};
3170}
3171
3172/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3173/// we need to use the LB and UB expressions generated by the worksharing
3174/// code generation support, whereas in non combined situations we would
3175/// just emit 0 and the LastIteration expression
3176/// This function is necessary due to the difference of the LB and UB
3177/// types for the RT emission routines for 'for_static_init' and
3178/// 'for_dispatch_init'
3179static std::pair<llvm::Value *, llvm::Value *>
3180emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3181 const OMPExecutableDirective &S,
3182 Address LB, Address UB) {
3183 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3184 const Expr *IVExpr = LS.getIterationVariable();
3185 // when implementing a dynamic schedule for a 'for' combined with a
3186 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3187 // is not normalized as each team only executes its own assigned
3188 // distribute chunk
3189 QualType IteratorTy = IVExpr->getType();
3190 llvm::Value *LBVal =
3191 CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3192 llvm::Value *UBVal =
3193 CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3194 return {LBVal, UBVal};
3195}
3196
3197static void emitDistributeParallelForDistributeInnerBoundParams(
3198 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3199 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3200 const auto &Dir = cast<OMPLoopDirective>(Val: S);
3201 LValue LB =
3202 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable()));
3203 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3204 V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3205 CapturedVars.push_back(Elt: LBCast);
3206 LValue UB =
3207 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable()));
3208
3209 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3210 V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3211 CapturedVars.push_back(Elt: UBCast);
3212}
3213
3214static void
3215emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3216 const OMPLoopDirective &S,
3217 CodeGenFunction::JumpDest LoopExit) {
3218 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3219 PrePostActionTy &Action) {
3220 Action.Enter(CGF);
3221 bool HasCancel = false;
3222 if (!isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
3223 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S))
3224 HasCancel = D->hasCancel();
3225 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S))
3226 HasCancel = D->hasCancel();
3227 else if (const auto *D =
3228 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S))
3229 HasCancel = D->hasCancel();
3230 }
3231 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3232 HasCancel);
3233 CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(),
3234 CodeGenLoopBounds: emitDistributeParallelForInnerBounds,
3235 CGDispatchBounds: emitDistributeParallelForDispatchBounds);
3236 };
3237
3238 emitCommonOMPParallelDirective(
3239 CGF, S,
3240 InnermostKind: isOpenMPSimdDirective(DKind: S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3241 CodeGen: CGInlinedWorksharingLoop,
3242 CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams);
3243}
3244
3245void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3246 const OMPDistributeParallelForDirective &S) {
3247 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3248 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3249 IncExpr: S.getDistInc());
3250 };
3251 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3252 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3253}
3254
3255void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3256 const OMPDistributeParallelForSimdDirective &S) {
3257 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3258 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3259 IncExpr: S.getDistInc());
3260 };
3261 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3262 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3263}
3264
3265void CodeGenFunction::EmitOMPDistributeSimdDirective(
3266 const OMPDistributeSimdDirective &S) {
3267 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3268 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
3269 };
3270 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3271 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
3272}
3273
3274void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3275 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3276 // Emit SPMD target parallel for region as a standalone region.
3277 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3278 emitOMPSimdRegion(CGF, S, Action);
3279 };
3280 llvm::Function *Fn;
3281 llvm::Constant *Addr;
3282 // Emit target region as a standalone region.
3283 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3284 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
3285 assert(Fn && Addr && "Target device function emission failed.");
3286}
3287
3288void CodeGenFunction::EmitOMPTargetSimdDirective(
3289 const OMPTargetSimdDirective &S) {
3290 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3291 emitOMPSimdRegion(CGF, S, Action);
3292 };
3293 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
3294}
3295
3296namespace {
3297struct ScheduleKindModifiersTy {
3298 OpenMPScheduleClauseKind Kind;
3299 OpenMPScheduleClauseModifier M1;
3300 OpenMPScheduleClauseModifier M2;
3301 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3302 OpenMPScheduleClauseModifier M1,
3303 OpenMPScheduleClauseModifier M2)
3304 : Kind(Kind), M1(M1), M2(M2) {}
3305};
3306} // namespace
3307
3308bool CodeGenFunction::EmitOMPWorksharingLoop(
3309 const OMPLoopDirective &S, Expr *EUB,
3310 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3311 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3312 // Emit the loop iteration variable.
3313 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
3314 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
3315 EmitVarDecl(D: *IVDecl);
3316
3317 // Emit the iterations count variable.
3318 // If it is not a variable, Sema decided to calculate iterations count on each
3319 // iteration (e.g., it is foldable into a constant).
3320 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3321 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3322 // Emit calculation of the iterations count.
3323 EmitIgnoredExpr(E: S.getCalcLastIteration());
3324 }
3325
3326 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3327
3328 bool HasLastprivateClause;
3329 // Check pre-condition.
3330 {
3331 OMPLoopScope PreInitScope(*this, S);
3332 // Skip the entire loop if we don't meet the precondition.
3333 // If the condition constant folds and can be elided, avoid emitting the
3334 // whole loop.
3335 bool CondConstant;
3336 llvm::BasicBlock *ContBlock = nullptr;
3337 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3338 if (!CondConstant)
3339 return false;
3340 } else {
3341 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
3342 ContBlock = createBasicBlock(name: "omp.precond.end");
3343 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3344 TrueCount: getProfileCount(S: &S));
3345 EmitBlock(BB: ThenBlock);
3346 incrementProfileCounter(S: &S);
3347 }
3348
3349 RunCleanupsScope DoacrossCleanupScope(*this);
3350 bool Ordered = false;
3351 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3352 if (OrderedClause->getNumForLoops())
3353 RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations());
3354 else
3355 Ordered = true;
3356 }
3357
3358 llvm::DenseSet<const Expr *> EmittedFinals;
3359 emitAlignedClause(CGF&: *this, D: S);
3360 bool HasLinears = EmitOMPLinearClauseInit(D: S);
3361 // Emit helper vars inits.
3362
3363 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3364 LValue LB = Bounds.first;
3365 LValue UB = Bounds.second;
3366 LValue ST =
3367 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
3368 LValue IL =
3369 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
3370
3371 // Emit 'then' code.
3372 {
3373 OMPPrivateScope LoopScope(*this);
3374 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) {
3375 // Emit implicit barrier to synchronize threads and avoid data races on
3376 // initialization of firstprivate variables and post-update of
3377 // lastprivate variables.
3378 CGM.getOpenMPRuntime().emitBarrierCall(
3379 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3380 /*ForceSimpleCall=*/true);
3381 }
3382 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3383 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3384 *this, S, EmitLValue(E: S.getIterationVariable()));
3385 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3386 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3387 EmitOMPPrivateLoopCounters(S, LoopScope);
3388 EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3389 (void)LoopScope.Privatize();
3390 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
3391 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
3392
3393 // Detect the loop schedule kind and chunk.
3394 const Expr *ChunkExpr = nullptr;
3395 OpenMPScheduleTy ScheduleKind;
3396 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3397 ScheduleKind.Schedule = C->getScheduleKind();
3398 ScheduleKind.M1 = C->getFirstScheduleModifier();
3399 ScheduleKind.M2 = C->getSecondScheduleModifier();
3400 ChunkExpr = C->getChunkSize();
3401 } else {
3402 // Default behaviour for schedule clause.
3403 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3404 CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr);
3405 }
3406 bool HasChunkSizeOne = false;
3407 llvm::Value *Chunk = nullptr;
3408 if (ChunkExpr) {
3409 Chunk = EmitScalarExpr(E: ChunkExpr);
3410 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(),
3411 DstTy: S.getIterationVariable()->getType(),
3412 Loc: S.getBeginLoc());
3413 Expr::EvalResult Result;
3414 if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) {
3415 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3416 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3417 }
3418 }
3419 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3420 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3421 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3422 // If the static schedule kind is specified or if the ordered clause is
3423 // specified, and if no monotonic modifier is specified, the effect will
3424 // be as if the monotonic modifier was specified.
3425 bool StaticChunkedOne =
3426 RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule,
3427 /* Chunked */ Chunk != nullptr) &&
3428 HasChunkSizeOne &&
3429 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind());
3430 bool IsMonotonic =
3431 Ordered ||
3432 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3433 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3434 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3435 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3436 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3437 if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule,
3438 /* Chunked */ Chunk != nullptr) ||
3439 StaticChunkedOne) &&
3440 !Ordered) {
3441 JumpDest LoopExit =
3442 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
3443 emitCommonSimdLoop(
3444 CGF&: *this, S,
3445 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3446 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
3447 CGF.EmitOMPSimdInit(D: S);
3448 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3449 if (C->getKind() == OMPC_ORDER_concurrent)
3450 CGF.LoopStack.setParallel(/*Enable=*/true);
3451 }
3452 },
3453 BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3454 &S, ScheduleKind, LoopExit,
3455 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3456 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3457 // When no chunk_size is specified, the iteration space is divided
3458 // into chunks that are approximately equal in size, and at most
3459 // one chunk is distributed to each thread. Note that the size of
3460 // the chunks is unspecified in this case.
3461 CGOpenMPRuntime::StaticRTInput StaticInit(
3462 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3463 UB.getAddress(), ST.getAddress(),
3464 StaticChunkedOne ? Chunk : nullptr);
3465 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3466 CGF, Loc: S.getBeginLoc(), DKind: S.getDirectiveKind(), ScheduleKind,
3467 Values: StaticInit);
3468 // UB = min(UB, GlobalUB);
3469 if (!StaticChunkedOne)
3470 CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound());
3471 // IV = LB;
3472 CGF.EmitIgnoredExpr(E: S.getInit());
3473 // For unchunked static schedule generate:
3474 //
3475 // while (idx <= UB) {
3476 // BODY;
3477 // ++idx;
3478 // }
3479 //
3480 // For static schedule with chunk one:
3481 //
3482 // while (IV <= PrevUB) {
3483 // BODY;
3484 // IV += ST;
3485 // }
3486 CGF.EmitOMPInnerLoop(
3487 S, RequiresCleanup: LoopScope.requiresCleanups(),
3488 LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond()
3489 : S.getCond(),
3490 IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(),
3491 BodyGen: [&S, LoopExit](CodeGenFunction &CGF) {
3492 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3493 },
3494 PostIncGen: [](CodeGenFunction &) {});
3495 });
3496 EmitBlock(BB: LoopExit.getBlock());
3497 // Tell the runtime we are done.
3498 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3499 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3500 DKind: OMPD_for);
3501 };
3502 OMPCancelStack.emitExit(CGF&: *this, Kind: S.getDirectiveKind(), CodeGen);
3503 } else {
3504 // Emit the outer loop, which requests its work chunk [LB..UB] from
3505 // runtime and runs the inner loop to process it.
3506 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3507 ST.getAddress(), IL.getAddress(), Chunk,
3508 EUB);
3509 LoopArguments.DKind = OMPD_for;
3510 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3511 LoopArgs: LoopArguments, CGDispatchBounds);
3512 }
3513 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
3514 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3515 return CGF.Builder.CreateIsNotNull(
3516 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3517 });
3518 }
3519 EmitOMPReductionClauseFinal(
3520 D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: S.getDirectiveKind())
3521 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3522 : /*Parallel only*/ OMPD_parallel);
3523 // Emit post-update of the reduction variables if IsLastIter != 0.
3524 emitPostUpdateForReductionClause(
3525 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3526 return CGF.Builder.CreateIsNotNull(
3527 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3528 });
3529 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3530 if (HasLastprivateClause)
3531 EmitOMPLastprivateClauseFinal(
3532 D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()),
3533 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
3534 LoopScope.restoreMap();
3535 EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3536 return CGF.Builder.CreateIsNotNull(
3537 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3538 });
3539 }
3540 DoacrossCleanupScope.ForceCleanup();
3541 // We're now done with the loop, so jump to the continuation block.
3542 if (ContBlock) {
3543 EmitBranch(Block: ContBlock);
3544 EmitBlock(BB: ContBlock, /*IsFinished=*/true);
3545 }
3546 }
3547 return HasLastprivateClause;
3548}
3549
3550/// The following two functions generate expressions for the loop lower
3551/// and upper bounds in case of static and dynamic (dispatch) schedule
3552/// of the associated 'for' or 'distribute' loop.
3553static std::pair<LValue, LValue>
3554emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3555 const auto &LS = cast<OMPLoopDirective>(Val: S);
3556 LValue LB =
3557 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3558 LValue UB =
3559 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3560 return {LB, UB};
3561}
3562
3563/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3564/// consider the lower and upper bound expressions generated by the
3565/// worksharing loop support, but we use 0 and the iteration space size as
3566/// constants
3567static std::pair<llvm::Value *, llvm::Value *>
3568emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3569 Address LB, Address UB) {
3570 const auto &LS = cast<OMPLoopDirective>(Val: S);
3571 const Expr *IVExpr = LS.getIterationVariable();
3572 const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType());
3573 llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0);
3574 llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration());
3575 return {LBVal, UBVal};
3576}
3577
3578/// Emits internal temp array declarations for the directive with inscan
3579/// reductions.
3580/// The code is the following:
3581/// \code
3582/// size num_iters = <num_iters>;
3583/// <type> buffer[num_iters];
3584/// \endcode
3585static void emitScanBasedDirectiveDecls(
3586 CodeGenFunction &CGF, const OMPLoopDirective &S,
3587 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3588 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3589 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3590 SmallVector<const Expr *, 4> Shareds;
3591 SmallVector<const Expr *, 4> Privates;
3592 SmallVector<const Expr *, 4> ReductionOps;
3593 SmallVector<const Expr *, 4> CopyArrayTemps;
3594 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3595 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3596 "Only inscan reductions are expected.");
3597 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3598 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3599 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3600 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
3601 in_end: C->copy_array_temps().end());
3602 }
3603 {
3604 // Emit buffers for each reduction variables.
3605 // ReductionCodeGen is required to emit correctly the code for array
3606 // reductions.
3607 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3608 unsigned Count = 0;
3609 auto *ITA = CopyArrayTemps.begin();
3610 for (const Expr *IRef : Privates) {
3611 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
3612 // Emit variably modified arrays, used for arrays/array sections
3613 // reductions.
3614 if (PrivateVD->getType()->isVariablyModifiedType()) {
3615 RedCG.emitSharedOrigLValue(CGF, N: Count);
3616 RedCG.emitAggregateType(CGF, N: Count);
3617 }
3618 CodeGenFunction::OpaqueValueMapping DimMapping(
3619 CGF,
3620 cast<OpaqueValueExpr>(
3621 Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe())
3622 ->getSizeExpr()),
3623 RValue::get(V: OMPScanNumIterations));
3624 // Emit temp buffer.
3625 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl()));
3626 ++ITA;
3627 ++Count;
3628 }
3629 }
3630}
3631
3632/// Copies final inscan reductions values to the original variables.
3633/// The code is the following:
3634/// \code
3635/// <orig_var> = buffer[num_iters-1];
3636/// \endcode
3637static void emitScanBasedDirectiveFinals(
3638 CodeGenFunction &CGF, const OMPLoopDirective &S,
3639 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3640 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3641 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3642 SmallVector<const Expr *, 4> Shareds;
3643 SmallVector<const Expr *, 4> LHSs;
3644 SmallVector<const Expr *, 4> RHSs;
3645 SmallVector<const Expr *, 4> Privates;
3646 SmallVector<const Expr *, 4> CopyOps;
3647 SmallVector<const Expr *, 4> CopyArrayElems;
3648 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3649 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3650 "Only inscan reductions are expected.");
3651 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3652 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3653 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3654 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3655 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
3656 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3657 in_end: C->copy_array_elems().end());
3658 }
3659 // Create temp var and copy LHS value to this temp value.
3660 // LHS = TMP[LastIter];
3661 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3662 LHS: OMPScanNumIterations,
3663 RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false));
3664 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3665 const Expr *PrivateExpr = Privates[I];
3666 const Expr *OrigExpr = Shareds[I];
3667 const Expr *CopyArrayElem = CopyArrayElems[I];
3668 CodeGenFunction::OpaqueValueMapping IdxMapping(
3669 CGF,
3670 cast<OpaqueValueExpr>(
3671 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3672 RValue::get(V: OMPLast));
3673 LValue DestLVal = CGF.EmitLValue(E: OrigExpr);
3674 LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem);
3675 CGF.EmitOMPCopy(
3676 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
3677 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
3678 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
3679 }
3680}
3681
3682/// Emits the code for the directive with inscan reductions.
3683/// The code is the following:
3684/// \code
3685/// #pragma omp ...
3686/// for (i: 0..<num_iters>) {
3687/// <input phase>;
3688/// buffer[i] = red;
3689/// }
3690/// #pragma omp master // in parallel region
3691/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3692/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3693/// buffer[i] op= buffer[i-pow(2,k)];
3694/// #pragma omp barrier // in parallel region
3695/// #pragma omp ...
3696/// for (0..<num_iters>) {
3697/// red = InclusiveScan ? buffer[i] : buffer[i-1];
3698/// <scan phase>;
3699/// }
3700/// \endcode
3701static void emitScanBasedDirective(
3702 CodeGenFunction &CGF, const OMPLoopDirective &S,
3703 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3704 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3705 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3706 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3707 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3708 SmallVector<const Expr *, 4> Privates;
3709 SmallVector<const Expr *, 4> ReductionOps;
3710 SmallVector<const Expr *, 4> LHSs;
3711 SmallVector<const Expr *, 4> RHSs;
3712 SmallVector<const Expr *, 4> CopyArrayElems;
3713 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3714 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3715 "Only inscan reductions are expected.");
3716 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3717 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3718 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3719 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3720 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3721 in_end: C->copy_array_elems().end());
3722 }
3723 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3724 {
3725 // Emit loop with input phase:
3726 // #pragma omp ...
3727 // for (i: 0..<num_iters>) {
3728 // <input phase>;
3729 // buffer[i] = red;
3730 // }
3731 CGF.OMPFirstScanLoop = true;
3732 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3733 FirstGen(CGF);
3734 }
3735 // #pragma omp barrier // in parallel region
3736 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3737 &ReductionOps,
3738 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3739 Action.Enter(CGF);
3740 // Emit prefix reduction:
3741 // #pragma omp master // in parallel region
3742 // for (int k = 0; k <= ceil(log2(n)); ++k)
3743 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3744 llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body");
3745 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit");
3746 llvm::Function *F =
3747 CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy);
3748 llvm::Value *Arg =
3749 CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy);
3750 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg);
3751 F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy);
3752 LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal);
3753 LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy);
3754 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3755 LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3756 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc());
3757 CGF.EmitBlock(BB: LoopBB);
3758 auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2);
3759 // size pow2k = 1;
3760 auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3761 Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB);
3762 Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB);
3763 // for (size i = n - 1; i >= 2 ^ k; --i)
3764 // tmp[i] op= tmp[i-pow2k];
3765 llvm::BasicBlock *InnerLoopBB =
3766 CGF.createBasicBlock(name: "omp.inner.log.scan.body");
3767 llvm::BasicBlock *InnerExitBB =
3768 CGF.createBasicBlock(name: "omp.inner.log.scan.exit");
3769 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K);
3770 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3771 CGF.EmitBlock(BB: InnerLoopBB);
3772 auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3773 IVal->addIncoming(V: NMin1, BB: LoopBB);
3774 {
3775 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3776 auto *ILHS = LHSs.begin();
3777 auto *IRHS = RHSs.begin();
3778 for (const Expr *CopyArrayElem : CopyArrayElems) {
3779 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
3780 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
3781 Address LHSAddr = Address::invalid();
3782 {
3783 CodeGenFunction::OpaqueValueMapping IdxMapping(
3784 CGF,
3785 cast<OpaqueValueExpr>(
3786 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3787 RValue::get(V: IVal));
3788 LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
3789 }
3790 PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr);
3791 Address RHSAddr = Address::invalid();
3792 {
3793 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K);
3794 CodeGenFunction::OpaqueValueMapping IdxMapping(
3795 CGF,
3796 cast<OpaqueValueExpr>(
3797 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3798 RValue::get(V: OffsetIVal));
3799 RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
3800 }
3801 PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr);
3802 ++ILHS;
3803 ++IRHS;
3804 }
3805 PrivScope.Privatize();
3806 CGF.CGM.getOpenMPRuntime().emitReduction(
3807 CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
3808 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true, .ReductionKind: OMPD_unknown});
3809 }
3810 llvm::Value *NextIVal =
3811 CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3812 IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock());
3813 CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K);
3814 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3815 CGF.EmitBlock(BB: InnerExitBB);
3816 llvm::Value *Next =
3817 CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1));
3818 Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock());
3819 // pow2k <<= 1;
3820 llvm::Value *NextPow2K =
3821 CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "", /*HasNUW=*/true);
3822 Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock());
3823 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal);
3824 CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB);
3825 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc());
3826 CGF.EmitBlock(BB: ExitBB);
3827 };
3828 if (isOpenMPParallelDirective(DKind: S.getDirectiveKind())) {
3829 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
3830 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3831 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3832 /*ForceSimpleCall=*/true);
3833 } else {
3834 RegionCodeGenTy RCG(CodeGen);
3835 RCG(CGF);
3836 }
3837
3838 CGF.OMPFirstScanLoop = false;
3839 SecondGen(CGF);
3840}
3841
3842static bool emitWorksharingDirective(CodeGenFunction &CGF,
3843 const OMPLoopDirective &S,
3844 bool HasCancel) {
3845 bool HasLastprivates;
3846 if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
3847 P: [](const OMPReductionClause *C) {
3848 return C->getModifier() == OMPC_REDUCTION_inscan;
3849 })) {
3850 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3851 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3852 OMPLoopScope LoopScope(CGF, S);
3853 return CGF.EmitScalarExpr(E: S.getNumIterations());
3854 };
3855 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3856 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3857 CGF, S.getDirectiveKind(), HasCancel);
3858 (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
3859 CodeGenLoopBounds: emitForLoopBounds,
3860 CGDispatchBounds: emitDispatchForLoopBounds);
3861 // Emit an implicit barrier at the end.
3862 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(),
3863 Kind: OMPD_for);
3864 };
3865 const auto &&SecondGen = [&S, HasCancel,
3866 &HasLastprivates](CodeGenFunction &CGF) {
3867 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3868 CGF, S.getDirectiveKind(), HasCancel);
3869 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
3870 CodeGenLoopBounds: emitForLoopBounds,
3871 CGDispatchBounds: emitDispatchForLoopBounds);
3872 };
3873 if (!isOpenMPParallelDirective(DKind: S.getDirectiveKind()))
3874 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3875 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3876 if (!isOpenMPParallelDirective(DKind: S.getDirectiveKind()))
3877 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3878 } else {
3879 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3880 HasCancel);
3881 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
3882 CodeGenLoopBounds: emitForLoopBounds,
3883 CGDispatchBounds: emitDispatchForLoopBounds);
3884 }
3885 return HasLastprivates;
3886}
3887
3888static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3889 if (S.hasCancel())
3890 return false;
3891 for (OMPClause *C : S.clauses()) {
3892 if (isa<OMPNowaitClause>(Val: C))
3893 continue;
3894
3895 if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) {
3896 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3897 return false;
3898 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3899 return false;
3900 switch (SC->getScheduleKind()) {
3901 case OMPC_SCHEDULE_auto:
3902 case OMPC_SCHEDULE_dynamic:
3903 case OMPC_SCHEDULE_runtime:
3904 case OMPC_SCHEDULE_guided:
3905 case OMPC_SCHEDULE_static:
3906 continue;
3907 case OMPC_SCHEDULE_unknown:
3908 return false;
3909 }
3910 }
3911
3912 return false;
3913 }
3914
3915 return true;
3916}
3917
3918static llvm::omp::ScheduleKind
3919convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3920 switch (ScheduleClauseKind) {
3921 case OMPC_SCHEDULE_unknown:
3922 return llvm::omp::OMP_SCHEDULE_Default;
3923 case OMPC_SCHEDULE_auto:
3924 return llvm::omp::OMP_SCHEDULE_Auto;
3925 case OMPC_SCHEDULE_dynamic:
3926 return llvm::omp::OMP_SCHEDULE_Dynamic;
3927 case OMPC_SCHEDULE_guided:
3928 return llvm::omp::OMP_SCHEDULE_Guided;
3929 case OMPC_SCHEDULE_runtime:
3930 return llvm::omp::OMP_SCHEDULE_Runtime;
3931 case OMPC_SCHEDULE_static:
3932 return llvm::omp::OMP_SCHEDULE_Static;
3933 }
3934 llvm_unreachable("Unhandled schedule kind");
3935}
3936
3937void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3938 bool HasLastprivates = false;
3939 bool UseOMPIRBuilder =
3940 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3941 auto &&CodeGen = [this, &S, &HasLastprivates,
3942 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3943 // Use the OpenMPIRBuilder if enabled.
3944 if (UseOMPIRBuilder) {
3945 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3946
3947 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3948 llvm::Value *ChunkSize = nullptr;
3949 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3950 SchedKind =
3951 convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind());
3952 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3953 ChunkSize = EmitScalarExpr(E: ChunkSizeExpr);
3954 }
3955
3956 // Emit the associated statement and get its loop representation.
3957 const Stmt *Inner = S.getRawStmt();
3958 llvm::CanonicalLoopInfo *CLI =
3959 EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3960
3961 llvm::OpenMPIRBuilder &OMPBuilder =
3962 CGM.getOpenMPRuntime().getOMPBuilder();
3963 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3964 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3965 OMPBuilder.applyWorkshareLoop(
3966 DL: Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3967 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3968 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3969 /*HasOrderedClause=*/false);
3970 return;
3971 }
3972
3973 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel());
3974 };
3975 {
3976 auto LPCRegion =
3977 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
3978 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3979 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_for, CodeGen,
3980 HasCancel: S.hasCancel());
3981 }
3982
3983 if (!UseOMPIRBuilder) {
3984 // Emit an implicit barrier at the end.
3985 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3986 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for);
3987 }
3988 // Check for outer lastprivate conditional update.
3989 checkForLastprivateConditionalUpdate(CGF&: *this, S);
3990}
3991
3992void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3993 bool HasLastprivates = false;
3994 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3995 PrePostActionTy &) {
3996 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3997 };
3998 {
3999 auto LPCRegion =
4000 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4001 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4002 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
4003 }
4004
4005 // Emit an implicit barrier at the end.
4006 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4007 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for);
4008 // Check for outer lastprivate conditional update.
4009 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4010}
4011
4012static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4013 const Twine &Name,
4014 llvm::Value *Init = nullptr) {
4015 LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty);
4016 if (Init)
4017 CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true);
4018 return LVal;
4019}
4020
4021void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4022 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4023 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4024 bool HasLastprivates = false;
4025 auto &&CodeGen = [&S, CapturedStmt, CS,
4026 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4027 const ASTContext &C = CGF.getContext();
4028 QualType KmpInt32Ty =
4029 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4030 // Emit helper vars inits.
4031 LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb.",
4032 Init: CGF.Builder.getInt32(C: 0));
4033 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4034 ? CGF.Builder.getInt32(C: CS->size() - 1)
4035 : CGF.Builder.getInt32(C: 0);
4036 LValue UB =
4037 createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub.", Init: GlobalUBVal);
4038 LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st.",
4039 Init: CGF.Builder.getInt32(C: 1));
4040 LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il.",
4041 Init: CGF.Builder.getInt32(C: 0));
4042 // Loop counter.
4043 LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv.");
4044 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4045 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4046 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4047 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4048 // Generate condition for loop.
4049 BinaryOperator *Cond = BinaryOperator::Create(
4050 C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary,
4051 opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride());
4052 // Increment for loop counter.
4053 UnaryOperator *Inc = UnaryOperator::Create(
4054 C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary,
4055 l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride());
4056 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4057 // Iterate through all sections and emit a switch construct:
4058 // switch (IV) {
4059 // case 0:
4060 // <SectionStmt[0]>;
4061 // break;
4062 // ...
4063 // case <NumSection> - 1:
4064 // <SectionStmt[<NumSection> - 1]>;
4065 // break;
4066 // }
4067 // .omp.sections.exit:
4068 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit");
4069 llvm::SwitchInst *SwitchStmt =
4070 CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()),
4071 Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size());
4072 if (CS) {
4073 unsigned CaseNumber = 0;
4074 for (const Stmt *SubStmt : CS->children()) {
4075 auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4076 CGF.EmitBlock(BB: CaseBB);
4077 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB);
4078 CGF.EmitStmt(S: SubStmt);
4079 CGF.EmitBranch(Block: ExitBB);
4080 ++CaseNumber;
4081 }
4082 } else {
4083 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4084 CGF.EmitBlock(BB: CaseBB);
4085 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB);
4086 CGF.EmitStmt(S: CapturedStmt);
4087 CGF.EmitBranch(Block: ExitBB);
4088 }
4089 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
4090 };
4091
4092 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4093 if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
4094 // Emit implicit barrier to synchronize threads and avoid data races on
4095 // initialization of firstprivate variables and post-update of lastprivate
4096 // variables.
4097 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4098 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4099 /*ForceSimpleCall=*/true);
4100 }
4101 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
4102 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4103 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
4104 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
4105 (void)LoopScope.Privatize();
4106 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
4107 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
4108
4109 // Emit static non-chunked loop.
4110 OpenMPScheduleTy ScheduleKind;
4111 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4112 CGOpenMPRuntime::StaticRTInput StaticInit(
4113 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4114 LB.getAddress(), UB.getAddress(), ST.getAddress());
4115 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4116 CGF, Loc: S.getBeginLoc(), DKind: S.getDirectiveKind(), ScheduleKind, Values: StaticInit);
4117 // UB = min(UB, GlobalUB);
4118 llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc());
4119 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4120 C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal);
4121 CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB);
4122 // IV = LB;
4123 CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV);
4124 // while (idx <= UB) { BODY; ++idx; }
4125 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen,
4126 PostIncGen: [](CodeGenFunction &) {});
4127 // Tell the runtime we are done.
4128 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4129 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
4130 DKind: OMPD_sections);
4131 };
4132 CGF.OMPCancelStack.emitExit(CGF, Kind: S.getDirectiveKind(), CodeGen);
4133 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4134 // Emit post-update of the reduction variables if IsLastIter != 0.
4135 emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4136 return CGF.Builder.CreateIsNotNull(
4137 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4138 });
4139
4140 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4141 if (HasLastprivates)
4142 CGF.EmitOMPLastprivateClauseFinal(
4143 D: S, /*NoFinals=*/false,
4144 IsLastIterCond: CGF.Builder.CreateIsNotNull(
4145 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4146 };
4147
4148 bool HasCancel = false;
4149 if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S))
4150 HasCancel = OSD->hasCancel();
4151 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S))
4152 HasCancel = OPSD->hasCancel();
4153 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4154 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen,
4155 HasCancel);
4156 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4157 // clause. Otherwise the barrier will be generated by the codegen for the
4158 // directive.
4159 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4160 // Emit implicit barrier to synchronize threads and avoid data races on
4161 // initialization of firstprivate variables.
4162 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4163 Kind: OMPD_unknown);
4164 }
4165}
4166
4167void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4168 if (CGM.getLangOpts().OpenMPIRBuilder) {
4169 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4170 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4171 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4172
4173 auto FiniCB = [this](InsertPointTy IP) {
4174 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4175 };
4176
4177 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4178 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4179 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4180 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4181 if (CS) {
4182 for (const Stmt *SubStmt : CS->children()) {
4183 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4184 InsertPointTy CodeGenIP) {
4185 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4186 CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP, CodeGenIP, RegionName: "section");
4187 };
4188 SectionCBVector.push_back(Elt: SectionCB);
4189 }
4190 } else {
4191 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4192 InsertPointTy CodeGenIP) {
4193 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4194 CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section");
4195 };
4196 SectionCBVector.push_back(Elt: SectionCB);
4197 }
4198
4199 // Privatization callback that performs appropriate action for
4200 // shared/private/firstprivate/lastprivate/copyin/... variables.
4201 //
4202 // TODO: This defaults to shared right now.
4203 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4204 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4205 // The next line is appropriate only for variables (Val) with the
4206 // data-sharing attribute "shared".
4207 ReplVal = &Val;
4208
4209 return CodeGenIP;
4210 };
4211
4212 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4213 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4214 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4215 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4216 Builder.restoreIP(IP: OMPBuilder.createSections(
4217 Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(),
4218 IsNowait: S.getSingleClause<OMPNowaitClause>()));
4219 return;
4220 }
4221 {
4222 auto LPCRegion =
4223 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4224 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4225 EmitSections(S);
4226 }
4227 // Emit an implicit barrier at the end.
4228 if (!S.getSingleClause<OMPNowaitClause>()) {
4229 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4230 Kind: OMPD_sections);
4231 }
4232 // Check for outer lastprivate conditional update.
4233 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4234}
4235
4236void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4237 if (CGM.getLangOpts().OpenMPIRBuilder) {
4238 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4239 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4240
4241 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4242 auto FiniCB = [this](InsertPointTy IP) {
4243 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4244 };
4245
4246 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4247 InsertPointTy CodeGenIP) {
4248 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4249 CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section");
4250 };
4251
4252 LexicalScope Scope(*this, S.getSourceRange());
4253 EmitStopPoint(S: &S);
4254 Builder.restoreIP(IP: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB));
4255
4256 return;
4257 }
4258 LexicalScope Scope(*this, S.getSourceRange());
4259 EmitStopPoint(S: &S);
4260 EmitStmt(S: S.getAssociatedStmt());
4261}
4262
4263void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4264 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4265 llvm::SmallVector<const Expr *, 8> DestExprs;
4266 llvm::SmallVector<const Expr *, 8> SrcExprs;
4267 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4268 // Check if there are any 'copyprivate' clauses associated with this
4269 // 'single' construct.
4270 // Build a list of copyprivate variables along with helper expressions
4271 // (<source>, <destination>, <destination>=<source> expressions)
4272 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4273 CopyprivateVars.append(in_start: C->varlists().begin(), in_end: C->varlists().end());
4274 DestExprs.append(in_start: C->destination_exprs().begin(),
4275 in_end: C->destination_exprs().end());
4276 SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end());
4277 AssignmentOps.append(in_start: C->assignment_ops().begin(),
4278 in_end: C->assignment_ops().end());
4279 }
4280 // Emit code for 'single' region along with 'copyprivate' clauses
4281 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4282 Action.Enter(CGF);
4283 OMPPrivateScope SingleScope(CGF);
4284 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope);
4285 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope);
4286 (void)SingleScope.Privatize();
4287 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4288 };
4289 {
4290 auto LPCRegion =
4291 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4292 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4293 CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(),
4294 CopyprivateVars, DestExprs,
4295 SrcExprs, AssignmentOps);
4296 }
4297 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4298 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4299 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4300 CGM.getOpenMPRuntime().emitBarrierCall(
4301 CGF&: *this, Loc: S.getBeginLoc(),
4302 Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4303 }
4304 // Check for outer lastprivate conditional update.
4305 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4306}
4307
4308static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4309 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4310 Action.Enter(CGF);
4311 CGF.EmitStmt(S: S.getRawStmt());
4312 };
4313 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4314}
4315
4316void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4317 if (CGM.getLangOpts().OpenMPIRBuilder) {
4318 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4319 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4320
4321 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4322
4323 auto FiniCB = [this](InsertPointTy IP) {
4324 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4325 };
4326
4327 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4328 InsertPointTy CodeGenIP) {
4329 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4330 CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master");
4331 };
4332
4333 LexicalScope Scope(*this, S.getSourceRange());
4334 EmitStopPoint(S: &S);
4335 Builder.restoreIP(IP: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB));
4336
4337 return;
4338 }
4339 LexicalScope Scope(*this, S.getSourceRange());
4340 EmitStopPoint(S: &S);
4341 emitMaster(CGF&: *this, S);
4342}
4343
4344static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4345 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4346 Action.Enter(CGF);
4347 CGF.EmitStmt(S: S.getRawStmt());
4348 };
4349 Expr *Filter = nullptr;
4350 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4351 Filter = FilterClause->getThreadID();
4352 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(),
4353 Filter);
4354}
4355
4356void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4357 if (CGM.getLangOpts().OpenMPIRBuilder) {
4358 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4359 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4360
4361 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4362 const Expr *Filter = nullptr;
4363 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4364 Filter = FilterClause->getThreadID();
4365 llvm::Value *FilterVal = Filter
4366 ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty)
4367 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
4368
4369 auto FiniCB = [this](InsertPointTy IP) {
4370 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4371 };
4372
4373 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4374 InsertPointTy CodeGenIP) {
4375 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4376 CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked");
4377 };
4378
4379 LexicalScope Scope(*this, S.getSourceRange());
4380 EmitStopPoint(S: &S);
4381 Builder.restoreIP(
4382 IP: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal));
4383
4384 return;
4385 }
4386 LexicalScope Scope(*this, S.getSourceRange());
4387 EmitStopPoint(S: &S);
4388 emitMasked(CGF&: *this, S);
4389}
4390
4391void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4392 if (CGM.getLangOpts().OpenMPIRBuilder) {
4393 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4394 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4395
4396 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4397 const Expr *Hint = nullptr;
4398 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4399 Hint = HintClause->getHint();
4400
4401 // TODO: This is slightly different from what's currently being done in
4402 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4403 // about typing is final.
4404 llvm::Value *HintInst = nullptr;
4405 if (Hint)
4406 HintInst =
4407 Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false);
4408
4409 auto FiniCB = [this](InsertPointTy IP) {
4410 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4411 };
4412
4413 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4414 InsertPointTy CodeGenIP) {
4415 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4416 CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical");
4417 };
4418
4419 LexicalScope Scope(*this, S.getSourceRange());
4420 EmitStopPoint(S: &S);
4421 Builder.restoreIP(IP: OMPBuilder.createCritical(
4422 Loc: Builder, BodyGenCB, FiniCB, CriticalName: S.getDirectiveName().getAsString(),
4423 HintInst));
4424
4425 return;
4426 }
4427
4428 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4429 Action.Enter(CGF);
4430 CGF.EmitStmt(S: S.getAssociatedStmt());
4431 };
4432 const Expr *Hint = nullptr;
4433 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4434 Hint = HintClause->getHint();
4435 LexicalScope Scope(*this, S.getSourceRange());
4436 EmitStopPoint(S: &S);
4437 CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this,
4438 CriticalName: S.getDirectiveName().getAsString(),
4439 CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint);
4440}
4441
4442void CodeGenFunction::EmitOMPParallelForDirective(
4443 const OMPParallelForDirective &S) {
4444 // Emit directive as a combined directive that consists of two implicit
4445 // directives: 'parallel' with 'for' directive.
4446 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4447 Action.Enter(CGF);
4448 emitOMPCopyinClause(CGF, S);
4449 (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel());
4450 };
4451 {
4452 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4453 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4454 CGCapturedStmtInfo CGSI(CR_OpenMP);
4455 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4456 OMPLoopScope LoopScope(CGF, S);
4457 return CGF.EmitScalarExpr(E: S.getNumIterations());
4458 };
4459 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4460 P: [](const OMPReductionClause *C) {
4461 return C->getModifier() == OMPC_REDUCTION_inscan;
4462 });
4463 if (IsInscan)
4464 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4465 auto LPCRegion =
4466 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4467 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
4468 CodeGenBoundParameters: emitEmptyBoundParameters);
4469 if (IsInscan)
4470 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4471 }
4472 // Check for outer lastprivate conditional update.
4473 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4474}
4475
4476void CodeGenFunction::EmitOMPParallelForSimdDirective(
4477 const OMPParallelForSimdDirective &S) {
4478 // Emit directive as a combined directive that consists of two implicit
4479 // directives: 'parallel' with 'for' directive.
4480 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4481 Action.Enter(CGF);
4482 emitOMPCopyinClause(CGF, S);
4483 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4484 };
4485 {
4486 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4487 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4488 CGCapturedStmtInfo CGSI(CR_OpenMP);
4489 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4490 OMPLoopScope LoopScope(CGF, S);
4491 return CGF.EmitScalarExpr(E: S.getNumIterations());
4492 };
4493 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4494 P: [](const OMPReductionClause *C) {
4495 return C->getModifier() == OMPC_REDUCTION_inscan;
4496 });
4497 if (IsInscan)
4498 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4499 auto LPCRegion =
4500 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4501 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen,
4502 CodeGenBoundParameters: emitEmptyBoundParameters);
4503 if (IsInscan)
4504 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4505 }
4506 // Check for outer lastprivate conditional update.
4507 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4508}
4509
4510void CodeGenFunction::EmitOMPParallelMasterDirective(
4511 const OMPParallelMasterDirective &S) {
4512 // Emit directive as a combined directive that consists of two implicit
4513 // directives: 'parallel' with 'master' directive.
4514 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4515 Action.Enter(CGF);
4516 OMPPrivateScope PrivateScope(CGF);
4517 emitOMPCopyinClause(CGF, S);
4518 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4519 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4520 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4521 (void)PrivateScope.Privatize();
4522 emitMaster(CGF, S);
4523 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4524 };
4525 {
4526 auto LPCRegion =
4527 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4528 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen,
4529 CodeGenBoundParameters: emitEmptyBoundParameters);
4530 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4531 CondGen: [](CodeGenFunction &) { return nullptr; });
4532 }
4533 // Check for outer lastprivate conditional update.
4534 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4535}
4536
4537void CodeGenFunction::EmitOMPParallelMaskedDirective(
4538 const OMPParallelMaskedDirective &S) {
4539 // Emit directive as a combined directive that consists of two implicit
4540 // directives: 'parallel' with 'masked' directive.
4541 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4542 Action.Enter(CGF);
4543 OMPPrivateScope PrivateScope(CGF);
4544 emitOMPCopyinClause(CGF, S);
4545 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4546 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4547 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4548 (void)PrivateScope.Privatize();
4549 emitMasked(CGF, S);
4550 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4551 };
4552 {
4553 auto LPCRegion =
4554 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4555 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen,
4556 CodeGenBoundParameters: emitEmptyBoundParameters);
4557 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4558 CondGen: [](CodeGenFunction &) { return nullptr; });
4559 }
4560 // Check for outer lastprivate conditional update.
4561 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4562}
4563
4564void CodeGenFunction::EmitOMPParallelSectionsDirective(
4565 const OMPParallelSectionsDirective &S) {
4566 // Emit directive as a combined directive that consists of two implicit
4567 // directives: 'parallel' with 'sections' directive.
4568 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4569 Action.Enter(CGF);
4570 emitOMPCopyinClause(CGF, S);
4571 CGF.EmitSections(S);
4572 };
4573 {
4574 auto LPCRegion =
4575 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4576 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen,
4577 CodeGenBoundParameters: emitEmptyBoundParameters);
4578 }
4579 // Check for outer lastprivate conditional update.
4580 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4581}
4582
4583namespace {
4584/// Get the list of variables declared in the context of the untied tasks.
4585class CheckVarsEscapingUntiedTaskDeclContext final
4586 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4587 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4588
4589public:
4590 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4591 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4592 void VisitDeclStmt(const DeclStmt *S) {
4593 if (!S)
4594 return;
4595 // Need to privatize only local vars, static locals can be processed as is.
4596 for (const Decl *D : S->decls()) {
4597 if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D))
4598 if (VD->hasLocalStorage())
4599 PrivateDecls.push_back(Elt: VD);
4600 }
4601 }
4602 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4603 void VisitCapturedStmt(const CapturedStmt *) {}
4604 void VisitLambdaExpr(const LambdaExpr *) {}
4605 void VisitBlockExpr(const BlockExpr *) {}
4606 void VisitStmt(const Stmt *S) {
4607 if (!S)
4608 return;
4609 for (const Stmt *Child : S->children())
4610 if (Child)
4611 Visit(S: Child);
4612 }
4613
4614 /// Swaps list of vars with the provided one.
4615 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4616};
4617} // anonymous namespace
4618
4619static void buildDependences(const OMPExecutableDirective &S,
4620 OMPTaskDataTy &Data) {
4621
4622 // First look for 'omp_all_memory' and add this first.
4623 bool OmpAllMemory = false;
4624 if (llvm::any_of(
4625 Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) {
4626 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4627 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4628 })) {
4629 OmpAllMemory = true;
4630 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4631 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4632 // simplify.
4633 OMPTaskDataTy::DependData &DD =
4634 Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory,
4635 /*IteratorExpr=*/Args: nullptr);
4636 // Add a nullptr Expr to simplify the codegen in emitDependData.
4637 DD.DepExprs.push_back(Elt: nullptr);
4638 }
4639 // Add remaining dependences skipping any 'out' or 'inout' if they are
4640 // overridden by 'omp_all_memory'.
4641 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4642 OpenMPDependClauseKind Kind = C->getDependencyKind();
4643 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4644 continue;
4645 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4646 continue;
4647 OMPTaskDataTy::DependData &DD =
4648 Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier());
4649 DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4650 }
4651}
4652
4653void CodeGenFunction::EmitOMPTaskBasedDirective(
4654 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4655 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4656 OMPTaskDataTy &Data) {
4657 // Emit outlined function for task construct.
4658 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion);
4659 auto I = CS->getCapturedDecl()->param_begin();
4660 auto PartId = std::next(x: I);
4661 auto TaskT = std::next(x: I, n: 4);
4662 // Check if the task is final
4663 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4664 // If the condition constant folds and can be elided, try to avoid emitting
4665 // the condition and the dead arm of the if/else.
4666 const Expr *Cond = Clause->getCondition();
4667 bool CondConstant;
4668 if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant))
4669 Data.Final.setInt(CondConstant);
4670 else
4671 Data.Final.setPointer(EvaluateExprAsBool(E: Cond));
4672 } else {
4673 // By default the task is not final.
4674 Data.Final.setInt(/*IntVal=*/false);
4675 }
4676 // Check if the task has 'priority' clause.
4677 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4678 const Expr *Prio = Clause->getPriority();
4679 Data.Priority.setInt(/*IntVal=*/true);
4680 Data.Priority.setPointer(EmitScalarConversion(
4681 Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(),
4682 DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4683 Loc: Prio->getExprLoc()));
4684 }
4685 // The first function argument for tasks is a thread id, the second one is a
4686 // part id (0 for tied tasks, >=0 for untied task).
4687 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4688 // Get list of private variables.
4689 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4690 auto IRef = C->varlist_begin();
4691 for (const Expr *IInit : C->private_copies()) {
4692 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4693 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4694 Data.PrivateVars.push_back(Elt: *IRef);
4695 Data.PrivateCopies.push_back(Elt: IInit);
4696 }
4697 ++IRef;
4698 }
4699 }
4700 EmittedAsPrivate.clear();
4701 // Get list of firstprivate variables.
4702 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4703 auto IRef = C->varlist_begin();
4704 auto IElemInitRef = C->inits().begin();
4705 for (const Expr *IInit : C->private_copies()) {
4706 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4707 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4708 Data.FirstprivateVars.push_back(Elt: *IRef);
4709 Data.FirstprivateCopies.push_back(Elt: IInit);
4710 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
4711 }
4712 ++IRef;
4713 ++IElemInitRef;
4714 }
4715 }
4716 // Get list of lastprivate variables (for taskloops).
4717 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4718 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4719 auto IRef = C->varlist_begin();
4720 auto ID = C->destination_exprs().begin();
4721 for (const Expr *IInit : C->private_copies()) {
4722 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4723 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4724 Data.LastprivateVars.push_back(Elt: *IRef);
4725 Data.LastprivateCopies.push_back(Elt: IInit);
4726 }
4727 LastprivateDstsOrigs.insert(
4728 KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()),
4729 y: cast<DeclRefExpr>(Val: *IRef)));
4730 ++IRef;
4731 ++ID;
4732 }
4733 }
4734 SmallVector<const Expr *, 4> LHSs;
4735 SmallVector<const Expr *, 4> RHSs;
4736 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4737 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4738 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4739 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
4740 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
4741 in_end: C->reduction_ops().end());
4742 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
4743 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
4744 }
4745 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4746 CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data);
4747 // Build list of dependences.
4748 buildDependences(S, Data);
4749 // Get list of local vars for untied tasks.
4750 if (!Data.Tied) {
4751 CheckVarsEscapingUntiedTaskDeclContext Checker;
4752 Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4753 Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(),
4754 in_end: Checker.getPrivateDecls().end());
4755 }
4756 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4757 CapturedRegion](CodeGenFunction &CGF,
4758 PrePostActionTy &Action) {
4759 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4760 std::pair<Address, Address>>
4761 UntiedLocalVars;
4762 // Set proper addresses for generated private copies.
4763 OMPPrivateScope Scope(CGF);
4764 // Generate debug info for variables present in shared clause.
4765 if (auto *DI = CGF.getDebugInfo()) {
4766 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4767 CGF.CapturedStmtInfo->getCaptureFields();
4768 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4769 if (CaptureFields.size() && ContextValue) {
4770 unsigned CharWidth = CGF.getContext().getCharWidth();
4771 // The shared variables are packed together as members of structure.
4772 // So the address of each shared variable can be computed by adding
4773 // offset of it (within record) to the base address of record. For each
4774 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4775 // appropriate expressions (DIExpression).
4776 // Ex:
4777 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4778 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4779 // metadata !svar1,
4780 // metadata !DIExpression(DW_OP_deref))
4781 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4782 // metadata !svar2,
4783 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4784 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4785 const VarDecl *SharedVar = It->first;
4786 RecordDecl *CaptureRecord = It->second->getParent();
4787 const ASTRecordLayout &Layout =
4788 CGF.getContext().getASTRecordLayout(D: CaptureRecord);
4789 unsigned Offset =
4790 Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth;
4791 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4792 (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue,
4793 Builder&: CGF.Builder, UsePointerValue: false);
4794 // Get the call dbg.declare instruction we just created and update
4795 // its DIExpression to add offset to base address.
4796 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
4797 unsigned Offset) {
4798 SmallVector<uint64_t, 8> Ops;
4799 // Add offset to the base address if non zero.
4800 if (Offset) {
4801 Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst);
4802 Ops.push_back(Elt: Offset);
4803 }
4804 Ops.push_back(Elt: llvm::dwarf::DW_OP_deref);
4805 Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops));
4806 };
4807 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4808 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last))
4809 UpdateExpr(DDI->getContext(), DDI, Offset);
4810 // If we're emitting using the new debug info format into a block
4811 // without a terminator, the record will be "trailing".
4812 assert(!Last.isTerminator() && "unexpected terminator");
4813 if (auto *Marker =
4814 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
4815 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
4816 C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) {
4817 UpdateExpr(Last.getContext(), &DVR, Offset);
4818 break;
4819 }
4820 }
4821 }
4822 }
4823 }
4824 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4825 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4826 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4827 enum { PrivatesParam = 2, CopyFnParam = 3 };
4828 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4829 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
4830 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
4831 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
4832 // Map privates.
4833 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4834 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4835 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4836 CallArgs.push_back(Elt: PrivatesPtr);
4837 ParamTypes.push_back(Elt: PrivatesPtr->getType());
4838 for (const Expr *E : Data.PrivateVars) {
4839 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
4840 RawAddress PrivatePtr = CGF.CreateMemTemp(
4841 T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr");
4842 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4843 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4844 ParamTypes.push_back(Elt: PrivatePtr.getType());
4845 }
4846 for (const Expr *E : Data.FirstprivateVars) {
4847 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
4848 RawAddress PrivatePtr =
4849 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
4850 Name: ".firstpriv.ptr.addr");
4851 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4852 FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4853 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4854 ParamTypes.push_back(Elt: PrivatePtr.getType());
4855 }
4856 for (const Expr *E : Data.LastprivateVars) {
4857 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
4858 RawAddress PrivatePtr =
4859 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
4860 Name: ".lastpriv.ptr.addr");
4861 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4862 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4863 ParamTypes.push_back(Elt: PrivatePtr.getType());
4864 }
4865 for (const VarDecl *VD : Data.PrivateLocals) {
4866 QualType Ty = VD->getType().getNonReferenceType();
4867 if (VD->getType()->isLValueReferenceType())
4868 Ty = CGF.getContext().getPointerType(T: Ty);
4869 if (isAllocatableDecl(VD))
4870 Ty = CGF.getContext().getPointerType(T: Ty);
4871 RawAddress PrivatePtr = CGF.CreateMemTemp(
4872 T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr");
4873 auto Result = UntiedLocalVars.insert(
4874 KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())));
4875 // If key exists update in place.
4876 if (Result.second == false)
4877 *Result.first = std::make_pair(
4878 x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()));
4879 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4880 ParamTypes.push_back(Elt: PrivatePtr.getType());
4881 }
4882 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
4883 Params: ParamTypes, /*isVarArg=*/false);
4884 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4885 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
4886 for (const auto &Pair : LastprivateDstsOrigs) {
4887 const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl());
4888 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4889 /*RefersToEnclosingVariableOrCapture=*/
4890 CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
4891 Pair.second->getType(), VK_LValue,
4892 Pair.second->getExprLoc());
4893 Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress());
4894 }
4895 for (const auto &Pair : PrivatePtrs) {
4896 Address Replacement = Address(
4897 CGF.Builder.CreateLoad(Addr: Pair.second),
4898 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
4899 CGF.getContext().getDeclAlign(D: Pair.first));
4900 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
4901 if (auto *DI = CGF.getDebugInfo())
4902 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4903 (void)DI->EmitDeclareOfAutoVariable(
4904 Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder,
4905 /*UsePointerValue*/ true);
4906 }
4907 // Adjust mapping for internal locals by mapping actual memory instead of
4908 // a pointer to this memory.
4909 for (auto &Pair : UntiedLocalVars) {
4910 QualType VDType = Pair.first->getType().getNonReferenceType();
4911 if (Pair.first->getType()->isLValueReferenceType())
4912 VDType = CGF.getContext().getPointerType(T: VDType);
4913 if (isAllocatableDecl(VD: Pair.first)) {
4914 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
4915 Address Replacement(
4916 Ptr,
4917 CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)),
4918 CGF.getPointerAlign());
4919 Pair.second.first = Replacement;
4920 Ptr = CGF.Builder.CreateLoad(Addr: Replacement);
4921 Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType),
4922 CGF.getContext().getDeclAlign(D: Pair.first));
4923 Pair.second.second = Replacement;
4924 } else {
4925 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
4926 Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType),
4927 CGF.getContext().getDeclAlign(D: Pair.first));
4928 Pair.second.first = Replacement;
4929 }
4930 }
4931 }
4932 if (Data.Reductions) {
4933 OMPPrivateScope FirstprivateScope(CGF);
4934 for (const auto &Pair : FirstprivatePtrs) {
4935 Address Replacement(
4936 CGF.Builder.CreateLoad(Addr: Pair.second),
4937 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
4938 CGF.getContext().getDeclAlign(D: Pair.first));
4939 FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
4940 }
4941 (void)FirstprivateScope.Privatize();
4942 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4943 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4944 Data.ReductionCopies, Data.ReductionOps);
4945 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4946 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9)));
4947 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4948 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
4949 RedCG.emitAggregateType(CGF, N: Cnt);
4950 // FIXME: This must removed once the runtime library is fixed.
4951 // Emit required threadprivate variables for
4952 // initializer/combiner/finalizer.
4953 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
4954 RCG&: RedCG, N: Cnt);
4955 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4956 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
4957 Replacement = Address(
4958 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
4959 SrcTy: CGF.getContext().VoidPtrTy,
4960 DstTy: CGF.getContext().getPointerType(
4961 T: Data.ReductionCopies[Cnt]->getType()),
4962 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
4963 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
4964 Replacement.getAlignment());
4965 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
4966 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
4967 }
4968 }
4969 // Privatize all private variables except for in_reduction items.
4970 (void)Scope.Privatize();
4971 SmallVector<const Expr *, 4> InRedVars;
4972 SmallVector<const Expr *, 4> InRedPrivs;
4973 SmallVector<const Expr *, 4> InRedOps;
4974 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4975 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4976 auto IPriv = C->privates().begin();
4977 auto IRed = C->reduction_ops().begin();
4978 auto ITD = C->taskgroup_descriptors().begin();
4979 for (const Expr *Ref : C->varlists()) {
4980 InRedVars.emplace_back(Args&: Ref);
4981 InRedPrivs.emplace_back(Args: *IPriv);
4982 InRedOps.emplace_back(Args: *IRed);
4983 TaskgroupDescriptors.emplace_back(Args: *ITD);
4984 std::advance(i&: IPriv, n: 1);
4985 std::advance(i&: IRed, n: 1);
4986 std::advance(i&: ITD, n: 1);
4987 }
4988 }
4989 // Privatize in_reduction items here, because taskgroup descriptors must be
4990 // privatized earlier.
4991 OMPPrivateScope InRedScope(CGF);
4992 if (!InRedVars.empty()) {
4993 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4994 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4995 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
4996 RedCG.emitAggregateType(CGF, N: Cnt);
4997 // The taskgroup descriptor variable is always implicit firstprivate and
4998 // privatized already during processing of the firstprivates.
4999 // FIXME: This must removed once the runtime library is fixed.
5000 // Emit required threadprivate variables for
5001 // initializer/combiner/finalizer.
5002 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5003 RCG&: RedCG, N: Cnt);
5004 llvm::Value *ReductionsPtr;
5005 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5006 ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr),
5007 Loc: TRExpr->getExprLoc());
5008 } else {
5009 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5010 }
5011 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5012 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5013 Replacement = Address(
5014 CGF.EmitScalarConversion(
5015 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5016 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5017 Loc: InRedPrivs[Cnt]->getExprLoc()),
5018 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5019 Replacement.getAlignment());
5020 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5021 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5022 }
5023 }
5024 (void)InRedScope.Privatize();
5025
5026 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5027 UntiedLocalVars);
5028 Action.Enter(CGF);
5029 BodyGen(CGF);
5030 };
5031 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5032 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: S.getDirectiveKind(), CodeGen, Tied: Data.Tied,
5033 NumberOfParts&: Data.NumberOfParts);
5034 OMPLexicalScope Scope(*this, S, std::nullopt,
5035 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
5036 !isOpenMPSimdDirective(DKind: S.getDirectiveKind()));
5037 TaskGen(*this, OutlinedFn, Data);
5038}
5039
5040static ImplicitParamDecl *
5041createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5042 QualType Ty, CapturedDecl *CD,
5043 SourceLocation Loc) {
5044 auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5045 ParamKind: ImplicitParamKind::Other);
5046 auto *OrigRef = DeclRefExpr::Create(
5047 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD,
5048 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5049 auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5050 ParamKind: ImplicitParamKind::Other);
5051 auto *PrivateRef = DeclRefExpr::Create(
5052 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD,
5053 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5054 QualType ElemType = C.getBaseElementType(QT: Ty);
5055 auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType,
5056 ParamKind: ImplicitParamKind::Other);
5057 auto *InitRef = DeclRefExpr::Create(
5058 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD,
5059 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue);
5060 PrivateVD->setInitStyle(VarDecl::CInit);
5061 PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue,
5062 Operand: InitRef, /*BasePath=*/nullptr,
5063 Cat: VK_PRValue, FPO: FPOptionsOverride()));
5064 Data.FirstprivateVars.emplace_back(Args&: OrigRef);
5065 Data.FirstprivateCopies.emplace_back(Args&: PrivateRef);
5066 Data.FirstprivateInits.emplace_back(Args&: InitRef);
5067 return OrigVD;
5068}
5069
5070void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5071 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5072 OMPTargetDataInfo &InputInfo) {
5073 // Emit outlined function for task construct.
5074 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5075 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5076 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
5077 auto I = CS->getCapturedDecl()->param_begin();
5078 auto PartId = std::next(x: I);
5079 auto TaskT = std::next(x: I, n: 4);
5080 OMPTaskDataTy Data;
5081 // The task is not final.
5082 Data.Final.setInt(/*IntVal=*/false);
5083 // Get list of firstprivate variables.
5084 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5085 auto IRef = C->varlist_begin();
5086 auto IElemInitRef = C->inits().begin();
5087 for (auto *IInit : C->private_copies()) {
5088 Data.FirstprivateVars.push_back(Elt: *IRef);
5089 Data.FirstprivateCopies.push_back(Elt: IInit);
5090 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5091 ++IRef;
5092 ++IElemInitRef;
5093 }
5094 }
5095 SmallVector<const Expr *, 4> LHSs;
5096 SmallVector<const Expr *, 4> RHSs;
5097 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5098 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5099 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5100 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5101 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5102 in_end: C->reduction_ops().end());
5103 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5104 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5105 }
5106 OMPPrivateScope TargetScope(*this);
5107 VarDecl *BPVD = nullptr;
5108 VarDecl *PVD = nullptr;
5109 VarDecl *SVD = nullptr;
5110 VarDecl *MVD = nullptr;
5111 if (InputInfo.NumberOfTargetItems > 0) {
5112 auto *CD = CapturedDecl::Create(
5113 C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5114 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5115 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5116 EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5117 /*IndexTypeQuals=*/0);
5118 BPVD = createImplicitFirstprivateForType(
5119 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5120 PVD = createImplicitFirstprivateForType(
5121 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5122 QualType SizesType = getContext().getConstantArrayType(
5123 EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5124 ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5125 /*IndexTypeQuals=*/0);
5126 SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD,
5127 Loc: S.getBeginLoc());
5128 TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray);
5129 TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray);
5130 TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray);
5131 // If there is no user-defined mapper, the mapper array will be nullptr. In
5132 // this case, we don't need to privatize it.
5133 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5134 Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) {
5135 MVD = createImplicitFirstprivateForType(
5136 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5137 TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray);
5138 }
5139 }
5140 (void)TargetScope.Privatize();
5141 buildDependences(S, Data);
5142 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5143 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5144 // Set proper addresses for generated private copies.
5145 OMPPrivateScope Scope(CGF);
5146 if (!Data.FirstprivateVars.empty()) {
5147 enum { PrivatesParam = 2, CopyFnParam = 3 };
5148 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5149 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5150 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5151 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5152 // Map privates.
5153 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5154 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5155 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5156 CallArgs.push_back(Elt: PrivatesPtr);
5157 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5158 for (const Expr *E : Data.FirstprivateVars) {
5159 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5160 RawAddress PrivatePtr =
5161 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5162 Name: ".firstpriv.ptr.addr");
5163 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5164 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5165 ParamTypes.push_back(Elt: PrivatePtr.getType());
5166 }
5167 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5168 Params: ParamTypes, /*isVarArg=*/false);
5169 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5170 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5171 for (const auto &Pair : PrivatePtrs) {
5172 Address Replacement(
5173 CGF.Builder.CreateLoad(Addr: Pair.second),
5174 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5175 CGF.getContext().getDeclAlign(D: Pair.first));
5176 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5177 }
5178 }
5179 CGF.processInReduction(S, Data, CGF, CS, Scope);
5180 if (InputInfo.NumberOfTargetItems > 0) {
5181 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5182 Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0);
5183 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5184 Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0);
5185 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5186 Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0);
5187 // If MVD is nullptr, the mapper array is not privatized
5188 if (MVD)
5189 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5190 Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0);
5191 }
5192
5193 Action.Enter(CGF);
5194 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5195 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5196 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5197 needsTaskBasedThreadLimit(DKind: S.getDirectiveKind()) && TL) {
5198 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5199 // enclosing this target region. This will indirectly set the thread_limit
5200 // for every applicable construct within target region.
5201 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5202 CGF, ThreadLimit: TL->getThreadLimit(), Loc: S.getBeginLoc());
5203 }
5204 BodyGen(CGF);
5205 };
5206 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5207 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5208 NumberOfParts&: Data.NumberOfParts);
5209 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5210 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5211 getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
5212 SourceLocation());
5213 CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5214 SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data);
5215}
5216
5217void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5218 OMPTaskDataTy &Data,
5219 CodeGenFunction &CGF,
5220 const CapturedStmt *CS,
5221 OMPPrivateScope &Scope) {
5222 if (Data.Reductions) {
5223 OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5224 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5225 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5226 Data.ReductionCopies, Data.ReductionOps);
5227 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5228 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4)));
5229 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5230 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5231 RedCG.emitAggregateType(CGF, N: Cnt);
5232 // FIXME: This must removed once the runtime library is fixed.
5233 // Emit required threadprivate variables for
5234 // initializer/combiner/finalizer.
5235 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5236 RCG&: RedCG, N: Cnt);
5237 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5238 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5239 Replacement = Address(
5240 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5241 SrcTy: CGF.getContext().VoidPtrTy,
5242 DstTy: CGF.getContext().getPointerType(
5243 T: Data.ReductionCopies[Cnt]->getType()),
5244 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5245 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5246 Replacement.getAlignment());
5247 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5248 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5249 }
5250 }
5251 (void)Scope.Privatize();
5252 SmallVector<const Expr *, 4> InRedVars;
5253 SmallVector<const Expr *, 4> InRedPrivs;
5254 SmallVector<const Expr *, 4> InRedOps;
5255 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5256 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5257 auto IPriv = C->privates().begin();
5258 auto IRed = C->reduction_ops().begin();
5259 auto ITD = C->taskgroup_descriptors().begin();
5260 for (const Expr *Ref : C->varlists()) {
5261 InRedVars.emplace_back(Args&: Ref);
5262 InRedPrivs.emplace_back(Args: *IPriv);
5263 InRedOps.emplace_back(Args: *IRed);
5264 TaskgroupDescriptors.emplace_back(Args: *ITD);
5265 std::advance(i&: IPriv, n: 1);
5266 std::advance(i&: IRed, n: 1);
5267 std::advance(i&: ITD, n: 1);
5268 }
5269 }
5270 OMPPrivateScope InRedScope(CGF);
5271 if (!InRedVars.empty()) {
5272 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5273 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5274 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5275 RedCG.emitAggregateType(CGF, N: Cnt);
5276 // FIXME: This must removed once the runtime library is fixed.
5277 // Emit required threadprivate variables for
5278 // initializer/combiner/finalizer.
5279 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5280 RCG&: RedCG, N: Cnt);
5281 llvm::Value *ReductionsPtr;
5282 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5283 ReductionsPtr =
5284 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc());
5285 } else {
5286 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5287 }
5288 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5289 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5290 Replacement = Address(
5291 CGF.EmitScalarConversion(
5292 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5293 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5294 Loc: InRedPrivs[Cnt]->getExprLoc()),
5295 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5296 Replacement.getAlignment());
5297 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5298 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5299 }
5300 }
5301 (void)InRedScope.Privatize();
5302}
5303
5304void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5305 // Emit outlined function for task construct.
5306 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5307 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5308 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
5309 const Expr *IfCond = nullptr;
5310 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5311 if (C->getNameModifier() == OMPD_unknown ||
5312 C->getNameModifier() == OMPD_task) {
5313 IfCond = C->getCondition();
5314 break;
5315 }
5316 }
5317
5318 OMPTaskDataTy Data;
5319 // Check if we should emit tied or untied task.
5320 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5321 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5322 CGF.EmitStmt(S: CS->getCapturedStmt());
5323 };
5324 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5325 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5326 const OMPTaskDataTy &Data) {
5327 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5328 SharedsTy, Shareds: CapturedStruct, IfCond,
5329 Data);
5330 };
5331 auto LPCRegion =
5332 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5333 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data);
5334}
5335
5336void CodeGenFunction::EmitOMPTaskyieldDirective(
5337 const OMPTaskyieldDirective &S) {
5338 CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc());
5339}
5340
5341void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5342 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5343 Expr *ME = MC ? MC->getMessageString() : nullptr;
5344 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5345 bool IsFatal = false;
5346 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5347 IsFatal = true;
5348 CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal);
5349}
5350
5351void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5352 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier);
5353}
5354
5355void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5356 OMPTaskDataTy Data;
5357 // Build list of dependences
5358 buildDependences(S, Data);
5359 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5360 CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data);
5361}
5362
5363bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5364 return T.clauses().empty();
5365}
5366
5367void CodeGenFunction::EmitOMPTaskgroupDirective(
5368 const OMPTaskgroupDirective &S) {
5369 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5370 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) {
5371 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5372 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5373 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5374 AllocaInsertPt->getIterator());
5375
5376 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5377 InsertPointTy CodeGenIP) {
5378 Builder.restoreIP(IP: CodeGenIP);
5379 EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5380 };
5381 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5382 if (!CapturedStmtInfo)
5383 CapturedStmtInfo = &CapStmtInfo;
5384 Builder.restoreIP(IP: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB));
5385 return;
5386 }
5387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5388 Action.Enter(CGF);
5389 if (const Expr *E = S.getReductionRef()) {
5390 SmallVector<const Expr *, 4> LHSs;
5391 SmallVector<const Expr *, 4> RHSs;
5392 OMPTaskDataTy Data;
5393 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5394 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5395 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5396 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5397 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5398 in_end: C->reduction_ops().end());
5399 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5400 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5401 }
5402 llvm::Value *ReductionDesc =
5403 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(),
5404 LHSExprs: LHSs, RHSExprs: RHSs, Data);
5405 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5406 CGF.EmitVarDecl(D: *VD);
5407 CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD),
5408 /*Volatile=*/false, Ty: E->getType());
5409 }
5410 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5411 };
5412 CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc());
5413}
5414
5415void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5416 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5417 ? llvm::AtomicOrdering::NotAtomic
5418 : llvm::AtomicOrdering::AcquireRelease;
5419 CGM.getOpenMPRuntime().emitFlush(
5420 CGF&: *this,
5421 Vars: [&S]() -> ArrayRef<const Expr *> {
5422 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5423 return llvm::ArrayRef(FlushClause->varlist_begin(),
5424 FlushClause->varlist_end());
5425 return std::nullopt;
5426 }(),
5427 Loc: S.getBeginLoc(), AO);
5428}
5429
5430void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5431 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5432 LValue DOLVal = EmitLValue(E: DO->getDepobj());
5433 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5434 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5435 DC->getModifier());
5436 Dependencies.DepExprs.append(in_start: DC->varlist_begin(), in_end: DC->varlist_end());
5437 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5438 CGF&: *this, Dependencies, Loc: DC->getBeginLoc());
5439 EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal);
5440 return;
5441 }
5442 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5443 CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc());
5444 return;
5445 }
5446 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5447 CGM.getOpenMPRuntime().emitUpdateClause(
5448 CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc());
5449 return;
5450 }
5451}
5452
5453void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5454 if (!OMPParentLoopDirectiveForScan)
5455 return;
5456 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5457 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5458 SmallVector<const Expr *, 4> Shareds;
5459 SmallVector<const Expr *, 4> Privates;
5460 SmallVector<const Expr *, 4> LHSs;
5461 SmallVector<const Expr *, 4> RHSs;
5462 SmallVector<const Expr *, 4> ReductionOps;
5463 SmallVector<const Expr *, 4> CopyOps;
5464 SmallVector<const Expr *, 4> CopyArrayTemps;
5465 SmallVector<const Expr *, 4> CopyArrayElems;
5466 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5467 if (C->getModifier() != OMPC_REDUCTION_inscan)
5468 continue;
5469 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5470 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
5471 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5472 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5473 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
5474 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
5475 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
5476 in_end: C->copy_array_temps().end());
5477 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
5478 in_end: C->copy_array_elems().end());
5479 }
5480 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5481 (getLangOpts().OpenMPSimd &&
5482 isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) {
5483 // For simd directive and simd-based directives in simd only mode, use the
5484 // following codegen:
5485 // int x = 0;
5486 // #pragma omp simd reduction(inscan, +: x)
5487 // for (..) {
5488 // <first part>
5489 // #pragma omp scan inclusive(x)
5490 // <second part>
5491 // }
5492 // is transformed to:
5493 // int x = 0;
5494 // for (..) {
5495 // int x_priv = 0;
5496 // <first part>
5497 // x = x_priv + x;
5498 // x_priv = x;
5499 // <second part>
5500 // }
5501 // and
5502 // int x = 0;
5503 // #pragma omp simd reduction(inscan, +: x)
5504 // for (..) {
5505 // <first part>
5506 // #pragma omp scan exclusive(x)
5507 // <second part>
5508 // }
5509 // to
5510 // int x = 0;
5511 // for (..) {
5512 // int x_priv = 0;
5513 // <second part>
5514 // int temp = x;
5515 // x = x_priv + x;
5516 // x_priv = temp;
5517 // <first part>
5518 // }
5519 llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce");
5520 EmitBranch(Block: IsInclusive
5521 ? OMPScanReduce
5522 : BreakContinueStack.back().ContinueBlock.getBlock());
5523 EmitBlock(BB: OMPScanDispatch);
5524 {
5525 // New scope for correct construction/destruction of temp variables for
5526 // exclusive scan.
5527 LexicalScope Scope(*this, S.getSourceRange());
5528 EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5529 EmitBlock(BB: OMPScanReduce);
5530 if (!IsInclusive) {
5531 // Create temp var and copy LHS value to this temp value.
5532 // TMP = LHS;
5533 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5534 const Expr *PrivateExpr = Privates[I];
5535 const Expr *TempExpr = CopyArrayTemps[I];
5536 EmitAutoVarDecl(
5537 D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl()));
5538 LValue DestLVal = EmitLValue(E: TempExpr);
5539 LValue SrcLVal = EmitLValue(E: LHSs[I]);
5540 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(),
5541 SrcAddr: SrcLVal.getAddress(),
5542 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5543 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5544 Copy: CopyOps[I]);
5545 }
5546 }
5547 CGM.getOpenMPRuntime().emitReduction(
5548 CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
5549 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true, .ReductionKind: OMPD_simd});
5550 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5551 const Expr *PrivateExpr = Privates[I];
5552 LValue DestLVal;
5553 LValue SrcLVal;
5554 if (IsInclusive) {
5555 DestLVal = EmitLValue(E: RHSs[I]);
5556 SrcLVal = EmitLValue(E: LHSs[I]);
5557 } else {
5558 const Expr *TempExpr = CopyArrayTemps[I];
5559 DestLVal = EmitLValue(E: RHSs[I]);
5560 SrcLVal = EmitLValue(E: TempExpr);
5561 }
5562 EmitOMPCopy(
5563 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5564 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5565 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5566 }
5567 }
5568 EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5569 OMPScanExitBlock = IsInclusive
5570 ? BreakContinueStack.back().ContinueBlock.getBlock()
5571 : OMPScanReduce;
5572 EmitBlock(BB: OMPAfterScanBlock);
5573 return;
5574 }
5575 if (!IsInclusive) {
5576 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5577 EmitBlock(BB: OMPScanExitBlock);
5578 }
5579 if (OMPFirstScanLoop) {
5580 // Emit buffer[i] = red; at the end of the input phase.
5581 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5582 .getIterationVariable()
5583 ->IgnoreParenImpCasts();
5584 LValue IdxLVal = EmitLValue(E: IVExpr);
5585 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5586 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5587 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5588 const Expr *PrivateExpr = Privates[I];
5589 const Expr *OrigExpr = Shareds[I];
5590 const Expr *CopyArrayElem = CopyArrayElems[I];
5591 OpaqueValueMapping IdxMapping(
5592 *this,
5593 cast<OpaqueValueExpr>(
5594 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5595 RValue::get(V: IdxVal));
5596 LValue DestLVal = EmitLValue(E: CopyArrayElem);
5597 LValue SrcLVal = EmitLValue(E: OrigExpr);
5598 EmitOMPCopy(
5599 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5600 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5601 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5602 }
5603 }
5604 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5605 if (IsInclusive) {
5606 EmitBlock(BB: OMPScanExitBlock);
5607 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5608 }
5609 EmitBlock(BB: OMPScanDispatch);
5610 if (!OMPFirstScanLoop) {
5611 // Emit red = buffer[i]; at the entrance to the scan phase.
5612 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5613 .getIterationVariable()
5614 ->IgnoreParenImpCasts();
5615 LValue IdxLVal = EmitLValue(E: IVExpr);
5616 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5617 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5618 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5619 if (!IsInclusive) {
5620 llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec");
5621 ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit");
5622 llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal);
5623 Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB);
5624 EmitBlock(BB: ContBB);
5625 // Use idx - 1 iteration for exclusive scan.
5626 IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1));
5627 }
5628 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5629 const Expr *PrivateExpr = Privates[I];
5630 const Expr *OrigExpr = Shareds[I];
5631 const Expr *CopyArrayElem = CopyArrayElems[I];
5632 OpaqueValueMapping IdxMapping(
5633 *this,
5634 cast<OpaqueValueExpr>(
5635 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5636 RValue::get(V: IdxVal));
5637 LValue SrcLVal = EmitLValue(E: CopyArrayElem);
5638 LValue DestLVal = EmitLValue(E: OrigExpr);
5639 EmitOMPCopy(
5640 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5641 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5642 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5643 }
5644 if (!IsInclusive) {
5645 EmitBlock(BB: ExclusiveExitBB);
5646 }
5647 }
5648 EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5649 : OMPAfterScanBlock);
5650 EmitBlock(BB: OMPAfterScanBlock);
5651}
5652
5653void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5654 const CodeGenLoopTy &CodeGenLoop,
5655 Expr *IncExpr) {
5656 // Emit the loop iteration variable.
5657 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
5658 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
5659 EmitVarDecl(D: *IVDecl);
5660
5661 // Emit the iterations count variable.
5662 // If it is not a variable, Sema decided to calculate iterations count on each
5663 // iteration (e.g., it is foldable into a constant).
5664 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
5665 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
5666 // Emit calculation of the iterations count.
5667 EmitIgnoredExpr(E: S.getCalcLastIteration());
5668 }
5669
5670 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5671
5672 bool HasLastprivateClause = false;
5673 // Check pre-condition.
5674 {
5675 OMPLoopScope PreInitScope(*this, S);
5676 // Skip the entire loop if we don't meet the precondition.
5677 // If the condition constant folds and can be elided, avoid emitting the
5678 // whole loop.
5679 bool CondConstant;
5680 llvm::BasicBlock *ContBlock = nullptr;
5681 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
5682 if (!CondConstant)
5683 return;
5684 } else {
5685 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
5686 ContBlock = createBasicBlock(name: "omp.precond.end");
5687 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
5688 TrueCount: getProfileCount(S: &S));
5689 EmitBlock(BB: ThenBlock);
5690 incrementProfileCounter(S: &S);
5691 }
5692
5693 emitAlignedClause(CGF&: *this, D: S);
5694 // Emit 'then' code.
5695 {
5696 // Emit helper vars inits.
5697
5698 LValue LB = EmitOMPHelperVar(
5699 CGF&: *this, Helper: cast<DeclRefExpr>(
5700 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5701 ? S.getCombinedLowerBoundVariable()
5702 : S.getLowerBoundVariable())));
5703 LValue UB = EmitOMPHelperVar(
5704 CGF&: *this, Helper: cast<DeclRefExpr>(
5705 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5706 ? S.getCombinedUpperBoundVariable()
5707 : S.getUpperBoundVariable())));
5708 LValue ST =
5709 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
5710 LValue IL =
5711 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
5712
5713 OMPPrivateScope LoopScope(*this);
5714 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
5715 // Emit implicit barrier to synchronize threads and avoid data races
5716 // on initialization of firstprivate variables and post-update of
5717 // lastprivate variables.
5718 CGM.getOpenMPRuntime().emitBarrierCall(
5719 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
5720 /*ForceSimpleCall=*/true);
5721 }
5722 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
5723 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
5724 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
5725 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind()))
5726 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
5727 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
5728 EmitOMPPrivateLoopCounters(S, LoopScope);
5729 (void)LoopScope.Privatize();
5730 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
5731 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
5732
5733 // Detect the distribute schedule kind and chunk.
5734 llvm::Value *Chunk = nullptr;
5735 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5736 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5737 ScheduleKind = C->getDistScheduleKind();
5738 if (const Expr *Ch = C->getChunkSize()) {
5739 Chunk = EmitScalarExpr(E: Ch);
5740 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(),
5741 DstTy: S.getIterationVariable()->getType(),
5742 Loc: S.getBeginLoc());
5743 }
5744 } else {
5745 // Default behaviour for dist_schedule clause.
5746 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5747 CGF&: *this, S, ScheduleKind, Chunk);
5748 }
5749 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
5750 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5751
5752 // OpenMP [2.10.8, distribute Construct, Description]
5753 // If dist_schedule is specified, kind must be static. If specified,
5754 // iterations are divided into chunks of size chunk_size, chunks are
5755 // assigned to the teams of the league in a round-robin fashion in the
5756 // order of the team number. When no chunk_size is specified, the
5757 // iteration space is divided into chunks that are approximately equal
5758 // in size, and at most one chunk is distributed to each team of the
5759 // league. The size of the chunks is unspecified in this case.
5760 bool StaticChunked =
5761 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5762 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind());
5763 if (RT.isStaticNonchunked(ScheduleKind,
5764 /* Chunked */ Chunk != nullptr) ||
5765 StaticChunked) {
5766 CGOpenMPRuntime::StaticRTInput StaticInit(
5767 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
5768 LB.getAddress(), UB.getAddress(), ST.getAddress(),
5769 StaticChunked ? Chunk : nullptr);
5770 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind,
5771 Values: StaticInit);
5772 JumpDest LoopExit =
5773 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
5774 // UB = min(UB, GlobalUB);
5775 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5776 ? S.getCombinedEnsureUpperBound()
5777 : S.getEnsureUpperBound());
5778 // IV = LB;
5779 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5780 ? S.getCombinedInit()
5781 : S.getInit());
5782
5783 const Expr *Cond =
5784 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5785 ? S.getCombinedCond()
5786 : S.getCond();
5787
5788 if (StaticChunked)
5789 Cond = S.getCombinedDistCond();
5790
5791 // For static unchunked schedules generate:
5792 //
5793 // 1. For distribute alone, codegen
5794 // while (idx <= UB) {
5795 // BODY;
5796 // ++idx;
5797 // }
5798 //
5799 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5800 // while (idx <= UB) {
5801 // <CodeGen rest of pragma>(LB, UB);
5802 // idx += ST;
5803 // }
5804 //
5805 // For static chunk one schedule generate:
5806 //
5807 // while (IV <= GlobalUB) {
5808 // <CodeGen rest of pragma>(LB, UB);
5809 // LB += ST;
5810 // UB += ST;
5811 // UB = min(UB, GlobalUB);
5812 // IV = LB;
5813 // }
5814 //
5815 emitCommonSimdLoop(
5816 CGF&: *this, S,
5817 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5818 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
5819 CGF.EmitOMPSimdInit(D: S);
5820 },
5821 BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5822 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5823 CGF.EmitOMPInnerLoop(
5824 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr,
5825 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5826 CodeGenLoop(CGF, S, LoopExit);
5827 },
5828 PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) {
5829 if (StaticChunked) {
5830 CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound());
5831 CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound());
5832 CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound());
5833 CGF.EmitIgnoredExpr(E: S.getCombinedInit());
5834 }
5835 });
5836 });
5837 EmitBlock(BB: LoopExit.getBlock());
5838 // Tell the runtime we are done.
5839 RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute);
5840 } else {
5841 // Emit the outer loop, which requests its work chunk [LB..UB] from
5842 // runtime and runs the inner loop to process it.
5843 const OMPLoopArguments LoopArguments = {
5844 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
5845 Chunk};
5846 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments,
5847 CodeGenLoopContent: CodeGenLoop);
5848 }
5849 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
5850 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
5851 return CGF.Builder.CreateIsNotNull(
5852 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
5853 });
5854 }
5855 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
5856 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
5857 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) {
5858 EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd);
5859 // Emit post-update of the reduction variables if IsLastIter != 0.
5860 emitPostUpdateForReductionClause(
5861 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
5862 return CGF.Builder.CreateIsNotNull(
5863 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
5864 });
5865 }
5866 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5867 if (HasLastprivateClause) {
5868 EmitOMPLastprivateClauseFinal(
5869 D: S, /*NoFinals=*/false,
5870 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
5871 }
5872 }
5873
5874 // We're now done with the loop, so jump to the continuation block.
5875 if (ContBlock) {
5876 EmitBranch(Block: ContBlock);
5877 EmitBlock(BB: ContBlock, IsFinished: true);
5878 }
5879 }
5880}
5881
5882void CodeGenFunction::EmitOMPDistributeDirective(
5883 const OMPDistributeDirective &S) {
5884 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5885 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
5886 };
5887 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5888 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
5889}
5890
5891static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5892 const CapturedStmt *S,
5893 SourceLocation Loc) {
5894 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5895 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5896 CGF.CapturedStmtInfo = &CapStmtInfo;
5897 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, Loc);
5898 Fn->setDoesNotRecurse();
5899 return Fn;
5900}
5901
5902template <typename T>
5903static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
5904 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5905 llvm::OpenMPIRBuilder &OMPBuilder) {
5906
5907 unsigned NumLoops = C->getNumLoops();
5908 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
5909 /*DestWidth=*/64, /*Signed=*/1);
5910 llvm::SmallVector<llvm::Value *> StoreValues;
5911 for (unsigned I = 0; I < NumLoops; I++) {
5912 const Expr *CounterVal = C->getLoopData(I);
5913 assert(CounterVal);
5914 llvm::Value *StoreValue = CGF.EmitScalarConversion(
5915 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
5916 Loc: CounterVal->getExprLoc());
5917 StoreValues.emplace_back(Args&: StoreValue);
5918 }
5919 OMPDoacrossKind<T> ODK;
5920 bool IsDependSource = ODK.isSource(C);
5921 CGF.Builder.restoreIP(
5922 IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops,
5923 StoreValues, Name: ".cnt.addr", IsDependSource));
5924}
5925
5926void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5927 if (CGM.getLangOpts().OpenMPIRBuilder) {
5928 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5929 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5930
5931 if (S.hasClausesOfKind<OMPDependClause>() ||
5932 S.hasClausesOfKind<OMPDoacrossClause>()) {
5933 // The ordered directive with depend clause.
5934 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
5935 "ordered depend|doacross construct.");
5936 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5937 AllocaInsertPt->getIterator());
5938 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5939 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
5940 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5941 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
5942 } else {
5943 // The ordered directive with threads or simd clause, or without clause.
5944 // Without clause, it behaves as if the threads clause is specified.
5945 const auto *C = S.getSingleClause<OMPSIMDClause>();
5946
5947 auto FiniCB = [this](InsertPointTy IP) {
5948 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
5949 };
5950
5951 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5952 InsertPointTy CodeGenIP) {
5953 Builder.restoreIP(IP: CodeGenIP);
5954
5955 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5956 if (C) {
5957 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5958 Builder, /*CreateBranch=*/false, Suffix: ".ordered.after");
5959 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5960 GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
5961 llvm::Function *OutlinedFn =
5962 emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc());
5963 assert(S.getBeginLoc().isValid() &&
5964 "Outlined function call location must be valid.");
5965 ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc());
5966 OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB,
5967 Fn: OutlinedFn, Args: CapturedVars);
5968 } else {
5969 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5970 CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered");
5971 }
5972 };
5973
5974 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5975 Builder.restoreIP(
5976 IP: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C));
5977 }
5978 return;
5979 }
5980
5981 if (S.hasClausesOfKind<OMPDependClause>()) {
5982 assert(!S.hasAssociatedStmt() &&
5983 "No associated statement must be in ordered depend construct.");
5984 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5985 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
5986 return;
5987 }
5988 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
5989 assert(!S.hasAssociatedStmt() &&
5990 "No associated statement must be in ordered doacross construct.");
5991 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5992 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
5993 return;
5994 }
5995 const auto *C = S.getSingleClause<OMPSIMDClause>();
5996 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5997 PrePostActionTy &Action) {
5998 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5999 if (C) {
6000 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6001 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6002 llvm::Function *OutlinedFn =
6003 emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc());
6004 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(),
6005 OutlinedFn, Args: CapturedVars);
6006 } else {
6007 Action.Enter(CGF);
6008 CGF.EmitStmt(S: CS->getCapturedStmt());
6009 }
6010 };
6011 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6012 CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C);
6013}
6014
6015static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6016 QualType SrcType, QualType DestType,
6017 SourceLocation Loc) {
6018 assert(CGF.hasScalarEvaluationKind(DestType) &&
6019 "DestType must have scalar evaluation kind.");
6020 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6021 return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType,
6022 DstTy: DestType, Loc)
6023 : CGF.EmitComplexToScalarConversion(
6024 Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc);
6025}
6026
6027static CodeGenFunction::ComplexPairTy
6028convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6029 QualType DestType, SourceLocation Loc) {
6030 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6031 "DestType must have complex evaluation kind.");
6032 CodeGenFunction::ComplexPairTy ComplexVal;
6033 if (Val.isScalar()) {
6034 // Convert the input element to the element type of the complex.
6035 QualType DestElementType =
6036 DestType->castAs<ComplexType>()->getElementType();
6037 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6038 Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc);
6039 ComplexVal = CodeGenFunction::ComplexPairTy(
6040 ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType()));
6041 } else {
6042 assert(Val.isComplex() && "Must be a scalar or complex.");
6043 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6044 QualType DestElementType =
6045 DestType->castAs<ComplexType>()->getElementType();
6046 ComplexVal.first = CGF.EmitScalarConversion(
6047 Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6048 ComplexVal.second = CGF.EmitScalarConversion(
6049 Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6050 }
6051 return ComplexVal;
6052}
6053
6054static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6055 LValue LVal, RValue RVal) {
6056 if (LVal.isGlobalReg())
6057 CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal);
6058 else
6059 CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false);
6060}
6061
6062static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6063 llvm::AtomicOrdering AO, LValue LVal,
6064 SourceLocation Loc) {
6065 if (LVal.isGlobalReg())
6066 return CGF.EmitLoadOfLValue(V: LVal, Loc);
6067 return CGF.EmitAtomicLoad(
6068 lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO),
6069 IsVolatile: LVal.isVolatile());
6070}
6071
6072void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6073 QualType RValTy, SourceLocation Loc) {
6074 switch (getEvaluationKind(T: LVal.getType())) {
6075 case TEK_Scalar:
6076 EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue(
6077 CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)),
6078 Dst: LVal);
6079 break;
6080 case TEK_Complex:
6081 EmitStoreOfComplex(
6082 V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal,
6083 /*isInit=*/false);
6084 break;
6085 case TEK_Aggregate:
6086 llvm_unreachable("Must be a scalar or complex.");
6087 }
6088}
6089
6090static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6091 const Expr *X, const Expr *V,
6092 SourceLocation Loc) {
6093 // v = x;
6094 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6095 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6096 LValue XLValue = CGF.EmitLValue(E: X);
6097 LValue VLValue = CGF.EmitLValue(E: V);
6098 RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc);
6099 // OpenMP, 2.17.7, atomic Construct
6100 // If the read or capture clause is specified and the acquire, acq_rel, or
6101 // seq_cst clause is specified then the strong flush on exit from the atomic
6102 // operation is also an acquire flush.
6103 switch (AO) {
6104 case llvm::AtomicOrdering::Acquire:
6105 case llvm::AtomicOrdering::AcquireRelease:
6106 case llvm::AtomicOrdering::SequentiallyConsistent:
6107 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6108 AO: llvm::AtomicOrdering::Acquire);
6109 break;
6110 case llvm::AtomicOrdering::Monotonic:
6111 case llvm::AtomicOrdering::Release:
6112 break;
6113 case llvm::AtomicOrdering::NotAtomic:
6114 case llvm::AtomicOrdering::Unordered:
6115 llvm_unreachable("Unexpected ordering.");
6116 }
6117 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc);
6118 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6119}
6120
6121static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6122 llvm::AtomicOrdering AO, const Expr *X,
6123 const Expr *E, SourceLocation Loc) {
6124 // x = expr;
6125 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6126 emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E));
6127 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6128 // OpenMP, 2.17.7, atomic Construct
6129 // If the write, update, or capture clause is specified and the release,
6130 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6131 // the atomic operation is also a release flush.
6132 switch (AO) {
6133 case llvm::AtomicOrdering::Release:
6134 case llvm::AtomicOrdering::AcquireRelease:
6135 case llvm::AtomicOrdering::SequentiallyConsistent:
6136 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6137 AO: llvm::AtomicOrdering::Release);
6138 break;
6139 case llvm::AtomicOrdering::Acquire:
6140 case llvm::AtomicOrdering::Monotonic:
6141 break;
6142 case llvm::AtomicOrdering::NotAtomic:
6143 case llvm::AtomicOrdering::Unordered:
6144 llvm_unreachable("Unexpected ordering.");
6145 }
6146}
6147
6148static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6149 RValue Update,
6150 BinaryOperatorKind BO,
6151 llvm::AtomicOrdering AO,
6152 bool IsXLHSInRHSPart) {
6153 ASTContext &Context = CGF.getContext();
6154 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6155 // expression is simple and atomic is allowed for the given type for the
6156 // target platform.
6157 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6158 (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) &&
6159 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6160 !Context.getTargetInfo().hasBuiltinAtomic(
6161 AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment())))
6162 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6163
6164 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6165 if (T->isIntegerTy())
6166 return true;
6167
6168 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6169 return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T));
6170
6171 return false;
6172 };
6173
6174 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6175 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6176 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6177
6178 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6179 llvm::AtomicRMWInst::BinOp RMWOp;
6180 switch (BO) {
6181 case BO_Add:
6182 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6183 break;
6184 case BO_Sub:
6185 if (!IsXLHSInRHSPart)
6186 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6187 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6188 break;
6189 case BO_And:
6190 RMWOp = llvm::AtomicRMWInst::And;
6191 break;
6192 case BO_Or:
6193 RMWOp = llvm::AtomicRMWInst::Or;
6194 break;
6195 case BO_Xor:
6196 RMWOp = llvm::AtomicRMWInst::Xor;
6197 break;
6198 case BO_LT:
6199 if (IsInteger)
6200 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6201 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6202 : llvm::AtomicRMWInst::Max)
6203 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6204 : llvm::AtomicRMWInst::UMax);
6205 else
6206 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6207 : llvm::AtomicRMWInst::FMax;
6208 break;
6209 case BO_GT:
6210 if (IsInteger)
6211 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6212 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6213 : llvm::AtomicRMWInst::Min)
6214 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6215 : llvm::AtomicRMWInst::UMin);
6216 else
6217 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6218 : llvm::AtomicRMWInst::FMin;
6219 break;
6220 case BO_Assign:
6221 RMWOp = llvm::AtomicRMWInst::Xchg;
6222 break;
6223 case BO_Mul:
6224 case BO_Div:
6225 case BO_Rem:
6226 case BO_Shl:
6227 case BO_Shr:
6228 case BO_LAnd:
6229 case BO_LOr:
6230 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6231 case BO_PtrMemD:
6232 case BO_PtrMemI:
6233 case BO_LE:
6234 case BO_GE:
6235 case BO_EQ:
6236 case BO_NE:
6237 case BO_Cmp:
6238 case BO_AddAssign:
6239 case BO_SubAssign:
6240 case BO_AndAssign:
6241 case BO_OrAssign:
6242 case BO_XorAssign:
6243 case BO_MulAssign:
6244 case BO_DivAssign:
6245 case BO_RemAssign:
6246 case BO_ShlAssign:
6247 case BO_ShrAssign:
6248 case BO_Comma:
6249 llvm_unreachable("Unsupported atomic update operation");
6250 }
6251 llvm::Value *UpdateVal = Update.getScalarVal();
6252 if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) {
6253 if (IsInteger)
6254 UpdateVal = CGF.Builder.CreateIntCast(
6255 V: IC, DestTy: X.getAddress().getElementType(),
6256 isSigned: X.getType()->hasSignedIntegerRepresentation());
6257 else
6258 UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC,
6259 DestTy: X.getAddress().getElementType());
6260 }
6261 llvm::Value *Res =
6262 CGF.Builder.CreateAtomicRMW(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Ordering: AO);
6263 return std::make_pair(x: true, y: RValue::get(V: Res));
6264}
6265
6266std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6267 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6268 llvm::AtomicOrdering AO, SourceLocation Loc,
6269 const llvm::function_ref<RValue(RValue)> CommonGen) {
6270 // Update expressions are allowed to have the following forms:
6271 // x binop= expr; -> xrval + expr;
6272 // x++, ++x -> xrval + 1;
6273 // x--, --x -> xrval - 1;
6274 // x = x binop expr; -> xrval binop expr
6275 // x = expr Op x; - > expr binop xrval;
6276 auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart);
6277 if (!Res.first) {
6278 if (X.isGlobalReg()) {
6279 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6280 // 'xrval'.
6281 EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X);
6282 } else {
6283 // Perform compare-and-swap procedure.
6284 EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified());
6285 }
6286 }
6287 return Res;
6288}
6289
6290static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6291 llvm::AtomicOrdering AO, const Expr *X,
6292 const Expr *E, const Expr *UE,
6293 bool IsXLHSInRHSPart, SourceLocation Loc) {
6294 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6295 "Update expr in 'atomic update' must be a binary operator.");
6296 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6297 // Update expressions are allowed to have the following forms:
6298 // x binop= expr; -> xrval + expr;
6299 // x++, ++x -> xrval + 1;
6300 // x--, --x -> xrval - 1;
6301 // x = x binop expr; -> xrval binop expr
6302 // x = expr Op x; - > expr binop xrval;
6303 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6304 LValue XLValue = CGF.EmitLValue(E: X);
6305 RValue ExprRValue = CGF.EmitAnyExpr(E);
6306 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6307 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6308 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6309 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6310 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6311 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6312 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6313 return CGF.EmitAnyExpr(E: UE);
6314 };
6315 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6316 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6317 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6318 // OpenMP, 2.17.7, atomic Construct
6319 // If the write, update, or capture clause is specified and the release,
6320 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6321 // the atomic operation is also a release flush.
6322 switch (AO) {
6323 case llvm::AtomicOrdering::Release:
6324 case llvm::AtomicOrdering::AcquireRelease:
6325 case llvm::AtomicOrdering::SequentiallyConsistent:
6326 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6327 AO: llvm::AtomicOrdering::Release);
6328 break;
6329 case llvm::AtomicOrdering::Acquire:
6330 case llvm::AtomicOrdering::Monotonic:
6331 break;
6332 case llvm::AtomicOrdering::NotAtomic:
6333 case llvm::AtomicOrdering::Unordered:
6334 llvm_unreachable("Unexpected ordering.");
6335 }
6336}
6337
6338static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6339 QualType SourceType, QualType ResType,
6340 SourceLocation Loc) {
6341 switch (CGF.getEvaluationKind(T: ResType)) {
6342 case TEK_Scalar:
6343 return RValue::get(
6344 V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc));
6345 case TEK_Complex: {
6346 auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc);
6347 return RValue::getComplex(V1: Res.first, V2: Res.second);
6348 }
6349 case TEK_Aggregate:
6350 break;
6351 }
6352 llvm_unreachable("Must be a scalar or complex.");
6353}
6354
6355static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6356 llvm::AtomicOrdering AO,
6357 bool IsPostfixUpdate, const Expr *V,
6358 const Expr *X, const Expr *E,
6359 const Expr *UE, bool IsXLHSInRHSPart,
6360 SourceLocation Loc) {
6361 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6362 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6363 RValue NewVVal;
6364 LValue VLValue = CGF.EmitLValue(E: V);
6365 LValue XLValue = CGF.EmitLValue(E: X);
6366 RValue ExprRValue = CGF.EmitAnyExpr(E);
6367 QualType NewVValType;
6368 if (UE) {
6369 // 'x' is updated with some additional value.
6370 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6371 "Update expr in 'atomic capture' must be a binary operator.");
6372 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6373 // Update expressions are allowed to have the following forms:
6374 // x binop= expr; -> xrval + expr;
6375 // x++, ++x -> xrval + 1;
6376 // x--, --x -> xrval - 1;
6377 // x = x binop expr; -> xrval binop expr
6378 // x = expr Op x; - > expr binop xrval;
6379 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6380 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6381 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6382 NewVValType = XRValExpr->getType();
6383 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6384 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6385 IsPostfixUpdate](RValue XRValue) {
6386 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6387 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6388 RValue Res = CGF.EmitAnyExpr(E: UE);
6389 NewVVal = IsPostfixUpdate ? XRValue : Res;
6390 return Res;
6391 };
6392 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6393 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6394 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6395 if (Res.first) {
6396 // 'atomicrmw' instruction was generated.
6397 if (IsPostfixUpdate) {
6398 // Use old value from 'atomicrmw'.
6399 NewVVal = Res.second;
6400 } else {
6401 // 'atomicrmw' does not provide new value, so evaluate it using old
6402 // value of 'x'.
6403 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6404 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6405 NewVVal = CGF.EmitAnyExpr(E: UE);
6406 }
6407 }
6408 } else {
6409 // 'x' is simply rewritten with some 'expr'.
6410 NewVValType = X->getType().getNonReferenceType();
6411 ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(),
6412 ResType: X->getType().getNonReferenceType(), Loc);
6413 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6414 NewVVal = XRValue;
6415 return ExprRValue;
6416 };
6417 // Try to perform atomicrmw xchg, otherwise simple exchange.
6418 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6419 X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6420 Loc, CommonGen: Gen);
6421 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6422 if (Res.first) {
6423 // 'atomicrmw' instruction was generated.
6424 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6425 }
6426 }
6427 // Emit post-update store to 'v' of old/new 'x' value.
6428 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc);
6429 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6430 // OpenMP 5.1 removes the required flush for capture clause.
6431 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6432 // OpenMP, 2.17.7, atomic Construct
6433 // If the write, update, or capture clause is specified and the release,
6434 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6435 // the atomic operation is also a release flush.
6436 // If the read or capture clause is specified and the acquire, acq_rel, or
6437 // seq_cst clause is specified then the strong flush on exit from the atomic
6438 // operation is also an acquire flush.
6439 switch (AO) {
6440 case llvm::AtomicOrdering::Release:
6441 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6442 AO: llvm::AtomicOrdering::Release);
6443 break;
6444 case llvm::AtomicOrdering::Acquire:
6445 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6446 AO: llvm::AtomicOrdering::Acquire);
6447 break;
6448 case llvm::AtomicOrdering::AcquireRelease:
6449 case llvm::AtomicOrdering::SequentiallyConsistent:
6450 CGF.CGM.getOpenMPRuntime().emitFlush(
6451 CGF, Vars: std::nullopt, Loc, AO: llvm::AtomicOrdering::AcquireRelease);
6452 break;
6453 case llvm::AtomicOrdering::Monotonic:
6454 break;
6455 case llvm::AtomicOrdering::NotAtomic:
6456 case llvm::AtomicOrdering::Unordered:
6457 llvm_unreachable("Unexpected ordering.");
6458 }
6459 }
6460}
6461
6462static void emitOMPAtomicCompareExpr(
6463 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6464 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6465 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6466 SourceLocation Loc) {
6467 llvm::OpenMPIRBuilder &OMPBuilder =
6468 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6469
6470 OMPAtomicCompareOp Op;
6471 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6472 switch (cast<BinaryOperator>(Val: CE)->getOpcode()) {
6473 case BO_EQ:
6474 Op = OMPAtomicCompareOp::EQ;
6475 break;
6476 case BO_LT:
6477 Op = OMPAtomicCompareOp::MIN;
6478 break;
6479 case BO_GT:
6480 Op = OMPAtomicCompareOp::MAX;
6481 break;
6482 default:
6483 llvm_unreachable("unsupported atomic compare binary operator");
6484 }
6485
6486 LValue XLVal = CGF.EmitLValue(E: X);
6487 Address XAddr = XLVal.getAddress();
6488
6489 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6490 if (X->getType() == E->getType())
6491 return CGF.EmitScalarExpr(E);
6492 const Expr *NewE = E->IgnoreImplicitAsWritten();
6493 llvm::Value *V = CGF.EmitScalarExpr(E: NewE);
6494 if (NewE->getType() == X->getType())
6495 return V;
6496 return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc);
6497 };
6498
6499 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6500 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6501 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal))
6502 EVal = CGF.Builder.CreateIntCast(
6503 V: CI, DestTy: XLVal.getAddress().getElementType(),
6504 isSigned: E->getType()->hasSignedIntegerRepresentation());
6505 if (DVal)
6506 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal))
6507 DVal = CGF.Builder.CreateIntCast(
6508 V: CI, DestTy: XLVal.getAddress().getElementType(),
6509 isSigned: D->getType()->hasSignedIntegerRepresentation());
6510
6511 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6512 .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(),
6513 .IsSigned: X->getType()->hasSignedIntegerRepresentation(),
6514 .IsVolatile: X->getType().isVolatileQualified()};
6515 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6516 if (V) {
6517 LValue LV = CGF.EmitLValue(E: V);
6518 Address Addr = LV.getAddress();
6519 VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6520 .IsSigned: V->getType()->hasSignedIntegerRepresentation(),
6521 .IsVolatile: V->getType().isVolatileQualified()};
6522 }
6523 if (R) {
6524 LValue LV = CGF.EmitLValue(E: R);
6525 Address Addr = LV.getAddress();
6526 ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6527 .IsSigned: R->getType()->hasSignedIntegerRepresentation(),
6528 .IsVolatile: R->getType().isVolatileQualified()};
6529 }
6530
6531 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6532 // fail clause was not mentioned on the
6533 // "#pragma omp atomic compare" construct.
6534 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6535 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6536 IsPostfixUpdate, IsFailOnly));
6537 } else
6538 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6539 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6540 IsPostfixUpdate, IsFailOnly, Failure: FailAO));
6541}
6542
6543static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6544 llvm::AtomicOrdering AO,
6545 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6546 const Expr *X, const Expr *V, const Expr *R,
6547 const Expr *E, const Expr *UE, const Expr *D,
6548 const Expr *CE, bool IsXLHSInRHSPart,
6549 bool IsFailOnly, SourceLocation Loc) {
6550 switch (Kind) {
6551 case OMPC_read:
6552 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6553 break;
6554 case OMPC_write:
6555 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6556 break;
6557 case OMPC_unknown:
6558 case OMPC_update:
6559 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6560 break;
6561 case OMPC_capture:
6562 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6563 IsXLHSInRHSPart, Loc);
6564 break;
6565 case OMPC_compare: {
6566 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6567 IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6568 break;
6569 }
6570 default:
6571 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6572 }
6573}
6574
6575void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6576 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6577 // Fail Memory Clause Ordering.
6578 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6579 bool MemOrderingSpecified = false;
6580 if (S.getSingleClause<OMPSeqCstClause>()) {
6581 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6582 MemOrderingSpecified = true;
6583 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6584 AO = llvm::AtomicOrdering::AcquireRelease;
6585 MemOrderingSpecified = true;
6586 } else if (S.getSingleClause<OMPAcquireClause>()) {
6587 AO = llvm::AtomicOrdering::Acquire;
6588 MemOrderingSpecified = true;
6589 } else if (S.getSingleClause<OMPReleaseClause>()) {
6590 AO = llvm::AtomicOrdering::Release;
6591 MemOrderingSpecified = true;
6592 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6593 AO = llvm::AtomicOrdering::Monotonic;
6594 MemOrderingSpecified = true;
6595 }
6596 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6597 OpenMPClauseKind Kind = OMPC_unknown;
6598 for (const OMPClause *C : S.clauses()) {
6599 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6600 // if it is first).
6601 OpenMPClauseKind K = C->getClauseKind();
6602 // TBD
6603 if (K == OMPC_weak)
6604 return;
6605 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6606 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6607 continue;
6608 Kind = K;
6609 KindsEncountered.insert(V: K);
6610 }
6611 // We just need to correct Kind here. No need to set a bool saying it is
6612 // actually compare capture because we can tell from whether V and R are
6613 // nullptr.
6614 if (KindsEncountered.contains(V: OMPC_compare) &&
6615 KindsEncountered.contains(V: OMPC_capture))
6616 Kind = OMPC_compare;
6617 if (!MemOrderingSpecified) {
6618 llvm::AtomicOrdering DefaultOrder =
6619 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6620 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6621 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6622 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6623 Kind == OMPC_capture)) {
6624 AO = DefaultOrder;
6625 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6626 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6627 AO = llvm::AtomicOrdering::Release;
6628 } else if (Kind == OMPC_read) {
6629 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6630 AO = llvm::AtomicOrdering::Acquire;
6631 }
6632 }
6633 }
6634
6635 if (KindsEncountered.contains(V: OMPC_compare) &&
6636 KindsEncountered.contains(V: OMPC_fail)) {
6637 Kind = OMPC_compare;
6638 const auto *FailClause = S.getSingleClause<OMPFailClause>();
6639 if (FailClause) {
6640 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6641 if (FailParameter == llvm::omp::OMPC_relaxed)
6642 FailAO = llvm::AtomicOrdering::Monotonic;
6643 else if (FailParameter == llvm::omp::OMPC_acquire)
6644 FailAO = llvm::AtomicOrdering::Acquire;
6645 else if (FailParameter == llvm::omp::OMPC_seq_cst)
6646 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6647 }
6648 }
6649
6650 LexicalScope Scope(*this, S.getSourceRange());
6651 EmitStopPoint(S: S.getAssociatedStmt());
6652 emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(),
6653 V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(),
6654 D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(),
6655 IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc());
6656}
6657
6658static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6659 const OMPExecutableDirective &S,
6660 const RegionCodeGenTy &CodeGen) {
6661 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6662 CodeGenModule &CGM = CGF.CGM;
6663
6664 // On device emit this construct as inlined code.
6665 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6666 OMPLexicalScope Scope(CGF, S, OMPD_target);
6667 CGM.getOpenMPRuntime().emitInlinedDirective(
6668 CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6669 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
6670 });
6671 return;
6672 }
6673
6674 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6675 llvm::Function *Fn = nullptr;
6676 llvm::Constant *FnID = nullptr;
6677
6678 const Expr *IfCond = nullptr;
6679 // Check for the at most one if clause associated with the target region.
6680 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6681 if (C->getNameModifier() == OMPD_unknown ||
6682 C->getNameModifier() == OMPD_target) {
6683 IfCond = C->getCondition();
6684 break;
6685 }
6686 }
6687
6688 // Check if we have any device clause associated with the directive.
6689 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6690 nullptr, OMPC_DEVICE_unknown);
6691 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6692 Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier());
6693
6694 // Check if we have an if clause whose conditional always evaluates to false
6695 // or if we do not have any targets specified. If so the target region is not
6696 // an offload entry point.
6697 bool IsOffloadEntry = true;
6698 if (IfCond) {
6699 bool Val;
6700 if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val)
6701 IsOffloadEntry = false;
6702 }
6703 if (CGM.getLangOpts().OMPTargetTriples.empty())
6704 IsOffloadEntry = false;
6705
6706 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6707 unsigned DiagID = CGM.getDiags().getCustomDiagID(
6708 L: DiagnosticsEngine::Error,
6709 FormatString: "No offloading entry generated while offloading is mandatory.");
6710 CGM.getDiags().Report(DiagID);
6711 }
6712
6713 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6714 StringRef ParentName;
6715 // In case we have Ctors/Dtors we use the complete type variant to produce
6716 // the mangling of the device outlined kernel.
6717 if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl))
6718 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete));
6719 else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl))
6720 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete));
6721 else
6722 ParentName =
6723 CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl)));
6724
6725 // Emit target region as a standalone region.
6726 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID,
6727 IsOffloadEntry, CodeGen);
6728 OMPLexicalScope Scope(CGF, S, OMPD_task);
6729 auto &&SizeEmitter =
6730 [IsOffloadEntry](CodeGenFunction &CGF,
6731 const OMPLoopDirective &D) -> llvm::Value * {
6732 if (IsOffloadEntry) {
6733 OMPLoopScope(CGF, D);
6734 // Emit calculation of the iterations count.
6735 llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations());
6736 NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty,
6737 /*isSigned=*/false);
6738 return NumIterations;
6739 }
6740 return nullptr;
6741 };
6742 CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device,
6743 SizeEmitter);
6744}
6745
6746static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6747 PrePostActionTy &Action) {
6748 Action.Enter(CGF);
6749 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6750 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
6751 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
6752 (void)PrivateScope.Privatize();
6753 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
6754 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
6755
6756 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt());
6757 CGF.EnsureInsertPoint();
6758}
6759
6760void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6761 StringRef ParentName,
6762 const OMPTargetDirective &S) {
6763 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6764 emitTargetRegion(CGF, S, Action);
6765 };
6766 llvm::Function *Fn;
6767 llvm::Constant *Addr;
6768 // Emit target region as a standalone region.
6769 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6770 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
6771 assert(Fn && Addr && "Target device function emission failed.");
6772}
6773
6774void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6775 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6776 emitTargetRegion(CGF, S, Action);
6777 };
6778 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
6779}
6780
6781static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6782 const OMPExecutableDirective &S,
6783 OpenMPDirectiveKind InnermostKind,
6784 const RegionCodeGenTy &CodeGen) {
6785 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
6786 llvm::Function *OutlinedFn =
6787 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6788 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
6789 CodeGen);
6790
6791 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6792 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6793 if (NT || TL) {
6794 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6795 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6796
6797 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6798 Loc: S.getBeginLoc());
6799 }
6800
6801 OMPTeamsScope Scope(CGF, S);
6802 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6803 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6804 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn,
6805 CapturedVars);
6806}
6807
6808void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6809 // Emit teams region as a standalone region.
6810 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6811 Action.Enter(CGF);
6812 OMPPrivateScope PrivateScope(CGF);
6813 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
6814 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
6815 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
6816 (void)PrivateScope.Privatize();
6817 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt());
6818 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
6819 };
6820 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
6821 emitPostUpdateForReductionClause(CGF&: *this, D: S,
6822 CondGen: [](CodeGenFunction &) { return nullptr; });
6823}
6824
6825static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6826 const OMPTargetTeamsDirective &S) {
6827 auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
6828 Action.Enter(CGF);
6829 // Emit teams region as a standalone region.
6830 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6831 Action.Enter(CGF);
6832 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6833 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
6834 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
6835 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
6836 (void)PrivateScope.Privatize();
6837 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
6838 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
6839 CGF.EmitStmt(S: CS->getCapturedStmt());
6840 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
6841 };
6842 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen);
6843 emitPostUpdateForReductionClause(CGF, D: S,
6844 CondGen: [](CodeGenFunction &) { return nullptr; });
6845}
6846
6847void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6848 CodeGenModule &CGM, StringRef ParentName,
6849 const OMPTargetTeamsDirective &S) {
6850 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6851 emitTargetTeamsRegion(CGF, Action, S);
6852 };
6853 llvm::Function *Fn;
6854 llvm::Constant *Addr;
6855 // Emit target region as a standalone region.
6856 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6857 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
6858 assert(Fn && Addr && "Target device function emission failed.");
6859}
6860
6861void CodeGenFunction::EmitOMPTargetTeamsDirective(
6862 const OMPTargetTeamsDirective &S) {
6863 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6864 emitTargetTeamsRegion(CGF, Action, S);
6865 };
6866 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
6867}
6868
6869static void
6870emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6871 const OMPTargetTeamsDistributeDirective &S) {
6872 Action.Enter(CGF);
6873 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6874 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6875 };
6876
6877 // Emit teams region as a standalone region.
6878 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6879 PrePostActionTy &Action) {
6880 Action.Enter(CGF);
6881 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6882 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
6883 (void)PrivateScope.Privatize();
6884 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
6885 CodeGen: CodeGenDistribute);
6886 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
6887 };
6888 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
6889 emitPostUpdateForReductionClause(CGF, D: S,
6890 CondGen: [](CodeGenFunction &) { return nullptr; });
6891}
6892
6893void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6894 CodeGenModule &CGM, StringRef ParentName,
6895 const OMPTargetTeamsDistributeDirective &S) {
6896 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6897 emitTargetTeamsDistributeRegion(CGF, Action, S);
6898 };
6899 llvm::Function *Fn;
6900 llvm::Constant *Addr;
6901 // Emit target region as a standalone region.
6902 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6903 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
6904 assert(Fn && Addr && "Target device function emission failed.");
6905}
6906
6907void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6908 const OMPTargetTeamsDistributeDirective &S) {
6909 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6910 emitTargetTeamsDistributeRegion(CGF, Action, S);
6911 };
6912 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
6913}
6914
6915static void emitTargetTeamsDistributeSimdRegion(
6916 CodeGenFunction &CGF, PrePostActionTy &Action,
6917 const OMPTargetTeamsDistributeSimdDirective &S) {
6918 Action.Enter(CGF);
6919 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6920 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6921 };
6922
6923 // Emit teams region as a standalone region.
6924 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6925 PrePostActionTy &Action) {
6926 Action.Enter(CGF);
6927 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6928 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
6929 (void)PrivateScope.Privatize();
6930 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
6931 CodeGen: CodeGenDistribute);
6932 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
6933 };
6934 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen);
6935 emitPostUpdateForReductionClause(CGF, D: S,
6936 CondGen: [](CodeGenFunction &) { return nullptr; });
6937}
6938
6939void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6940 CodeGenModule &CGM, StringRef ParentName,
6941 const OMPTargetTeamsDistributeSimdDirective &S) {
6942 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6943 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6944 };
6945 llvm::Function *Fn;
6946 llvm::Constant *Addr;
6947 // Emit target region as a standalone region.
6948 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6949 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
6950 assert(Fn && Addr && "Target device function emission failed.");
6951}
6952
6953void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6954 const OMPTargetTeamsDistributeSimdDirective &S) {
6955 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6956 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6957 };
6958 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
6959}
6960
6961void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6962 const OMPTeamsDistributeDirective &S) {
6963
6964 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6965 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6966 };
6967
6968 // Emit teams region as a standalone region.
6969 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6970 PrePostActionTy &Action) {
6971 Action.Enter(CGF);
6972 OMPPrivateScope PrivateScope(CGF);
6973 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
6974 (void)PrivateScope.Privatize();
6975 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
6976 CodeGen: CodeGenDistribute);
6977 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
6978 };
6979 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
6980 emitPostUpdateForReductionClause(CGF&: *this, D: S,
6981 CondGen: [](CodeGenFunction &) { return nullptr; });
6982}
6983
6984void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6985 const OMPTeamsDistributeSimdDirective &S) {
6986 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6987 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6988 };
6989
6990 // Emit teams region as a standalone region.
6991 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6992 PrePostActionTy &Action) {
6993 Action.Enter(CGF);
6994 OMPPrivateScope PrivateScope(CGF);
6995 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
6996 (void)PrivateScope.Privatize();
6997 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
6998 CodeGen: CodeGenDistribute);
6999 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7000 };
7001 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7002 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7003 CondGen: [](CodeGenFunction &) { return nullptr; });
7004}
7005
7006void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7007 const OMPTeamsDistributeParallelForDirective &S) {
7008 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7009 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7010 IncExpr: S.getDistInc());
7011 };
7012
7013 // Emit teams region as a standalone region.
7014 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7015 PrePostActionTy &Action) {
7016 Action.Enter(CGF);
7017 OMPPrivateScope PrivateScope(CGF);
7018 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7019 (void)PrivateScope.Privatize();
7020 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7021 CodeGen: CodeGenDistribute);
7022 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7023 };
7024 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen);
7025 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7026 CondGen: [](CodeGenFunction &) { return nullptr; });
7027}
7028
7029void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7030 const OMPTeamsDistributeParallelForSimdDirective &S) {
7031 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7032 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7033 IncExpr: S.getDistInc());
7034 };
7035
7036 // Emit teams region as a standalone region.
7037 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7038 PrePostActionTy &Action) {
7039 Action.Enter(CGF);
7040 OMPPrivateScope PrivateScope(CGF);
7041 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7042 (void)PrivateScope.Privatize();
7043 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7044 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7045 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7046 };
7047 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7048 CodeGen);
7049 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7050 CondGen: [](CodeGenFunction &) { return nullptr; });
7051}
7052
7053void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7054 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7055 llvm::Value *Device = nullptr;
7056 llvm::Value *NumDependences = nullptr;
7057 llvm::Value *DependenceList = nullptr;
7058
7059 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7060 Device = EmitScalarExpr(E: C->getDevice());
7061
7062 // Build list and emit dependences
7063 OMPTaskDataTy Data;
7064 buildDependences(S, Data);
7065 if (!Data.Dependences.empty()) {
7066 Address DependenciesArray = Address::invalid();
7067 std::tie(args&: NumDependences, args&: DependenciesArray) =
7068 CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences,
7069 Loc: S.getBeginLoc());
7070 DependenceList = DependenciesArray.emitRawPointer(CGF&: *this);
7071 }
7072 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7073
7074 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7075 S.getSingleClause<OMPDestroyClause>() ||
7076 S.getSingleClause<OMPUseClause>())) &&
7077 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7078
7079 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7080 if (!ItOMPInitClause.empty()) {
7081 // Look at the multiple init clauses
7082 for (const OMPInitClause *C : ItOMPInitClause) {
7083 llvm::Value *InteropvarPtr =
7084 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7085 llvm::omp::OMPInteropType InteropType =
7086 llvm::omp::OMPInteropType::Unknown;
7087 if (C->getIsTarget()) {
7088 InteropType = llvm::omp::OMPInteropType::Target;
7089 } else {
7090 assert(C->getIsTargetSync() &&
7091 "Expected interop-type target/targetsync");
7092 InteropType = llvm::omp::OMPInteropType::TargetSync;
7093 }
7094 OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType,
7095 Device, NumDependences, DependenceAddress: DependenceList,
7096 HaveNowaitClause: Data.HasNowaitClause);
7097 }
7098 }
7099 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7100 if (!ItOMPDestroyClause.empty()) {
7101 // Look at the multiple destroy clauses
7102 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7103 llvm::Value *InteropvarPtr =
7104 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7105 OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device,
7106 NumDependences, DependenceAddress: DependenceList,
7107 HaveNowaitClause: Data.HasNowaitClause);
7108 }
7109 }
7110 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7111 if (!ItOMPUseClause.empty()) {
7112 // Look at the multiple use clauses
7113 for (const OMPUseClause *C : ItOMPUseClause) {
7114 llvm::Value *InteropvarPtr =
7115 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7116 OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device,
7117 NumDependences, DependenceAddress: DependenceList,
7118 HaveNowaitClause: Data.HasNowaitClause);
7119 }
7120 }
7121}
7122
7123static void emitTargetTeamsDistributeParallelForRegion(
7124 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7125 PrePostActionTy &Action) {
7126 Action.Enter(CGF);
7127 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7128 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7129 IncExpr: S.getDistInc());
7130 };
7131
7132 // Emit teams region as a standalone region.
7133 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7134 PrePostActionTy &Action) {
7135 Action.Enter(CGF);
7136 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7137 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7138 (void)PrivateScope.Privatize();
7139 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7140 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7141 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7142 };
7143
7144 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
7145 CodeGen: CodeGenTeams);
7146 emitPostUpdateForReductionClause(CGF, D: S,
7147 CondGen: [](CodeGenFunction &) { return nullptr; });
7148}
7149
7150void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7151 CodeGenModule &CGM, StringRef ParentName,
7152 const OMPTargetTeamsDistributeParallelForDirective &S) {
7153 // Emit SPMD target teams distribute parallel for region as a standalone
7154 // region.
7155 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7156 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7157 };
7158 llvm::Function *Fn;
7159 llvm::Constant *Addr;
7160 // Emit target region as a standalone region.
7161 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7162 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7163 assert(Fn && Addr && "Target device function emission failed.");
7164}
7165
7166void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7167 const OMPTargetTeamsDistributeParallelForDirective &S) {
7168 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7169 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7170 };
7171 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7172}
7173
7174static void emitTargetTeamsDistributeParallelForSimdRegion(
7175 CodeGenFunction &CGF,
7176 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7177 PrePostActionTy &Action) {
7178 Action.Enter(CGF);
7179 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7180 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7181 IncExpr: S.getDistInc());
7182 };
7183
7184 // Emit teams region as a standalone region.
7185 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7186 PrePostActionTy &Action) {
7187 Action.Enter(CGF);
7188 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7189 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7190 (void)PrivateScope.Privatize();
7191 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7192 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7193 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7194 };
7195
7196 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7197 CodeGen: CodeGenTeams);
7198 emitPostUpdateForReductionClause(CGF, D: S,
7199 CondGen: [](CodeGenFunction &) { return nullptr; });
7200}
7201
7202void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7203 CodeGenModule &CGM, StringRef ParentName,
7204 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7205 // Emit SPMD target teams distribute parallel for simd region as a standalone
7206 // region.
7207 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7208 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7209 };
7210 llvm::Function *Fn;
7211 llvm::Constant *Addr;
7212 // Emit target region as a standalone region.
7213 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7214 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7215 assert(Fn && Addr && "Target device function emission failed.");
7216}
7217
7218void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7219 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7220 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7221 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7222 };
7223 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7224}
7225
7226void CodeGenFunction::EmitOMPCancellationPointDirective(
7227 const OMPCancellationPointDirective &S) {
7228 CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(),
7229 CancelRegion: S.getCancelRegion());
7230}
7231
7232void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7233 const Expr *IfCond = nullptr;
7234 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7235 if (C->getNameModifier() == OMPD_unknown ||
7236 C->getNameModifier() == OMPD_cancel) {
7237 IfCond = C->getCondition();
7238 break;
7239 }
7240 }
7241 if (CGM.getLangOpts().OpenMPIRBuilder) {
7242 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7243 // TODO: This check is necessary as we only generate `omp parallel` through
7244 // the OpenMPIRBuilder for now.
7245 if (S.getCancelRegion() == OMPD_parallel ||
7246 S.getCancelRegion() == OMPD_sections ||
7247 S.getCancelRegion() == OMPD_section) {
7248 llvm::Value *IfCondition = nullptr;
7249 if (IfCond)
7250 IfCondition = EmitScalarExpr(E: IfCond,
7251 /*IgnoreResultAssign=*/true);
7252 return Builder.restoreIP(
7253 IP: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion()));
7254 }
7255 }
7256
7257 CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond,
7258 CancelRegion: S.getCancelRegion());
7259}
7260
7261CodeGenFunction::JumpDest
7262CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7263 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7264 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7265 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7266 return ReturnBlock;
7267 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7268 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7269 Kind == OMPD_distribute_parallel_for ||
7270 Kind == OMPD_target_parallel_for ||
7271 Kind == OMPD_teams_distribute_parallel_for ||
7272 Kind == OMPD_target_teams_distribute_parallel_for);
7273 return OMPCancelStack.getExitBlock();
7274}
7275
7276void CodeGenFunction::EmitOMPUseDevicePtrClause(
7277 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7278 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7279 CaptureDeviceAddrMap) {
7280 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7281 for (const Expr *OrigVarIt : C.varlists()) {
7282 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl());
7283 if (!Processed.insert(V: OrigVD).second)
7284 continue;
7285
7286 // In order to identify the right initializer we need to match the
7287 // declaration used by the mapping logic. In some cases we may get
7288 // OMPCapturedExprDecl that refers to the original declaration.
7289 const ValueDecl *MatchingVD = OrigVD;
7290 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7291 // OMPCapturedExprDecl are used to privative fields of the current
7292 // structure.
7293 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7294 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7295 "Base should be the current struct!");
7296 MatchingVD = ME->getMemberDecl();
7297 }
7298
7299 // If we don't have information about the current list item, move on to
7300 // the next one.
7301 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7302 if (InitAddrIt == CaptureDeviceAddrMap.end())
7303 continue;
7304
7305 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7306
7307 // Return the address of the private variable.
7308 bool IsRegistered = PrivateScope.addPrivate(
7309 LocalVD: OrigVD,
7310 Addr: Address(InitAddrIt->second, Ty,
7311 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)));
7312 assert(IsRegistered && "firstprivate var already registered as private");
7313 // Silence the warning about unused variable.
7314 (void)IsRegistered;
7315 }
7316}
7317
7318static const VarDecl *getBaseDecl(const Expr *Ref) {
7319 const Expr *Base = Ref->IgnoreParenImpCasts();
7320 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base))
7321 Base = OASE->getBase()->IgnoreParenImpCasts();
7322 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
7323 Base = ASE->getBase()->IgnoreParenImpCasts();
7324 return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl());
7325}
7326
7327void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7328 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7329 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7330 CaptureDeviceAddrMap) {
7331 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7332 for (const Expr *Ref : C.varlists()) {
7333 const VarDecl *OrigVD = getBaseDecl(Ref);
7334 if (!Processed.insert(V: OrigVD).second)
7335 continue;
7336 // In order to identify the right initializer we need to match the
7337 // declaration used by the mapping logic. In some cases we may get
7338 // OMPCapturedExprDecl that refers to the original declaration.
7339 const ValueDecl *MatchingVD = OrigVD;
7340 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7341 // OMPCapturedExprDecl are used to privative fields of the current
7342 // structure.
7343 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7344 assert(isa<CXXThisExpr>(ME->getBase()) &&
7345 "Base should be the current struct!");
7346 MatchingVD = ME->getMemberDecl();
7347 }
7348
7349 // If we don't have information about the current list item, move on to
7350 // the next one.
7351 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7352 if (InitAddrIt == CaptureDeviceAddrMap.end())
7353 continue;
7354
7355 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7356
7357 Address PrivAddr =
7358 Address(InitAddrIt->second, Ty,
7359 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy));
7360 // For declrefs and variable length array need to load the pointer for
7361 // correct mapping, since the pointer to the data was passed to the runtime.
7362 if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) ||
7363 MatchingVD->getType()->isArrayType()) {
7364 QualType PtrTy = getContext().getPointerType(
7365 T: OrigVD->getType().getNonReferenceType());
7366 PrivAddr =
7367 EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)),
7368 PtrTy: PtrTy->castAs<PointerType>());
7369 }
7370
7371 (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr);
7372 }
7373}
7374
7375// Generate the instructions for '#pragma omp target data' directive.
7376void CodeGenFunction::EmitOMPTargetDataDirective(
7377 const OMPTargetDataDirective &S) {
7378 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7379 /*SeparateBeginEndCalls=*/true);
7380
7381 // Create a pre/post action to signal the privatization of the device pointer.
7382 // This action can be replaced by the OpenMP runtime code generation to
7383 // deactivate privatization.
7384 bool PrivatizeDevicePointers = false;
7385 class DevicePointerPrivActionTy : public PrePostActionTy {
7386 bool &PrivatizeDevicePointers;
7387
7388 public:
7389 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7390 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7391 void Enter(CodeGenFunction &CGF) override {
7392 PrivatizeDevicePointers = true;
7393 }
7394 };
7395 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7396
7397 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7398 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7399 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7400 };
7401
7402 // Codegen that selects whether to generate the privatization code or not.
7403 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7404 RegionCodeGenTy RCG(InnermostCodeGen);
7405 PrivatizeDevicePointers = false;
7406
7407 // Call the pre-action to change the status of PrivatizeDevicePointers if
7408 // needed.
7409 Action.Enter(CGF);
7410
7411 if (PrivatizeDevicePointers) {
7412 OMPPrivateScope PrivateScope(CGF);
7413 // Emit all instances of the use_device_ptr clause.
7414 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7415 CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope,
7416 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7417 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7418 CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope,
7419 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7420 (void)PrivateScope.Privatize();
7421 RCG(CGF);
7422 } else {
7423 // If we don't have target devices, don't bother emitting the data
7424 // mapping code.
7425 std::optional<OpenMPDirectiveKind> CaptureRegion;
7426 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7427 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7428 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7429 for (const Expr *E : C->varlists()) {
7430 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
7431 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7432 CGF.EmitVarDecl(D: *OED);
7433 }
7434 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7435 for (const Expr *E : C->varlists()) {
7436 const Decl *D = getBaseDecl(Ref: E);
7437 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7438 CGF.EmitVarDecl(D: *OED);
7439 }
7440 } else {
7441 CaptureRegion = OMPD_unknown;
7442 }
7443
7444 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7445 RCG(CGF);
7446 }
7447 };
7448
7449 // Forward the provided action to the privatization codegen.
7450 RegionCodeGenTy PrivRCG(PrivCodeGen);
7451 PrivRCG.setAction(Action);
7452
7453 // Notwithstanding the body of the region is emitted as inlined directive,
7454 // we don't use an inline scope as changes in the references inside the
7455 // region are expected to be visible outside, so we do not privative them.
7456 OMPLexicalScope Scope(CGF, S);
7457 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data,
7458 CodeGen: PrivRCG);
7459 };
7460
7461 RegionCodeGenTy RCG(CodeGen);
7462
7463 // If we don't have target devices, don't bother emitting the data mapping
7464 // code.
7465 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7466 RCG(*this);
7467 return;
7468 }
7469
7470 // Check if we have any if clause associated with the directive.
7471 const Expr *IfCond = nullptr;
7472 if (const auto *C = S.getSingleClause<OMPIfClause>())
7473 IfCond = C->getCondition();
7474
7475 // Check if we have any device clause associated with the directive.
7476 const Expr *Device = nullptr;
7477 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7478 Device = C->getDevice();
7479
7480 // Set the action to signal privatization of device pointers.
7481 RCG.setAction(PrivAction);
7482
7483 // Emit region code.
7484 CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG,
7485 Info);
7486}
7487
7488void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7489 const OMPTargetEnterDataDirective &S) {
7490 // If we don't have target devices, don't bother emitting the data mapping
7491 // code.
7492 if (CGM.getLangOpts().OMPTargetTriples.empty())
7493 return;
7494
7495 // Check if we have any if clause associated with the directive.
7496 const Expr *IfCond = nullptr;
7497 if (const auto *C = S.getSingleClause<OMPIfClause>())
7498 IfCond = C->getCondition();
7499
7500 // Check if we have any device clause associated with the directive.
7501 const Expr *Device = nullptr;
7502 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7503 Device = C->getDevice();
7504
7505 OMPLexicalScope Scope(*this, S, OMPD_task);
7506 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7507}
7508
7509void CodeGenFunction::EmitOMPTargetExitDataDirective(
7510 const OMPTargetExitDataDirective &S) {
7511 // If we don't have target devices, don't bother emitting the data mapping
7512 // code.
7513 if (CGM.getLangOpts().OMPTargetTriples.empty())
7514 return;
7515
7516 // Check if we have any if clause associated with the directive.
7517 const Expr *IfCond = nullptr;
7518 if (const auto *C = S.getSingleClause<OMPIfClause>())
7519 IfCond = C->getCondition();
7520
7521 // Check if we have any device clause associated with the directive.
7522 const Expr *Device = nullptr;
7523 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7524 Device = C->getDevice();
7525
7526 OMPLexicalScope Scope(*this, S, OMPD_task);
7527 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7528}
7529
7530static void emitTargetParallelRegion(CodeGenFunction &CGF,
7531 const OMPTargetParallelDirective &S,
7532 PrePostActionTy &Action) {
7533 // Get the captured statement associated with the 'parallel' region.
7534 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
7535 Action.Enter(CGF);
7536 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7537 Action.Enter(CGF);
7538 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7539 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7540 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7541 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7542 (void)PrivateScope.Privatize();
7543 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7544 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7545 // TODO: Add support for clauses.
7546 CGF.EmitStmt(S: CS->getCapturedStmt());
7547 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
7548 };
7549 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen,
7550 CodeGenBoundParameters: emitEmptyBoundParameters);
7551 emitPostUpdateForReductionClause(CGF, D: S,
7552 CondGen: [](CodeGenFunction &) { return nullptr; });
7553}
7554
7555void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7556 CodeGenModule &CGM, StringRef ParentName,
7557 const OMPTargetParallelDirective &S) {
7558 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7559 emitTargetParallelRegion(CGF, S, Action);
7560 };
7561 llvm::Function *Fn;
7562 llvm::Constant *Addr;
7563 // Emit target region as a standalone region.
7564 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7565 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7566 assert(Fn && Addr && "Target device function emission failed.");
7567}
7568
7569void CodeGenFunction::EmitOMPTargetParallelDirective(
7570 const OMPTargetParallelDirective &S) {
7571 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7572 emitTargetParallelRegion(CGF, S, Action);
7573 };
7574 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7575}
7576
7577static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7578 const OMPTargetParallelForDirective &S,
7579 PrePostActionTy &Action) {
7580 Action.Enter(CGF);
7581 // Emit directive as a combined directive that consists of two implicit
7582 // directives: 'parallel' with 'for' directive.
7583 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7584 Action.Enter(CGF);
7585 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7586 CGF, OMPD_target_parallel_for, S.hasCancel());
7587 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7588 CGDispatchBounds: emitDispatchForLoopBounds);
7589 };
7590 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
7591 CodeGenBoundParameters: emitEmptyBoundParameters);
7592}
7593
7594void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7595 CodeGenModule &CGM, StringRef ParentName,
7596 const OMPTargetParallelForDirective &S) {
7597 // Emit SPMD target parallel for region as a standalone region.
7598 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7599 emitTargetParallelForRegion(CGF, S, Action);
7600 };
7601 llvm::Function *Fn;
7602 llvm::Constant *Addr;
7603 // Emit target region as a standalone region.
7604 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7605 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7606 assert(Fn && Addr && "Target device function emission failed.");
7607}
7608
7609void CodeGenFunction::EmitOMPTargetParallelForDirective(
7610 const OMPTargetParallelForDirective &S) {
7611 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7612 emitTargetParallelForRegion(CGF, S, Action);
7613 };
7614 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7615}
7616
7617static void
7618emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7619 const OMPTargetParallelForSimdDirective &S,
7620 PrePostActionTy &Action) {
7621 Action.Enter(CGF);
7622 // Emit directive as a combined directive that consists of two implicit
7623 // directives: 'parallel' with 'for' directive.
7624 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7625 Action.Enter(CGF);
7626 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7627 CGDispatchBounds: emitDispatchForLoopBounds);
7628 };
7629 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen,
7630 CodeGenBoundParameters: emitEmptyBoundParameters);
7631}
7632
7633void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7634 CodeGenModule &CGM, StringRef ParentName,
7635 const OMPTargetParallelForSimdDirective &S) {
7636 // Emit SPMD target parallel for region as a standalone region.
7637 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7638 emitTargetParallelForSimdRegion(CGF, S, Action);
7639 };
7640 llvm::Function *Fn;
7641 llvm::Constant *Addr;
7642 // Emit target region as a standalone region.
7643 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7644 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7645 assert(Fn && Addr && "Target device function emission failed.");
7646}
7647
7648void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7649 const OMPTargetParallelForSimdDirective &S) {
7650 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7651 emitTargetParallelForSimdRegion(CGF, S, Action);
7652 };
7653 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7654}
7655
7656/// Emit a helper variable and return corresponding lvalue.
7657static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7658 const ImplicitParamDecl *PVD,
7659 CodeGenFunction::OMPPrivateScope &Privates) {
7660 const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl());
7661 Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD));
7662}
7663
7664void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7665 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7666 // Emit outlined function for task construct.
7667 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop);
7668 Address CapturedStruct = Address::invalid();
7669 {
7670 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7671 CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
7672 }
7673 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
7674 const Expr *IfCond = nullptr;
7675 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7676 if (C->getNameModifier() == OMPD_unknown ||
7677 C->getNameModifier() == OMPD_taskloop) {
7678 IfCond = C->getCondition();
7679 break;
7680 }
7681 }
7682
7683 OMPTaskDataTy Data;
7684 // Check if taskloop must be emitted without taskgroup.
7685 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7686 // TODO: Check if we should emit tied or untied task.
7687 Data.Tied = true;
7688 // Set scheduling for taskloop
7689 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7690 // grainsize clause
7691 Data.Schedule.setInt(/*IntVal=*/false);
7692 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize()));
7693 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7694 // num_tasks clause
7695 Data.Schedule.setInt(/*IntVal=*/true);
7696 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks()));
7697 }
7698
7699 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7700 // if (PreCond) {
7701 // for (IV in 0..LastIteration) BODY;
7702 // <Final counter/linear vars updates>;
7703 // }
7704 //
7705
7706 // Emit: if (PreCond) - begin.
7707 // If the condition constant folds and can be elided, avoid emitting the
7708 // whole loop.
7709 bool CondConstant;
7710 llvm::BasicBlock *ContBlock = nullptr;
7711 OMPLoopScope PreInitScope(CGF, S);
7712 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
7713 if (!CondConstant)
7714 return;
7715 } else {
7716 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then");
7717 ContBlock = CGF.createBasicBlock(name: "taskloop.if.end");
7718 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
7719 TrueCount: CGF.getProfileCount(S: &S));
7720 CGF.EmitBlock(BB: ThenBlock);
7721 CGF.incrementProfileCounter(S: &S);
7722 }
7723
7724 (void)CGF.EmitOMPLinearClauseInit(D: S);
7725
7726 OMPPrivateScope LoopScope(CGF);
7727 // Emit helper vars inits.
7728 enum { LowerBound = 5, UpperBound, Stride, LastIter };
7729 auto *I = CS->getCapturedDecl()->param_begin();
7730 auto *LBP = std::next(x: I, n: LowerBound);
7731 auto *UBP = std::next(x: I, n: UpperBound);
7732 auto *STP = std::next(x: I, n: Stride);
7733 auto *LIP = std::next(x: I, n: LastIter);
7734 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP,
7735 Privates&: LoopScope);
7736 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP,
7737 Privates&: LoopScope);
7738 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope);
7739 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP,
7740 Privates&: LoopScope);
7741 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7742 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
7743 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
7744 (void)LoopScope.Privatize();
7745 // Emit the loop iteration variable.
7746 const Expr *IVExpr = S.getIterationVariable();
7747 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
7748 CGF.EmitVarDecl(D: *IVDecl);
7749 CGF.EmitIgnoredExpr(E: S.getInit());
7750
7751 // Emit the iterations count variable.
7752 // If it is not a variable, Sema decided to calculate iterations count on
7753 // each iteration (e.g., it is foldable into a constant).
7754 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
7755 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
7756 // Emit calculation of the iterations count.
7757 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
7758 }
7759
7760 {
7761 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7762 emitCommonSimdLoop(
7763 CGF, S,
7764 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7765 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
7766 CGF.EmitOMPSimdInit(D: S);
7767 },
7768 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7769 CGF.EmitOMPInnerLoop(
7770 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
7771 BodyGen: [&S](CodeGenFunction &CGF) {
7772 emitOMPLoopBodyWithStopPoint(CGF, S,
7773 LoopExit: CodeGenFunction::JumpDest());
7774 },
7775 PostIncGen: [](CodeGenFunction &) {});
7776 });
7777 }
7778 // Emit: if (PreCond) - end.
7779 if (ContBlock) {
7780 CGF.EmitBranch(Block: ContBlock);
7781 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
7782 }
7783 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7784 if (HasLastprivateClause) {
7785 CGF.EmitOMPLastprivateClauseFinal(
7786 D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()),
7787 IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar(
7788 Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
7789 Ty: (*LIP)->getType(), Loc: S.getBeginLoc())));
7790 }
7791 LoopScope.restoreMap();
7792 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) {
7793 return CGF.Builder.CreateIsNotNull(
7794 Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
7795 Ty: (*LIP)->getType(), Loc: S.getBeginLoc()));
7796 });
7797 };
7798 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7799 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7800 const OMPTaskDataTy &Data) {
7801 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7802 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7803 OMPLoopScope PreInitScope(CGF, S);
7804 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S,
7805 TaskFunction: OutlinedFn, SharedsTy,
7806 Shareds: CapturedStruct, IfCond, Data);
7807 };
7808 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop,
7809 CodeGen);
7810 };
7811 if (Data.Nogroup) {
7812 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data);
7813 } else {
7814 CGM.getOpenMPRuntime().emitTaskgroupRegion(
7815 CGF&: *this,
7816 TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7817 PrePostActionTy &Action) {
7818 Action.Enter(CGF);
7819 CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen,
7820 Data);
7821 },
7822 Loc: S.getBeginLoc());
7823 }
7824}
7825
7826void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7827 auto LPCRegion =
7828 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
7829 EmitOMPTaskLoopBasedDirective(S);
7830}
7831
7832void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7833 const OMPTaskLoopSimdDirective &S) {
7834 auto LPCRegion =
7835 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
7836 OMPLexicalScope Scope(*this, S);
7837 EmitOMPTaskLoopBasedDirective(S);
7838}
7839
7840void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7841 const OMPMasterTaskLoopDirective &S) {
7842 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7843 Action.Enter(CGF);
7844 EmitOMPTaskLoopBasedDirective(S);
7845 };
7846 auto LPCRegion =
7847 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
7848 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7849 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
7850}
7851
7852void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7853 const OMPMasterTaskLoopSimdDirective &S) {
7854 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7855 Action.Enter(CGF);
7856 EmitOMPTaskLoopBasedDirective(S);
7857 };
7858 auto LPCRegion =
7859 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
7860 OMPLexicalScope Scope(*this, S);
7861 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
7862}
7863
7864void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7865 const OMPParallelMasterTaskLoopDirective &S) {
7866 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7867 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7868 PrePostActionTy &Action) {
7869 Action.Enter(CGF);
7870 CGF.EmitOMPTaskLoopBasedDirective(S);
7871 };
7872 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7873 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
7874 Loc: S.getBeginLoc());
7875 };
7876 auto LPCRegion =
7877 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
7878 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen,
7879 CodeGenBoundParameters: emitEmptyBoundParameters);
7880}
7881
7882void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7883 const OMPParallelMasterTaskLoopSimdDirective &S) {
7884 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7885 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7886 PrePostActionTy &Action) {
7887 Action.Enter(CGF);
7888 CGF.EmitOMPTaskLoopBasedDirective(S);
7889 };
7890 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7891 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
7892 Loc: S.getBeginLoc());
7893 };
7894 auto LPCRegion =
7895 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
7896 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen,
7897 CodeGenBoundParameters: emitEmptyBoundParameters);
7898}
7899
7900// Generate the instructions for '#pragma omp target update' directive.
7901void CodeGenFunction::EmitOMPTargetUpdateDirective(
7902 const OMPTargetUpdateDirective &S) {
7903 // If we don't have target devices, don't bother emitting the data mapping
7904 // code.
7905 if (CGM.getLangOpts().OMPTargetTriples.empty())
7906 return;
7907
7908 // Check if we have any if clause associated with the directive.
7909 const Expr *IfCond = nullptr;
7910 if (const auto *C = S.getSingleClause<OMPIfClause>())
7911 IfCond = C->getCondition();
7912
7913 // Check if we have any device clause associated with the directive.
7914 const Expr *Device = nullptr;
7915 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7916 Device = C->getDevice();
7917
7918 OMPLexicalScope Scope(*this, S, OMPD_task);
7919 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7920}
7921
7922void CodeGenFunction::EmitOMPGenericLoopDirective(
7923 const OMPGenericLoopDirective &S) {
7924 // Unimplemented, just inline the underlying statement for now.
7925 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7926 // Emit the loop iteration variable.
7927 const Stmt *CS =
7928 cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt();
7929 const auto *ForS = dyn_cast<ForStmt>(Val: CS);
7930 if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) {
7931 OMPPrivateScope LoopScope(CGF);
7932 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7933 (void)LoopScope.Privatize();
7934 CGF.EmitStmt(S: CS);
7935 LoopScope.restoreMap();
7936 } else {
7937 CGF.EmitStmt(S: CS);
7938 }
7939 };
7940 OMPLexicalScope Scope(*this, S, OMPD_unknown);
7941 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen);
7942}
7943
7944void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7945 const OMPLoopDirective &S) {
7946 // Emit combined directive as if its constituent constructs are 'parallel'
7947 // and 'for'.
7948 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7949 Action.Enter(CGF);
7950 emitOMPCopyinClause(CGF, S);
7951 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
7952 };
7953 {
7954 auto LPCRegion =
7955 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
7956 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
7957 CodeGenBoundParameters: emitEmptyBoundParameters);
7958 }
7959 // Check for outer lastprivate conditional update.
7960 checkForLastprivateConditionalUpdate(CGF&: *this, S);
7961}
7962
7963void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7964 const OMPTeamsGenericLoopDirective &S) {
7965 // To be consistent with current behavior of 'target teams loop', emit
7966 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
7967 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7968 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7969 };
7970
7971 // Emit teams region as a standalone region.
7972 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7973 PrePostActionTy &Action) {
7974 Action.Enter(CGF);
7975 OMPPrivateScope PrivateScope(CGF);
7976 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7977 (void)PrivateScope.Privatize();
7978 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7979 CodeGen: CodeGenDistribute);
7980 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7981 };
7982 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7983 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7984 CondGen: [](CodeGenFunction &) { return nullptr; });
7985}
7986
7987#ifndef NDEBUG
7988static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
7989 std::string StatusMsg,
7990 const OMPExecutableDirective &D) {
7991 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
7992 if (IsDevice)
7993 StatusMsg += ": DEVICE";
7994 else
7995 StatusMsg += ": HOST";
7996 SourceLocation L = D.getBeginLoc();
7997 auto &SM = CGF.getContext().getSourceManager();
7998 PresumedLoc PLoc = SM.getPresumedLoc(L);
7999 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8000 unsigned LineNo =
8001 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8002 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8003}
8004#endif
8005
8006static void emitTargetTeamsGenericLoopRegionAsParallel(
8007 CodeGenFunction &CGF, PrePostActionTy &Action,
8008 const OMPTargetTeamsGenericLoopDirective &S) {
8009 Action.Enter(CGF);
8010 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8011 // 'parallel, and 'for'.
8012 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8013 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
8014 IncExpr: S.getDistInc());
8015 };
8016
8017 // Emit teams region as a standalone region.
8018 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8019 PrePostActionTy &Action) {
8020 Action.Enter(CGF);
8021 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8022 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8023 (void)PrivateScope.Privatize();
8024 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8025 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8026 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8027 };
8028 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8029 emitTargetTeamsLoopCodegenStatus(
8030 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8031 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
8032 CodeGen: CodeGenTeams);
8033 emitPostUpdateForReductionClause(CGF, D: S,
8034 CondGen: [](CodeGenFunction &) { return nullptr; });
8035}
8036
8037static void emitTargetTeamsGenericLoopRegionAsDistribute(
8038 CodeGenFunction &CGF, PrePostActionTy &Action,
8039 const OMPTargetTeamsGenericLoopDirective &S) {
8040 Action.Enter(CGF);
8041 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8042 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8043 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8044 };
8045
8046 // Emit teams region as a standalone region.
8047 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8048 PrePostActionTy &Action) {
8049 Action.Enter(CGF);
8050 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8051 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8052 (void)PrivateScope.Privatize();
8053 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8054 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8055 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8056 };
8057 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8058 emitTargetTeamsLoopCodegenStatus(
8059 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8060 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
8061 emitPostUpdateForReductionClause(CGF, D: S,
8062 CondGen: [](CodeGenFunction &) { return nullptr; });
8063}
8064
8065void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8066 const OMPTargetTeamsGenericLoopDirective &S) {
8067 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8068 if (S.canBeParallelFor())
8069 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8070 else
8071 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8072 };
8073 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8074}
8075
8076void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8077 CodeGenModule &CGM, StringRef ParentName,
8078 const OMPTargetTeamsGenericLoopDirective &S) {
8079 // Emit SPMD target parallel loop region as a standalone region.
8080 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8081 if (S.canBeParallelFor())
8082 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8083 else
8084 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8085 };
8086 llvm::Function *Fn;
8087 llvm::Constant *Addr;
8088 // Emit target region as a standalone region.
8089 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8090 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8091 assert(Fn && Addr &&
8092 "Target device function emission failed for 'target teams loop'.");
8093}
8094
8095static void emitTargetParallelGenericLoopRegion(
8096 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8097 PrePostActionTy &Action) {
8098 Action.Enter(CGF);
8099 // Emit as 'parallel for'.
8100 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8101 Action.Enter(CGF);
8102 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8103 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8104 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8105 CGDispatchBounds: emitDispatchForLoopBounds);
8106 };
8107 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8108 CodeGenBoundParameters: emitEmptyBoundParameters);
8109}
8110
8111void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8112 CodeGenModule &CGM, StringRef ParentName,
8113 const OMPTargetParallelGenericLoopDirective &S) {
8114 // Emit target parallel loop region as a standalone region.
8115 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8116 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8117 };
8118 llvm::Function *Fn;
8119 llvm::Constant *Addr;
8120 // Emit target region as a standalone region.
8121 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8122 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8123 assert(Fn && Addr && "Target device function emission failed.");
8124}
8125
8126/// Emit combined directive 'target parallel loop' as if its constituent
8127/// constructs are 'target', 'parallel', and 'for'.
8128void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8129 const OMPTargetParallelGenericLoopDirective &S) {
8130 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8131 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8132 };
8133 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8134}
8135
8136void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8137 const OMPExecutableDirective &D) {
8138 if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) {
8139 EmitOMPScanDirective(S: *SD);
8140 return;
8141 }
8142 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8143 return;
8144 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8145 OMPPrivateScope GlobalsScope(CGF);
8146 if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) {
8147 // Capture global firstprivates to avoid crash.
8148 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8149 for (const Expr *Ref : C->varlists()) {
8150 const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
8151 if (!DRE)
8152 continue;
8153 const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl());
8154 if (!VD || VD->hasLocalStorage())
8155 continue;
8156 if (!CGF.LocalDeclMap.count(Val: VD)) {
8157 LValue GlobLVal = CGF.EmitLValue(E: Ref);
8158 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8159 }
8160 }
8161 }
8162 }
8163 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
8164 (void)GlobalsScope.Privatize();
8165 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8166 emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action);
8167 } else {
8168 if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) {
8169 for (const Expr *E : LD->counters()) {
8170 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
8171 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) {
8172 LValue GlobLVal = CGF.EmitLValue(E);
8173 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8174 }
8175 if (isa<OMPCapturedExprDecl>(Val: VD)) {
8176 // Emit only those that were not explicitly referenced in clauses.
8177 if (!CGF.LocalDeclMap.count(Val: VD))
8178 CGF.EmitVarDecl(D: *VD);
8179 }
8180 }
8181 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8182 if (!C->getNumForLoops())
8183 continue;
8184 for (unsigned I = LD->getLoopsNumber(),
8185 E = C->getLoopNumIterations().size();
8186 I < E; ++I) {
8187 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8188 Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) {
8189 // Emit only those that were not explicitly referenced in clauses.
8190 if (!CGF.LocalDeclMap.count(Val: VD))
8191 CGF.EmitVarDecl(D: *VD);
8192 }
8193 }
8194 }
8195 }
8196 (void)GlobalsScope.Privatize();
8197 CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt());
8198 }
8199 };
8200 if (D.getDirectiveKind() == OMPD_atomic ||
8201 D.getDirectiveKind() == OMPD_critical ||
8202 D.getDirectiveKind() == OMPD_section ||
8203 D.getDirectiveKind() == OMPD_master ||
8204 D.getDirectiveKind() == OMPD_masked ||
8205 D.getDirectiveKind() == OMPD_unroll) {
8206 EmitStmt(S: D.getAssociatedStmt());
8207 } else {
8208 auto LPCRegion =
8209 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D);
8210 OMPSimdLexicalScope Scope(*this, D);
8211 CGM.getOpenMPRuntime().emitInlinedDirective(
8212 CGF&: *this,
8213 InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd
8214 : D.getDirectiveKind(),
8215 CodeGen);
8216 }
8217 // Check for outer lastprivate conditional update.
8218 checkForLastprivateConditionalUpdate(CGF&: *this, S: D);
8219}
8220