1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGDebugInfo.h"
15#include "CGOpenMPRuntime.h"
16#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "CodeGenPGO.h"
19#include "TargetInfo.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/DeclOpenMP.h"
23#include "clang/AST/OpenMPClause.h"
24#include "clang/AST/Stmt.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/PrettyStackTrace.h"
29#include "clang/Basic/SourceManager.h"
30#include "llvm/ADT/SmallSet.h"
31#include "llvm/BinaryFormat/Dwarf.h"
32#include "llvm/Frontend/OpenMP/OMPConstants.h"
33#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DebugInfoMetadata.h"
36#include "llvm/IR/Instructions.h"
37#include "llvm/IR/IntrinsicInst.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/Debug.h"
41#include <optional>
42using namespace clang;
43using namespace CodeGen;
44using namespace llvm::omp;
45
46#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
47
48static const VarDecl *getBaseDecl(const Expr *Ref);
49static OpenMPDirectiveKind
50getEffectiveDirectiveKind(const OMPExecutableDirective &S);
51
52namespace {
53/// Lexical scope for OpenMP executable constructs, that handles correct codegen
54/// for captured expressions.
55class OMPLexicalScope : public CodeGenFunction::LexicalScope {
56 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
57 for (const auto *C : S.clauses()) {
58 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
59 if (const auto *PreInit =
60 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
61 for (const auto *I : PreInit->decls()) {
62 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
63 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
64 } else {
65 CodeGenFunction::AutoVarEmission Emission =
66 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
67 CGF.EmitAutoVarCleanups(emission: Emission);
68 }
69 }
70 }
71 }
72 }
73 }
74 CodeGenFunction::OMPPrivateScope InlinedShareds;
75
76 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
77 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
78 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
79 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
80 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
81 }
82
83public:
84 OMPLexicalScope(
85 CodeGenFunction &CGF, const OMPExecutableDirective &S,
86 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
87 const bool EmitPreInitStmt = true)
88 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
89 InlinedShareds(CGF) {
90 if (EmitPreInitStmt)
91 emitPreInitStmt(CGF, S);
92 if (!CapturedRegion)
93 return;
94 assert(S.hasAssociatedStmt() &&
95 "Expected associated statement for inlined directive.");
96 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion);
97 for (const auto &C : CS->captures()) {
98 if (C.capturesVariable() || C.capturesVariableByCopy()) {
99 auto *VD = C.getCapturedVar();
100 assert(VD == VD->getCanonicalDecl() &&
101 "Canonical decl must be captured.");
102 DeclRefExpr DRE(
103 CGF.getContext(), const_cast<VarDecl *>(VD),
104 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
105 InlinedShareds.isGlobalVarCaptured(VD)),
106 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
107 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
108 }
109 }
110 (void)InlinedShareds.Privatize();
111 }
112};
113
114/// Lexical scope for OpenMP parallel construct, that handles correct codegen
115/// for captured expressions.
116class OMPParallelScope final : public OMPLexicalScope {
117 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
118 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
119 return !(isOpenMPTargetExecutionDirective(DKind: EKind) ||
120 isOpenMPLoopBoundSharingDirective(Kind: EKind)) &&
121 isOpenMPParallelDirective(DKind: EKind);
122 }
123
124public:
125 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
126 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
127 EmitPreInitStmt(S)) {}
128};
129
130/// Lexical scope for OpenMP teams construct, that handles correct codegen
131/// for captured expressions.
132class OMPTeamsScope final : public OMPLexicalScope {
133 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
134 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
135 return !isOpenMPTargetExecutionDirective(DKind: EKind) &&
136 isOpenMPTeamsDirective(DKind: EKind);
137 }
138
139public:
140 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
141 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
142 EmitPreInitStmt(S)) {}
143};
144
145/// Private scope for OpenMP loop-based directives, that supports capturing
146/// of used expression from loop statement.
147class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
148 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
149 const Stmt *PreInits;
150 CodeGenFunction::OMPMapVars PreCondVars;
151 if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) {
152 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
153 for (const auto *E : LD->counters()) {
154 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
155 EmittedAsPrivate.insert(V: VD->getCanonicalDecl());
156 (void)PreCondVars.setVarAddr(
157 CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()));
158 }
159 // Mark private vars as undefs.
160 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
161 for (const Expr *IRef : C->varlist()) {
162 const auto *OrigVD =
163 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
164 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
165 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
166 (void)PreCondVars.setVarAddr(
167 CGF, LocalVD: OrigVD,
168 TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem(
169 T: CGF.getContext().getPointerType(T: OrigVDTy))),
170 CGF.ConvertTypeForMem(T: OrigVDTy),
171 CGF.getContext().getDeclAlign(D: OrigVD)));
172 }
173 }
174 }
175 (void)PreCondVars.apply(CGF);
176 // Emit init, __range and __end variables for C++ range loops.
177 (void)OMPLoopBasedDirective::doForAllLoops(
178 CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(),
179 /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(),
180 Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) {
181 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) {
182 if (const Stmt *Init = CXXFor->getInit())
183 CGF.EmitStmt(S: Init);
184 CGF.EmitStmt(S: CXXFor->getRangeStmt());
185 CGF.EmitStmt(S: CXXFor->getEndStmt());
186 }
187 return false;
188 });
189 PreInits = LD->getPreInits();
190 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) {
191 PreInits = Tile->getPreInits();
192 } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(Val: &S)) {
193 PreInits = Stripe->getPreInits();
194 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) {
195 PreInits = Unroll->getPreInits();
196 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) {
197 PreInits = Reverse->getPreInits();
198 } else if (const auto *Interchange =
199 dyn_cast<OMPInterchangeDirective>(Val: &S)) {
200 PreInits = Interchange->getPreInits();
201 } else {
202 llvm_unreachable("Unknown loop-based directive kind.");
203 }
204 if (PreInits) {
205 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
206 // declarations. Since declarations must be visible in the the following
207 // that they initialize, unpack the CompoundStmt they are nested in.
208 SmallVector<const Stmt *> PreInitStmts;
209 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits))
210 llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body());
211 else
212 PreInitStmts.push_back(Elt: PreInits);
213
214 for (const Stmt *S : PreInitStmts) {
215 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
216 // here.
217 if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) {
218 for (Decl *I : PreInitDecl->decls())
219 CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I));
220 continue;
221 }
222 CGF.EmitStmt(S);
223 }
224 }
225 PreCondVars.restore(CGF);
226 }
227
228public:
229 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
230 : CodeGenFunction::RunCleanupsScope(CGF) {
231 emitPreInitStmt(CGF, S);
232 }
233};
234
235class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
236 CodeGenFunction::OMPPrivateScope InlinedShareds;
237
238 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
239 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
240 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
241 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
242 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
243 }
244
245public:
246 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
247 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
248 InlinedShareds(CGF) {
249 for (const auto *C : S.clauses()) {
250 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
251 if (const auto *PreInit =
252 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
253 for (const auto *I : PreInit->decls()) {
254 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
255 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
256 } else {
257 CodeGenFunction::AutoVarEmission Emission =
258 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
259 CGF.EmitAutoVarCleanups(emission: Emission);
260 }
261 }
262 }
263 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) {
264 for (const Expr *E : UDP->varlist()) {
265 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
266 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
267 CGF.EmitVarDecl(D: *OED);
268 }
269 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) {
270 for (const Expr *E : UDP->varlist()) {
271 const Decl *D = getBaseDecl(Ref: E);
272 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
273 CGF.EmitVarDecl(D: *OED);
274 }
275 }
276 }
277 if (!isOpenMPSimdDirective(DKind: getEffectiveDirectiveKind(S)))
278 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds);
279 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) {
280 if (const Expr *E = TG->getReductionRef())
281 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()));
282 }
283 // Temp copy arrays for inscan reductions should not be emitted as they are
284 // not used in simd only mode.
285 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
286 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
287 if (C->getModifier() != OMPC_REDUCTION_inscan)
288 continue;
289 for (const Expr *E : C->copy_array_temps())
290 CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl());
291 }
292 const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt());
293 while (CS) {
294 for (auto &C : CS->captures()) {
295 if (C.capturesVariable() || C.capturesVariableByCopy()) {
296 auto *VD = C.getCapturedVar();
297 if (CopyArrayTemps.contains(V: VD))
298 continue;
299 assert(VD == VD->getCanonicalDecl() &&
300 "Canonical decl must be captured.");
301 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
302 isCapturedVar(CGF, VD) ||
303 (CGF.CapturedStmtInfo &&
304 InlinedShareds.isGlobalVarCaptured(VD)),
305 VD->getType().getNonReferenceType(), VK_LValue,
306 C.getLocation());
307 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
308 }
309 }
310 CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt());
311 }
312 (void)InlinedShareds.Privatize();
313 }
314};
315
316} // namespace
317
318// The loop directive with a bind clause will be mapped to a different
319// directive with corresponding semantics.
320static OpenMPDirectiveKind
321getEffectiveDirectiveKind(const OMPExecutableDirective &S) {
322 OpenMPDirectiveKind Kind = S.getDirectiveKind();
323 if (Kind != OMPD_loop)
324 return Kind;
325
326 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
327 if (const auto *C = S.getSingleClause<OMPBindClause>())
328 BindKind = C->getBindKind();
329
330 switch (BindKind) {
331 case OMPC_BIND_parallel:
332 return OMPD_for;
333 case OMPC_BIND_teams:
334 return OMPD_distribute;
335 case OMPC_BIND_thread:
336 return OMPD_simd;
337 default:
338 return OMPD_loop;
339 }
340}
341
342static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
343 const OMPExecutableDirective &S,
344 const RegionCodeGenTy &CodeGen);
345
346LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
347 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) {
348 if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) {
349 OrigVD = OrigVD->getCanonicalDecl();
350 bool IsCaptured =
351 LambdaCaptureFields.lookup(Val: OrigVD) ||
352 (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) ||
353 (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl));
354 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
355 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
356 return EmitLValue(E: &DRE);
357 }
358 }
359 return EmitLValue(E);
360}
361
362llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
363 ASTContext &C = getContext();
364 llvm::Value *Size = nullptr;
365 auto SizeInChars = C.getTypeSizeInChars(T: Ty);
366 if (SizeInChars.isZero()) {
367 // getTypeSizeInChars() returns 0 for a VLA.
368 while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) {
369 VlaSizePair VlaSize = getVLASize(vla: VAT);
370 Ty = VlaSize.Type;
371 Size =
372 Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts;
373 }
374 SizeInChars = C.getTypeSizeInChars(T: Ty);
375 if (SizeInChars.isZero())
376 return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0);
377 return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars));
378 }
379 return CGM.getSize(numChars: SizeInChars);
380}
381
382void CodeGenFunction::GenerateOpenMPCapturedVars(
383 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
384 const RecordDecl *RD = S.getCapturedRecordDecl();
385 auto CurField = RD->field_begin();
386 auto CurCap = S.captures().begin();
387 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
388 E = S.capture_init_end();
389 I != E; ++I, ++CurField, ++CurCap) {
390 if (CurField->hasCapturedVLAType()) {
391 const VariableArrayType *VAT = CurField->getCapturedVLAType();
392 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
393 CapturedVars.push_back(Elt: Val);
394 } else if (CurCap->capturesThis()) {
395 CapturedVars.push_back(Elt: CXXThisValue);
396 } else if (CurCap->capturesVariableByCopy()) {
397 llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation());
398
399 // If the field is not a pointer, we need to save the actual value
400 // and load it as a void pointer.
401 if (!CurField->getType()->isAnyPointerType()) {
402 ASTContext &Ctx = getContext();
403 Address DstAddr = CreateMemTemp(
404 T: Ctx.getUIntPtrType(),
405 Name: Twine(CurCap->getCapturedVar()->getName(), ".casted"));
406 LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType());
407
408 llvm::Value *SrcAddrVal = EmitScalarConversion(
409 Src: DstAddr.emitRawPointer(CGF&: *this),
410 SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()),
411 DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation());
412 LValue SrcLV =
413 MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType());
414
415 // Store the value using the source type pointer.
416 EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV);
417
418 // Load the value using the destination type pointer.
419 CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation());
420 }
421 CapturedVars.push_back(Elt: CV);
422 } else {
423 assert(CurCap->capturesVariable() && "Expected capture by reference.");
424 CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this));
425 }
426 }
427}
428
429static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
430 QualType DstType, StringRef Name,
431 LValue AddrLV) {
432 ASTContext &Ctx = CGF.getContext();
433
434 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
435 Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(),
436 DstTy: Ctx.getPointerType(T: DstType), Loc);
437 // FIXME: should the pointee type (DstType) be passed?
438 Address TmpAddr =
439 CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress();
440 return TmpAddr;
441}
442
443static QualType getCanonicalParamType(ASTContext &C, QualType T) {
444 if (T->isLValueReferenceType())
445 return C.getLValueReferenceType(
446 T: getCanonicalParamType(C, T: T.getNonReferenceType()),
447 /*SpelledAsLValue=*/false);
448 if (T->isPointerType())
449 return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType()));
450 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
451 if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A))
452 return getCanonicalParamType(C, T: VLA->getElementType());
453 if (!A->isVariablyModifiedType())
454 return C.getCanonicalType(T);
455 }
456 return C.getCanonicalParamType(T);
457}
458
459namespace {
460/// Contains required data for proper outlined function codegen.
461struct FunctionOptions {
462 /// Captured statement for which the function is generated.
463 const CapturedStmt *S = nullptr;
464 /// true if cast to/from UIntPtr is required for variables captured by
465 /// value.
466 const bool UIntPtrCastRequired = true;
467 /// true if only casted arguments must be registered as local args or VLA
468 /// sizes.
469 const bool RegisterCastedArgsOnly = false;
470 /// Name of the generated function.
471 const StringRef FunctionName;
472 /// Location of the non-debug version of the outlined function.
473 SourceLocation Loc;
474 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
475 bool RegisterCastedArgsOnly, StringRef FunctionName,
476 SourceLocation Loc)
477 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
478 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
479 FunctionName(FunctionName), Loc(Loc) {}
480};
481} // namespace
482
483static llvm::Function *emitOutlinedFunctionPrologue(
484 CodeGenFunction &CGF, FunctionArgList &Args,
485 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
486 &LocalAddrs,
487 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
488 &VLASizes,
489 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
490 const CapturedDecl *CD = FO.S->getCapturedDecl();
491 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
492 assert(CD->hasBody() && "missing CapturedDecl body");
493
494 CXXThisValue = nullptr;
495 // Build the argument list.
496 CodeGenModule &CGM = CGF.CGM;
497 ASTContext &Ctx = CGM.getContext();
498 FunctionArgList TargetArgs;
499 Args.append(in_start: CD->param_begin(),
500 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
501 TargetArgs.append(
502 in_start: CD->param_begin(),
503 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
504 auto I = FO.S->captures().begin();
505 FunctionDecl *DebugFunctionDecl = nullptr;
506 if (!FO.UIntPtrCastRequired) {
507 FunctionProtoType::ExtProtoInfo EPI;
508 QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: {}, EPI);
509 DebugFunctionDecl = FunctionDecl::Create(
510 C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(),
511 NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy,
512 TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static,
513 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
514 /*hasWrittenPrototype=*/false);
515 }
516 for (const FieldDecl *FD : RD->fields()) {
517 QualType ArgType = FD->getType();
518 IdentifierInfo *II = nullptr;
519 VarDecl *CapVar = nullptr;
520
521 // If this is a capture by copy and the type is not a pointer, the outlined
522 // function argument type should be uintptr and the value properly casted to
523 // uintptr. This is necessary given that the runtime library is only able to
524 // deal with pointers. We can pass in the same way the VLA type sizes to the
525 // outlined function.
526 if (FO.UIntPtrCastRequired &&
527 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
528 I->capturesVariableArrayType()))
529 ArgType = Ctx.getUIntPtrType();
530
531 if (I->capturesVariable() || I->capturesVariableByCopy()) {
532 CapVar = I->getCapturedVar();
533 II = CapVar->getIdentifier();
534 } else if (I->capturesThis()) {
535 II = &Ctx.Idents.get(Name: "this");
536 } else {
537 assert(I->capturesVariableArrayType());
538 II = &Ctx.Idents.get(Name: "vla");
539 }
540 if (ArgType->isVariablyModifiedType())
541 ArgType = getCanonicalParamType(C&: Ctx, T: ArgType);
542 VarDecl *Arg;
543 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
544 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
545 Id: II, T: ArgType,
546 ParamKind: ImplicitParamKind::ThreadPrivateVar);
547 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
548 Arg = ParmVarDecl::Create(
549 C&: Ctx, DC: DebugFunctionDecl,
550 StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
551 IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType,
552 /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr);
553 } else {
554 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
555 Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other);
556 }
557 Args.emplace_back(Args&: Arg);
558 // Do not cast arguments if we emit function with non-original types.
559 TargetArgs.emplace_back(
560 Args: FO.UIntPtrCastRequired
561 ? Arg
562 : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg));
563 ++I;
564 }
565 Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
566 in_end: CD->param_end());
567 TargetArgs.append(
568 in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
569 in_end: CD->param_end());
570
571 // Create the function declaration.
572 const CGFunctionInfo &FuncInfo =
573 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy, args: TargetArgs);
574 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
575
576 auto *F =
577 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
578 N: FO.FunctionName, M: &CGM.getModule());
579 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
580 if (CD->isNothrow())
581 F->setDoesNotThrow();
582 F->setDoesNotRecurse();
583
584 // Always inline the outlined function if optimizations are enabled.
585 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
586 F->removeFnAttr(Kind: llvm::Attribute::NoInline);
587 F->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
588 }
589
590 // Generate the function.
591 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs,
592 Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
593 StartLoc: FO.UIntPtrCastRequired ? FO.Loc
594 : CD->getBody()->getBeginLoc());
595 unsigned Cnt = CD->getContextParamPosition();
596 I = FO.S->captures().begin();
597 for (const FieldDecl *FD : RD->fields()) {
598 // Do not map arguments if we emit function with non-original types.
599 Address LocalAddr(Address::invalid());
600 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
601 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt],
602 TargetParam: TargetArgs[Cnt]);
603 } else {
604 LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]);
605 }
606 // If we are capturing a pointer by copy we don't need to do anything, just
607 // use the value that we get from the arguments.
608 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
609 const VarDecl *CurVD = I->getCapturedVar();
610 if (!FO.RegisterCastedArgsOnly)
611 LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}});
612 ++Cnt;
613 ++I;
614 continue;
615 }
616
617 LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(),
618 Source: AlignmentSource::Decl);
619 if (FD->hasCapturedVLAType()) {
620 if (FO.UIntPtrCastRequired) {
621 ArgLVal = CGF.MakeAddrLValue(
622 Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(),
623 Name: Args[Cnt]->getName(), AddrLV: ArgLVal),
624 T: FD->getType(), Source: AlignmentSource::Decl);
625 }
626 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
627 const VariableArrayType *VAT = FD->getCapturedVLAType();
628 VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg);
629 } else if (I->capturesVariable()) {
630 const VarDecl *Var = I->getCapturedVar();
631 QualType VarTy = Var->getType();
632 Address ArgAddr = ArgLVal.getAddress();
633 if (ArgLVal.getType()->isLValueReferenceType()) {
634 ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal);
635 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
636 assert(ArgLVal.getType()->isPointerType());
637 ArgAddr = CGF.EmitLoadOfPointer(
638 Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>());
639 }
640 if (!FO.RegisterCastedArgsOnly) {
641 LocalAddrs.insert(
642 KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}});
643 }
644 } else if (I->capturesVariableByCopy()) {
645 assert(!FD->getType()->isAnyPointerType() &&
646 "Not expecting a captured pointer.");
647 const VarDecl *Var = I->getCapturedVar();
648 LocalAddrs.insert(KV: {Args[Cnt],
649 {Var, FO.UIntPtrCastRequired
650 ? castValueFromUintptr(
651 CGF, Loc: I->getLocation(), DstType: FD->getType(),
652 Name: Args[Cnt]->getName(), AddrLV: ArgLVal)
653 : ArgLVal.getAddress()}});
654 } else {
655 // If 'this' is captured, load it into CXXThisValue.
656 assert(I->capturesThis());
657 CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
658 LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}});
659 }
660 ++Cnt;
661 ++I;
662 }
663
664 return F;
665}
666
667llvm::Function *
668CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
669 SourceLocation Loc) {
670 assert(
671 CapturedStmtInfo &&
672 "CapturedStmtInfo should be set when generating the captured function");
673 const CapturedDecl *CD = S.getCapturedDecl();
674 // Build the argument list.
675 bool NeedWrapperFunction =
676 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
677 FunctionArgList Args, WrapperArgs;
678 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
679 WrapperLocalAddrs;
680 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
681 WrapperVLASizes;
682 SmallString<256> Buffer;
683 llvm::raw_svector_ostream Out(Buffer);
684 Out << CapturedStmtInfo->getHelperName();
685
686 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
687 llvm::Function *WrapperF = nullptr;
688 if (NeedWrapperFunction) {
689 // Emit the final kernel early to allow attributes to be added by the
690 // OpenMPI-IR-Builder.
691 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
692 /*RegisterCastedArgsOnly=*/true,
693 CapturedStmtInfo->getHelperName(), Loc);
694 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
695 WrapperF =
696 emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes,
697 CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO);
698 Out << "_debug__";
699 }
700 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
701 Out.str(), Loc);
702 llvm::Function *F = emitOutlinedFunctionPrologue(
703 CGF&: *this, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes, CXXThisValue, FO);
704 CodeGenFunction::OMPPrivateScope LocalScope(*this);
705 for (const auto &LocalAddrPair : WrapperLocalAddrs) {
706 if (LocalAddrPair.second.first) {
707 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
708 Addr: LocalAddrPair.second.second);
709 }
710 }
711 (void)LocalScope.Privatize();
712 for (const auto &VLASizePair : WrapperVLASizes)
713 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
714 PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
715 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
716 LocalScope.ForceCleanup();
717 FinishFunction(EndLoc: CD->getBodyRBrace());
718 if (!NeedWrapperFunction)
719 return F;
720
721 // Reverse the order.
722 WrapperF->removeFromParent();
723 F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF);
724
725 llvm::SmallVector<llvm::Value *, 4> CallArgs;
726 auto *PI = F->arg_begin();
727 for (const auto *Arg : Args) {
728 llvm::Value *CallArg;
729 auto I = LocalAddrs.find(Key: Arg);
730 if (I != LocalAddrs.end()) {
731 LValue LV = WrapperCGF.MakeAddrLValue(
732 Addr: I->second.second,
733 T: I->second.first ? I->second.first->getType() : Arg->getType(),
734 Source: AlignmentSource::Decl);
735 if (LV.getType()->isAnyComplexType())
736 LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType()));
737 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
738 } else {
739 auto EI = VLASizes.find(Val: Arg);
740 if (EI != VLASizes.end()) {
741 CallArg = EI->second.second;
742 } else {
743 LValue LV =
744 WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg),
745 T: Arg->getType(), Source: AlignmentSource::Decl);
746 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
747 }
748 }
749 CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType()));
750 ++PI;
751 }
752 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
753 WrapperCGF.FinishFunction();
754 return WrapperF;
755}
756
757//===----------------------------------------------------------------------===//
758// OpenMP Directive Emission
759//===----------------------------------------------------------------------===//
760void CodeGenFunction::EmitOMPAggregateAssign(
761 Address DestAddr, Address SrcAddr, QualType OriginalType,
762 const llvm::function_ref<void(Address, Address)> CopyGen) {
763 // Perform element-by-element initialization.
764 QualType ElementTy;
765
766 // Drill down to the base element type on both arrays.
767 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
768 llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
769 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
770
771 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this);
772 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this);
773 // Cast from pointer to array type to pointer to single element.
774 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(),
775 Ptr: DestBegin, IdxList: NumElements);
776
777 // The basic structure here is a while-do loop.
778 llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body");
779 llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done");
780 llvm::Value *IsEmpty =
781 Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty");
782 Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
783
784 // Enter the loop body, making that address the current address.
785 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
786 EmitBlock(BB: BodyBB);
787
788 CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy);
789
790 llvm::PHINode *SrcElementPHI =
791 Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
792 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
793 Address SrcElementCurrent =
794 Address(SrcElementPHI, SrcAddr.getElementType(),
795 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
796
797 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
798 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
799 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
800 Address DestElementCurrent =
801 Address(DestElementPHI, DestAddr.getElementType(),
802 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
803
804 // Emit copy.
805 CopyGen(DestElementCurrent, SrcElementCurrent);
806
807 // Shift the address forward by one element.
808 llvm::Value *DestElementNext =
809 Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI,
810 /*Idx0=*/1, Name: "omp.arraycpy.dest.element");
811 llvm::Value *SrcElementNext =
812 Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI,
813 /*Idx0=*/1, Name: "omp.arraycpy.src.element");
814 // Check whether we've reached the end.
815 llvm::Value *Done =
816 Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
817 Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
818 DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock());
819 SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock());
820
821 // Done.
822 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
823}
824
825void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
826 Address SrcAddr, const VarDecl *DestVD,
827 const VarDecl *SrcVD, const Expr *Copy) {
828 if (OriginalType->isArrayType()) {
829 const auto *BO = dyn_cast<BinaryOperator>(Val: Copy);
830 if (BO && BO->getOpcode() == BO_Assign) {
831 // Perform simple memcpy for simple copying.
832 LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType);
833 LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType);
834 EmitAggregateAssign(Dest, Src, EltTy: OriginalType);
835 } else {
836 // For arrays with complex element types perform element by element
837 // copying.
838 EmitOMPAggregateAssign(
839 DestAddr, SrcAddr, OriginalType,
840 CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
841 // Working with the single array element, so have to remap
842 // destination and source variables to corresponding array
843 // elements.
844 CodeGenFunction::OMPPrivateScope Remap(*this);
845 Remap.addPrivate(LocalVD: DestVD, Addr: DestElement);
846 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement);
847 (void)Remap.Privatize();
848 EmitIgnoredExpr(E: Copy);
849 });
850 }
851 } else {
852 // Remap pseudo source variable to private copy.
853 CodeGenFunction::OMPPrivateScope Remap(*this);
854 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr);
855 Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr);
856 (void)Remap.Privatize();
857 // Emit copying of the whole variable.
858 EmitIgnoredExpr(E: Copy);
859 }
860}
861
862bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
863 OMPPrivateScope &PrivateScope) {
864 if (!HaveInsertPoint())
865 return false;
866 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
867 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
868 isOpenMPTargetExecutionDirective(DKind: EKind);
869 bool FirstprivateIsLastprivate = false;
870 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
871 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
872 for (const auto *D : C->varlist())
873 Lastprivates.try_emplace(
874 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(),
875 Args: C->getKind());
876 }
877 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
878 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
879 getOpenMPCaptureRegions(CaptureRegions, DKind: EKind);
880 // Force emission of the firstprivate copy if the directive does not emit
881 // outlined function, like omp for, omp simd, omp distribute etc.
882 bool MustEmitFirstprivateCopy =
883 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
884 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
885 const auto *IRef = C->varlist_begin();
886 const auto *InitsRef = C->inits().begin();
887 for (const Expr *IInit : C->private_copies()) {
888 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
889 bool ThisFirstprivateIsLastprivate =
890 Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0;
891 const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD);
892 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
893 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
894 !FD->getType()->isReferenceType() &&
895 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
896 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
897 ++IRef;
898 ++InitsRef;
899 continue;
900 }
901 // Do not emit copy for firstprivate constant variables in target regions,
902 // captured by reference.
903 if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) &&
904 FD && FD->getType()->isReferenceType() &&
905 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
906 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
907 ++IRef;
908 ++InitsRef;
909 continue;
910 }
911 FirstprivateIsLastprivate =
912 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
913 if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) {
914 const auto *VDInit =
915 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl());
916 bool IsRegistered;
917 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
918 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
919 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
920 LValue OriginalLVal;
921 if (!FD) {
922 // Check if the firstprivate variable is just a constant value.
923 ConstantEmission CE = tryEmitAsConstant(RefExpr: &DRE);
924 if (CE && !CE.isReference()) {
925 // Constant value, no need to create a copy.
926 ++IRef;
927 ++InitsRef;
928 continue;
929 }
930 if (CE && CE.isReference()) {
931 OriginalLVal = CE.getReferenceLValue(CGF&: *this, RefExpr: &DRE);
932 } else {
933 assert(!CE && "Expected non-constant firstprivate.");
934 OriginalLVal = EmitLValue(E: &DRE);
935 }
936 } else {
937 OriginalLVal = EmitLValue(E: &DRE);
938 }
939 QualType Type = VD->getType();
940 if (Type->isArrayType()) {
941 // Emit VarDecl with copy init for arrays.
942 // Get the address of the original variable captured in current
943 // captured region.
944 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
945 const Expr *Init = VD->getInit();
946 if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) {
947 // Perform simple memcpy.
948 LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type);
949 EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type);
950 } else {
951 EmitOMPAggregateAssign(
952 DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type,
953 CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) {
954 // Clean up any temporaries needed by the
955 // initialization.
956 RunCleanupsScope InitScope(*this);
957 // Emit initialization for single element.
958 setAddrOfLocalVar(VD: VDInit, Addr: SrcElement);
959 EmitAnyExprToMem(E: Init, Location: DestElement,
960 Quals: Init->getType().getQualifiers(),
961 /*IsInitializer*/ false);
962 LocalDeclMap.erase(Val: VDInit);
963 });
964 }
965 EmitAutoVarCleanups(emission: Emission);
966 IsRegistered =
967 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress());
968 } else {
969 Address OriginalAddr = OriginalLVal.getAddress();
970 // Emit private VarDecl with copy init.
971 // Remap temp VDInit variable to the address of the original
972 // variable (for proper handling of captured global variables).
973 setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr);
974 EmitDecl(D: *VD);
975 LocalDeclMap.erase(Val: VDInit);
976 Address VDAddr = GetAddrOfLocalVar(VD);
977 if (ThisFirstprivateIsLastprivate &&
978 Lastprivates[OrigVD->getCanonicalDecl()] ==
979 OMPC_LASTPRIVATE_conditional) {
980 // Create/init special variable for lastprivate conditionals.
981 llvm::Value *V =
982 EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
983 Source: AlignmentSource::Decl),
984 Loc: (*IRef)->getExprLoc());
985 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
986 CGF&: *this, VD: OrigVD);
987 EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
988 Source: AlignmentSource::Decl));
989 LocalDeclMap.erase(Val: VD);
990 setAddrOfLocalVar(VD, Addr: VDAddr);
991 }
992 IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
993 }
994 assert(IsRegistered &&
995 "firstprivate var already registered as private");
996 // Silence the warning about unused variable.
997 (void)IsRegistered;
998 }
999 ++IRef;
1000 ++InitsRef;
1001 }
1002 }
1003 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
1004}
1005
1006void CodeGenFunction::EmitOMPPrivateClause(
1007 const OMPExecutableDirective &D,
1008 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1009 if (!HaveInsertPoint())
1010 return;
1011 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1012 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
1013 auto IRef = C->varlist_begin();
1014 for (const Expr *IInit : C->private_copies()) {
1015 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1016 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
1017 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1018 EmitDecl(D: *VD);
1019 // Emit private VarDecl with copy init.
1020 bool IsRegistered =
1021 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD));
1022 assert(IsRegistered && "private var already registered as private");
1023 // Silence the warning about unused variable.
1024 (void)IsRegistered;
1025 }
1026 ++IRef;
1027 }
1028 }
1029}
1030
1031bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
1032 if (!HaveInsertPoint())
1033 return false;
1034 // threadprivate_var1 = master_threadprivate_var1;
1035 // operator=(threadprivate_var2, master_threadprivate_var2);
1036 // ...
1037 // __kmpc_barrier(&loc, global_tid);
1038 llvm::DenseSet<const VarDecl *> CopiedVars;
1039 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1040 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1041 auto IRef = C->varlist_begin();
1042 auto ISrcRef = C->source_exprs().begin();
1043 auto IDestRef = C->destination_exprs().begin();
1044 for (const Expr *AssignOp : C->assignment_ops()) {
1045 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1046 QualType Type = VD->getType();
1047 if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) {
1048 // Get the address of the master variable. If we are emitting code with
1049 // TLS support, the address is passed from the master as field in the
1050 // captured declaration.
1051 Address MasterAddr = Address::invalid();
1052 if (getLangOpts().OpenMPUseTLS &&
1053 getContext().getTargetInfo().isTLSSupported()) {
1054 assert(CapturedStmtInfo->lookup(VD) &&
1055 "Copyin threadprivates should have been captured!");
1056 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1057 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1058 MasterAddr = EmitLValue(E: &DRE).getAddress();
1059 LocalDeclMap.erase(Val: VD);
1060 } else {
1061 MasterAddr =
1062 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD)
1063 : CGM.GetAddrOfGlobal(GD: VD),
1064 CGM.getTypes().ConvertTypeForMem(T: VD->getType()),
1065 getContext().getDeclAlign(D: VD));
1066 }
1067 // Get the address of the threadprivate variable.
1068 Address PrivateAddr = EmitLValue(E: *IRef).getAddress();
1069 if (CopiedVars.size() == 1) {
1070 // At first check if current thread is a master thread. If it is, no
1071 // need to copy data.
1072 CopyBegin = createBasicBlock(name: "copyin.not.master");
1073 CopyEnd = createBasicBlock(name: "copyin.not.master.end");
1074 // TODO: Avoid ptrtoint conversion.
1075 auto *MasterAddrInt = Builder.CreatePtrToInt(
1076 V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1077 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1078 V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1079 Builder.CreateCondBr(
1080 Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin,
1081 False: CopyEnd);
1082 EmitBlock(BB: CopyBegin);
1083 }
1084 const auto *SrcVD =
1085 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1086 const auto *DestVD =
1087 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1088 EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp);
1089 }
1090 ++IRef;
1091 ++ISrcRef;
1092 ++IDestRef;
1093 }
1094 }
1095 if (CopyEnd) {
1096 // Exit out of copying procedure for non-master thread.
1097 EmitBlock(BB: CopyEnd, /*IsFinished=*/true);
1098 return true;
1099 }
1100 return false;
1101}
1102
1103bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1104 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1105 if (!HaveInsertPoint())
1106 return false;
1107 bool HasAtLeastOneLastprivate = false;
1108 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1109 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1110 if (isOpenMPSimdDirective(DKind: EKind)) {
1111 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
1112 for (const Expr *C : LoopDirective->counters()) {
1113 SIMDLCVs.insert(
1114 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
1115 }
1116 }
1117 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1118 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1119 HasAtLeastOneLastprivate = true;
1120 if (isOpenMPTaskLoopDirective(DKind: EKind) && !getLangOpts().OpenMPSimd)
1121 break;
1122 const auto *IRef = C->varlist_begin();
1123 const auto *IDestRef = C->destination_exprs().begin();
1124 for (const Expr *IInit : C->private_copies()) {
1125 // Keep the address of the original variable for future update at the end
1126 // of the loop.
1127 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1128 // Taskloops do not require additional initialization, it is done in
1129 // runtime support library.
1130 if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) {
1131 const auto *DestVD =
1132 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1133 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1134 /*RefersToEnclosingVariableOrCapture=*/
1135 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
1136 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1137 PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress());
1138 // Check if the variable is also a firstprivate: in this case IInit is
1139 // not generated. Initialization of this variable will happen in codegen
1140 // for 'firstprivate' clause.
1141 if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) {
1142 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1143 Address VDAddr = Address::invalid();
1144 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1145 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1146 CGF&: *this, VD: OrigVD);
1147 setAddrOfLocalVar(VD, Addr: VDAddr);
1148 } else {
1149 // Emit private VarDecl with copy init.
1150 EmitDecl(D: *VD);
1151 VDAddr = GetAddrOfLocalVar(VD);
1152 }
1153 bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1154 assert(IsRegistered &&
1155 "lastprivate var already registered as private");
1156 (void)IsRegistered;
1157 }
1158 }
1159 ++IRef;
1160 ++IDestRef;
1161 }
1162 }
1163 return HasAtLeastOneLastprivate;
1164}
1165
1166void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1167 const OMPExecutableDirective &D, bool NoFinals,
1168 llvm::Value *IsLastIterCond) {
1169 if (!HaveInsertPoint())
1170 return;
1171 // Emit following code:
1172 // if (<IsLastIterCond>) {
1173 // orig_var1 = private_orig_var1;
1174 // ...
1175 // orig_varn = private_orig_varn;
1176 // }
1177 llvm::BasicBlock *ThenBB = nullptr;
1178 llvm::BasicBlock *DoneBB = nullptr;
1179 if (IsLastIterCond) {
1180 // Emit implicit barrier if at least one lastprivate conditional is found
1181 // and this is not a simd mode.
1182 if (!getLangOpts().OpenMPSimd &&
1183 llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(),
1184 P: [](const OMPLastprivateClause *C) {
1185 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1186 })) {
1187 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(),
1188 Kind: OMPD_unknown,
1189 /*EmitChecks=*/false,
1190 /*ForceSimpleCall=*/true);
1191 }
1192 ThenBB = createBasicBlock(name: ".omp.lastprivate.then");
1193 DoneBB = createBasicBlock(name: ".omp.lastprivate.done");
1194 Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB);
1195 EmitBlock(BB: ThenBB);
1196 }
1197 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1198 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1199 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) {
1200 auto IC = LoopDirective->counters().begin();
1201 for (const Expr *F : LoopDirective->finals()) {
1202 const auto *D =
1203 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl();
1204 if (NoFinals)
1205 AlreadyEmittedVars.insert(V: D);
1206 else
1207 LoopCountersAndUpdates[D] = F;
1208 ++IC;
1209 }
1210 }
1211 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1212 auto IRef = C->varlist_begin();
1213 auto ISrcRef = C->source_exprs().begin();
1214 auto IDestRef = C->destination_exprs().begin();
1215 for (const Expr *AssignOp : C->assignment_ops()) {
1216 const auto *PrivateVD =
1217 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1218 QualType Type = PrivateVD->getType();
1219 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1220 if (AlreadyEmittedVars.insert(V: CanonicalVD).second) {
1221 // If lastprivate variable is a loop control variable for loop-based
1222 // directive, update its value before copyin back to original
1223 // variable.
1224 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD))
1225 EmitIgnoredExpr(E: FinalExpr);
1226 const auto *SrcVD =
1227 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1228 const auto *DestVD =
1229 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1230 // Get the address of the private variable.
1231 Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD);
1232 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1233 PrivateAddr = Address(
1234 Builder.CreateLoad(Addr: PrivateAddr),
1235 CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()),
1236 CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType()));
1237 // Store the last value to the private copy in the last iteration.
1238 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1239 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1240 CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD,
1241 Loc: (*IRef)->getExprLoc());
1242 // Get the address of the original variable.
1243 Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD);
1244 EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp);
1245 }
1246 ++IRef;
1247 ++ISrcRef;
1248 ++IDestRef;
1249 }
1250 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1251 EmitIgnoredExpr(E: PostUpdate);
1252 }
1253 if (IsLastIterCond)
1254 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1255}
1256
1257void CodeGenFunction::EmitOMPReductionClauseInit(
1258 const OMPExecutableDirective &D,
1259 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1260 if (!HaveInsertPoint())
1261 return;
1262 SmallVector<const Expr *, 4> Shareds;
1263 SmallVector<const Expr *, 4> Privates;
1264 SmallVector<const Expr *, 4> ReductionOps;
1265 SmallVector<const Expr *, 4> LHSs;
1266 SmallVector<const Expr *, 4> RHSs;
1267 OMPTaskDataTy Data;
1268 SmallVector<const Expr *, 4> TaskLHSs;
1269 SmallVector<const Expr *, 4> TaskRHSs;
1270 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1271 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1272 continue;
1273 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1274 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1275 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1276 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1277 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1278 if (C->getModifier() == OMPC_REDUCTION_task) {
1279 Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end());
1280 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1281 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
1282 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
1283 in_end: C->reduction_ops().end());
1284 TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1285 TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1286 }
1287 }
1288 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1289 unsigned Count = 0;
1290 auto *ILHS = LHSs.begin();
1291 auto *IRHS = RHSs.begin();
1292 auto *IPriv = Privates.begin();
1293 for (const Expr *IRef : Shareds) {
1294 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl());
1295 // Emit private VarDecl with reduction init.
1296 RedCG.emitSharedOrigLValue(CGF&: *this, N: Count);
1297 RedCG.emitAggregateType(CGF&: *this, N: Count);
1298 AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD);
1299 RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(),
1300 SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(),
1301 DefaultInit: [&Emission](CodeGenFunction &CGF) {
1302 CGF.EmitAutoVarInit(emission: Emission);
1303 return true;
1304 });
1305 EmitAutoVarCleanups(emission: Emission);
1306 Address BaseAddr = RedCG.adjustPrivateAddress(
1307 CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress());
1308 bool IsRegistered =
1309 PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr);
1310 assert(IsRegistered && "private var already registered as private");
1311 // Silence the warning about unused variable.
1312 (void)IsRegistered;
1313
1314 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
1315 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
1316 QualType Type = PrivateVD->getType();
1317 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef);
1318 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1319 // Store the address of the original variable associated with the LHS
1320 // implicit variable.
1321 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1322 PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
1323 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1324 isa<ArraySubscriptExpr>(Val: IRef)) {
1325 // Store the address of the original variable associated with the LHS
1326 // implicit variable.
1327 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1328 PrivateScope.addPrivate(LocalVD: RHSVD,
1329 Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1330 ElemTy: ConvertTypeForMem(T: RHSVD->getType())));
1331 } else {
1332 QualType Type = PrivateVD->getType();
1333 bool IsArray = getContext().getAsArrayType(T: Type) != nullptr;
1334 Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress();
1335 // Store the address of the original variable associated with the LHS
1336 // implicit variable.
1337 if (IsArray) {
1338 OriginalAddr =
1339 OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType()));
1340 }
1341 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr);
1342 PrivateScope.addPrivate(
1343 LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1344 ElemTy: ConvertTypeForMem(T: RHSVD->getType()))
1345 : GetAddrOfLocalVar(VD: PrivateVD));
1346 }
1347 ++ILHS;
1348 ++IRHS;
1349 ++IPriv;
1350 ++Count;
1351 }
1352 if (!Data.ReductionVars.empty()) {
1353 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1354 Data.IsReductionWithTaskMod = true;
1355 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(DKind: EKind);
1356 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1357 CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data);
1358 const Expr *TaskRedRef = nullptr;
1359 switch (EKind) {
1360 case OMPD_parallel:
1361 TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr();
1362 break;
1363 case OMPD_for:
1364 TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr();
1365 break;
1366 case OMPD_sections:
1367 TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr();
1368 break;
1369 case OMPD_parallel_for:
1370 TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr();
1371 break;
1372 case OMPD_parallel_master:
1373 TaskRedRef =
1374 cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr();
1375 break;
1376 case OMPD_parallel_sections:
1377 TaskRedRef =
1378 cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr();
1379 break;
1380 case OMPD_target_parallel:
1381 TaskRedRef =
1382 cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr();
1383 break;
1384 case OMPD_target_parallel_for:
1385 TaskRedRef =
1386 cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr();
1387 break;
1388 case OMPD_distribute_parallel_for:
1389 TaskRedRef =
1390 cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr();
1391 break;
1392 case OMPD_teams_distribute_parallel_for:
1393 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D)
1394 .getTaskReductionRefExpr();
1395 break;
1396 case OMPD_target_teams_distribute_parallel_for:
1397 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D)
1398 .getTaskReductionRefExpr();
1399 break;
1400 case OMPD_simd:
1401 case OMPD_for_simd:
1402 case OMPD_section:
1403 case OMPD_single:
1404 case OMPD_master:
1405 case OMPD_critical:
1406 case OMPD_parallel_for_simd:
1407 case OMPD_task:
1408 case OMPD_taskyield:
1409 case OMPD_error:
1410 case OMPD_barrier:
1411 case OMPD_taskwait:
1412 case OMPD_taskgroup:
1413 case OMPD_flush:
1414 case OMPD_depobj:
1415 case OMPD_scan:
1416 case OMPD_ordered:
1417 case OMPD_atomic:
1418 case OMPD_teams:
1419 case OMPD_target:
1420 case OMPD_cancellation_point:
1421 case OMPD_cancel:
1422 case OMPD_target_data:
1423 case OMPD_target_enter_data:
1424 case OMPD_target_exit_data:
1425 case OMPD_taskloop:
1426 case OMPD_taskloop_simd:
1427 case OMPD_master_taskloop:
1428 case OMPD_master_taskloop_simd:
1429 case OMPD_parallel_master_taskloop:
1430 case OMPD_parallel_master_taskloop_simd:
1431 case OMPD_distribute:
1432 case OMPD_target_update:
1433 case OMPD_distribute_parallel_for_simd:
1434 case OMPD_distribute_simd:
1435 case OMPD_target_parallel_for_simd:
1436 case OMPD_target_simd:
1437 case OMPD_teams_distribute:
1438 case OMPD_teams_distribute_simd:
1439 case OMPD_teams_distribute_parallel_for_simd:
1440 case OMPD_target_teams:
1441 case OMPD_target_teams_distribute:
1442 case OMPD_target_teams_distribute_parallel_for_simd:
1443 case OMPD_target_teams_distribute_simd:
1444 case OMPD_declare_target:
1445 case OMPD_end_declare_target:
1446 case OMPD_threadprivate:
1447 case OMPD_allocate:
1448 case OMPD_declare_reduction:
1449 case OMPD_declare_mapper:
1450 case OMPD_declare_simd:
1451 case OMPD_requires:
1452 case OMPD_declare_variant:
1453 case OMPD_begin_declare_variant:
1454 case OMPD_end_declare_variant:
1455 case OMPD_unknown:
1456 default:
1457 llvm_unreachable("Unexpected directive with task reductions.");
1458 }
1459
1460 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl());
1461 EmitVarDecl(D: *VD);
1462 EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD),
1463 /*Volatile=*/false, Ty: TaskRedRef->getType());
1464 }
1465}
1466
1467void CodeGenFunction::EmitOMPReductionClauseFinal(
1468 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1469 if (!HaveInsertPoint())
1470 return;
1471 llvm::SmallVector<const Expr *, 8> Privates;
1472 llvm::SmallVector<const Expr *, 8> LHSExprs;
1473 llvm::SmallVector<const Expr *, 8> RHSExprs;
1474 llvm::SmallVector<const Expr *, 8> ReductionOps;
1475 llvm::SmallVector<bool, 8> IsPrivateVarReduction;
1476 bool HasAtLeastOneReduction = false;
1477 bool IsReductionWithTaskMod = false;
1478 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1479 // Do not emit for inscan reductions.
1480 if (C->getModifier() == OMPC_REDUCTION_inscan)
1481 continue;
1482 HasAtLeastOneReduction = true;
1483 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1484 LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1485 RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1486 IsPrivateVarReduction.append(in_start: C->private_var_reduction_flags().begin(),
1487 in_end: C->private_var_reduction_flags().end());
1488 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1489 IsReductionWithTaskMod =
1490 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1491 }
1492 if (HasAtLeastOneReduction) {
1493 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1494 if (IsReductionWithTaskMod) {
1495 CGM.getOpenMPRuntime().emitTaskReductionFini(
1496 CGF&: *this, Loc: D.getBeginLoc(), IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: EKind));
1497 }
1498 bool TeamsLoopCanBeParallel = false;
1499 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D))
1500 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1501 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1502 isOpenMPParallelDirective(DKind: EKind) ||
1503 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1504 bool SimpleReduction = ReductionKind == OMPD_simd;
1505 // Emit nowait reduction if nowait clause is present or directive is a
1506 // parallel directive (it always has implicit barrier).
1507 CGM.getOpenMPRuntime().emitReduction(
1508 CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1509 Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .IsPrivateVarReduction: IsPrivateVarReduction, .ReductionKind: ReductionKind});
1510 }
1511}
1512
1513static void emitPostUpdateForReductionClause(
1514 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1515 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1516 if (!CGF.HaveInsertPoint())
1517 return;
1518 llvm::BasicBlock *DoneBB = nullptr;
1519 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1520 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1521 if (!DoneBB) {
1522 if (llvm::Value *Cond = CondGen(CGF)) {
1523 // If the first post-update expression is found, emit conditional
1524 // block if it was requested.
1525 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu");
1526 DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done");
1527 CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
1528 CGF.EmitBlock(BB: ThenBB);
1529 }
1530 }
1531 CGF.EmitIgnoredExpr(E: PostUpdate);
1532 }
1533 }
1534 if (DoneBB)
1535 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1536}
1537
1538namespace {
1539/// Codegen lambda for appending distribute lower and upper bounds to outlined
1540/// parallel function. This is necessary for combined constructs such as
1541/// 'distribute parallel for'
1542typedef llvm::function_ref<void(CodeGenFunction &,
1543 const OMPExecutableDirective &,
1544 llvm::SmallVectorImpl<llvm::Value *> &)>
1545 CodeGenBoundParametersTy;
1546} // anonymous namespace
1547
1548static void
1549checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1550 const OMPExecutableDirective &S) {
1551 if (CGF.getLangOpts().OpenMP < 50)
1552 return;
1553 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1554 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1555 for (const Expr *Ref : C->varlist()) {
1556 if (!Ref->getType()->isScalarType())
1557 continue;
1558 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1559 if (!DRE)
1560 continue;
1561 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1562 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1563 }
1564 }
1565 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1566 for (const Expr *Ref : C->varlist()) {
1567 if (!Ref->getType()->isScalarType())
1568 continue;
1569 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1570 if (!DRE)
1571 continue;
1572 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1573 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1574 }
1575 }
1576 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1577 for (const Expr *Ref : C->varlist()) {
1578 if (!Ref->getType()->isScalarType())
1579 continue;
1580 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1581 if (!DRE)
1582 continue;
1583 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1584 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1585 }
1586 }
1587 // Privates should ne analyzed since they are not captured at all.
1588 // Task reductions may be skipped - tasks are ignored.
1589 // Firstprivates do not return value but may be passed by reference - no need
1590 // to check for updated lastprivate conditional.
1591 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1592 for (const Expr *Ref : C->varlist()) {
1593 if (!Ref->getType()->isScalarType())
1594 continue;
1595 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1596 if (!DRE)
1597 continue;
1598 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1599 }
1600 }
1601 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1602 CGF, D: S, IgnoredDecls: PrivateDecls);
1603}
1604
1605static void emitCommonOMPParallelDirective(
1606 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1607 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1608 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1609 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1610 llvm::Value *NumThreads = nullptr;
1611 llvm::Function *OutlinedFn =
1612 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1613 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
1614 CodeGen);
1615 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1616 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1617 NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1618 /*IgnoreResultAssign=*/true);
1619 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1620 CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc());
1621 }
1622 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1623 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1624 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1625 CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc());
1626 }
1627 const Expr *IfCond = nullptr;
1628 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1629 if (C->getNameModifier() == OMPD_unknown ||
1630 C->getNameModifier() == OMPD_parallel) {
1631 IfCond = C->getCondition();
1632 break;
1633 }
1634 }
1635
1636 OMPParallelScope Scope(CGF, S);
1637 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1638 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1639 // lower and upper bounds with the pragma 'for' chunking mechanism.
1640 // The following lambda takes care of appending the lower and upper bound
1641 // parameters when necessary
1642 CodeGenBoundParameters(CGF, S, CapturedVars);
1643 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
1644 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn,
1645 CapturedVars, IfCond, NumThreads);
1646}
1647
1648static bool isAllocatableDecl(const VarDecl *VD) {
1649 const VarDecl *CVD = VD->getCanonicalDecl();
1650 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1651 return false;
1652 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1653 // Use the default allocation.
1654 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1655 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1656 !AA->getAllocator());
1657}
1658
1659static void emitEmptyBoundParameters(CodeGenFunction &,
1660 const OMPExecutableDirective &,
1661 llvm::SmallVectorImpl<llvm::Value *> &) {}
1662
1663static void emitOMPCopyinClause(CodeGenFunction &CGF,
1664 const OMPExecutableDirective &S) {
1665 bool Copyins = CGF.EmitOMPCopyinClause(D: S);
1666 if (Copyins) {
1667 // Emit implicit barrier to synchronize threads and avoid data races on
1668 // propagation master's thread values of threadprivate variables to local
1669 // instances of that variables of all other implicit threads.
1670 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1671 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
1672 /*ForceSimpleCall=*/true);
1673 }
1674}
1675
1676Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1677 CodeGenFunction &CGF, const VarDecl *VD) {
1678 CodeGenModule &CGM = CGF.CGM;
1679 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1680
1681 if (!VD)
1682 return Address::invalid();
1683 const VarDecl *CVD = VD->getCanonicalDecl();
1684 if (!isAllocatableDecl(VD: CVD))
1685 return Address::invalid();
1686 llvm::Value *Size;
1687 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
1688 if (CVD->getType()->isVariablyModifiedType()) {
1689 Size = CGF.getTypeSize(Ty: CVD->getType());
1690 // Align the size: ((size + align - 1) / align) * align
1691 Size = CGF.Builder.CreateNUWAdd(
1692 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
1693 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
1694 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
1695 } else {
1696 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
1697 Size = CGM.getSize(numChars: Sz.alignTo(Align));
1698 }
1699
1700 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1701 assert(AA->getAllocator() &&
1702 "Expected allocator expression for non-default allocator.");
1703 llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator());
1704 // According to the standard, the original allocator type is a enum (integer).
1705 // Convert to pointer type, if required.
1706 if (Allocator->getType()->isIntegerTy())
1707 Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy);
1708 else if (Allocator->getType()->isPointerTy())
1709 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator,
1710 DestTy: CGM.VoidPtrTy);
1711
1712 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1713 Loc: CGF.Builder, Size, Allocator,
1714 Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr"}, FirstSeparator: ".", Separator: "."));
1715 llvm::CallInst *FreeCI =
1716 OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator);
1717
1718 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI);
1719 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1720 V: Addr,
1721 DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())),
1722 Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr"}, FirstSeparator: ".", Separator: "."));
1723 return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
1724}
1725
1726Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1727 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1728 SourceLocation Loc) {
1729 CodeGenModule &CGM = CGF.CGM;
1730 if (CGM.getLangOpts().OpenMPUseTLS &&
1731 CGM.getContext().getTargetInfo().isTLSSupported())
1732 return VDAddr;
1733
1734 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1735
1736 llvm::Type *VarTy = VDAddr.getElementType();
1737 llvm::Value *Data =
1738 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy);
1739 llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy));
1740 std::string Suffix = getNameWithSeparators(Parts: {"cache", ""});
1741 llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix);
1742
1743 llvm::CallInst *ThreadPrivateCacheCall =
1744 OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName);
1745
1746 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1747}
1748
1749std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1750 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1751 SmallString<128> Buffer;
1752 llvm::raw_svector_ostream OS(Buffer);
1753 StringRef Sep = FirstSeparator;
1754 for (StringRef Part : Parts) {
1755 OS << Sep << Part;
1756 Sep = Separator;
1757 }
1758 return OS.str().str();
1759}
1760
1761void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1762 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1763 InsertPointTy CodeGenIP, Twine RegionName) {
1764 CGBuilderTy &Builder = CGF.Builder;
1765 Builder.restoreIP(IP: CodeGenIP);
1766 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1767 Suffix: "." + RegionName + ".after");
1768
1769 {
1770 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1771 CGF.EmitStmt(S: RegionBodyStmt);
1772 }
1773
1774 if (Builder.saveIP().isSet())
1775 Builder.CreateBr(Dest: FiniBB);
1776}
1777
1778void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1779 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1780 InsertPointTy CodeGenIP, Twine RegionName) {
1781 CGBuilderTy &Builder = CGF.Builder;
1782 Builder.restoreIP(IP: CodeGenIP);
1783 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1784 Suffix: "." + RegionName + ".after");
1785
1786 {
1787 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1788 CGF.EmitStmt(S: RegionBodyStmt);
1789 }
1790
1791 if (Builder.saveIP().isSet())
1792 Builder.CreateBr(Dest: FiniBB);
1793}
1794
1795void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1796 if (CGM.getLangOpts().OpenMPIRBuilder) {
1797 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1798 // Check if we have any if clause associated with the directive.
1799 llvm::Value *IfCond = nullptr;
1800 if (const auto *C = S.getSingleClause<OMPIfClause>())
1801 IfCond = EmitScalarExpr(E: C->getCondition(),
1802 /*IgnoreResultAssign=*/true);
1803
1804 llvm::Value *NumThreads = nullptr;
1805 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1806 NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1807 /*IgnoreResultAssign=*/true);
1808
1809 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1810 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1811 ProcBind = ProcBindClause->getProcBindKind();
1812
1813 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1814
1815 // The cleanup callback that finalizes all variables at the given location,
1816 // thus calls destructors etc.
1817 auto FiniCB = [this](InsertPointTy IP) {
1818 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
1819 return llvm::Error::success();
1820 };
1821
1822 // Privatization callback that performs appropriate action for
1823 // shared/private/firstprivate/lastprivate/copyin/... variables.
1824 //
1825 // TODO: This defaults to shared right now.
1826 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1827 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1828 // The next line is appropriate only for variables (Val) with the
1829 // data-sharing attribute "shared".
1830 ReplVal = &Val;
1831
1832 return CodeGenIP;
1833 };
1834
1835 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1836 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1837
1838 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1839 InsertPointTy CodeGenIP) {
1840 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1841 CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel");
1842 return llvm::Error::success();
1843 };
1844
1845 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1846 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1847 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1848 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1849 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
1850 ValOrErr: OMPBuilder.createParallel(Loc: Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1851 IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel()));
1852 Builder.restoreIP(IP: AfterIP);
1853 return;
1854 }
1855
1856 // Emit parallel region as a standalone region.
1857 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1858 Action.Enter(CGF);
1859 OMPPrivateScope PrivateScope(CGF);
1860 emitOMPCopyinClause(CGF, S);
1861 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
1862 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
1863 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
1864 (void)PrivateScope.Privatize();
1865 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt());
1866 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
1867 };
1868 {
1869 auto LPCRegion =
1870 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
1871 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen,
1872 CodeGenBoundParameters: emitEmptyBoundParameters);
1873 emitPostUpdateForReductionClause(CGF&: *this, D: S,
1874 CondGen: [](CodeGenFunction &) { return nullptr; });
1875 }
1876 // Check for outer lastprivate conditional update.
1877 checkForLastprivateConditionalUpdate(CGF&: *this, S);
1878}
1879
1880void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1881 EmitStmt(S: S.getIfStmt());
1882}
1883
1884namespace {
1885/// RAII to handle scopes for loop transformation directives.
1886class OMPTransformDirectiveScopeRAII {
1887 OMPLoopScope *Scope = nullptr;
1888 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1889 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1890
1891 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1892 delete;
1893 OMPTransformDirectiveScopeRAII &
1894 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1895
1896public:
1897 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1898 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) {
1899 Scope = new OMPLoopScope(CGF, *Dir);
1900 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1901 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1902 }
1903 }
1904 ~OMPTransformDirectiveScopeRAII() {
1905 if (!Scope)
1906 return;
1907 delete CapInfoRAII;
1908 delete CGSI;
1909 delete Scope;
1910 }
1911};
1912} // namespace
1913
1914static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1915 int MaxLevel, int Level = 0) {
1916 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1917 const Stmt *SimplifiedS = S->IgnoreContainers();
1918 if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) {
1919 PrettyStackTraceLoc CrashInfo(
1920 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1921 "LLVM IR generation of compound statement ('{}')");
1922
1923 // Keep track of the current cleanup stack depth, including debug scopes.
1924 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1925 for (const Stmt *CurStmt : CS->body())
1926 emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level);
1927 return;
1928 }
1929 if (SimplifiedS == NextLoop) {
1930 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS))
1931 SimplifiedS = Dir->getTransformedStmt();
1932 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS))
1933 SimplifiedS = CanonLoop->getLoopStmt();
1934 if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) {
1935 S = For->getBody();
1936 } else {
1937 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1938 "Expected canonical for loop or range-based for loop.");
1939 const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS);
1940 CGF.EmitStmt(S: CXXFor->getLoopVarStmt());
1941 S = CXXFor->getBody();
1942 }
1943 if (Level + 1 < MaxLevel) {
1944 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1945 CurStmt: S, /*TryImperfectlyNestedLoops=*/true);
1946 emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1);
1947 return;
1948 }
1949 }
1950 CGF.EmitStmt(S);
1951}
1952
1953void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1954 JumpDest LoopExit) {
1955 RunCleanupsScope BodyScope(*this);
1956 // Update counters values on current iteration.
1957 for (const Expr *UE : D.updates())
1958 EmitIgnoredExpr(E: UE);
1959 // Update the linear variables.
1960 // In distribute directives only loop counters may be marked as linear, no
1961 // need to generate the code for them.
1962 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1963 if (!isOpenMPDistributeDirective(DKind: EKind)) {
1964 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1965 for (const Expr *UE : C->updates())
1966 EmitIgnoredExpr(E: UE);
1967 }
1968 }
1969
1970 // On a continue in the body, jump to the end.
1971 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue");
1972 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
1973 for (const Expr *E : D.finals_conditions()) {
1974 if (!E)
1975 continue;
1976 // Check that loop counter in non-rectangular nest fits into the iteration
1977 // space.
1978 llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next");
1979 EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(),
1980 TrueCount: getProfileCount(S: D.getBody()));
1981 EmitBlock(BB: NextBB);
1982 }
1983
1984 OMPPrivateScope InscanScope(*this);
1985 EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true);
1986 bool IsInscanRegion = InscanScope.Privatize();
1987 if (IsInscanRegion) {
1988 // Need to remember the block before and after scan directive
1989 // to dispatch them correctly depending on the clause used in
1990 // this directive, inclusive or exclusive. For inclusive scan the natural
1991 // order of the blocks is used, for exclusive clause the blocks must be
1992 // executed in reverse order.
1993 OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb");
1994 OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb");
1995 // No need to allocate inscan exit block, in simd mode it is selected in the
1996 // codegen for the scan directive.
1997 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd)
1998 OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb");
1999 OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch");
2000 EmitBranch(Block: OMPScanDispatch);
2001 EmitBlock(BB: OMPBeforeScanBlock);
2002 }
2003
2004 // Emit loop variables for C++ range loops.
2005 const Stmt *Body =
2006 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
2007 // Emit loop body.
2008 emitBody(CGF&: *this, S: Body,
2009 NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop(
2010 CurStmt: Body, /*TryImperfectlyNestedLoops=*/true),
2011 MaxLevel: D.getLoopsNumber());
2012
2013 // Jump to the dispatcher at the end of the loop body.
2014 if (IsInscanRegion)
2015 EmitBranch(Block: OMPScanExitBlock);
2016
2017 // The end (updates/cleanups).
2018 EmitBlock(BB: Continue.getBlock());
2019 BreakContinueStack.pop_back();
2020}
2021
2022using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
2023
2024/// Emit a captured statement and return the function as well as its captured
2025/// closure context.
2026static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
2027 const CapturedStmt *S) {
2028 LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S);
2029 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
2030 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
2031 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S);
2032 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
2033 llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S);
2034
2035 return {F, CapStruct.getPointer(CGF&: ParentCGF)};
2036}
2037
2038/// Emit a call to a previously captured closure.
2039static llvm::CallInst *
2040emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
2041 llvm::ArrayRef<llvm::Value *> Args) {
2042 // Append the closure context to the argument.
2043 SmallVector<llvm::Value *> EffectiveArgs;
2044 EffectiveArgs.reserve(N: Args.size() + 1);
2045 llvm::append_range(C&: EffectiveArgs, R&: Args);
2046 EffectiveArgs.push_back(Elt: Cap.second);
2047
2048 return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs);
2049}
2050
2051llvm::CanonicalLoopInfo *
2052CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2053 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2054
2055 // The caller is processing the loop-associated directive processing the \p
2056 // Depth loops nested in \p S. Put the previous pending loop-associated
2057 // directive to the stack. If the current loop-associated directive is a loop
2058 // transformation directive, it will push its generated loops onto the stack
2059 // such that together with the loops left here they form the combined loop
2060 // nest for the parent loop-associated directive.
2061 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2062 ExpectedOMPLoopDepth = Depth;
2063
2064 EmitStmt(S);
2065 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2066
2067 // The last added loop is the outermost one.
2068 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2069
2070 // Pop the \p Depth loops requested by the call from that stack and restore
2071 // the previous context.
2072 OMPLoopNestStack.pop_back_n(NumItems: Depth);
2073 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2074
2075 return Result;
2076}
2077
2078void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2079 const Stmt *SyntacticalLoop = S->getLoopStmt();
2080 if (!getLangOpts().OpenMPIRBuilder) {
2081 // Ignore if OpenMPIRBuilder is not enabled.
2082 EmitStmt(S: SyntacticalLoop);
2083 return;
2084 }
2085
2086 LexicalScope ForScope(*this, S->getSourceRange());
2087
2088 // Emit init statements. The Distance/LoopVar funcs may reference variable
2089 // declarations they contain.
2090 const Stmt *BodyStmt;
2091 if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) {
2092 if (const Stmt *InitStmt = For->getInit())
2093 EmitStmt(S: InitStmt);
2094 BodyStmt = For->getBody();
2095 } else if (const auto *RangeFor =
2096 dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) {
2097 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2098 EmitStmt(S: RangeStmt);
2099 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2100 EmitStmt(S: BeginStmt);
2101 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2102 EmitStmt(S: EndStmt);
2103 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2104 EmitStmt(S: LoopVarStmt);
2105 BodyStmt = RangeFor->getBody();
2106 } else
2107 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2108
2109 // Emit closure for later use. By-value captures will be captured here.
2110 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2111 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc);
2112 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2113 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc);
2114
2115 // Call the distance function to get the number of iterations of the loop to
2116 // come.
2117 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2118 ->getParam(i: 0)
2119 ->getType()
2120 .getNonReferenceType();
2121 RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr");
2122 emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()});
2123 llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count");
2124
2125 // Emit the loop structure.
2126 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2127 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2128 llvm::Value *IndVar) {
2129 Builder.restoreIP(IP: CodeGenIP);
2130
2131 // Emit the loop body: Convert the logical iteration number to the loop
2132 // variable and emit the body.
2133 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2134 LValue LCVal = EmitLValue(E: LoopVarRef);
2135 Address LoopVarAddress = LCVal.getAddress();
2136 emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure,
2137 Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar});
2138
2139 RunCleanupsScope BodyScope(*this);
2140 EmitStmt(S: BodyStmt);
2141 return llvm::Error::success();
2142 };
2143
2144 llvm::CanonicalLoopInfo *CL =
2145 cantFail(ValOrErr: OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal));
2146
2147 // Finish up the loop.
2148 Builder.restoreIP(IP: CL->getAfterIP());
2149 ForScope.ForceCleanup();
2150
2151 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2152 OMPLoopNestStack.push_back(Elt: CL);
2153}
2154
2155void CodeGenFunction::EmitOMPInnerLoop(
2156 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2157 const Expr *IncExpr,
2158 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2159 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2160 auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end");
2161
2162 // Start the loop with a block that tests the condition.
2163 auto CondBlock = createBasicBlock(name: "omp.inner.for.cond");
2164 EmitBlock(BB: CondBlock);
2165 const SourceRange R = S.getSourceRange();
2166
2167 // If attributes are attached, push to the basic block with them.
2168 const auto &OMPED = cast<OMPExecutableDirective>(Val: S);
2169 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2170 const Stmt *SS = ICS->getCapturedStmt();
2171 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS);
2172 OMPLoopNestStack.clear();
2173 if (AS)
2174 LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(),
2175 Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2176 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2177 else
2178 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2179 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2180
2181 // If there are any cleanups between here and the loop-exit scope,
2182 // create a block to stage a loop exit along.
2183 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2184 if (RequiresCleanup)
2185 ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup");
2186
2187 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body");
2188
2189 // Emit condition.
2190 EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S));
2191 if (ExitBlock != LoopExit.getBlock()) {
2192 EmitBlock(BB: ExitBlock);
2193 EmitBranchThroughCleanup(Dest: LoopExit);
2194 }
2195
2196 EmitBlock(BB: LoopBody);
2197 incrementProfileCounter(S: &S);
2198
2199 // Create a block for the increment.
2200 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc");
2201 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
2202
2203 BodyGen(*this);
2204
2205 // Emit "IV = IV + 1" and a back-edge to the condition block.
2206 EmitBlock(BB: Continue.getBlock());
2207 EmitIgnoredExpr(E: IncExpr);
2208 PostIncGen(*this);
2209 BreakContinueStack.pop_back();
2210 EmitBranch(Block: CondBlock);
2211 LoopStack.pop();
2212 // Emit the fall-through block.
2213 EmitBlock(BB: LoopExit.getBlock());
2214}
2215
2216bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2217 if (!HaveInsertPoint())
2218 return false;
2219 // Emit inits for the linear variables.
2220 bool HasLinears = false;
2221 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2222 for (const Expr *Init : C->inits()) {
2223 HasLinears = true;
2224 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl());
2225 if (const auto *Ref =
2226 dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) {
2227 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
2228 const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl());
2229 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2230 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2231 VD->getInit()->getType(), VK_LValue,
2232 VD->getInit()->getExprLoc());
2233 EmitExprAsInit(
2234 init: &DRE, D: VD,
2235 lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()),
2236 /*capturedByInit=*/false);
2237 EmitAutoVarCleanups(emission: Emission);
2238 } else {
2239 EmitVarDecl(D: *VD);
2240 }
2241 }
2242 // Emit the linear steps for the linear clauses.
2243 // If a step is not constant, it is pre-calculated before the loop.
2244 if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep()))
2245 if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) {
2246 EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl()));
2247 // Emit calculation of the linear step.
2248 EmitIgnoredExpr(E: CS);
2249 }
2250 }
2251 return HasLinears;
2252}
2253
2254void CodeGenFunction::EmitOMPLinearClauseFinal(
2255 const OMPLoopDirective &D,
2256 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2257 if (!HaveInsertPoint())
2258 return;
2259 llvm::BasicBlock *DoneBB = nullptr;
2260 // Emit the final values of the linear variables.
2261 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2262 auto IC = C->varlist_begin();
2263 for (const Expr *F : C->finals()) {
2264 if (!DoneBB) {
2265 if (llvm::Value *Cond = CondGen(*this)) {
2266 // If the first post-update expression is found, emit conditional
2267 // block if it was requested.
2268 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu");
2269 DoneBB = createBasicBlock(name: ".omp.linear.pu.done");
2270 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2271 EmitBlock(BB: ThenBB);
2272 }
2273 }
2274 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl());
2275 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2276 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2277 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2278 Address OrigAddr = EmitLValue(E: &DRE).getAddress();
2279 CodeGenFunction::OMPPrivateScope VarScope(*this);
2280 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2281 (void)VarScope.Privatize();
2282 EmitIgnoredExpr(E: F);
2283 ++IC;
2284 }
2285 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2286 EmitIgnoredExpr(E: PostUpdate);
2287 }
2288 if (DoneBB)
2289 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2290}
2291
2292static void emitAlignedClause(CodeGenFunction &CGF,
2293 const OMPExecutableDirective &D) {
2294 if (!CGF.HaveInsertPoint())
2295 return;
2296 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2297 llvm::APInt ClauseAlignment(64, 0);
2298 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2299 auto *AlignmentCI =
2300 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2301 ClauseAlignment = AlignmentCI->getValue();
2302 }
2303 for (const Expr *E : Clause->varlist()) {
2304 llvm::APInt Alignment(ClauseAlignment);
2305 if (Alignment == 0) {
2306 // OpenMP [2.8.1, Description]
2307 // If no optional parameter is specified, implementation-defined default
2308 // alignments for SIMD instructions on the target platforms are assumed.
2309 Alignment =
2310 CGF.getContext()
2311 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2312 T: E->getType()->getPointeeType()))
2313 .getQuantity();
2314 }
2315 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2316 "alignment is not power of 2");
2317 if (Alignment != 0) {
2318 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2319 CGF.emitAlignmentAssumption(
2320 PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(),
2321 Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment));
2322 }
2323 }
2324 }
2325}
2326
2327void CodeGenFunction::EmitOMPPrivateLoopCounters(
2328 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2329 if (!HaveInsertPoint())
2330 return;
2331 auto I = S.private_counters().begin();
2332 for (const Expr *E : S.counters()) {
2333 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2334 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl());
2335 // Emit var without initialization.
2336 AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD);
2337 EmitAutoVarCleanups(emission: VarEmission);
2338 LocalDeclMap.erase(Val: PrivateVD);
2339 (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress());
2340 if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) ||
2341 VD->hasGlobalStorage()) {
2342 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2343 LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD),
2344 E->getType(), VK_LValue, E->getExprLoc());
2345 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress());
2346 } else {
2347 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress());
2348 }
2349 ++I;
2350 }
2351 // Privatize extra loop counters used in loops for ordered(n) clauses.
2352 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2353 if (!C->getNumForLoops())
2354 continue;
2355 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2356 I < E; ++I) {
2357 const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I));
2358 const auto *VD = cast<VarDecl>(Val: DRE->getDecl());
2359 // Override only those variables that can be captured to avoid re-emission
2360 // of the variables declared within the loops.
2361 if (DRE->refersToEnclosingVariableOrCapture()) {
2362 (void)LoopScope.addPrivate(
2363 LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName()));
2364 }
2365 }
2366 }
2367}
2368
2369static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2370 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2371 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2372 if (!CGF.HaveInsertPoint())
2373 return;
2374 {
2375 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2376 CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope);
2377 (void)PreCondScope.Privatize();
2378 // Get initial values of real counters.
2379 for (const Expr *I : S.inits()) {
2380 CGF.EmitIgnoredExpr(E: I);
2381 }
2382 }
2383 // Create temp loop control variables with their init values to support
2384 // non-rectangular loops.
2385 CodeGenFunction::OMPMapVars PreCondVars;
2386 for (const Expr *E : S.dependent_counters()) {
2387 if (!E)
2388 continue;
2389 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2390 "dependent counter must not be an iterator.");
2391 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2392 Address CounterAddr =
2393 CGF.CreateMemTemp(T: VD->getType().getNonReferenceType());
2394 (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr);
2395 }
2396 (void)PreCondVars.apply(CGF);
2397 for (const Expr *E : S.dependent_inits()) {
2398 if (!E)
2399 continue;
2400 CGF.EmitIgnoredExpr(E);
2401 }
2402 // Check that loop is executed at least one time.
2403 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2404 PreCondVars.restore(CGF);
2405}
2406
2407void CodeGenFunction::EmitOMPLinearClause(
2408 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2409 if (!HaveInsertPoint())
2410 return;
2411 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2412 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2413 if (isOpenMPSimdDirective(DKind: EKind)) {
2414 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
2415 for (const Expr *C : LoopDirective->counters()) {
2416 SIMDLCVs.insert(
2417 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
2418 }
2419 }
2420 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2421 auto CurPrivate = C->privates().begin();
2422 for (const Expr *E : C->varlist()) {
2423 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2424 const auto *PrivateVD =
2425 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl());
2426 if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) {
2427 // Emit private VarDecl with copy init.
2428 EmitVarDecl(D: *PrivateVD);
2429 bool IsRegistered =
2430 PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
2431 assert(IsRegistered && "linear var already registered as private");
2432 // Silence the warning about unused variable.
2433 (void)IsRegistered;
2434 } else {
2435 EmitVarDecl(D: *PrivateVD);
2436 }
2437 ++CurPrivate;
2438 }
2439 }
2440}
2441
2442static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2443 const OMPExecutableDirective &D) {
2444 if (!CGF.HaveInsertPoint())
2445 return;
2446 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2447 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2448 /*ignoreResult=*/true);
2449 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2450 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2451 // In presence of finite 'safelen', it may be unsafe to mark all
2452 // the memory instructions parallel, because loop-carried
2453 // dependences of 'safelen' iterations are possible.
2454 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2455 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2456 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2457 /*ignoreResult=*/true);
2458 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2459 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2460 // In presence of finite 'safelen', it may be unsafe to mark all
2461 // the memory instructions parallel, because loop-carried
2462 // dependences of 'safelen' iterations are possible.
2463 CGF.LoopStack.setParallel(/*Enable=*/false);
2464 }
2465}
2466
2467// Check for the presence of an `OMPOrderedDirective`,
2468// i.e., `ordered` in `#pragma omp ordered simd`.
2469//
2470// Consider the following source code:
2471// ```
2472// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2473// {
2474// for (int r = 1; r < ARRAY_SIZE; ++r) {
2475// for (int c = 1; c < ARRAY_SIZE; ++c) {
2476// #pragma omp simd
2477// for (int k = 2; k < ARRAY_SIZE; ++k) {
2478// #pragma omp ordered simd
2479// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2480// }
2481// }
2482// }
2483// }
2484// ```
2485//
2486// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2487// &D)`. By examining `D.dump()` we have the following AST containing
2488// `OMPOrderedDirective`:
2489//
2490// ```
2491// OMPSimdDirective 0x1c32950
2492// `-CapturedStmt 0x1c32028
2493// |-CapturedDecl 0x1c310e8
2494// | |-ForStmt 0x1c31e30
2495// | | |-DeclStmt 0x1c31298
2496// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2497// | | | `-IntegerLiteral 0x1c31278 'int' 2
2498// | | |-<<<NULL>>>
2499// | | |-BinaryOperator 0x1c31308 'int' '<'
2500// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2501// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2502// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2503// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2504// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2505// | | `-CompoundStmt 0x1c31e18
2506// | | `-OMPOrderedDirective 0x1c31dd8
2507// | | |-OMPSimdClause 0x1c31380
2508// | | `-CapturedStmt 0x1c31cd0
2509// ```
2510//
2511// Note the presence of `OMPOrderedDirective` above:
2512// It's (transitively) nested in a `CapturedStmt` representing the pragma
2513// annotated compound statement. Thus, we need to consider this nesting and
2514// include checking the `getCapturedStmt` in this case.
2515static bool hasOrderedDirective(const Stmt *S) {
2516 if (isa<OMPOrderedDirective>(Val: S))
2517 return true;
2518
2519 if (const auto *CS = dyn_cast<CapturedStmt>(Val: S))
2520 return hasOrderedDirective(S: CS->getCapturedStmt());
2521
2522 for (const Stmt *Child : S->children()) {
2523 if (Child && hasOrderedDirective(S: Child))
2524 return true;
2525 }
2526
2527 return false;
2528}
2529
2530static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2531 LoopInfoStack &LoopStack) {
2532 // Check for the presence of an `OMPOrderedDirective`
2533 // i.e., `ordered` in `#pragma omp ordered simd`
2534 bool HasOrderedDirective = hasOrderedDirective(S: &AssociatedStmt);
2535 // If present then conservatively disable loop vectorization
2536 // analogously to how `emitSimdlenSafelenClause` does.
2537 if (HasOrderedDirective)
2538 LoopStack.setParallel(/*Enable=*/false);
2539}
2540
2541void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2542 // Walk clauses and process safelen/lastprivate.
2543 LoopStack.setParallel(/*Enable=*/true);
2544 LoopStack.setVectorizeEnable();
2545 const Stmt *AssociatedStmt = D.getAssociatedStmt();
2546 applyConservativeSimdOrderedDirective(AssociatedStmt: *AssociatedStmt, LoopStack);
2547 emitSimdlenSafelenClause(CGF&: *this, D);
2548 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2549 if (C->getKind() == OMPC_ORDER_concurrent)
2550 LoopStack.setParallel(/*Enable=*/true);
2551 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2552 if ((EKind == OMPD_simd ||
2553 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(DKind: EKind))) &&
2554 llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(),
2555 P: [](const OMPReductionClause *C) {
2556 return C->getModifier() == OMPC_REDUCTION_inscan;
2557 }))
2558 // Disable parallel access in case of prefix sum.
2559 LoopStack.setParallel(/*Enable=*/false);
2560}
2561
2562void CodeGenFunction::EmitOMPSimdFinal(
2563 const OMPLoopDirective &D,
2564 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2565 if (!HaveInsertPoint())
2566 return;
2567 llvm::BasicBlock *DoneBB = nullptr;
2568 auto IC = D.counters().begin();
2569 auto IPC = D.private_counters().begin();
2570 for (const Expr *F : D.finals()) {
2571 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl());
2572 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl());
2573 const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD);
2574 if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) ||
2575 OrigVD->hasGlobalStorage() || CED) {
2576 if (!DoneBB) {
2577 if (llvm::Value *Cond = CondGen(*this)) {
2578 // If the first post-update expression is found, emit conditional
2579 // block if it was requested.
2580 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then");
2581 DoneBB = createBasicBlock(name: ".omp.final.done");
2582 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2583 EmitBlock(BB: ThenBB);
2584 }
2585 }
2586 Address OrigAddr = Address::invalid();
2587 if (CED) {
2588 OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress();
2589 } else {
2590 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2591 /*RefersToEnclosingVariableOrCapture=*/false,
2592 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2593 OrigAddr = EmitLValue(E: &DRE).getAddress();
2594 }
2595 OMPPrivateScope VarScope(*this);
2596 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2597 (void)VarScope.Privatize();
2598 EmitIgnoredExpr(E: F);
2599 }
2600 ++IC;
2601 ++IPC;
2602 }
2603 if (DoneBB)
2604 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2605}
2606
2607static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2608 const OMPLoopDirective &S,
2609 CodeGenFunction::JumpDest LoopExit) {
2610 CGF.EmitOMPLoopBody(D: S, LoopExit);
2611 CGF.EmitStopPoint(S: &S);
2612}
2613
2614/// Emit a helper variable and return corresponding lvalue.
2615static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2616 const DeclRefExpr *Helper) {
2617 auto VDecl = cast<VarDecl>(Val: Helper->getDecl());
2618 CGF.EmitVarDecl(D: *VDecl);
2619 return CGF.EmitLValue(E: Helper);
2620}
2621
2622static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2623 const RegionCodeGenTy &SimdInitGen,
2624 const RegionCodeGenTy &BodyCodeGen) {
2625 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2626 PrePostActionTy &) {
2627 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2628 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2629 SimdInitGen(CGF);
2630
2631 BodyCodeGen(CGF);
2632 };
2633 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2634 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2635 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2636
2637 BodyCodeGen(CGF);
2638 };
2639 const Expr *IfCond = nullptr;
2640 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2641 if (isOpenMPSimdDirective(DKind: EKind)) {
2642 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2643 if (CGF.getLangOpts().OpenMP >= 50 &&
2644 (C->getNameModifier() == OMPD_unknown ||
2645 C->getNameModifier() == OMPD_simd)) {
2646 IfCond = C->getCondition();
2647 break;
2648 }
2649 }
2650 }
2651 if (IfCond) {
2652 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2653 } else {
2654 RegionCodeGenTy ThenRCG(ThenGen);
2655 ThenRCG(CGF);
2656 }
2657}
2658
2659static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2660 PrePostActionTy &Action) {
2661 Action.Enter(CGF);
2662 OMPLoopScope PreInitScope(CGF, S);
2663 // if (PreCond) {
2664 // for (IV in 0..LastIteration) BODY;
2665 // <Final counter/linear vars updates>;
2666 // }
2667
2668 // The presence of lower/upper bound variable depends on the actual directive
2669 // kind in the AST node. The variables must be emitted because some of the
2670 // expressions associated with the loop will use them.
2671 OpenMPDirectiveKind DKind = S.getDirectiveKind();
2672 if (isOpenMPDistributeDirective(DKind) ||
2673 isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
2674 isOpenMPGenericLoopDirective(DKind)) {
2675 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()));
2676 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()));
2677 }
2678
2679 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2680 // Emit: if (PreCond) - begin.
2681 // If the condition constant folds and can be elided, avoid emitting the
2682 // whole loop.
2683 bool CondConstant;
2684 llvm::BasicBlock *ContBlock = nullptr;
2685 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
2686 if (!CondConstant)
2687 return;
2688 } else {
2689 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then");
2690 ContBlock = CGF.createBasicBlock(name: "simd.if.end");
2691 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
2692 TrueCount: CGF.getProfileCount(S: &S));
2693 CGF.EmitBlock(BB: ThenBlock);
2694 CGF.incrementProfileCounter(S: &S);
2695 }
2696
2697 // Emit the loop iteration variable.
2698 const Expr *IVExpr = S.getIterationVariable();
2699 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
2700 CGF.EmitVarDecl(D: *IVDecl);
2701 CGF.EmitIgnoredExpr(E: S.getInit());
2702
2703 // Emit the iterations count variable.
2704 // If it is not a variable, Sema decided to calculate iterations count on
2705 // each iteration (e.g., it is foldable into a constant).
2706 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
2707 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
2708 // Emit calculation of the iterations count.
2709 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
2710 }
2711
2712 emitAlignedClause(CGF, D: S);
2713 (void)CGF.EmitOMPLinearClauseInit(D: S);
2714 {
2715 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2716 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
2717 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2718 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
2719 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
2720 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2721 CGF, S, CGF.EmitLValue(E: S.getIterationVariable()));
2722 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
2723 (void)LoopScope.Privatize();
2724 if (isOpenMPTargetExecutionDirective(DKind: EKind))
2725 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
2726
2727 emitCommonSimdLoop(
2728 CGF, S,
2729 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2730 CGF.EmitOMPSimdInit(D: S);
2731 },
2732 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2733 CGF.EmitOMPInnerLoop(
2734 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
2735 BodyGen: [&S](CodeGenFunction &CGF) {
2736 emitOMPLoopBodyWithStopPoint(CGF, S,
2737 LoopExit: CodeGenFunction::JumpDest());
2738 },
2739 PostIncGen: [](CodeGenFunction &) {});
2740 });
2741 CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2742 // Emit final copy of the lastprivate variables at the end of loops.
2743 if (HasLastprivateClause)
2744 CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true);
2745 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd);
2746 emitPostUpdateForReductionClause(CGF, D: S,
2747 CondGen: [](CodeGenFunction &) { return nullptr; });
2748 LoopScope.restoreMap();
2749 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2750 }
2751 // Emit: if (PreCond) - end.
2752 if (ContBlock) {
2753 CGF.EmitBranch(Block: ContBlock);
2754 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2755 }
2756}
2757
2758// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
2759// available for "loop bind(thread)", which maps to "simd".
2760static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) {
2761 // Check for unsupported clauses
2762 for (OMPClause *C : S.clauses()) {
2763 // Currently only order, simdlen and safelen clauses are supported
2764 if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) ||
2765 isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C)))
2766 return false;
2767 }
2768
2769 // Check if we have a statement with the ordered directive.
2770 // Visit the statement hierarchy to find a compound statement
2771 // with a ordered directive in it.
2772 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) {
2773 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2774 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2775 if (!SubStmt)
2776 continue;
2777 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) {
2778 for (const Stmt *CSSubStmt : CS->children()) {
2779 if (!CSSubStmt)
2780 continue;
2781 if (isa<OMPOrderedDirective>(Val: CSSubStmt)) {
2782 return false;
2783 }
2784 }
2785 }
2786 }
2787 }
2788 }
2789 return true;
2790}
2791
2792static llvm::MapVector<llvm::Value *, llvm::Value *>
2793GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) {
2794 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2795 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2796 llvm::APInt ClauseAlignment(64, 0);
2797 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2798 auto *AlignmentCI =
2799 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2800 ClauseAlignment = AlignmentCI->getValue();
2801 }
2802 for (const Expr *E : Clause->varlist()) {
2803 llvm::APInt Alignment(ClauseAlignment);
2804 if (Alignment == 0) {
2805 // OpenMP [2.8.1, Description]
2806 // If no optional parameter is specified, implementation-defined default
2807 // alignments for SIMD instructions on the target platforms are assumed.
2808 Alignment =
2809 CGF.getContext()
2810 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2811 T: E->getType()->getPointeeType()))
2812 .getQuantity();
2813 }
2814 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2815 "alignment is not power of 2");
2816 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2817 AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue());
2818 }
2819 }
2820 return AlignedVars;
2821}
2822
2823// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
2824// available for "loop bind(thread)", which maps to "simd".
2825static void emitOMPSimdDirective(const OMPLoopDirective &S,
2826 CodeGenFunction &CGF, CodeGenModule &CGM) {
2827 bool UseOMPIRBuilder =
2828 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S);
2829 if (UseOMPIRBuilder) {
2830 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF,
2831 PrePostActionTy &) {
2832 // Use the OpenMPIRBuilder if enabled.
2833 if (UseOMPIRBuilder) {
2834 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2835 GetAlignedMapping(S, CGF);
2836 // Emit the associated statement and get its loop representation.
2837 const Stmt *Inner = S.getRawStmt();
2838 llvm::CanonicalLoopInfo *CLI =
2839 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2840
2841 llvm::OpenMPIRBuilder &OMPBuilder =
2842 CGM.getOpenMPRuntime().getOMPBuilder();
2843 // Add SIMD specific metadata
2844 llvm::ConstantInt *Simdlen = nullptr;
2845 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2846 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2847 /*ignoreResult=*/true);
2848 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2849 Simdlen = Val;
2850 }
2851 llvm::ConstantInt *Safelen = nullptr;
2852 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2853 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2854 /*ignoreResult=*/true);
2855 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2856 Safelen = Val;
2857 }
2858 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2859 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2860 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) {
2861 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2862 }
2863 }
2864 // Add simd metadata to the collapsed loop. Do not generate
2865 // another loop for if clause. Support for if clause is done earlier.
2866 OMPBuilder.applySimd(Loop: CLI, AlignedVars,
2867 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2868 return;
2869 }
2870 };
2871 {
2872 auto LPCRegion =
2873 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
2874 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
2875 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
2876 CodeGen: CodeGenIRBuilder);
2877 }
2878 return;
2879 }
2880
2881 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
2882 CGF.OMPFirstScanLoop = true;
2883 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2884 emitOMPSimdRegion(CGF, S, Action);
2885 };
2886 {
2887 auto LPCRegion =
2888 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
2889 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
2890 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, CodeGen);
2891 }
2892 // Check for outer lastprivate conditional update.
2893 checkForLastprivateConditionalUpdate(CGF, S);
2894}
2895
2896void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2897 emitOMPSimdDirective(S, CGF&: *this, CGM);
2898}
2899
2900void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2901 // Emit the de-sugared statement.
2902 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2903 EmitStmt(S: S.getTransformedStmt());
2904}
2905
2906void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) {
2907 // Emit the de-sugared statement.
2908 OMPTransformDirectiveScopeRAII StripeScope(*this, &S);
2909 EmitStmt(S: S.getTransformedStmt());
2910}
2911
2912void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
2913 // Emit the de-sugared statement.
2914 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
2915 EmitStmt(S: S.getTransformedStmt());
2916}
2917
2918void CodeGenFunction::EmitOMPInterchangeDirective(
2919 const OMPInterchangeDirective &S) {
2920 // Emit the de-sugared statement.
2921 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
2922 EmitStmt(S: S.getTransformedStmt());
2923}
2924
2925void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2926 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2927
2928 if (UseOMPIRBuilder) {
2929 auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc());
2930 const Stmt *Inner = S.getRawStmt();
2931
2932 // Consume nested loop. Clear the entire remaining loop stack because a
2933 // fully unrolled loop is non-transformable. For partial unrolling the
2934 // generated outer loop is pushed back to the stack.
2935 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2936 OMPLoopNestStack.clear();
2937
2938 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2939
2940 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2941 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2942
2943 if (S.hasClausesOfKind<OMPFullClause>()) {
2944 assert(ExpectedOMPLoopDepth == 0);
2945 OMPBuilder.unrollLoopFull(DL, Loop: CLI);
2946 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2947 uint64_t Factor = 0;
2948 if (Expr *FactorExpr = PartialClause->getFactor()) {
2949 Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
2950 assert(Factor >= 1 && "Only positive factors are valid");
2951 }
2952 OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor,
2953 UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2954 } else {
2955 OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI);
2956 }
2957
2958 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2959 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2960 if (UnrolledCLI)
2961 OMPLoopNestStack.push_back(Elt: UnrolledCLI);
2962
2963 return;
2964 }
2965
2966 // This function is only called if the unrolled loop is not consumed by any
2967 // other loop-associated construct. Such a loop-associated construct will have
2968 // used the transformed AST.
2969
2970 // Set the unroll metadata for the next emitted loop.
2971 LoopStack.setUnrollState(LoopAttributes::Enable);
2972
2973 if (S.hasClausesOfKind<OMPFullClause>()) {
2974 LoopStack.setUnrollState(LoopAttributes::Full);
2975 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2976 if (Expr *FactorExpr = PartialClause->getFactor()) {
2977 uint64_t Factor =
2978 FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
2979 assert(Factor >= 1 && "Only positive factors are valid");
2980 LoopStack.setUnrollCount(Factor);
2981 }
2982 }
2983
2984 EmitStmt(S: S.getAssociatedStmt());
2985}
2986
2987void CodeGenFunction::EmitOMPOuterLoop(
2988 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2989 CodeGenFunction::OMPPrivateScope &LoopScope,
2990 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2991 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2992 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2993 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2994
2995 const Expr *IVExpr = S.getIterationVariable();
2996 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
2997 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2998
2999 JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end");
3000
3001 // Start the loop with a block that tests the condition.
3002 llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond");
3003 EmitBlock(BB: CondBlock);
3004 const SourceRange R = S.getSourceRange();
3005 OMPLoopNestStack.clear();
3006 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
3007 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
3008
3009 llvm::Value *BoolCondVal = nullptr;
3010 if (!DynamicOrOrdered) {
3011 // UB = min(UB, GlobalUB) or
3012 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
3013 // 'distribute parallel for')
3014 EmitIgnoredExpr(E: LoopArgs.EUB);
3015 // IV = LB
3016 EmitIgnoredExpr(E: LoopArgs.Init);
3017 // IV < UB
3018 BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond);
3019 } else {
3020 BoolCondVal =
3021 RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL,
3022 LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST);
3023 }
3024
3025 // If there are any cleanups between here and the loop-exit scope,
3026 // create a block to stage a loop exit along.
3027 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
3028 if (LoopScope.requiresCleanups())
3029 ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup");
3030
3031 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body");
3032 Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock);
3033 if (ExitBlock != LoopExit.getBlock()) {
3034 EmitBlock(BB: ExitBlock);
3035 EmitBranchThroughCleanup(Dest: LoopExit);
3036 }
3037 EmitBlock(BB: LoopBody);
3038
3039 // Emit "IV = LB" (in case of static schedule, we have already calculated new
3040 // LB for loop condition and emitted it above).
3041 if (DynamicOrOrdered)
3042 EmitIgnoredExpr(E: LoopArgs.Init);
3043
3044 // Create a block for the increment.
3045 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc");
3046 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
3047
3048 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3049 emitCommonSimdLoop(
3050 CGF&: *this, S,
3051 SimdInitGen: [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3052 // Generate !llvm.loop.parallel metadata for loads and stores for loops
3053 // with dynamic/guided scheduling and without ordered clause.
3054 if (!isOpenMPSimdDirective(DKind: EKind)) {
3055 CGF.LoopStack.setParallel(!IsMonotonic);
3056 if (const auto *C = S.getSingleClause<OMPOrderClause>())
3057 if (C->getKind() == OMPC_ORDER_concurrent)
3058 CGF.LoopStack.setParallel(/*Enable=*/true);
3059 } else {
3060 CGF.EmitOMPSimdInit(D: S);
3061 }
3062 },
3063 BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
3064 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3065 SourceLocation Loc = S.getBeginLoc();
3066 // when 'distribute' is not combined with a 'for':
3067 // while (idx <= UB) { BODY; ++idx; }
3068 // when 'distribute' is combined with a 'for'
3069 // (e.g. 'distribute parallel for')
3070 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3071 CGF.EmitOMPInnerLoop(
3072 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr,
3073 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3074 CodeGenLoop(CGF, S, LoopExit);
3075 },
3076 PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
3077 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
3078 });
3079 });
3080
3081 EmitBlock(BB: Continue.getBlock());
3082 BreakContinueStack.pop_back();
3083 if (!DynamicOrOrdered) {
3084 // Emit "LB = LB + Stride", "UB = UB + Stride".
3085 EmitIgnoredExpr(E: LoopArgs.NextLB);
3086 EmitIgnoredExpr(E: LoopArgs.NextUB);
3087 }
3088
3089 EmitBranch(Block: CondBlock);
3090 OMPLoopNestStack.clear();
3091 LoopStack.pop();
3092 // Emit the fall-through block.
3093 EmitBlock(BB: LoopExit.getBlock());
3094
3095 // Tell the runtime we are done.
3096 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
3097 if (!DynamicOrOrdered)
3098 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3099 DKind: LoopArgs.DKind);
3100 };
3101 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3102}
3103
3104void CodeGenFunction::EmitOMPForOuterLoop(
3105 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
3106 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
3107 const OMPLoopArguments &LoopArgs,
3108 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3109 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3110
3111 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
3112 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule);
3113
3114 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
3115 LoopArgs.Chunk != nullptr)) &&
3116 "static non-chunked schedule does not need outer loop");
3117
3118 // Emit outer loop.
3119 //
3120 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3121 // When schedule(dynamic,chunk_size) is specified, the iterations are
3122 // distributed to threads in the team in chunks as the threads request them.
3123 // Each thread executes a chunk of iterations, then requests another chunk,
3124 // until no chunks remain to be distributed. Each chunk contains chunk_size
3125 // iterations, except for the last chunk to be distributed, which may have
3126 // fewer iterations. When no chunk_size is specified, it defaults to 1.
3127 //
3128 // When schedule(guided,chunk_size) is specified, the iterations are assigned
3129 // to threads in the team in chunks as the executing threads request them.
3130 // Each thread executes a chunk of iterations, then requests another chunk,
3131 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
3132 // each chunk is proportional to the number of unassigned iterations divided
3133 // by the number of threads in the team, decreasing to 1. For a chunk_size
3134 // with value k (greater than 1), the size of each chunk is determined in the
3135 // same way, with the restriction that the chunks do not contain fewer than k
3136 // iterations (except for the last chunk to be assigned, which may have fewer
3137 // than k iterations).
3138 //
3139 // When schedule(auto) is specified, the decision regarding scheduling is
3140 // delegated to the compiler and/or runtime system. The programmer gives the
3141 // implementation the freedom to choose any possible mapping of iterations to
3142 // threads in the team.
3143 //
3144 // When schedule(runtime) is specified, the decision regarding scheduling is
3145 // deferred until run time, and the schedule and chunk size are taken from the
3146 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3147 // implementation defined
3148 //
3149 // __kmpc_dispatch_init();
3150 // while(__kmpc_dispatch_next(&LB, &UB)) {
3151 // idx = LB;
3152 // while (idx <= UB) { BODY; ++idx;
3153 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3154 // } // inner loop
3155 // }
3156 // __kmpc_dispatch_deinit();
3157 //
3158 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3159 // When schedule(static, chunk_size) is specified, iterations are divided into
3160 // chunks of size chunk_size, and the chunks are assigned to the threads in
3161 // the team in a round-robin fashion in the order of the thread number.
3162 //
3163 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3164 // while (idx <= UB) { BODY; ++idx; } // inner loop
3165 // LB = LB + ST;
3166 // UB = UB + ST;
3167 // }
3168 //
3169
3170 const Expr *IVExpr = S.getIterationVariable();
3171 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3172 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3173
3174 if (DynamicOrOrdered) {
3175 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3176 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3177 llvm::Value *LBVal = DispatchBounds.first;
3178 llvm::Value *UBVal = DispatchBounds.second;
3179 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3180 LoopArgs.Chunk};
3181 RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize,
3182 IVSigned, Ordered, DispatchValues: DipatchRTInputValues);
3183 } else {
3184 CGOpenMPRuntime::StaticRTInput StaticInit(
3185 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3186 LoopArgs.ST, LoopArgs.Chunk);
3187 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3188 RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind,
3189 Values: StaticInit);
3190 }
3191
3192 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3193 const unsigned IVSize,
3194 const bool IVSigned) {
3195 if (Ordered) {
3196 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3197 IVSigned);
3198 }
3199 };
3200
3201 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3202 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3203 OuterLoopArgs.IncExpr = S.getInc();
3204 OuterLoopArgs.Init = S.getInit();
3205 OuterLoopArgs.Cond = S.getCond();
3206 OuterLoopArgs.NextLB = S.getNextLowerBound();
3207 OuterLoopArgs.NextUB = S.getNextUpperBound();
3208 OuterLoopArgs.DKind = LoopArgs.DKind;
3209 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs,
3210 CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3211 if (DynamicOrOrdered) {
3212 RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc());
3213 }
3214}
3215
3216static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3217 const unsigned IVSize, const bool IVSigned) {}
3218
3219void CodeGenFunction::EmitOMPDistributeOuterLoop(
3220 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3221 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3222 const CodeGenLoopTy &CodeGenLoopContent) {
3223
3224 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3225
3226 // Emit outer loop.
3227 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3228 // dynamic
3229 //
3230
3231 const Expr *IVExpr = S.getIterationVariable();
3232 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3233 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3234 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3235
3236 CGOpenMPRuntime::StaticRTInput StaticInit(
3237 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3238 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3239 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit);
3240
3241 // for combined 'distribute' and 'for' the increment expression of distribute
3242 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3243 Expr *IncExpr;
3244 if (isOpenMPLoopBoundSharingDirective(Kind: EKind))
3245 IncExpr = S.getDistInc();
3246 else
3247 IncExpr = S.getInc();
3248
3249 // this routine is shared by 'omp distribute parallel for' and
3250 // 'omp distribute': select the right EUB expression depending on the
3251 // directive
3252 OMPLoopArguments OuterLoopArgs;
3253 OuterLoopArgs.LB = LoopArgs.LB;
3254 OuterLoopArgs.UB = LoopArgs.UB;
3255 OuterLoopArgs.ST = LoopArgs.ST;
3256 OuterLoopArgs.IL = LoopArgs.IL;
3257 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3258 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3259 ? S.getCombinedEnsureUpperBound()
3260 : S.getEnsureUpperBound();
3261 OuterLoopArgs.IncExpr = IncExpr;
3262 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3263 ? S.getCombinedInit()
3264 : S.getInit();
3265 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3266 ? S.getCombinedCond()
3267 : S.getCond();
3268 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3269 ? S.getCombinedNextLowerBound()
3270 : S.getNextLowerBound();
3271 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3272 ? S.getCombinedNextUpperBound()
3273 : S.getNextUpperBound();
3274 OuterLoopArgs.DKind = OMPD_distribute;
3275
3276 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3277 LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent,
3278 CodeGenOrdered: emitEmptyOrdered);
3279}
3280
3281static std::pair<LValue, LValue>
3282emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3283 const OMPExecutableDirective &S) {
3284 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3285 LValue LB =
3286 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3287 LValue UB =
3288 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3289
3290 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3291 // parallel for') we need to use the 'distribute'
3292 // chunk lower and upper bounds rather than the whole loop iteration
3293 // space. These are parameters to the outlined function for 'parallel'
3294 // and we copy the bounds of the previous schedule into the
3295 // the current ones.
3296 LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable());
3297 LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable());
3298 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3299 lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3300 PrevLBVal = CGF.EmitScalarConversion(
3301 Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(),
3302 DstTy: LS.getIterationVariable()->getType(),
3303 Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3304 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3305 lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3306 PrevUBVal = CGF.EmitScalarConversion(
3307 Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(),
3308 DstTy: LS.getIterationVariable()->getType(),
3309 Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3310
3311 CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB);
3312 CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB);
3313
3314 return {LB, UB};
3315}
3316
3317/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3318/// we need to use the LB and UB expressions generated by the worksharing
3319/// code generation support, whereas in non combined situations we would
3320/// just emit 0 and the LastIteration expression
3321/// This function is necessary due to the difference of the LB and UB
3322/// types for the RT emission routines for 'for_static_init' and
3323/// 'for_dispatch_init'
3324static std::pair<llvm::Value *, llvm::Value *>
3325emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3326 const OMPExecutableDirective &S,
3327 Address LB, Address UB) {
3328 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3329 const Expr *IVExpr = LS.getIterationVariable();
3330 // when implementing a dynamic schedule for a 'for' combined with a
3331 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3332 // is not normalized as each team only executes its own assigned
3333 // distribute chunk
3334 QualType IteratorTy = IVExpr->getType();
3335 llvm::Value *LBVal =
3336 CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3337 llvm::Value *UBVal =
3338 CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3339 return {LBVal, UBVal};
3340}
3341
3342static void emitDistributeParallelForDistributeInnerBoundParams(
3343 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3344 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3345 const auto &Dir = cast<OMPLoopDirective>(Val: S);
3346 LValue LB =
3347 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable()));
3348 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3349 V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3350 CapturedVars.push_back(Elt: LBCast);
3351 LValue UB =
3352 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable()));
3353
3354 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3355 V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3356 CapturedVars.push_back(Elt: UBCast);
3357}
3358
3359static void
3360emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3361 const OMPLoopDirective &S,
3362 CodeGenFunction::JumpDest LoopExit) {
3363 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3364 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
3365 PrePostActionTy &Action) {
3366 Action.Enter(CGF);
3367 bool HasCancel = false;
3368 if (!isOpenMPSimdDirective(DKind: EKind)) {
3369 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S))
3370 HasCancel = D->hasCancel();
3371 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S))
3372 HasCancel = D->hasCancel();
3373 else if (const auto *D =
3374 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S))
3375 HasCancel = D->hasCancel();
3376 }
3377 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
3378 CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(),
3379 CodeGenLoopBounds: emitDistributeParallelForInnerBounds,
3380 CGDispatchBounds: emitDistributeParallelForDispatchBounds);
3381 };
3382
3383 emitCommonOMPParallelDirective(
3384 CGF, S, InnermostKind: isOpenMPSimdDirective(DKind: EKind) ? OMPD_for_simd : OMPD_for,
3385 CodeGen: CGInlinedWorksharingLoop,
3386 CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams);
3387}
3388
3389void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3390 const OMPDistributeParallelForDirective &S) {
3391 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3392 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3393 IncExpr: S.getDistInc());
3394 };
3395 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3396 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3397}
3398
3399void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3400 const OMPDistributeParallelForSimdDirective &S) {
3401 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3402 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3403 IncExpr: S.getDistInc());
3404 };
3405 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3406 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3407}
3408
3409void CodeGenFunction::EmitOMPDistributeSimdDirective(
3410 const OMPDistributeSimdDirective &S) {
3411 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3412 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
3413 };
3414 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3415 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
3416}
3417
3418void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3419 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3420 // Emit SPMD target parallel for region as a standalone region.
3421 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3422 emitOMPSimdRegion(CGF, S, Action);
3423 };
3424 llvm::Function *Fn;
3425 llvm::Constant *Addr;
3426 // Emit target region as a standalone region.
3427 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3428 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
3429 assert(Fn && Addr && "Target device function emission failed.");
3430}
3431
3432void CodeGenFunction::EmitOMPTargetSimdDirective(
3433 const OMPTargetSimdDirective &S) {
3434 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3435 emitOMPSimdRegion(CGF, S, Action);
3436 };
3437 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
3438}
3439
3440namespace {
3441struct ScheduleKindModifiersTy {
3442 OpenMPScheduleClauseKind Kind;
3443 OpenMPScheduleClauseModifier M1;
3444 OpenMPScheduleClauseModifier M2;
3445 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3446 OpenMPScheduleClauseModifier M1,
3447 OpenMPScheduleClauseModifier M2)
3448 : Kind(Kind), M1(M1), M2(M2) {}
3449};
3450} // namespace
3451
3452bool CodeGenFunction::EmitOMPWorksharingLoop(
3453 const OMPLoopDirective &S, Expr *EUB,
3454 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3455 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3456 // Emit the loop iteration variable.
3457 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
3458 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
3459 EmitVarDecl(D: *IVDecl);
3460
3461 // Emit the iterations count variable.
3462 // If it is not a variable, Sema decided to calculate iterations count on each
3463 // iteration (e.g., it is foldable into a constant).
3464 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3465 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3466 // Emit calculation of the iterations count.
3467 EmitIgnoredExpr(E: S.getCalcLastIteration());
3468 }
3469
3470 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3471
3472 bool HasLastprivateClause;
3473 // Check pre-condition.
3474 {
3475 OMPLoopScope PreInitScope(*this, S);
3476 // Skip the entire loop if we don't meet the precondition.
3477 // If the condition constant folds and can be elided, avoid emitting the
3478 // whole loop.
3479 bool CondConstant;
3480 llvm::BasicBlock *ContBlock = nullptr;
3481 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3482 if (!CondConstant)
3483 return false;
3484 } else {
3485 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
3486 ContBlock = createBasicBlock(name: "omp.precond.end");
3487 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3488 TrueCount: getProfileCount(S: &S));
3489 EmitBlock(BB: ThenBlock);
3490 incrementProfileCounter(S: &S);
3491 }
3492
3493 RunCleanupsScope DoacrossCleanupScope(*this);
3494 bool Ordered = false;
3495 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3496 if (OrderedClause->getNumForLoops())
3497 RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations());
3498 else
3499 Ordered = true;
3500 }
3501
3502 emitAlignedClause(CGF&: *this, D: S);
3503 bool HasLinears = EmitOMPLinearClauseInit(D: S);
3504 // Emit helper vars inits.
3505
3506 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3507 LValue LB = Bounds.first;
3508 LValue UB = Bounds.second;
3509 LValue ST =
3510 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
3511 LValue IL =
3512 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
3513
3514 // Emit 'then' code.
3515 {
3516 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3517 OMPPrivateScope LoopScope(*this);
3518 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) {
3519 // Emit implicit barrier to synchronize threads and avoid data races on
3520 // initialization of firstprivate variables and post-update of
3521 // lastprivate variables.
3522 CGM.getOpenMPRuntime().emitBarrierCall(
3523 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3524 /*ForceSimpleCall=*/true);
3525 }
3526 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3527 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3528 *this, S, EmitLValue(E: S.getIterationVariable()));
3529 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3530 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3531 EmitOMPPrivateLoopCounters(S, LoopScope);
3532 EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3533 (void)LoopScope.Privatize();
3534 if (isOpenMPTargetExecutionDirective(DKind: EKind))
3535 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
3536
3537 // Detect the loop schedule kind and chunk.
3538 const Expr *ChunkExpr = nullptr;
3539 OpenMPScheduleTy ScheduleKind;
3540 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3541 ScheduleKind.Schedule = C->getScheduleKind();
3542 ScheduleKind.M1 = C->getFirstScheduleModifier();
3543 ScheduleKind.M2 = C->getSecondScheduleModifier();
3544 ChunkExpr = C->getChunkSize();
3545 } else {
3546 // Default behaviour for schedule clause.
3547 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3548 CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr);
3549 }
3550 bool HasChunkSizeOne = false;
3551 llvm::Value *Chunk = nullptr;
3552 if (ChunkExpr) {
3553 Chunk = EmitScalarExpr(E: ChunkExpr);
3554 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(),
3555 DstTy: S.getIterationVariable()->getType(),
3556 Loc: S.getBeginLoc());
3557 Expr::EvalResult Result;
3558 if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) {
3559 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3560 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3561 }
3562 }
3563 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3564 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3565 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3566 // If the static schedule kind is specified or if the ordered clause is
3567 // specified, and if no monotonic modifier is specified, the effect will
3568 // be as if the monotonic modifier was specified.
3569 bool StaticChunkedOne =
3570 RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule,
3571 /* Chunked */ Chunk != nullptr) &&
3572 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(Kind: EKind);
3573 bool IsMonotonic =
3574 Ordered ||
3575 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3576 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3577 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3578 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3579 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3580 if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule,
3581 /* Chunked */ Chunk != nullptr) ||
3582 StaticChunkedOne) &&
3583 !Ordered) {
3584 JumpDest LoopExit =
3585 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
3586 emitCommonSimdLoop(
3587 CGF&: *this, S,
3588 SimdInitGen: [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3589 if (isOpenMPSimdDirective(DKind: EKind)) {
3590 CGF.EmitOMPSimdInit(D: S);
3591 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3592 if (C->getKind() == OMPC_ORDER_concurrent)
3593 CGF.LoopStack.setParallel(/*Enable=*/true);
3594 }
3595 },
3596 BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3597 &S, ScheduleKind, LoopExit, EKind,
3598 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3599 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3600 // When no chunk_size is specified, the iteration space is divided
3601 // into chunks that are approximately equal in size, and at most
3602 // one chunk is distributed to each thread. Note that the size of
3603 // the chunks is unspecified in this case.
3604 CGOpenMPRuntime::StaticRTInput StaticInit(
3605 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3606 UB.getAddress(), ST.getAddress(),
3607 StaticChunkedOne ? Chunk : nullptr);
3608 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3609 CGF, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind, Values: StaticInit);
3610 // UB = min(UB, GlobalUB);
3611 if (!StaticChunkedOne)
3612 CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound());
3613 // IV = LB;
3614 CGF.EmitIgnoredExpr(E: S.getInit());
3615 // For unchunked static schedule generate:
3616 //
3617 // while (idx <= UB) {
3618 // BODY;
3619 // ++idx;
3620 // }
3621 //
3622 // For static schedule with chunk one:
3623 //
3624 // while (IV <= PrevUB) {
3625 // BODY;
3626 // IV += ST;
3627 // }
3628 CGF.EmitOMPInnerLoop(
3629 S, RequiresCleanup: LoopScope.requiresCleanups(),
3630 LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond()
3631 : S.getCond(),
3632 IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(),
3633 BodyGen: [&S, LoopExit](CodeGenFunction &CGF) {
3634 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3635 },
3636 PostIncGen: [](CodeGenFunction &) {});
3637 });
3638 EmitBlock(BB: LoopExit.getBlock());
3639 // Tell the runtime we are done.
3640 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3641 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3642 DKind: OMPD_for);
3643 };
3644 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3645 } else {
3646 // Emit the outer loop, which requests its work chunk [LB..UB] from
3647 // runtime and runs the inner loop to process it.
3648 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3649 ST.getAddress(), IL.getAddress(), Chunk,
3650 EUB);
3651 LoopArguments.DKind = OMPD_for;
3652 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3653 LoopArgs: LoopArguments, CGDispatchBounds);
3654 }
3655 if (isOpenMPSimdDirective(DKind: EKind)) {
3656 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3657 return CGF.Builder.CreateIsNotNull(
3658 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3659 });
3660 }
3661 EmitOMPReductionClauseFinal(
3662 D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: EKind)
3663 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3664 : /*Parallel only*/ OMPD_parallel);
3665 // Emit post-update of the reduction variables if IsLastIter != 0.
3666 emitPostUpdateForReductionClause(
3667 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3668 return CGF.Builder.CreateIsNotNull(
3669 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3670 });
3671 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3672 if (HasLastprivateClause)
3673 EmitOMPLastprivateClauseFinal(
3674 D: S, NoFinals: isOpenMPSimdDirective(DKind: EKind),
3675 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
3676 LoopScope.restoreMap();
3677 EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3678 return CGF.Builder.CreateIsNotNull(
3679 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3680 });
3681 }
3682 DoacrossCleanupScope.ForceCleanup();
3683 // We're now done with the loop, so jump to the continuation block.
3684 if (ContBlock) {
3685 EmitBranch(Block: ContBlock);
3686 EmitBlock(BB: ContBlock, /*IsFinished=*/true);
3687 }
3688 }
3689 return HasLastprivateClause;
3690}
3691
3692/// The following two functions generate expressions for the loop lower
3693/// and upper bounds in case of static and dynamic (dispatch) schedule
3694/// of the associated 'for' or 'distribute' loop.
3695static std::pair<LValue, LValue>
3696emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3697 const auto &LS = cast<OMPLoopDirective>(Val: S);
3698 LValue LB =
3699 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3700 LValue UB =
3701 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3702 return {LB, UB};
3703}
3704
3705/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3706/// consider the lower and upper bound expressions generated by the
3707/// worksharing loop support, but we use 0 and the iteration space size as
3708/// constants
3709static std::pair<llvm::Value *, llvm::Value *>
3710emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3711 Address LB, Address UB) {
3712 const auto &LS = cast<OMPLoopDirective>(Val: S);
3713 const Expr *IVExpr = LS.getIterationVariable();
3714 const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType());
3715 llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0);
3716 llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration());
3717 return {LBVal, UBVal};
3718}
3719
3720/// Emits internal temp array declarations for the directive with inscan
3721/// reductions.
3722/// The code is the following:
3723/// \code
3724/// size num_iters = <num_iters>;
3725/// <type> buffer[num_iters];
3726/// \endcode
3727static void emitScanBasedDirectiveDecls(
3728 CodeGenFunction &CGF, const OMPLoopDirective &S,
3729 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3730 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3731 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3732 SmallVector<const Expr *, 4> Shareds;
3733 SmallVector<const Expr *, 4> Privates;
3734 SmallVector<const Expr *, 4> ReductionOps;
3735 SmallVector<const Expr *, 4> CopyArrayTemps;
3736 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3737 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3738 "Only inscan reductions are expected.");
3739 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3740 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3741 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3742 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
3743 in_end: C->copy_array_temps().end());
3744 }
3745 {
3746 // Emit buffers for each reduction variables.
3747 // ReductionCodeGen is required to emit correctly the code for array
3748 // reductions.
3749 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3750 unsigned Count = 0;
3751 auto *ITA = CopyArrayTemps.begin();
3752 for (const Expr *IRef : Privates) {
3753 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
3754 // Emit variably modified arrays, used for arrays/array sections
3755 // reductions.
3756 if (PrivateVD->getType()->isVariablyModifiedType()) {
3757 RedCG.emitSharedOrigLValue(CGF, N: Count);
3758 RedCG.emitAggregateType(CGF, N: Count);
3759 }
3760 CodeGenFunction::OpaqueValueMapping DimMapping(
3761 CGF,
3762 cast<OpaqueValueExpr>(
3763 Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe())
3764 ->getSizeExpr()),
3765 RValue::get(V: OMPScanNumIterations));
3766 // Emit temp buffer.
3767 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl()));
3768 ++ITA;
3769 ++Count;
3770 }
3771 }
3772}
3773
3774/// Copies final inscan reductions values to the original variables.
3775/// The code is the following:
3776/// \code
3777/// <orig_var> = buffer[num_iters-1];
3778/// \endcode
3779static void emitScanBasedDirectiveFinals(
3780 CodeGenFunction &CGF, const OMPLoopDirective &S,
3781 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3782 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3783 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3784 SmallVector<const Expr *, 4> Shareds;
3785 SmallVector<const Expr *, 4> LHSs;
3786 SmallVector<const Expr *, 4> RHSs;
3787 SmallVector<const Expr *, 4> Privates;
3788 SmallVector<const Expr *, 4> CopyOps;
3789 SmallVector<const Expr *, 4> CopyArrayElems;
3790 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3791 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3792 "Only inscan reductions are expected.");
3793 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3794 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3795 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3796 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3797 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
3798 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3799 in_end: C->copy_array_elems().end());
3800 }
3801 // Create temp var and copy LHS value to this temp value.
3802 // LHS = TMP[LastIter];
3803 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3804 LHS: OMPScanNumIterations,
3805 RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false));
3806 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3807 const Expr *PrivateExpr = Privates[I];
3808 const Expr *OrigExpr = Shareds[I];
3809 const Expr *CopyArrayElem = CopyArrayElems[I];
3810 CodeGenFunction::OpaqueValueMapping IdxMapping(
3811 CGF,
3812 cast<OpaqueValueExpr>(
3813 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3814 RValue::get(V: OMPLast));
3815 LValue DestLVal = CGF.EmitLValue(E: OrigExpr);
3816 LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem);
3817 CGF.EmitOMPCopy(
3818 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
3819 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
3820 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
3821 }
3822}
3823
3824/// Emits the code for the directive with inscan reductions.
3825/// The code is the following:
3826/// \code
3827/// #pragma omp ...
3828/// for (i: 0..<num_iters>) {
3829/// <input phase>;
3830/// buffer[i] = red;
3831/// }
3832/// #pragma omp master // in parallel region
3833/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3834/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3835/// buffer[i] op= buffer[i-pow(2,k)];
3836/// #pragma omp barrier // in parallel region
3837/// #pragma omp ...
3838/// for (0..<num_iters>) {
3839/// red = InclusiveScan ? buffer[i] : buffer[i-1];
3840/// <scan phase>;
3841/// }
3842/// \endcode
3843static void emitScanBasedDirective(
3844 CodeGenFunction &CGF, const OMPLoopDirective &S,
3845 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3846 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3847 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3848 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3849 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3850 SmallVector<const Expr *, 4> Privates;
3851 SmallVector<const Expr *, 4> ReductionOps;
3852 SmallVector<const Expr *, 4> LHSs;
3853 SmallVector<const Expr *, 4> RHSs;
3854 SmallVector<const Expr *, 4> CopyArrayElems;
3855 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3856 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3857 "Only inscan reductions are expected.");
3858 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3859 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3860 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3861 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3862 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3863 in_end: C->copy_array_elems().end());
3864 }
3865 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3866 {
3867 // Emit loop with input phase:
3868 // #pragma omp ...
3869 // for (i: 0..<num_iters>) {
3870 // <input phase>;
3871 // buffer[i] = red;
3872 // }
3873 CGF.OMPFirstScanLoop = true;
3874 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3875 FirstGen(CGF);
3876 }
3877 // #pragma omp barrier // in parallel region
3878 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3879 &ReductionOps,
3880 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3881 Action.Enter(CGF);
3882 // Emit prefix reduction:
3883 // #pragma omp master // in parallel region
3884 // for (int k = 0; k <= ceil(log2(n)); ++k)
3885 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3886 llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body");
3887 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit");
3888 llvm::Function *F =
3889 CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy);
3890 llvm::Value *Arg =
3891 CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy);
3892 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg);
3893 F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy);
3894 LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal);
3895 LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy);
3896 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3897 LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3898 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc());
3899 CGF.EmitBlock(BB: LoopBB);
3900 auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2);
3901 // size pow2k = 1;
3902 auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3903 Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB);
3904 Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB);
3905 // for (size i = n - 1; i >= 2 ^ k; --i)
3906 // tmp[i] op= tmp[i-pow2k];
3907 llvm::BasicBlock *InnerLoopBB =
3908 CGF.createBasicBlock(name: "omp.inner.log.scan.body");
3909 llvm::BasicBlock *InnerExitBB =
3910 CGF.createBasicBlock(name: "omp.inner.log.scan.exit");
3911 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K);
3912 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3913 CGF.EmitBlock(BB: InnerLoopBB);
3914 auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3915 IVal->addIncoming(V: NMin1, BB: LoopBB);
3916 {
3917 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3918 auto *ILHS = LHSs.begin();
3919 auto *IRHS = RHSs.begin();
3920 for (const Expr *CopyArrayElem : CopyArrayElems) {
3921 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
3922 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
3923 Address LHSAddr = Address::invalid();
3924 {
3925 CodeGenFunction::OpaqueValueMapping IdxMapping(
3926 CGF,
3927 cast<OpaqueValueExpr>(
3928 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3929 RValue::get(V: IVal));
3930 LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
3931 }
3932 PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr);
3933 Address RHSAddr = Address::invalid();
3934 {
3935 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K);
3936 CodeGenFunction::OpaqueValueMapping IdxMapping(
3937 CGF,
3938 cast<OpaqueValueExpr>(
3939 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3940 RValue::get(V: OffsetIVal));
3941 RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
3942 }
3943 PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr);
3944 ++ILHS;
3945 ++IRHS;
3946 }
3947 PrivScope.Privatize();
3948 CGF.CGM.getOpenMPRuntime().emitReduction(
3949 CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
3950 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
3951 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_unknown});
3952 }
3953 llvm::Value *NextIVal =
3954 CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3955 IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock());
3956 CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K);
3957 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3958 CGF.EmitBlock(BB: InnerExitBB);
3959 llvm::Value *Next =
3960 CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1));
3961 Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock());
3962 // pow2k <<= 1;
3963 llvm::Value *NextPow2K =
3964 CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "", /*HasNUW=*/true);
3965 Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock());
3966 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal);
3967 CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB);
3968 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc());
3969 CGF.EmitBlock(BB: ExitBB);
3970 };
3971 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3972 if (isOpenMPParallelDirective(DKind: EKind)) {
3973 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
3974 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3975 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3976 /*ForceSimpleCall=*/true);
3977 } else {
3978 RegionCodeGenTy RCG(CodeGen);
3979 RCG(CGF);
3980 }
3981
3982 CGF.OMPFirstScanLoop = false;
3983 SecondGen(CGF);
3984}
3985
3986static bool emitWorksharingDirective(CodeGenFunction &CGF,
3987 const OMPLoopDirective &S,
3988 bool HasCancel) {
3989 bool HasLastprivates;
3990 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3991 if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
3992 P: [](const OMPReductionClause *C) {
3993 return C->getModifier() == OMPC_REDUCTION_inscan;
3994 })) {
3995 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3996 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3997 OMPLoopScope LoopScope(CGF, S);
3998 return CGF.EmitScalarExpr(E: S.getNumIterations());
3999 };
4000 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) {
4001 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4002 (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4003 CodeGenLoopBounds: emitForLoopBounds,
4004 CGDispatchBounds: emitDispatchForLoopBounds);
4005 // Emit an implicit barrier at the end.
4006 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(),
4007 Kind: OMPD_for);
4008 };
4009 const auto &&SecondGen = [&S, HasCancel, EKind,
4010 &HasLastprivates](CodeGenFunction &CGF) {
4011 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4012 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4013 CodeGenLoopBounds: emitForLoopBounds,
4014 CGDispatchBounds: emitDispatchForLoopBounds);
4015 };
4016 if (!isOpenMPParallelDirective(DKind: EKind))
4017 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
4018 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
4019 if (!isOpenMPParallelDirective(DKind: EKind))
4020 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
4021 } else {
4022 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4023 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4024 CodeGenLoopBounds: emitForLoopBounds,
4025 CGDispatchBounds: emitDispatchForLoopBounds);
4026 }
4027 return HasLastprivates;
4028}
4029
4030// Pass OMPLoopDirective (instead of OMPForDirective) to make this check
4031// available for "loop bind(parallel)", which maps to "for".
4032static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S,
4033 bool HasCancel) {
4034 if (HasCancel)
4035 return false;
4036 for (OMPClause *C : S.clauses()) {
4037 if (isa<OMPNowaitClause, OMPBindClause>(Val: C))
4038 continue;
4039
4040 if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) {
4041 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4042 return false;
4043 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4044 return false;
4045 switch (SC->getScheduleKind()) {
4046 case OMPC_SCHEDULE_auto:
4047 case OMPC_SCHEDULE_dynamic:
4048 case OMPC_SCHEDULE_runtime:
4049 case OMPC_SCHEDULE_guided:
4050 case OMPC_SCHEDULE_static:
4051 continue;
4052 case OMPC_SCHEDULE_unknown:
4053 return false;
4054 }
4055 }
4056
4057 return false;
4058 }
4059
4060 return true;
4061}
4062
4063static llvm::omp::ScheduleKind
4064convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
4065 switch (ScheduleClauseKind) {
4066 case OMPC_SCHEDULE_unknown:
4067 return llvm::omp::OMP_SCHEDULE_Default;
4068 case OMPC_SCHEDULE_auto:
4069 return llvm::omp::OMP_SCHEDULE_Auto;
4070 case OMPC_SCHEDULE_dynamic:
4071 return llvm::omp::OMP_SCHEDULE_Dynamic;
4072 case OMPC_SCHEDULE_guided:
4073 return llvm::omp::OMP_SCHEDULE_Guided;
4074 case OMPC_SCHEDULE_runtime:
4075 return llvm::omp::OMP_SCHEDULE_Runtime;
4076 case OMPC_SCHEDULE_static:
4077 return llvm::omp::OMP_SCHEDULE_Static;
4078 }
4079 llvm_unreachable("Unhandled schedule kind");
4080}
4081
4082// Pass OMPLoopDirective (instead of OMPForDirective) to make this function
4083// available for "loop bind(parallel)", which maps to "for".
4084static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
4085 CodeGenModule &CGM, bool HasCancel) {
4086 bool HasLastprivates = false;
4087 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder &&
4088 isForSupportedByOpenMPIRBuilder(S, HasCancel);
4089 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates,
4090 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
4091 // Use the OpenMPIRBuilder if enabled.
4092 if (UseOMPIRBuilder) {
4093 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
4094
4095 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
4096 llvm::Value *ChunkSize = nullptr;
4097 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
4098 SchedKind =
4099 convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind());
4100 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
4101 ChunkSize = CGF.EmitScalarExpr(E: ChunkSizeExpr);
4102 }
4103
4104 // Emit the associated statement and get its loop representation.
4105 const Stmt *Inner = S.getRawStmt();
4106 llvm::CanonicalLoopInfo *CLI =
4107 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
4108
4109 llvm::OpenMPIRBuilder &OMPBuilder =
4110 CGM.getOpenMPRuntime().getOMPBuilder();
4111 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4112 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
4113 cantFail(ValOrErr: OMPBuilder.applyWorkshareLoop(
4114 DL: CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
4115 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
4116 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
4117 /*HasOrderedClause=*/false));
4118 return;
4119 }
4120
4121 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel);
4122 };
4123 {
4124 auto LPCRegion =
4125 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
4126 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
4127 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_for, CodeGen,
4128 HasCancel);
4129 }
4130
4131 if (!UseOMPIRBuilder) {
4132 // Emit an implicit barrier at the end.
4133 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4134 CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), Kind: OMPD_for);
4135 }
4136 // Check for outer lastprivate conditional update.
4137 checkForLastprivateConditionalUpdate(CGF, S);
4138}
4139
4140void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
4141 return emitOMPForDirective(S, CGF&: *this, CGM, HasCancel: S.hasCancel());
4142}
4143
4144void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
4145 bool HasLastprivates = false;
4146 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
4147 PrePostActionTy &) {
4148 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4149 };
4150 {
4151 auto LPCRegion =
4152 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4153 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4154 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
4155 }
4156
4157 // Emit an implicit barrier at the end.
4158 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4159 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for);
4160 // Check for outer lastprivate conditional update.
4161 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4162}
4163
4164static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4165 const Twine &Name,
4166 llvm::Value *Init = nullptr) {
4167 LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty);
4168 if (Init)
4169 CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true);
4170 return LVal;
4171}
4172
4173void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4174 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4175 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4176 bool HasLastprivates = false;
4177 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4178 auto &&CodeGen = [&S, CapturedStmt, CS, EKind,
4179 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4180 const ASTContext &C = CGF.getContext();
4181 QualType KmpInt32Ty =
4182 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4183 // Emit helper vars inits.
4184 LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb.",
4185 Init: CGF.Builder.getInt32(C: 0));
4186 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4187 ? CGF.Builder.getInt32(C: CS->size() - 1)
4188 : CGF.Builder.getInt32(C: 0);
4189 LValue UB =
4190 createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub.", Init: GlobalUBVal);
4191 LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st.",
4192 Init: CGF.Builder.getInt32(C: 1));
4193 LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il.",
4194 Init: CGF.Builder.getInt32(C: 0));
4195 // Loop counter.
4196 LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv.");
4197 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4198 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4199 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4200 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4201 // Generate condition for loop.
4202 BinaryOperator *Cond = BinaryOperator::Create(
4203 C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary,
4204 opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride());
4205 // Increment for loop counter.
4206 UnaryOperator *Inc = UnaryOperator::Create(
4207 C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary,
4208 l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride());
4209 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4210 // Iterate through all sections and emit a switch construct:
4211 // switch (IV) {
4212 // case 0:
4213 // <SectionStmt[0]>;
4214 // break;
4215 // ...
4216 // case <NumSection> - 1:
4217 // <SectionStmt[<NumSection> - 1]>;
4218 // break;
4219 // }
4220 // .omp.sections.exit:
4221 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit");
4222 llvm::SwitchInst *SwitchStmt =
4223 CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()),
4224 Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size());
4225 if (CS) {
4226 unsigned CaseNumber = 0;
4227 for (const Stmt *SubStmt : CS->children()) {
4228 auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4229 CGF.EmitBlock(BB: CaseBB);
4230 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB);
4231 CGF.EmitStmt(S: SubStmt);
4232 CGF.EmitBranch(Block: ExitBB);
4233 ++CaseNumber;
4234 }
4235 } else {
4236 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4237 CGF.EmitBlock(BB: CaseBB);
4238 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB);
4239 CGF.EmitStmt(S: CapturedStmt);
4240 CGF.EmitBranch(Block: ExitBB);
4241 }
4242 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
4243 };
4244
4245 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4246 if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
4247 // Emit implicit barrier to synchronize threads and avoid data races on
4248 // initialization of firstprivate variables and post-update of lastprivate
4249 // variables.
4250 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4251 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4252 /*ForceSimpleCall=*/true);
4253 }
4254 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
4255 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4256 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
4257 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
4258 (void)LoopScope.Privatize();
4259 if (isOpenMPTargetExecutionDirective(DKind: EKind))
4260 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
4261
4262 // Emit static non-chunked loop.
4263 OpenMPScheduleTy ScheduleKind;
4264 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4265 CGOpenMPRuntime::StaticRTInput StaticInit(
4266 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4267 LB.getAddress(), UB.getAddress(), ST.getAddress());
4268 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, Loc: S.getBeginLoc(), DKind: EKind,
4269 ScheduleKind, Values: StaticInit);
4270 // UB = min(UB, GlobalUB);
4271 llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc());
4272 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4273 C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal);
4274 CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB);
4275 // IV = LB;
4276 CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV);
4277 // while (idx <= UB) { BODY; ++idx; }
4278 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen,
4279 PostIncGen: [](CodeGenFunction &) {});
4280 // Tell the runtime we are done.
4281 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4282 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
4283 DKind: OMPD_sections);
4284 };
4285 CGF.OMPCancelStack.emitExit(CGF, Kind: EKind, CodeGen);
4286 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4287 // Emit post-update of the reduction variables if IsLastIter != 0.
4288 emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4289 return CGF.Builder.CreateIsNotNull(
4290 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4291 });
4292
4293 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4294 if (HasLastprivates)
4295 CGF.EmitOMPLastprivateClauseFinal(
4296 D: S, /*NoFinals=*/false,
4297 IsLastIterCond: CGF.Builder.CreateIsNotNull(
4298 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4299 };
4300
4301 bool HasCancel = false;
4302 if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S))
4303 HasCancel = OSD->hasCancel();
4304 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S))
4305 HasCancel = OPSD->hasCancel();
4306 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel);
4307 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen,
4308 HasCancel);
4309 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4310 // clause. Otherwise the barrier will be generated by the codegen for the
4311 // directive.
4312 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4313 // Emit implicit barrier to synchronize threads and avoid data races on
4314 // initialization of firstprivate variables.
4315 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4316 Kind: OMPD_unknown);
4317 }
4318}
4319
4320void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) {
4321 {
4322 // Emit code for 'scope' region
4323 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4324 Action.Enter(CGF);
4325 OMPPrivateScope PrivateScope(CGF);
4326 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4327 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4328 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4329 (void)PrivateScope.Privatize();
4330 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4331 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4332 };
4333 auto LPCRegion =
4334 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4335 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4336 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_scope, CodeGen);
4337 }
4338 // Emit an implicit barrier at the end.
4339 if (!S.getSingleClause<OMPNowaitClause>()) {
4340 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_scope);
4341 }
4342 // Check for outer lastprivate conditional update.
4343 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4344}
4345
4346void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4347 if (CGM.getLangOpts().OpenMPIRBuilder) {
4348 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4349 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4350 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4351
4352 auto FiniCB = [](InsertPointTy IP) {
4353 // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for
4354 // sections.
4355 return llvm::Error::success();
4356 };
4357
4358 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4359 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4360 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4361 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4362 if (CS) {
4363 for (const Stmt *SubStmt : CS->children()) {
4364 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4365 InsertPointTy CodeGenIP) {
4366 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4367 CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP, CodeGenIP, RegionName: "section");
4368 return llvm::Error::success();
4369 };
4370 SectionCBVector.push_back(Elt: SectionCB);
4371 }
4372 } else {
4373 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4374 InsertPointTy CodeGenIP) {
4375 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4376 CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section");
4377 return llvm::Error::success();
4378 };
4379 SectionCBVector.push_back(Elt: SectionCB);
4380 }
4381
4382 // Privatization callback that performs appropriate action for
4383 // shared/private/firstprivate/lastprivate/copyin/... variables.
4384 //
4385 // TODO: This defaults to shared right now.
4386 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4387 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4388 // The next line is appropriate only for variables (Val) with the
4389 // data-sharing attribute "shared".
4390 ReplVal = &Val;
4391
4392 return CodeGenIP;
4393 };
4394
4395 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4396 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4397 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4398 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4399 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4400 cantFail(ValOrErr: OMPBuilder.createSections(
4401 Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(),
4402 IsNowait: S.getSingleClause<OMPNowaitClause>()));
4403 Builder.restoreIP(IP: AfterIP);
4404 return;
4405 }
4406 {
4407 auto LPCRegion =
4408 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4409 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4410 EmitSections(S);
4411 }
4412 // Emit an implicit barrier at the end.
4413 if (!S.getSingleClause<OMPNowaitClause>()) {
4414 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4415 Kind: OMPD_sections);
4416 }
4417 // Check for outer lastprivate conditional update.
4418 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4419}
4420
4421void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4422 if (CGM.getLangOpts().OpenMPIRBuilder) {
4423 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4424 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4425
4426 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4427 auto FiniCB = [this](InsertPointTy IP) {
4428 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4429 return llvm::Error::success();
4430 };
4431
4432 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4433 InsertPointTy CodeGenIP) {
4434 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4435 CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section");
4436 return llvm::Error::success();
4437 };
4438
4439 LexicalScope Scope(*this, S.getSourceRange());
4440 EmitStopPoint(S: &S);
4441 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4442 cantFail(ValOrErr: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB));
4443 Builder.restoreIP(IP: AfterIP);
4444
4445 return;
4446 }
4447 LexicalScope Scope(*this, S.getSourceRange());
4448 EmitStopPoint(S: &S);
4449 EmitStmt(S: S.getAssociatedStmt());
4450}
4451
4452void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4453 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4454 llvm::SmallVector<const Expr *, 8> DestExprs;
4455 llvm::SmallVector<const Expr *, 8> SrcExprs;
4456 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4457 // Check if there are any 'copyprivate' clauses associated with this
4458 // 'single' construct.
4459 // Build a list of copyprivate variables along with helper expressions
4460 // (<source>, <destination>, <destination>=<source> expressions)
4461 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4462 CopyprivateVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4463 DestExprs.append(in_start: C->destination_exprs().begin(),
4464 in_end: C->destination_exprs().end());
4465 SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end());
4466 AssignmentOps.append(in_start: C->assignment_ops().begin(),
4467 in_end: C->assignment_ops().end());
4468 }
4469 // Emit code for 'single' region along with 'copyprivate' clauses
4470 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4471 Action.Enter(CGF);
4472 OMPPrivateScope SingleScope(CGF);
4473 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope);
4474 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope);
4475 (void)SingleScope.Privatize();
4476 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4477 };
4478 {
4479 auto LPCRegion =
4480 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4481 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4482 CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(),
4483 CopyprivateVars, DestExprs,
4484 SrcExprs, AssignmentOps);
4485 }
4486 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4487 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4488 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4489 CGM.getOpenMPRuntime().emitBarrierCall(
4490 CGF&: *this, Loc: S.getBeginLoc(),
4491 Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4492 }
4493 // Check for outer lastprivate conditional update.
4494 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4495}
4496
4497static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4498 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4499 Action.Enter(CGF);
4500 CGF.EmitStmt(S: S.getRawStmt());
4501 };
4502 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4503}
4504
4505void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4506 if (CGM.getLangOpts().OpenMPIRBuilder) {
4507 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4508 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4509
4510 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4511
4512 auto FiniCB = [this](InsertPointTy IP) {
4513 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4514 return llvm::Error::success();
4515 };
4516
4517 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4518 InsertPointTy CodeGenIP) {
4519 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4520 CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master");
4521 return llvm::Error::success();
4522 };
4523
4524 LexicalScope Scope(*this, S.getSourceRange());
4525 EmitStopPoint(S: &S);
4526 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4527 cantFail(ValOrErr: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB));
4528 Builder.restoreIP(IP: AfterIP);
4529
4530 return;
4531 }
4532 LexicalScope Scope(*this, S.getSourceRange());
4533 EmitStopPoint(S: &S);
4534 emitMaster(CGF&: *this, S);
4535}
4536
4537static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4538 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4539 Action.Enter(CGF);
4540 CGF.EmitStmt(S: S.getRawStmt());
4541 };
4542 Expr *Filter = nullptr;
4543 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4544 Filter = FilterClause->getThreadID();
4545 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(),
4546 Filter);
4547}
4548
4549void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4550 if (CGM.getLangOpts().OpenMPIRBuilder) {
4551 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4552 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4553
4554 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4555 const Expr *Filter = nullptr;
4556 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4557 Filter = FilterClause->getThreadID();
4558 llvm::Value *FilterVal = Filter
4559 ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty)
4560 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
4561
4562 auto FiniCB = [this](InsertPointTy IP) {
4563 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4564 return llvm::Error::success();
4565 };
4566
4567 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4568 InsertPointTy CodeGenIP) {
4569 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4570 CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked");
4571 return llvm::Error::success();
4572 };
4573
4574 LexicalScope Scope(*this, S.getSourceRange());
4575 EmitStopPoint(S: &S);
4576 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
4577 ValOrErr: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal));
4578 Builder.restoreIP(IP: AfterIP);
4579
4580 return;
4581 }
4582 LexicalScope Scope(*this, S.getSourceRange());
4583 EmitStopPoint(S: &S);
4584 emitMasked(CGF&: *this, S);
4585}
4586
4587void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4588 if (CGM.getLangOpts().OpenMPIRBuilder) {
4589 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4590 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4591
4592 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4593 const Expr *Hint = nullptr;
4594 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4595 Hint = HintClause->getHint();
4596
4597 // TODO: This is slightly different from what's currently being done in
4598 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4599 // about typing is final.
4600 llvm::Value *HintInst = nullptr;
4601 if (Hint)
4602 HintInst =
4603 Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false);
4604
4605 auto FiniCB = [this](InsertPointTy IP) {
4606 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4607 return llvm::Error::success();
4608 };
4609
4610 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4611 InsertPointTy CodeGenIP) {
4612 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4613 CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical");
4614 return llvm::Error::success();
4615 };
4616
4617 LexicalScope Scope(*this, S.getSourceRange());
4618 EmitStopPoint(S: &S);
4619 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4620 cantFail(ValOrErr: OMPBuilder.createCritical(Loc: Builder, BodyGenCB, FiniCB,
4621 CriticalName: S.getDirectiveName().getAsString(),
4622 HintInst));
4623 Builder.restoreIP(IP: AfterIP);
4624
4625 return;
4626 }
4627
4628 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4629 Action.Enter(CGF);
4630 CGF.EmitStmt(S: S.getAssociatedStmt());
4631 };
4632 const Expr *Hint = nullptr;
4633 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4634 Hint = HintClause->getHint();
4635 LexicalScope Scope(*this, S.getSourceRange());
4636 EmitStopPoint(S: &S);
4637 CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this,
4638 CriticalName: S.getDirectiveName().getAsString(),
4639 CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint);
4640}
4641
4642void CodeGenFunction::EmitOMPParallelForDirective(
4643 const OMPParallelForDirective &S) {
4644 // Emit directive as a combined directive that consists of two implicit
4645 // directives: 'parallel' with 'for' directive.
4646 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4647 Action.Enter(CGF);
4648 emitOMPCopyinClause(CGF, S);
4649 (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel());
4650 };
4651 {
4652 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4653 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4654 CGCapturedStmtInfo CGSI(CR_OpenMP);
4655 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4656 OMPLoopScope LoopScope(CGF, S);
4657 return CGF.EmitScalarExpr(E: S.getNumIterations());
4658 };
4659 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4660 P: [](const OMPReductionClause *C) {
4661 return C->getModifier() == OMPC_REDUCTION_inscan;
4662 });
4663 if (IsInscan)
4664 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4665 auto LPCRegion =
4666 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4667 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
4668 CodeGenBoundParameters: emitEmptyBoundParameters);
4669 if (IsInscan)
4670 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4671 }
4672 // Check for outer lastprivate conditional update.
4673 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4674}
4675
4676void CodeGenFunction::EmitOMPParallelForSimdDirective(
4677 const OMPParallelForSimdDirective &S) {
4678 // Emit directive as a combined directive that consists of two implicit
4679 // directives: 'parallel' with 'for' directive.
4680 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4681 Action.Enter(CGF);
4682 emitOMPCopyinClause(CGF, S);
4683 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4684 };
4685 {
4686 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4687 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4688 CGCapturedStmtInfo CGSI(CR_OpenMP);
4689 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4690 OMPLoopScope LoopScope(CGF, S);
4691 return CGF.EmitScalarExpr(E: S.getNumIterations());
4692 };
4693 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4694 P: [](const OMPReductionClause *C) {
4695 return C->getModifier() == OMPC_REDUCTION_inscan;
4696 });
4697 if (IsInscan)
4698 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4699 auto LPCRegion =
4700 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4701 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen,
4702 CodeGenBoundParameters: emitEmptyBoundParameters);
4703 if (IsInscan)
4704 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4705 }
4706 // Check for outer lastprivate conditional update.
4707 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4708}
4709
4710void CodeGenFunction::EmitOMPParallelMasterDirective(
4711 const OMPParallelMasterDirective &S) {
4712 // Emit directive as a combined directive that consists of two implicit
4713 // directives: 'parallel' with 'master' directive.
4714 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4715 Action.Enter(CGF);
4716 OMPPrivateScope PrivateScope(CGF);
4717 emitOMPCopyinClause(CGF, S);
4718 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4719 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4720 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4721 (void)PrivateScope.Privatize();
4722 emitMaster(CGF, S);
4723 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4724 };
4725 {
4726 auto LPCRegion =
4727 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4728 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen,
4729 CodeGenBoundParameters: emitEmptyBoundParameters);
4730 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4731 CondGen: [](CodeGenFunction &) { return nullptr; });
4732 }
4733 // Check for outer lastprivate conditional update.
4734 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4735}
4736
4737void CodeGenFunction::EmitOMPParallelMaskedDirective(
4738 const OMPParallelMaskedDirective &S) {
4739 // Emit directive as a combined directive that consists of two implicit
4740 // directives: 'parallel' with 'masked' directive.
4741 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4742 Action.Enter(CGF);
4743 OMPPrivateScope PrivateScope(CGF);
4744 emitOMPCopyinClause(CGF, S);
4745 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4746 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4747 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4748 (void)PrivateScope.Privatize();
4749 emitMasked(CGF, S);
4750 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4751 };
4752 {
4753 auto LPCRegion =
4754 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4755 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen,
4756 CodeGenBoundParameters: emitEmptyBoundParameters);
4757 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4758 CondGen: [](CodeGenFunction &) { return nullptr; });
4759 }
4760 // Check for outer lastprivate conditional update.
4761 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4762}
4763
4764void CodeGenFunction::EmitOMPParallelSectionsDirective(
4765 const OMPParallelSectionsDirective &S) {
4766 // Emit directive as a combined directive that consists of two implicit
4767 // directives: 'parallel' with 'sections' directive.
4768 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4769 Action.Enter(CGF);
4770 emitOMPCopyinClause(CGF, S);
4771 CGF.EmitSections(S);
4772 };
4773 {
4774 auto LPCRegion =
4775 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4776 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen,
4777 CodeGenBoundParameters: emitEmptyBoundParameters);
4778 }
4779 // Check for outer lastprivate conditional update.
4780 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4781}
4782
4783namespace {
4784/// Get the list of variables declared in the context of the untied tasks.
4785class CheckVarsEscapingUntiedTaskDeclContext final
4786 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4787 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4788
4789public:
4790 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4791 ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4792 void VisitDeclStmt(const DeclStmt *S) {
4793 if (!S)
4794 return;
4795 // Need to privatize only local vars, static locals can be processed as is.
4796 for (const Decl *D : S->decls()) {
4797 if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D))
4798 if (VD->hasLocalStorage())
4799 PrivateDecls.push_back(Elt: VD);
4800 }
4801 }
4802 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4803 void VisitCapturedStmt(const CapturedStmt *) {}
4804 void VisitLambdaExpr(const LambdaExpr *) {}
4805 void VisitBlockExpr(const BlockExpr *) {}
4806 void VisitStmt(const Stmt *S) {
4807 if (!S)
4808 return;
4809 for (const Stmt *Child : S->children())
4810 if (Child)
4811 Visit(S: Child);
4812 }
4813
4814 /// Swaps list of vars with the provided one.
4815 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4816};
4817} // anonymous namespace
4818
4819static void buildDependences(const OMPExecutableDirective &S,
4820 OMPTaskDataTy &Data) {
4821
4822 // First look for 'omp_all_memory' and add this first.
4823 bool OmpAllMemory = false;
4824 if (llvm::any_of(
4825 Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) {
4826 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4827 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4828 })) {
4829 OmpAllMemory = true;
4830 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4831 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4832 // simplify.
4833 OMPTaskDataTy::DependData &DD =
4834 Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory,
4835 /*IteratorExpr=*/Args: nullptr);
4836 // Add a nullptr Expr to simplify the codegen in emitDependData.
4837 DD.DepExprs.push_back(Elt: nullptr);
4838 }
4839 // Add remaining dependences skipping any 'out' or 'inout' if they are
4840 // overridden by 'omp_all_memory'.
4841 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4842 OpenMPDependClauseKind Kind = C->getDependencyKind();
4843 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4844 continue;
4845 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4846 continue;
4847 OMPTaskDataTy::DependData &DD =
4848 Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier());
4849 DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4850 }
4851}
4852
4853void CodeGenFunction::EmitOMPTaskBasedDirective(
4854 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4855 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4856 OMPTaskDataTy &Data) {
4857 // Emit outlined function for task construct.
4858 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion);
4859 auto I = CS->getCapturedDecl()->param_begin();
4860 auto PartId = std::next(x: I);
4861 auto TaskT = std::next(x: I, n: 4);
4862 // Check if the task is final
4863 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4864 // If the condition constant folds and can be elided, try to avoid emitting
4865 // the condition and the dead arm of the if/else.
4866 const Expr *Cond = Clause->getCondition();
4867 bool CondConstant;
4868 if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant))
4869 Data.Final.setInt(CondConstant);
4870 else
4871 Data.Final.setPointer(EvaluateExprAsBool(E: Cond));
4872 } else {
4873 // By default the task is not final.
4874 Data.Final.setInt(/*IntVal=*/false);
4875 }
4876 // Check if the task has 'priority' clause.
4877 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4878 const Expr *Prio = Clause->getPriority();
4879 Data.Priority.setInt(/*IntVal=*/true);
4880 Data.Priority.setPointer(EmitScalarConversion(
4881 Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(),
4882 DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4883 Loc: Prio->getExprLoc()));
4884 }
4885 // The first function argument for tasks is a thread id, the second one is a
4886 // part id (0 for tied tasks, >=0 for untied task).
4887 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4888 // Get list of private variables.
4889 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4890 auto IRef = C->varlist_begin();
4891 for (const Expr *IInit : C->private_copies()) {
4892 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4893 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4894 Data.PrivateVars.push_back(Elt: *IRef);
4895 Data.PrivateCopies.push_back(Elt: IInit);
4896 }
4897 ++IRef;
4898 }
4899 }
4900 EmittedAsPrivate.clear();
4901 // Get list of firstprivate variables.
4902 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4903 auto IRef = C->varlist_begin();
4904 auto IElemInitRef = C->inits().begin();
4905 for (const Expr *IInit : C->private_copies()) {
4906 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4907 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4908 Data.FirstprivateVars.push_back(Elt: *IRef);
4909 Data.FirstprivateCopies.push_back(Elt: IInit);
4910 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
4911 }
4912 ++IRef;
4913 ++IElemInitRef;
4914 }
4915 }
4916 // Get list of lastprivate variables (for taskloops).
4917 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4918 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4919 auto IRef = C->varlist_begin();
4920 auto ID = C->destination_exprs().begin();
4921 for (const Expr *IInit : C->private_copies()) {
4922 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4923 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4924 Data.LastprivateVars.push_back(Elt: *IRef);
4925 Data.LastprivateCopies.push_back(Elt: IInit);
4926 }
4927 LastprivateDstsOrigs.insert(
4928 KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()),
4929 y: cast<DeclRefExpr>(Val: *IRef)));
4930 ++IRef;
4931 ++ID;
4932 }
4933 }
4934 SmallVector<const Expr *, 4> LHSs;
4935 SmallVector<const Expr *, 4> RHSs;
4936 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4937 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4938 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4939 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
4940 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
4941 in_end: C->reduction_ops().end());
4942 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
4943 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
4944 }
4945 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4946 CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data);
4947 // Build list of dependences.
4948 buildDependences(S, Data);
4949 // Get list of local vars for untied tasks.
4950 if (!Data.Tied) {
4951 CheckVarsEscapingUntiedTaskDeclContext Checker;
4952 Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4953 Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(),
4954 in_end: Checker.getPrivateDecls().end());
4955 }
4956 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4957 CapturedRegion](CodeGenFunction &CGF,
4958 PrePostActionTy &Action) {
4959 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4960 std::pair<Address, Address>>
4961 UntiedLocalVars;
4962 // Set proper addresses for generated private copies.
4963 OMPPrivateScope Scope(CGF);
4964 // Generate debug info for variables present in shared clause.
4965 if (auto *DI = CGF.getDebugInfo()) {
4966 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4967 CGF.CapturedStmtInfo->getCaptureFields();
4968 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4969 if (CaptureFields.size() && ContextValue) {
4970 unsigned CharWidth = CGF.getContext().getCharWidth();
4971 // The shared variables are packed together as members of structure.
4972 // So the address of each shared variable can be computed by adding
4973 // offset of it (within record) to the base address of record. For each
4974 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4975 // appropriate expressions (DIExpression).
4976 // Ex:
4977 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4978 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4979 // metadata !svar1,
4980 // metadata !DIExpression(DW_OP_deref))
4981 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4982 // metadata !svar2,
4983 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4984 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4985 const VarDecl *SharedVar = It->first;
4986 RecordDecl *CaptureRecord = It->second->getParent();
4987 const ASTRecordLayout &Layout =
4988 CGF.getContext().getASTRecordLayout(D: CaptureRecord);
4989 unsigned Offset =
4990 Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth;
4991 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4992 (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue,
4993 Builder&: CGF.Builder, UsePointerValue: false);
4994 // Get the call dbg.declare instruction we just created and update
4995 // its DIExpression to add offset to base address.
4996 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
4997 unsigned Offset) {
4998 SmallVector<uint64_t, 8> Ops;
4999 // Add offset to the base address if non zero.
5000 if (Offset) {
5001 Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst);
5002 Ops.push_back(Elt: Offset);
5003 }
5004 Ops.push_back(Elt: llvm::dwarf::DW_OP_deref);
5005 Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops));
5006 };
5007 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
5008 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last))
5009 UpdateExpr(DDI->getContext(), DDI, Offset);
5010 // If we're emitting using the new debug info format into a block
5011 // without a terminator, the record will be "trailing".
5012 assert(!Last.isTerminator() && "unexpected terminator");
5013 if (auto *Marker =
5014 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
5015 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
5016 C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) {
5017 UpdateExpr(Last.getContext(), &DVR, Offset);
5018 break;
5019 }
5020 }
5021 }
5022 }
5023 }
5024 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
5025 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
5026 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
5027 enum { PrivatesParam = 2, CopyFnParam = 3 };
5028 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5029 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5030 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5031 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5032 // Map privates.
5033 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5034 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5035 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5036 CallArgs.push_back(Elt: PrivatesPtr);
5037 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5038 for (const Expr *E : Data.PrivateVars) {
5039 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5040 RawAddress PrivatePtr = CGF.CreateMemTemp(
5041 T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr");
5042 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5043 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5044 ParamTypes.push_back(Elt: PrivatePtr.getType());
5045 }
5046 for (const Expr *E : Data.FirstprivateVars) {
5047 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5048 RawAddress PrivatePtr =
5049 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5050 Name: ".firstpriv.ptr.addr");
5051 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5052 FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5053 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5054 ParamTypes.push_back(Elt: PrivatePtr.getType());
5055 }
5056 for (const Expr *E : Data.LastprivateVars) {
5057 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5058 RawAddress PrivatePtr =
5059 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5060 Name: ".lastpriv.ptr.addr");
5061 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5062 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5063 ParamTypes.push_back(Elt: PrivatePtr.getType());
5064 }
5065 for (const VarDecl *VD : Data.PrivateLocals) {
5066 QualType Ty = VD->getType().getNonReferenceType();
5067 if (VD->getType()->isLValueReferenceType())
5068 Ty = CGF.getContext().getPointerType(T: Ty);
5069 if (isAllocatableDecl(VD))
5070 Ty = CGF.getContext().getPointerType(T: Ty);
5071 RawAddress PrivatePtr = CGF.CreateMemTemp(
5072 T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr");
5073 auto Result = UntiedLocalVars.insert(
5074 KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())));
5075 // If key exists update in place.
5076 if (Result.second == false)
5077 *Result.first = std::make_pair(
5078 x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()));
5079 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5080 ParamTypes.push_back(Elt: PrivatePtr.getType());
5081 }
5082 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5083 Params: ParamTypes, /*isVarArg=*/false);
5084 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5085 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5086 for (const auto &Pair : LastprivateDstsOrigs) {
5087 const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl());
5088 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
5089 /*RefersToEnclosingVariableOrCapture=*/
5090 CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
5091 Pair.second->getType(), VK_LValue,
5092 Pair.second->getExprLoc());
5093 Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress());
5094 }
5095 for (const auto &Pair : PrivatePtrs) {
5096 Address Replacement = Address(
5097 CGF.Builder.CreateLoad(Addr: Pair.second),
5098 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5099 CGF.getContext().getDeclAlign(D: Pair.first));
5100 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5101 if (auto *DI = CGF.getDebugInfo())
5102 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5103 (void)DI->EmitDeclareOfAutoVariable(
5104 Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder,
5105 /*UsePointerValue*/ true);
5106 }
5107 // Adjust mapping for internal locals by mapping actual memory instead of
5108 // a pointer to this memory.
5109 for (auto &Pair : UntiedLocalVars) {
5110 QualType VDType = Pair.first->getType().getNonReferenceType();
5111 if (Pair.first->getType()->isLValueReferenceType())
5112 VDType = CGF.getContext().getPointerType(T: VDType);
5113 if (isAllocatableDecl(VD: Pair.first)) {
5114 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5115 Address Replacement(
5116 Ptr,
5117 CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)),
5118 CGF.getPointerAlign());
5119 Pair.second.first = Replacement;
5120 Ptr = CGF.Builder.CreateLoad(Addr: Replacement);
5121 Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType),
5122 CGF.getContext().getDeclAlign(D: Pair.first));
5123 Pair.second.second = Replacement;
5124 } else {
5125 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5126 Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType),
5127 CGF.getContext().getDeclAlign(D: Pair.first));
5128 Pair.second.first = Replacement;
5129 }
5130 }
5131 }
5132 if (Data.Reductions) {
5133 OMPPrivateScope FirstprivateScope(CGF);
5134 for (const auto &Pair : FirstprivatePtrs) {
5135 Address Replacement(
5136 CGF.Builder.CreateLoad(Addr: Pair.second),
5137 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5138 CGF.getContext().getDeclAlign(D: Pair.first));
5139 FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5140 }
5141 (void)FirstprivateScope.Privatize();
5142 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5143 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5144 Data.ReductionCopies, Data.ReductionOps);
5145 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5146 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9)));
5147 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5148 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5149 RedCG.emitAggregateType(CGF, N: Cnt);
5150 // FIXME: This must removed once the runtime library is fixed.
5151 // Emit required threadprivate variables for
5152 // initializer/combiner/finalizer.
5153 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5154 RCG&: RedCG, N: Cnt);
5155 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5156 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5157 Replacement = Address(
5158 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5159 SrcTy: CGF.getContext().VoidPtrTy,
5160 DstTy: CGF.getContext().getPointerType(
5161 T: Data.ReductionCopies[Cnt]->getType()),
5162 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5163 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5164 Replacement.getAlignment());
5165 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5166 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5167 }
5168 }
5169 // Privatize all private variables except for in_reduction items.
5170 (void)Scope.Privatize();
5171 SmallVector<const Expr *, 4> InRedVars;
5172 SmallVector<const Expr *, 4> InRedPrivs;
5173 SmallVector<const Expr *, 4> InRedOps;
5174 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5175 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5176 auto IPriv = C->privates().begin();
5177 auto IRed = C->reduction_ops().begin();
5178 auto ITD = C->taskgroup_descriptors().begin();
5179 for (const Expr *Ref : C->varlist()) {
5180 InRedVars.emplace_back(Args&: Ref);
5181 InRedPrivs.emplace_back(Args: *IPriv);
5182 InRedOps.emplace_back(Args: *IRed);
5183 TaskgroupDescriptors.emplace_back(Args: *ITD);
5184 std::advance(i&: IPriv, n: 1);
5185 std::advance(i&: IRed, n: 1);
5186 std::advance(i&: ITD, n: 1);
5187 }
5188 }
5189 // Privatize in_reduction items here, because taskgroup descriptors must be
5190 // privatized earlier.
5191 OMPPrivateScope InRedScope(CGF);
5192 if (!InRedVars.empty()) {
5193 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5194 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5195 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5196 RedCG.emitAggregateType(CGF, N: Cnt);
5197 // The taskgroup descriptor variable is always implicit firstprivate and
5198 // privatized already during processing of the firstprivates.
5199 // FIXME: This must removed once the runtime library is fixed.
5200 // Emit required threadprivate variables for
5201 // initializer/combiner/finalizer.
5202 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5203 RCG&: RedCG, N: Cnt);
5204 llvm::Value *ReductionsPtr;
5205 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5206 ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr),
5207 Loc: TRExpr->getExprLoc());
5208 } else {
5209 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5210 }
5211 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5212 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5213 Replacement = Address(
5214 CGF.EmitScalarConversion(
5215 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5216 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5217 Loc: InRedPrivs[Cnt]->getExprLoc()),
5218 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5219 Replacement.getAlignment());
5220 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5221 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5222 }
5223 }
5224 (void)InRedScope.Privatize();
5225
5226 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5227 UntiedLocalVars);
5228 Action.Enter(CGF);
5229 BodyGen(CGF);
5230 };
5231 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5232 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5233 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, Tied: Data.Tied, NumberOfParts&: Data.NumberOfParts);
5234 OMPLexicalScope Scope(*this, S, std::nullopt,
5235 !isOpenMPParallelDirective(DKind: EKind) &&
5236 !isOpenMPSimdDirective(DKind: EKind));
5237 TaskGen(*this, OutlinedFn, Data);
5238}
5239
5240static ImplicitParamDecl *
5241createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5242 QualType Ty, CapturedDecl *CD,
5243 SourceLocation Loc) {
5244 auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5245 ParamKind: ImplicitParamKind::Other);
5246 auto *OrigRef = DeclRefExpr::Create(
5247 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD,
5248 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5249 auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5250 ParamKind: ImplicitParamKind::Other);
5251 auto *PrivateRef = DeclRefExpr::Create(
5252 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD,
5253 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5254 QualType ElemType = C.getBaseElementType(QT: Ty);
5255 auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType,
5256 ParamKind: ImplicitParamKind::Other);
5257 auto *InitRef = DeclRefExpr::Create(
5258 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD,
5259 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue);
5260 PrivateVD->setInitStyle(VarDecl::CInit);
5261 PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue,
5262 Operand: InitRef, /*BasePath=*/nullptr,
5263 Cat: VK_PRValue, FPO: FPOptionsOverride()));
5264 Data.FirstprivateVars.emplace_back(Args&: OrigRef);
5265 Data.FirstprivateCopies.emplace_back(Args&: PrivateRef);
5266 Data.FirstprivateInits.emplace_back(Args&: InitRef);
5267 return OrigVD;
5268}
5269
5270void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5271 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5272 OMPTargetDataInfo &InputInfo) {
5273 // Emit outlined function for task construct.
5274 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5275 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5276 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
5277 auto I = CS->getCapturedDecl()->param_begin();
5278 auto PartId = std::next(x: I);
5279 auto TaskT = std::next(x: I, n: 4);
5280 OMPTaskDataTy Data;
5281 // The task is not final.
5282 Data.Final.setInt(/*IntVal=*/false);
5283 // Get list of firstprivate variables.
5284 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5285 auto IRef = C->varlist_begin();
5286 auto IElemInitRef = C->inits().begin();
5287 for (auto *IInit : C->private_copies()) {
5288 Data.FirstprivateVars.push_back(Elt: *IRef);
5289 Data.FirstprivateCopies.push_back(Elt: IInit);
5290 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5291 ++IRef;
5292 ++IElemInitRef;
5293 }
5294 }
5295 SmallVector<const Expr *, 4> LHSs;
5296 SmallVector<const Expr *, 4> RHSs;
5297 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5298 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5299 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5300 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5301 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5302 in_end: C->reduction_ops().end());
5303 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5304 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5305 }
5306 OMPPrivateScope TargetScope(*this);
5307 VarDecl *BPVD = nullptr;
5308 VarDecl *PVD = nullptr;
5309 VarDecl *SVD = nullptr;
5310 VarDecl *MVD = nullptr;
5311 if (InputInfo.NumberOfTargetItems > 0) {
5312 auto *CD = CapturedDecl::Create(
5313 C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5314 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5315 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5316 EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5317 /*IndexTypeQuals=*/0);
5318 BPVD = createImplicitFirstprivateForType(
5319 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5320 PVD = createImplicitFirstprivateForType(
5321 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5322 QualType SizesType = getContext().getConstantArrayType(
5323 EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5324 ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5325 /*IndexTypeQuals=*/0);
5326 SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD,
5327 Loc: S.getBeginLoc());
5328 TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray);
5329 TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray);
5330 TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray);
5331 // If there is no user-defined mapper, the mapper array will be nullptr. In
5332 // this case, we don't need to privatize it.
5333 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5334 Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) {
5335 MVD = createImplicitFirstprivateForType(
5336 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5337 TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray);
5338 }
5339 }
5340 (void)TargetScope.Privatize();
5341 buildDependences(S, Data);
5342 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5343 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind,
5344 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5345 // Set proper addresses for generated private copies.
5346 OMPPrivateScope Scope(CGF);
5347 if (!Data.FirstprivateVars.empty()) {
5348 enum { PrivatesParam = 2, CopyFnParam = 3 };
5349 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5350 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5351 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5352 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5353 // Map privates.
5354 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5355 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5356 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5357 CallArgs.push_back(Elt: PrivatesPtr);
5358 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5359 for (const Expr *E : Data.FirstprivateVars) {
5360 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5361 RawAddress PrivatePtr =
5362 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5363 Name: ".firstpriv.ptr.addr");
5364 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5365 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5366 ParamTypes.push_back(Elt: PrivatePtr.getType());
5367 }
5368 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5369 Params: ParamTypes, /*isVarArg=*/false);
5370 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5371 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5372 for (const auto &Pair : PrivatePtrs) {
5373 Address Replacement(
5374 CGF.Builder.CreateLoad(Addr: Pair.second),
5375 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5376 CGF.getContext().getDeclAlign(D: Pair.first));
5377 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5378 }
5379 }
5380 CGF.processInReduction(S, Data, CGF, CS, Scope);
5381 if (InputInfo.NumberOfTargetItems > 0) {
5382 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5383 Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0);
5384 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5385 Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0);
5386 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5387 Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0);
5388 // If MVD is nullptr, the mapper array is not privatized
5389 if (MVD)
5390 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5391 Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0);
5392 }
5393
5394 Action.Enter(CGF);
5395 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5396 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5397 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5398 needsTaskBasedThreadLimit(DKind: EKind) && TL) {
5399 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5400 // enclosing this target region. This will indirectly set the thread_limit
5401 // for every applicable construct within target region.
5402 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5403 CGF, ThreadLimit: TL->getThreadLimit().front(), Loc: S.getBeginLoc());
5404 }
5405 BodyGen(CGF);
5406 };
5407 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5408 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, /*Tied=*/true,
5409 NumberOfParts&: Data.NumberOfParts);
5410 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5411 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5412 getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
5413 SourceLocation());
5414 CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5415 SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data);
5416}
5417
5418void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5419 OMPTaskDataTy &Data,
5420 CodeGenFunction &CGF,
5421 const CapturedStmt *CS,
5422 OMPPrivateScope &Scope) {
5423 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5424 if (Data.Reductions) {
5425 OpenMPDirectiveKind CapturedRegion = EKind;
5426 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5427 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5428 Data.ReductionCopies, Data.ReductionOps);
5429 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5430 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4)));
5431 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5432 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5433 RedCG.emitAggregateType(CGF, N: Cnt);
5434 // FIXME: This must removed once the runtime library is fixed.
5435 // Emit required threadprivate variables for
5436 // initializer/combiner/finalizer.
5437 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5438 RCG&: RedCG, N: Cnt);
5439 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5440 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5441 Replacement = Address(
5442 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5443 SrcTy: CGF.getContext().VoidPtrTy,
5444 DstTy: CGF.getContext().getPointerType(
5445 T: Data.ReductionCopies[Cnt]->getType()),
5446 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5447 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5448 Replacement.getAlignment());
5449 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5450 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5451 }
5452 }
5453 (void)Scope.Privatize();
5454 SmallVector<const Expr *, 4> InRedVars;
5455 SmallVector<const Expr *, 4> InRedPrivs;
5456 SmallVector<const Expr *, 4> InRedOps;
5457 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5458 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5459 auto IPriv = C->privates().begin();
5460 auto IRed = C->reduction_ops().begin();
5461 auto ITD = C->taskgroup_descriptors().begin();
5462 for (const Expr *Ref : C->varlist()) {
5463 InRedVars.emplace_back(Args&: Ref);
5464 InRedPrivs.emplace_back(Args: *IPriv);
5465 InRedOps.emplace_back(Args: *IRed);
5466 TaskgroupDescriptors.emplace_back(Args: *ITD);
5467 std::advance(i&: IPriv, n: 1);
5468 std::advance(i&: IRed, n: 1);
5469 std::advance(i&: ITD, n: 1);
5470 }
5471 }
5472 OMPPrivateScope InRedScope(CGF);
5473 if (!InRedVars.empty()) {
5474 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5475 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5476 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5477 RedCG.emitAggregateType(CGF, N: Cnt);
5478 // FIXME: This must removed once the runtime library is fixed.
5479 // Emit required threadprivate variables for
5480 // initializer/combiner/finalizer.
5481 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5482 RCG&: RedCG, N: Cnt);
5483 llvm::Value *ReductionsPtr;
5484 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5485 ReductionsPtr =
5486 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc());
5487 } else {
5488 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5489 }
5490 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5491 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5492 Replacement = Address(
5493 CGF.EmitScalarConversion(
5494 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5495 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5496 Loc: InRedPrivs[Cnt]->getExprLoc()),
5497 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5498 Replacement.getAlignment());
5499 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5500 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5501 }
5502 }
5503 (void)InRedScope.Privatize();
5504}
5505
5506void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5507 // Emit outlined function for task construct.
5508 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5509 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5510 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
5511 const Expr *IfCond = nullptr;
5512 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5513 if (C->getNameModifier() == OMPD_unknown ||
5514 C->getNameModifier() == OMPD_task) {
5515 IfCond = C->getCondition();
5516 break;
5517 }
5518 }
5519
5520 OMPTaskDataTy Data;
5521 // Check if we should emit tied or untied task.
5522 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5523 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5524 CGF.EmitStmt(S: CS->getCapturedStmt());
5525 };
5526 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5527 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5528 const OMPTaskDataTy &Data) {
5529 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5530 SharedsTy, Shareds: CapturedStruct, IfCond,
5531 Data);
5532 };
5533 auto LPCRegion =
5534 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5535 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data);
5536}
5537
5538void CodeGenFunction::EmitOMPTaskyieldDirective(
5539 const OMPTaskyieldDirective &S) {
5540 CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc());
5541}
5542
5543void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5544 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5545 Expr *ME = MC ? MC->getMessageString() : nullptr;
5546 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5547 bool IsFatal = false;
5548 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5549 IsFatal = true;
5550 CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal);
5551}
5552
5553void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5554 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier);
5555}
5556
5557void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5558 OMPTaskDataTy Data;
5559 // Build list of dependences
5560 buildDependences(S, Data);
5561 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5562 CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data);
5563}
5564
5565static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5566 return T.clauses().empty();
5567}
5568
5569void CodeGenFunction::EmitOMPTaskgroupDirective(
5570 const OMPTaskgroupDirective &S) {
5571 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5572 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) {
5573 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5574 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5575 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5576 AllocaInsertPt->getIterator());
5577
5578 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5579 InsertPointTy CodeGenIP) {
5580 Builder.restoreIP(IP: CodeGenIP);
5581 EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5582 return llvm::Error::success();
5583 };
5584 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5585 if (!CapturedStmtInfo)
5586 CapturedStmtInfo = &CapStmtInfo;
5587 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
5588 cantFail(ValOrErr: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB));
5589 Builder.restoreIP(IP: AfterIP);
5590 return;
5591 }
5592 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5593 Action.Enter(CGF);
5594 if (const Expr *E = S.getReductionRef()) {
5595 SmallVector<const Expr *, 4> LHSs;
5596 SmallVector<const Expr *, 4> RHSs;
5597 OMPTaskDataTy Data;
5598 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5599 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5600 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5601 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5602 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5603 in_end: C->reduction_ops().end());
5604 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5605 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5606 }
5607 llvm::Value *ReductionDesc =
5608 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(),
5609 LHSExprs: LHSs, RHSExprs: RHSs, Data);
5610 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5611 CGF.EmitVarDecl(D: *VD);
5612 CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD),
5613 /*Volatile=*/false, Ty: E->getType());
5614 }
5615 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5616 };
5617 CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc());
5618}
5619
5620void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5621 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5622 ? llvm::AtomicOrdering::NotAtomic
5623 : llvm::AtomicOrdering::AcquireRelease;
5624 CGM.getOpenMPRuntime().emitFlush(
5625 CGF&: *this,
5626 Vars: [&S]() -> ArrayRef<const Expr *> {
5627 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5628 return llvm::ArrayRef(FlushClause->varlist_begin(),
5629 FlushClause->varlist_end());
5630 return {};
5631 }(),
5632 Loc: S.getBeginLoc(), AO);
5633}
5634
5635void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5636 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5637 LValue DOLVal = EmitLValue(E: DO->getDepobj());
5638 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5639 // Build list and emit dependences
5640 OMPTaskDataTy Data;
5641 buildDependences(S, Data);
5642 for (auto &Dep : Data.Dependences) {
5643 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5644 CGF&: *this, Dependencies: Dep, Loc: DC->getBeginLoc());
5645 EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal);
5646 }
5647 return;
5648 }
5649 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5650 CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc());
5651 return;
5652 }
5653 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5654 CGM.getOpenMPRuntime().emitUpdateClause(
5655 CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc());
5656 return;
5657 }
5658}
5659
5660void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5661 if (!OMPParentLoopDirectiveForScan)
5662 return;
5663 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5664 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5665 SmallVector<const Expr *, 4> Shareds;
5666 SmallVector<const Expr *, 4> Privates;
5667 SmallVector<const Expr *, 4> LHSs;
5668 SmallVector<const Expr *, 4> RHSs;
5669 SmallVector<const Expr *, 4> ReductionOps;
5670 SmallVector<const Expr *, 4> CopyOps;
5671 SmallVector<const Expr *, 4> CopyArrayTemps;
5672 SmallVector<const Expr *, 4> CopyArrayElems;
5673 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5674 if (C->getModifier() != OMPC_REDUCTION_inscan)
5675 continue;
5676 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5677 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
5678 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5679 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5680 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
5681 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
5682 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
5683 in_end: C->copy_array_temps().end());
5684 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
5685 in_end: C->copy_array_elems().end());
5686 }
5687 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5688 (getLangOpts().OpenMPSimd &&
5689 isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) {
5690 // For simd directive and simd-based directives in simd only mode, use the
5691 // following codegen:
5692 // int x = 0;
5693 // #pragma omp simd reduction(inscan, +: x)
5694 // for (..) {
5695 // <first part>
5696 // #pragma omp scan inclusive(x)
5697 // <second part>
5698 // }
5699 // is transformed to:
5700 // int x = 0;
5701 // for (..) {
5702 // int x_priv = 0;
5703 // <first part>
5704 // x = x_priv + x;
5705 // x_priv = x;
5706 // <second part>
5707 // }
5708 // and
5709 // int x = 0;
5710 // #pragma omp simd reduction(inscan, +: x)
5711 // for (..) {
5712 // <first part>
5713 // #pragma omp scan exclusive(x)
5714 // <second part>
5715 // }
5716 // to
5717 // int x = 0;
5718 // for (..) {
5719 // int x_priv = 0;
5720 // <second part>
5721 // int temp = x;
5722 // x = x_priv + x;
5723 // x_priv = temp;
5724 // <first part>
5725 // }
5726 llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce");
5727 EmitBranch(Block: IsInclusive
5728 ? OMPScanReduce
5729 : BreakContinueStack.back().ContinueBlock.getBlock());
5730 EmitBlock(BB: OMPScanDispatch);
5731 {
5732 // New scope for correct construction/destruction of temp variables for
5733 // exclusive scan.
5734 LexicalScope Scope(*this, S.getSourceRange());
5735 EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5736 EmitBlock(BB: OMPScanReduce);
5737 if (!IsInclusive) {
5738 // Create temp var and copy LHS value to this temp value.
5739 // TMP = LHS;
5740 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5741 const Expr *PrivateExpr = Privates[I];
5742 const Expr *TempExpr = CopyArrayTemps[I];
5743 EmitAutoVarDecl(
5744 D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl()));
5745 LValue DestLVal = EmitLValue(E: TempExpr);
5746 LValue SrcLVal = EmitLValue(E: LHSs[I]);
5747 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(),
5748 SrcAddr: SrcLVal.getAddress(),
5749 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5750 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5751 Copy: CopyOps[I]);
5752 }
5753 }
5754 CGM.getOpenMPRuntime().emitReduction(
5755 CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
5756 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
5757 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_simd});
5758 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5759 const Expr *PrivateExpr = Privates[I];
5760 LValue DestLVal;
5761 LValue SrcLVal;
5762 if (IsInclusive) {
5763 DestLVal = EmitLValue(E: RHSs[I]);
5764 SrcLVal = EmitLValue(E: LHSs[I]);
5765 } else {
5766 const Expr *TempExpr = CopyArrayTemps[I];
5767 DestLVal = EmitLValue(E: RHSs[I]);
5768 SrcLVal = EmitLValue(E: TempExpr);
5769 }
5770 EmitOMPCopy(
5771 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5772 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5773 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5774 }
5775 }
5776 EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5777 OMPScanExitBlock = IsInclusive
5778 ? BreakContinueStack.back().ContinueBlock.getBlock()
5779 : OMPScanReduce;
5780 EmitBlock(BB: OMPAfterScanBlock);
5781 return;
5782 }
5783 if (!IsInclusive) {
5784 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5785 EmitBlock(BB: OMPScanExitBlock);
5786 }
5787 if (OMPFirstScanLoop) {
5788 // Emit buffer[i] = red; at the end of the input phase.
5789 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5790 .getIterationVariable()
5791 ->IgnoreParenImpCasts();
5792 LValue IdxLVal = EmitLValue(E: IVExpr);
5793 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5794 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5795 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5796 const Expr *PrivateExpr = Privates[I];
5797 const Expr *OrigExpr = Shareds[I];
5798 const Expr *CopyArrayElem = CopyArrayElems[I];
5799 OpaqueValueMapping IdxMapping(
5800 *this,
5801 cast<OpaqueValueExpr>(
5802 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5803 RValue::get(V: IdxVal));
5804 LValue DestLVal = EmitLValue(E: CopyArrayElem);
5805 LValue SrcLVal = EmitLValue(E: OrigExpr);
5806 EmitOMPCopy(
5807 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5808 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5809 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5810 }
5811 }
5812 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5813 if (IsInclusive) {
5814 EmitBlock(BB: OMPScanExitBlock);
5815 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5816 }
5817 EmitBlock(BB: OMPScanDispatch);
5818 if (!OMPFirstScanLoop) {
5819 // Emit red = buffer[i]; at the entrance to the scan phase.
5820 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5821 .getIterationVariable()
5822 ->IgnoreParenImpCasts();
5823 LValue IdxLVal = EmitLValue(E: IVExpr);
5824 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5825 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5826 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5827 if (!IsInclusive) {
5828 llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec");
5829 ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit");
5830 llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal);
5831 Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB);
5832 EmitBlock(BB: ContBB);
5833 // Use idx - 1 iteration for exclusive scan.
5834 IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1));
5835 }
5836 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5837 const Expr *PrivateExpr = Privates[I];
5838 const Expr *OrigExpr = Shareds[I];
5839 const Expr *CopyArrayElem = CopyArrayElems[I];
5840 OpaqueValueMapping IdxMapping(
5841 *this,
5842 cast<OpaqueValueExpr>(
5843 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5844 RValue::get(V: IdxVal));
5845 LValue SrcLVal = EmitLValue(E: CopyArrayElem);
5846 LValue DestLVal = EmitLValue(E: OrigExpr);
5847 EmitOMPCopy(
5848 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5849 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5850 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5851 }
5852 if (!IsInclusive) {
5853 EmitBlock(BB: ExclusiveExitBB);
5854 }
5855 }
5856 EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5857 : OMPAfterScanBlock);
5858 EmitBlock(BB: OMPAfterScanBlock);
5859}
5860
5861void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5862 const CodeGenLoopTy &CodeGenLoop,
5863 Expr *IncExpr) {
5864 // Emit the loop iteration variable.
5865 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
5866 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
5867 EmitVarDecl(D: *IVDecl);
5868
5869 // Emit the iterations count variable.
5870 // If it is not a variable, Sema decided to calculate iterations count on each
5871 // iteration (e.g., it is foldable into a constant).
5872 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
5873 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
5874 // Emit calculation of the iterations count.
5875 EmitIgnoredExpr(E: S.getCalcLastIteration());
5876 }
5877
5878 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5879
5880 bool HasLastprivateClause = false;
5881 // Check pre-condition.
5882 {
5883 OMPLoopScope PreInitScope(*this, S);
5884 // Skip the entire loop if we don't meet the precondition.
5885 // If the condition constant folds and can be elided, avoid emitting the
5886 // whole loop.
5887 bool CondConstant;
5888 llvm::BasicBlock *ContBlock = nullptr;
5889 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
5890 if (!CondConstant)
5891 return;
5892 } else {
5893 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
5894 ContBlock = createBasicBlock(name: "omp.precond.end");
5895 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
5896 TrueCount: getProfileCount(S: &S));
5897 EmitBlock(BB: ThenBlock);
5898 incrementProfileCounter(S: &S);
5899 }
5900
5901 emitAlignedClause(CGF&: *this, D: S);
5902 // Emit 'then' code.
5903 {
5904 // Emit helper vars inits.
5905
5906 LValue LB = EmitOMPHelperVar(
5907 CGF&: *this, Helper: cast<DeclRefExpr>(
5908 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5909 ? S.getCombinedLowerBoundVariable()
5910 : S.getLowerBoundVariable())));
5911 LValue UB = EmitOMPHelperVar(
5912 CGF&: *this, Helper: cast<DeclRefExpr>(
5913 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5914 ? S.getCombinedUpperBoundVariable()
5915 : S.getUpperBoundVariable())));
5916 LValue ST =
5917 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
5918 LValue IL =
5919 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
5920
5921 OMPPrivateScope LoopScope(*this);
5922 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
5923 // Emit implicit barrier to synchronize threads and avoid data races
5924 // on initialization of firstprivate variables and post-update of
5925 // lastprivate variables.
5926 CGM.getOpenMPRuntime().emitBarrierCall(
5927 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
5928 /*ForceSimpleCall=*/true);
5929 }
5930 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
5931 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
5932 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
5933 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind()))
5934 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
5935 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
5936 EmitOMPPrivateLoopCounters(S, LoopScope);
5937 (void)LoopScope.Privatize();
5938 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
5939 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
5940
5941 // Detect the distribute schedule kind and chunk.
5942 llvm::Value *Chunk = nullptr;
5943 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5944 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5945 ScheduleKind = C->getDistScheduleKind();
5946 if (const Expr *Ch = C->getChunkSize()) {
5947 Chunk = EmitScalarExpr(E: Ch);
5948 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(),
5949 DstTy: S.getIterationVariable()->getType(),
5950 Loc: S.getBeginLoc());
5951 }
5952 } else {
5953 // Default behaviour for dist_schedule clause.
5954 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5955 CGF&: *this, S, ScheduleKind, Chunk);
5956 }
5957 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
5958 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5959
5960 // OpenMP [2.10.8, distribute Construct, Description]
5961 // If dist_schedule is specified, kind must be static. If specified,
5962 // iterations are divided into chunks of size chunk_size, chunks are
5963 // assigned to the teams of the league in a round-robin fashion in the
5964 // order of the team number. When no chunk_size is specified, the
5965 // iteration space is divided into chunks that are approximately equal
5966 // in size, and at most one chunk is distributed to each team of the
5967 // league. The size of the chunks is unspecified in this case.
5968 bool StaticChunked =
5969 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5970 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind());
5971 if (RT.isStaticNonchunked(ScheduleKind,
5972 /* Chunked */ Chunk != nullptr) ||
5973 StaticChunked) {
5974 CGOpenMPRuntime::StaticRTInput StaticInit(
5975 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
5976 LB.getAddress(), UB.getAddress(), ST.getAddress(),
5977 StaticChunked ? Chunk : nullptr);
5978 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind,
5979 Values: StaticInit);
5980 JumpDest LoopExit =
5981 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
5982 // UB = min(UB, GlobalUB);
5983 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5984 ? S.getCombinedEnsureUpperBound()
5985 : S.getEnsureUpperBound());
5986 // IV = LB;
5987 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5988 ? S.getCombinedInit()
5989 : S.getInit());
5990
5991 const Expr *Cond =
5992 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5993 ? S.getCombinedCond()
5994 : S.getCond();
5995
5996 if (StaticChunked)
5997 Cond = S.getCombinedDistCond();
5998
5999 // For static unchunked schedules generate:
6000 //
6001 // 1. For distribute alone, codegen
6002 // while (idx <= UB) {
6003 // BODY;
6004 // ++idx;
6005 // }
6006 //
6007 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
6008 // while (idx <= UB) {
6009 // <CodeGen rest of pragma>(LB, UB);
6010 // idx += ST;
6011 // }
6012 //
6013 // For static chunk one schedule generate:
6014 //
6015 // while (IV <= GlobalUB) {
6016 // <CodeGen rest of pragma>(LB, UB);
6017 // LB += ST;
6018 // UB += ST;
6019 // UB = min(UB, GlobalUB);
6020 // IV = LB;
6021 // }
6022 //
6023 emitCommonSimdLoop(
6024 CGF&: *this, S,
6025 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6026 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
6027 CGF.EmitOMPSimdInit(D: S);
6028 },
6029 BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
6030 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
6031 CGF.EmitOMPInnerLoop(
6032 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr,
6033 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
6034 CodeGenLoop(CGF, S, LoopExit);
6035 },
6036 PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) {
6037 if (StaticChunked) {
6038 CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound());
6039 CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound());
6040 CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound());
6041 CGF.EmitIgnoredExpr(E: S.getCombinedInit());
6042 }
6043 });
6044 });
6045 EmitBlock(BB: LoopExit.getBlock());
6046 // Tell the runtime we are done.
6047 RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute);
6048 } else {
6049 // Emit the outer loop, which requests its work chunk [LB..UB] from
6050 // runtime and runs the inner loop to process it.
6051 const OMPLoopArguments LoopArguments = {
6052 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
6053 Chunk};
6054 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments,
6055 CodeGenLoopContent: CodeGenLoop);
6056 }
6057 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
6058 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6059 return CGF.Builder.CreateIsNotNull(
6060 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6061 });
6062 }
6063 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6064 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6065 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) {
6066 EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd);
6067 // Emit post-update of the reduction variables if IsLastIter != 0.
6068 emitPostUpdateForReductionClause(
6069 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6070 return CGF.Builder.CreateIsNotNull(
6071 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6072 });
6073 }
6074 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6075 if (HasLastprivateClause) {
6076 EmitOMPLastprivateClauseFinal(
6077 D: S, /*NoFinals=*/false,
6078 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
6079 }
6080 }
6081
6082 // We're now done with the loop, so jump to the continuation block.
6083 if (ContBlock) {
6084 EmitBranch(Block: ContBlock);
6085 EmitBlock(BB: ContBlock, IsFinished: true);
6086 }
6087 }
6088}
6089
6090// Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this
6091// function available for "loop bind(teams)", which maps to "distribute".
6092static void emitOMPDistributeDirective(const OMPLoopDirective &S,
6093 CodeGenFunction &CGF,
6094 CodeGenModule &CGM) {
6095 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6096 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6097 };
6098 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6099 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, CodeGen);
6100}
6101
6102void CodeGenFunction::EmitOMPDistributeDirective(
6103 const OMPDistributeDirective &S) {
6104 emitOMPDistributeDirective(S, CGF&: *this, CGM);
6105}
6106
6107static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
6108 const CapturedStmt *S,
6109 SourceLocation Loc) {
6110 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
6111 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
6112 CGF.CapturedStmtInfo = &CapStmtInfo;
6113 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, Loc);
6114 Fn->setDoesNotRecurse();
6115 return Fn;
6116}
6117
6118template <typename T>
6119static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
6120 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6121 llvm::OpenMPIRBuilder &OMPBuilder) {
6122
6123 unsigned NumLoops = C->getNumLoops();
6124 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
6125 /*DestWidth=*/64, /*Signed=*/1);
6126 llvm::SmallVector<llvm::Value *> StoreValues;
6127 for (unsigned I = 0; I < NumLoops; I++) {
6128 const Expr *CounterVal = C->getLoopData(I);
6129 assert(CounterVal);
6130 llvm::Value *StoreValue = CGF.EmitScalarConversion(
6131 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
6132 Loc: CounterVal->getExprLoc());
6133 StoreValues.emplace_back(Args&: StoreValue);
6134 }
6135 OMPDoacrossKind<T> ODK;
6136 bool IsDependSource = ODK.isSource(C);
6137 CGF.Builder.restoreIP(
6138 IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops,
6139 StoreValues, Name: ".cnt.addr", IsDependSource));
6140}
6141
6142void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
6143 if (CGM.getLangOpts().OpenMPIRBuilder) {
6144 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6145 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
6146
6147 if (S.hasClausesOfKind<OMPDependClause>() ||
6148 S.hasClausesOfKind<OMPDoacrossClause>()) {
6149 // The ordered directive with depend clause.
6150 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
6151 "ordered depend|doacross construct.");
6152 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
6153 AllocaInsertPt->getIterator());
6154 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6155 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6156 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6157 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6158 } else {
6159 // The ordered directive with threads or simd clause, or without clause.
6160 // Without clause, it behaves as if the threads clause is specified.
6161 const auto *C = S.getSingleClause<OMPSIMDClause>();
6162
6163 auto FiniCB = [this](InsertPointTy IP) {
6164 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
6165 return llvm::Error::success();
6166 };
6167
6168 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
6169 InsertPointTy CodeGenIP) {
6170 Builder.restoreIP(IP: CodeGenIP);
6171
6172 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6173 if (C) {
6174 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
6175 Builder, /*CreateBranch=*/false, Suffix: ".ordered.after");
6176 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6177 GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6178 llvm::Function *OutlinedFn =
6179 emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc());
6180 assert(S.getBeginLoc().isValid() &&
6181 "Outlined function call location must be valid.");
6182 ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc());
6183 OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB,
6184 Fn: OutlinedFn, Args: CapturedVars);
6185 } else {
6186 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
6187 CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered");
6188 }
6189 return llvm::Error::success();
6190 };
6191
6192 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6193 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
6194 ValOrErr: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C));
6195 Builder.restoreIP(IP: AfterIP);
6196 }
6197 return;
6198 }
6199
6200 if (S.hasClausesOfKind<OMPDependClause>()) {
6201 assert(!S.hasAssociatedStmt() &&
6202 "No associated statement must be in ordered depend construct.");
6203 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6204 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6205 return;
6206 }
6207 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
6208 assert(!S.hasAssociatedStmt() &&
6209 "No associated statement must be in ordered doacross construct.");
6210 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6211 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6212 return;
6213 }
6214 const auto *C = S.getSingleClause<OMPSIMDClause>();
6215 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
6216 PrePostActionTy &Action) {
6217 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6218 if (C) {
6219 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6220 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6221 llvm::Function *OutlinedFn =
6222 emitOutlinedOrderedFunction(CGM, S: CS, Loc: S.getBeginLoc());
6223 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(),
6224 OutlinedFn, Args: CapturedVars);
6225 } else {
6226 Action.Enter(CGF);
6227 CGF.EmitStmt(S: CS->getCapturedStmt());
6228 }
6229 };
6230 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6231 CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C);
6232}
6233
6234static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6235 QualType SrcType, QualType DestType,
6236 SourceLocation Loc) {
6237 assert(CGF.hasScalarEvaluationKind(DestType) &&
6238 "DestType must have scalar evaluation kind.");
6239 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6240 return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType,
6241 DstTy: DestType, Loc)
6242 : CGF.EmitComplexToScalarConversion(
6243 Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc);
6244}
6245
6246static CodeGenFunction::ComplexPairTy
6247convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6248 QualType DestType, SourceLocation Loc) {
6249 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6250 "DestType must have complex evaluation kind.");
6251 CodeGenFunction::ComplexPairTy ComplexVal;
6252 if (Val.isScalar()) {
6253 // Convert the input element to the element type of the complex.
6254 QualType DestElementType =
6255 DestType->castAs<ComplexType>()->getElementType();
6256 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6257 Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc);
6258 ComplexVal = CodeGenFunction::ComplexPairTy(
6259 ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType()));
6260 } else {
6261 assert(Val.isComplex() && "Must be a scalar or complex.");
6262 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6263 QualType DestElementType =
6264 DestType->castAs<ComplexType>()->getElementType();
6265 ComplexVal.first = CGF.EmitScalarConversion(
6266 Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6267 ComplexVal.second = CGF.EmitScalarConversion(
6268 Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6269 }
6270 return ComplexVal;
6271}
6272
6273static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6274 LValue LVal, RValue RVal) {
6275 if (LVal.isGlobalReg())
6276 CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal);
6277 else
6278 CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false);
6279}
6280
6281static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6282 llvm::AtomicOrdering AO, LValue LVal,
6283 SourceLocation Loc) {
6284 if (LVal.isGlobalReg())
6285 return CGF.EmitLoadOfLValue(V: LVal, Loc);
6286 return CGF.EmitAtomicLoad(
6287 lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO),
6288 IsVolatile: LVal.isVolatile());
6289}
6290
6291void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6292 QualType RValTy, SourceLocation Loc) {
6293 switch (getEvaluationKind(T: LVal.getType())) {
6294 case TEK_Scalar:
6295 EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue(
6296 CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)),
6297 Dst: LVal);
6298 break;
6299 case TEK_Complex:
6300 EmitStoreOfComplex(
6301 V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal,
6302 /*isInit=*/false);
6303 break;
6304 case TEK_Aggregate:
6305 llvm_unreachable("Must be a scalar or complex.");
6306 }
6307}
6308
6309static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6310 const Expr *X, const Expr *V,
6311 SourceLocation Loc) {
6312 // v = x;
6313 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6314 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6315 LValue XLValue = CGF.EmitLValue(E: X);
6316 LValue VLValue = CGF.EmitLValue(E: V);
6317 RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc);
6318 // OpenMP, 2.17.7, atomic Construct
6319 // If the read or capture clause is specified and the acquire, acq_rel, or
6320 // seq_cst clause is specified then the strong flush on exit from the atomic
6321 // operation is also an acquire flush.
6322 switch (AO) {
6323 case llvm::AtomicOrdering::Acquire:
6324 case llvm::AtomicOrdering::AcquireRelease:
6325 case llvm::AtomicOrdering::SequentiallyConsistent:
6326 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6327 AO: llvm::AtomicOrdering::Acquire);
6328 break;
6329 case llvm::AtomicOrdering::Monotonic:
6330 case llvm::AtomicOrdering::Release:
6331 break;
6332 case llvm::AtomicOrdering::NotAtomic:
6333 case llvm::AtomicOrdering::Unordered:
6334 llvm_unreachable("Unexpected ordering.");
6335 }
6336 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc);
6337 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6338}
6339
6340static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6341 llvm::AtomicOrdering AO, const Expr *X,
6342 const Expr *E, SourceLocation Loc) {
6343 // x = expr;
6344 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6345 emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E));
6346 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6347 // OpenMP, 2.17.7, atomic Construct
6348 // If the write, update, or capture clause is specified and the release,
6349 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6350 // the atomic operation is also a release flush.
6351 switch (AO) {
6352 case llvm::AtomicOrdering::Release:
6353 case llvm::AtomicOrdering::AcquireRelease:
6354 case llvm::AtomicOrdering::SequentiallyConsistent:
6355 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6356 AO: llvm::AtomicOrdering::Release);
6357 break;
6358 case llvm::AtomicOrdering::Acquire:
6359 case llvm::AtomicOrdering::Monotonic:
6360 break;
6361 case llvm::AtomicOrdering::NotAtomic:
6362 case llvm::AtomicOrdering::Unordered:
6363 llvm_unreachable("Unexpected ordering.");
6364 }
6365}
6366
6367static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6368 RValue Update,
6369 BinaryOperatorKind BO,
6370 llvm::AtomicOrdering AO,
6371 bool IsXLHSInRHSPart) {
6372 ASTContext &Context = CGF.getContext();
6373 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6374 // expression is simple and atomic is allowed for the given type for the
6375 // target platform.
6376 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6377 (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) &&
6378 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6379 !Context.getTargetInfo().hasBuiltinAtomic(
6380 AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment())))
6381 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6382
6383 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6384 if (T->isIntegerTy())
6385 return true;
6386
6387 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6388 return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T));
6389
6390 return false;
6391 };
6392
6393 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6394 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6395 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6396
6397 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6398 llvm::AtomicRMWInst::BinOp RMWOp;
6399 switch (BO) {
6400 case BO_Add:
6401 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6402 break;
6403 case BO_Sub:
6404 if (!IsXLHSInRHSPart)
6405 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6406 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6407 break;
6408 case BO_And:
6409 RMWOp = llvm::AtomicRMWInst::And;
6410 break;
6411 case BO_Or:
6412 RMWOp = llvm::AtomicRMWInst::Or;
6413 break;
6414 case BO_Xor:
6415 RMWOp = llvm::AtomicRMWInst::Xor;
6416 break;
6417 case BO_LT:
6418 if (IsInteger)
6419 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6420 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6421 : llvm::AtomicRMWInst::Max)
6422 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6423 : llvm::AtomicRMWInst::UMax);
6424 else
6425 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6426 : llvm::AtomicRMWInst::FMax;
6427 break;
6428 case BO_GT:
6429 if (IsInteger)
6430 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6431 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6432 : llvm::AtomicRMWInst::Min)
6433 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6434 : llvm::AtomicRMWInst::UMin);
6435 else
6436 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6437 : llvm::AtomicRMWInst::FMin;
6438 break;
6439 case BO_Assign:
6440 RMWOp = llvm::AtomicRMWInst::Xchg;
6441 break;
6442 case BO_Mul:
6443 case BO_Div:
6444 case BO_Rem:
6445 case BO_Shl:
6446 case BO_Shr:
6447 case BO_LAnd:
6448 case BO_LOr:
6449 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6450 case BO_PtrMemD:
6451 case BO_PtrMemI:
6452 case BO_LE:
6453 case BO_GE:
6454 case BO_EQ:
6455 case BO_NE:
6456 case BO_Cmp:
6457 case BO_AddAssign:
6458 case BO_SubAssign:
6459 case BO_AndAssign:
6460 case BO_OrAssign:
6461 case BO_XorAssign:
6462 case BO_MulAssign:
6463 case BO_DivAssign:
6464 case BO_RemAssign:
6465 case BO_ShlAssign:
6466 case BO_ShrAssign:
6467 case BO_Comma:
6468 llvm_unreachable("Unsupported atomic update operation");
6469 }
6470 llvm::Value *UpdateVal = Update.getScalarVal();
6471 if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) {
6472 if (IsInteger)
6473 UpdateVal = CGF.Builder.CreateIntCast(
6474 V: IC, DestTy: X.getAddress().getElementType(),
6475 isSigned: X.getType()->hasSignedIntegerRepresentation());
6476 else
6477 UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC,
6478 DestTy: X.getAddress().getElementType());
6479 }
6480 llvm::AtomicRMWInst *Res =
6481 CGF.emitAtomicRMWInst(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Order: AO);
6482 return std::make_pair(x: true, y: RValue::get(V: Res));
6483}
6484
6485std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6486 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6487 llvm::AtomicOrdering AO, SourceLocation Loc,
6488 const llvm::function_ref<RValue(RValue)> CommonGen) {
6489 // Update expressions are allowed to have the following forms:
6490 // x binop= expr; -> xrval + expr;
6491 // x++, ++x -> xrval + 1;
6492 // x--, --x -> xrval - 1;
6493 // x = x binop expr; -> xrval binop expr
6494 // x = expr Op x; - > expr binop xrval;
6495 auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart);
6496 if (!Res.first) {
6497 if (X.isGlobalReg()) {
6498 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6499 // 'xrval'.
6500 EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X);
6501 } else {
6502 // Perform compare-and-swap procedure.
6503 EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified());
6504 }
6505 }
6506 return Res;
6507}
6508
6509static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6510 llvm::AtomicOrdering AO, const Expr *X,
6511 const Expr *E, const Expr *UE,
6512 bool IsXLHSInRHSPart, SourceLocation Loc) {
6513 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6514 "Update expr in 'atomic update' must be a binary operator.");
6515 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6516 // Update expressions are allowed to have the following forms:
6517 // x binop= expr; -> xrval + expr;
6518 // x++, ++x -> xrval + 1;
6519 // x--, --x -> xrval - 1;
6520 // x = x binop expr; -> xrval binop expr
6521 // x = expr Op x; - > expr binop xrval;
6522 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6523 LValue XLValue = CGF.EmitLValue(E: X);
6524 RValue ExprRValue = CGF.EmitAnyExpr(E);
6525 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6526 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6527 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6528 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6529 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6530 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6531 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6532 return CGF.EmitAnyExpr(E: UE);
6533 };
6534 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6535 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6536 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6537 // OpenMP, 2.17.7, atomic Construct
6538 // If the write, update, or capture clause is specified and the release,
6539 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6540 // the atomic operation is also a release flush.
6541 switch (AO) {
6542 case llvm::AtomicOrdering::Release:
6543 case llvm::AtomicOrdering::AcquireRelease:
6544 case llvm::AtomicOrdering::SequentiallyConsistent:
6545 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6546 AO: llvm::AtomicOrdering::Release);
6547 break;
6548 case llvm::AtomicOrdering::Acquire:
6549 case llvm::AtomicOrdering::Monotonic:
6550 break;
6551 case llvm::AtomicOrdering::NotAtomic:
6552 case llvm::AtomicOrdering::Unordered:
6553 llvm_unreachable("Unexpected ordering.");
6554 }
6555}
6556
6557static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6558 QualType SourceType, QualType ResType,
6559 SourceLocation Loc) {
6560 switch (CGF.getEvaluationKind(T: ResType)) {
6561 case TEK_Scalar:
6562 return RValue::get(
6563 V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc));
6564 case TEK_Complex: {
6565 auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc);
6566 return RValue::getComplex(V1: Res.first, V2: Res.second);
6567 }
6568 case TEK_Aggregate:
6569 break;
6570 }
6571 llvm_unreachable("Must be a scalar or complex.");
6572}
6573
6574static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6575 llvm::AtomicOrdering AO,
6576 bool IsPostfixUpdate, const Expr *V,
6577 const Expr *X, const Expr *E,
6578 const Expr *UE, bool IsXLHSInRHSPart,
6579 SourceLocation Loc) {
6580 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6581 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6582 RValue NewVVal;
6583 LValue VLValue = CGF.EmitLValue(E: V);
6584 LValue XLValue = CGF.EmitLValue(E: X);
6585 RValue ExprRValue = CGF.EmitAnyExpr(E);
6586 QualType NewVValType;
6587 if (UE) {
6588 // 'x' is updated with some additional value.
6589 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6590 "Update expr in 'atomic capture' must be a binary operator.");
6591 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6592 // Update expressions are allowed to have the following forms:
6593 // x binop= expr; -> xrval + expr;
6594 // x++, ++x -> xrval + 1;
6595 // x--, --x -> xrval - 1;
6596 // x = x binop expr; -> xrval binop expr
6597 // x = expr Op x; - > expr binop xrval;
6598 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6599 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6600 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6601 NewVValType = XRValExpr->getType();
6602 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6603 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6604 IsPostfixUpdate](RValue XRValue) {
6605 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6606 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6607 RValue Res = CGF.EmitAnyExpr(E: UE);
6608 NewVVal = IsPostfixUpdate ? XRValue : Res;
6609 return Res;
6610 };
6611 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6612 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6613 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6614 if (Res.first) {
6615 // 'atomicrmw' instruction was generated.
6616 if (IsPostfixUpdate) {
6617 // Use old value from 'atomicrmw'.
6618 NewVVal = Res.second;
6619 } else {
6620 // 'atomicrmw' does not provide new value, so evaluate it using old
6621 // value of 'x'.
6622 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6623 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6624 NewVVal = CGF.EmitAnyExpr(E: UE);
6625 }
6626 }
6627 } else {
6628 // 'x' is simply rewritten with some 'expr'.
6629 NewVValType = X->getType().getNonReferenceType();
6630 ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(),
6631 ResType: X->getType().getNonReferenceType(), Loc);
6632 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6633 NewVVal = XRValue;
6634 return ExprRValue;
6635 };
6636 // Try to perform atomicrmw xchg, otherwise simple exchange.
6637 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6638 X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6639 Loc, CommonGen: Gen);
6640 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6641 if (Res.first) {
6642 // 'atomicrmw' instruction was generated.
6643 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6644 }
6645 }
6646 // Emit post-update store to 'v' of old/new 'x' value.
6647 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc);
6648 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6649 // OpenMP 5.1 removes the required flush for capture clause.
6650 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6651 // OpenMP, 2.17.7, atomic Construct
6652 // If the write, update, or capture clause is specified and the release,
6653 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6654 // the atomic operation is also a release flush.
6655 // If the read or capture clause is specified and the acquire, acq_rel, or
6656 // seq_cst clause is specified then the strong flush on exit from the atomic
6657 // operation is also an acquire flush.
6658 switch (AO) {
6659 case llvm::AtomicOrdering::Release:
6660 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6661 AO: llvm::AtomicOrdering::Release);
6662 break;
6663 case llvm::AtomicOrdering::Acquire:
6664 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6665 AO: llvm::AtomicOrdering::Acquire);
6666 break;
6667 case llvm::AtomicOrdering::AcquireRelease:
6668 case llvm::AtomicOrdering::SequentiallyConsistent:
6669 CGF.CGM.getOpenMPRuntime().emitFlush(
6670 CGF, Vars: {}, Loc, AO: llvm::AtomicOrdering::AcquireRelease);
6671 break;
6672 case llvm::AtomicOrdering::Monotonic:
6673 break;
6674 case llvm::AtomicOrdering::NotAtomic:
6675 case llvm::AtomicOrdering::Unordered:
6676 llvm_unreachable("Unexpected ordering.");
6677 }
6678 }
6679}
6680
6681static void emitOMPAtomicCompareExpr(
6682 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6683 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6684 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6685 SourceLocation Loc) {
6686 llvm::OpenMPIRBuilder &OMPBuilder =
6687 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6688
6689 OMPAtomicCompareOp Op;
6690 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6691 switch (cast<BinaryOperator>(Val: CE)->getOpcode()) {
6692 case BO_EQ:
6693 Op = OMPAtomicCompareOp::EQ;
6694 break;
6695 case BO_LT:
6696 Op = OMPAtomicCompareOp::MIN;
6697 break;
6698 case BO_GT:
6699 Op = OMPAtomicCompareOp::MAX;
6700 break;
6701 default:
6702 llvm_unreachable("unsupported atomic compare binary operator");
6703 }
6704
6705 LValue XLVal = CGF.EmitLValue(E: X);
6706 Address XAddr = XLVal.getAddress();
6707
6708 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6709 if (X->getType() == E->getType())
6710 return CGF.EmitScalarExpr(E);
6711 const Expr *NewE = E->IgnoreImplicitAsWritten();
6712 llvm::Value *V = CGF.EmitScalarExpr(E: NewE);
6713 if (NewE->getType() == X->getType())
6714 return V;
6715 return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc);
6716 };
6717
6718 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6719 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6720 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal))
6721 EVal = CGF.Builder.CreateIntCast(
6722 V: CI, DestTy: XLVal.getAddress().getElementType(),
6723 isSigned: E->getType()->hasSignedIntegerRepresentation());
6724 if (DVal)
6725 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal))
6726 DVal = CGF.Builder.CreateIntCast(
6727 V: CI, DestTy: XLVal.getAddress().getElementType(),
6728 isSigned: D->getType()->hasSignedIntegerRepresentation());
6729
6730 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6731 .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(),
6732 .IsSigned: X->getType()->hasSignedIntegerRepresentation(),
6733 .IsVolatile: X->getType().isVolatileQualified()};
6734 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6735 if (V) {
6736 LValue LV = CGF.EmitLValue(E: V);
6737 Address Addr = LV.getAddress();
6738 VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6739 .IsSigned: V->getType()->hasSignedIntegerRepresentation(),
6740 .IsVolatile: V->getType().isVolatileQualified()};
6741 }
6742 if (R) {
6743 LValue LV = CGF.EmitLValue(E: R);
6744 Address Addr = LV.getAddress();
6745 ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6746 .IsSigned: R->getType()->hasSignedIntegerRepresentation(),
6747 .IsVolatile: R->getType().isVolatileQualified()};
6748 }
6749
6750 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6751 // fail clause was not mentioned on the
6752 // "#pragma omp atomic compare" construct.
6753 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6754 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6755 IsPostfixUpdate, IsFailOnly));
6756 } else
6757 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6758 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6759 IsPostfixUpdate, IsFailOnly, Failure: FailAO));
6760}
6761
6762static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6763 llvm::AtomicOrdering AO,
6764 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6765 const Expr *X, const Expr *V, const Expr *R,
6766 const Expr *E, const Expr *UE, const Expr *D,
6767 const Expr *CE, bool IsXLHSInRHSPart,
6768 bool IsFailOnly, SourceLocation Loc) {
6769 switch (Kind) {
6770 case OMPC_read:
6771 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6772 break;
6773 case OMPC_write:
6774 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6775 break;
6776 case OMPC_unknown:
6777 case OMPC_update:
6778 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6779 break;
6780 case OMPC_capture:
6781 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6782 IsXLHSInRHSPart, Loc);
6783 break;
6784 case OMPC_compare: {
6785 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6786 IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6787 break;
6788 }
6789 default:
6790 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6791 }
6792}
6793
6794void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6795 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6796 // Fail Memory Clause Ordering.
6797 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6798 bool MemOrderingSpecified = false;
6799 if (S.getSingleClause<OMPSeqCstClause>()) {
6800 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6801 MemOrderingSpecified = true;
6802 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6803 AO = llvm::AtomicOrdering::AcquireRelease;
6804 MemOrderingSpecified = true;
6805 } else if (S.getSingleClause<OMPAcquireClause>()) {
6806 AO = llvm::AtomicOrdering::Acquire;
6807 MemOrderingSpecified = true;
6808 } else if (S.getSingleClause<OMPReleaseClause>()) {
6809 AO = llvm::AtomicOrdering::Release;
6810 MemOrderingSpecified = true;
6811 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6812 AO = llvm::AtomicOrdering::Monotonic;
6813 MemOrderingSpecified = true;
6814 }
6815 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6816 OpenMPClauseKind Kind = OMPC_unknown;
6817 for (const OMPClause *C : S.clauses()) {
6818 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6819 // if it is first).
6820 OpenMPClauseKind K = C->getClauseKind();
6821 // TBD
6822 if (K == OMPC_weak)
6823 return;
6824 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6825 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6826 continue;
6827 Kind = K;
6828 KindsEncountered.insert(V: K);
6829 }
6830 // We just need to correct Kind here. No need to set a bool saying it is
6831 // actually compare capture because we can tell from whether V and R are
6832 // nullptr.
6833 if (KindsEncountered.contains(V: OMPC_compare) &&
6834 KindsEncountered.contains(V: OMPC_capture))
6835 Kind = OMPC_compare;
6836 if (!MemOrderingSpecified) {
6837 llvm::AtomicOrdering DefaultOrder =
6838 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6839 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6840 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6841 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6842 Kind == OMPC_capture)) {
6843 AO = DefaultOrder;
6844 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6845 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6846 AO = llvm::AtomicOrdering::Release;
6847 } else if (Kind == OMPC_read) {
6848 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6849 AO = llvm::AtomicOrdering::Acquire;
6850 }
6851 }
6852 }
6853
6854 if (KindsEncountered.contains(V: OMPC_compare) &&
6855 KindsEncountered.contains(V: OMPC_fail)) {
6856 Kind = OMPC_compare;
6857 const auto *FailClause = S.getSingleClause<OMPFailClause>();
6858 if (FailClause) {
6859 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6860 if (FailParameter == llvm::omp::OMPC_relaxed)
6861 FailAO = llvm::AtomicOrdering::Monotonic;
6862 else if (FailParameter == llvm::omp::OMPC_acquire)
6863 FailAO = llvm::AtomicOrdering::Acquire;
6864 else if (FailParameter == llvm::omp::OMPC_seq_cst)
6865 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6866 }
6867 }
6868
6869 LexicalScope Scope(*this, S.getSourceRange());
6870 EmitStopPoint(S: S.getAssociatedStmt());
6871 emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(),
6872 V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(),
6873 D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(),
6874 IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc());
6875}
6876
6877static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6878 const OMPExecutableDirective &S,
6879 const RegionCodeGenTy &CodeGen) {
6880 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6881 CodeGenModule &CGM = CGF.CGM;
6882
6883 // On device emit this construct as inlined code.
6884 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6885 OMPLexicalScope Scope(CGF, S, OMPD_target);
6886 CGM.getOpenMPRuntime().emitInlinedDirective(
6887 CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6888 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
6889 });
6890 return;
6891 }
6892
6893 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6894 llvm::Function *Fn = nullptr;
6895 llvm::Constant *FnID = nullptr;
6896
6897 const Expr *IfCond = nullptr;
6898 // Check for the at most one if clause associated with the target region.
6899 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6900 if (C->getNameModifier() == OMPD_unknown ||
6901 C->getNameModifier() == OMPD_target) {
6902 IfCond = C->getCondition();
6903 break;
6904 }
6905 }
6906
6907 // Check if we have any device clause associated with the directive.
6908 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6909 nullptr, OMPC_DEVICE_unknown);
6910 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6911 Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier());
6912
6913 // Check if we have an if clause whose conditional always evaluates to false
6914 // or if we do not have any targets specified. If so the target region is not
6915 // an offload entry point.
6916 bool IsOffloadEntry = true;
6917 if (IfCond) {
6918 bool Val;
6919 if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val)
6920 IsOffloadEntry = false;
6921 }
6922 if (CGM.getLangOpts().OMPTargetTriples.empty())
6923 IsOffloadEntry = false;
6924
6925 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6926 unsigned DiagID = CGM.getDiags().getCustomDiagID(
6927 L: DiagnosticsEngine::Error,
6928 FormatString: "No offloading entry generated while offloading is mandatory.");
6929 CGM.getDiags().Report(DiagID);
6930 }
6931
6932 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6933 StringRef ParentName;
6934 // In case we have Ctors/Dtors we use the complete type variant to produce
6935 // the mangling of the device outlined kernel.
6936 if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl))
6937 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete));
6938 else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl))
6939 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete));
6940 else
6941 ParentName =
6942 CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl)));
6943
6944 // Emit target region as a standalone region.
6945 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID,
6946 IsOffloadEntry, CodeGen);
6947 OMPLexicalScope Scope(CGF, S, OMPD_task);
6948 auto &&SizeEmitter =
6949 [IsOffloadEntry](CodeGenFunction &CGF,
6950 const OMPLoopDirective &D) -> llvm::Value * {
6951 if (IsOffloadEntry) {
6952 OMPLoopScope(CGF, D);
6953 // Emit calculation of the iterations count.
6954 llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations());
6955 NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty,
6956 /*isSigned=*/false);
6957 return NumIterations;
6958 }
6959 return nullptr;
6960 };
6961 CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device,
6962 SizeEmitter);
6963}
6964
6965static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6966 PrePostActionTy &Action) {
6967 Action.Enter(CGF);
6968 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6969 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
6970 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
6971 (void)PrivateScope.Privatize();
6972 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
6973 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
6974
6975 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt());
6976 CGF.EnsureInsertPoint();
6977}
6978
6979void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6980 StringRef ParentName,
6981 const OMPTargetDirective &S) {
6982 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6983 emitTargetRegion(CGF, S, Action);
6984 };
6985 llvm::Function *Fn;
6986 llvm::Constant *Addr;
6987 // Emit target region as a standalone region.
6988 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6989 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
6990 assert(Fn && Addr && "Target device function emission failed.");
6991}
6992
6993void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6994 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6995 emitTargetRegion(CGF, S, Action);
6996 };
6997 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
6998}
6999
7000static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
7001 const OMPExecutableDirective &S,
7002 OpenMPDirectiveKind InnermostKind,
7003 const RegionCodeGenTy &CodeGen) {
7004 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7005 llvm::Function *OutlinedFn =
7006 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
7007 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
7008 CodeGen);
7009
7010 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
7011 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
7012 if (NT || TL) {
7013 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr;
7014 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr;
7015
7016 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
7017 Loc: S.getBeginLoc());
7018 }
7019
7020 OMPTeamsScope Scope(CGF, S);
7021 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7022 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
7023 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn,
7024 CapturedVars);
7025}
7026
7027void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
7028 // Emit teams region as a standalone region.
7029 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7030 Action.Enter(CGF);
7031 OMPPrivateScope PrivateScope(CGF);
7032 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7033 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7034 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7035 (void)PrivateScope.Privatize();
7036 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt());
7037 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7038 };
7039 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7040 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7041 CondGen: [](CodeGenFunction &) { return nullptr; });
7042}
7043
7044static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7045 const OMPTargetTeamsDirective &S) {
7046 auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7047 Action.Enter(CGF);
7048 // Emit teams region as a standalone region.
7049 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7050 Action.Enter(CGF);
7051 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7052 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7053 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7054 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7055 (void)PrivateScope.Privatize();
7056 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7057 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7058 CGF.EmitStmt(S: CS->getCapturedStmt());
7059 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7060 };
7061 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen);
7062 emitPostUpdateForReductionClause(CGF, D: S,
7063 CondGen: [](CodeGenFunction &) { return nullptr; });
7064}
7065
7066void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7067 CodeGenModule &CGM, StringRef ParentName,
7068 const OMPTargetTeamsDirective &S) {
7069 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7070 emitTargetTeamsRegion(CGF, Action, S);
7071 };
7072 llvm::Function *Fn;
7073 llvm::Constant *Addr;
7074 // Emit target region as a standalone region.
7075 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7076 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7077 assert(Fn && Addr && "Target device function emission failed.");
7078}
7079
7080void CodeGenFunction::EmitOMPTargetTeamsDirective(
7081 const OMPTargetTeamsDirective &S) {
7082 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7083 emitTargetTeamsRegion(CGF, Action, S);
7084 };
7085 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7086}
7087
7088static void
7089emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7090 const OMPTargetTeamsDistributeDirective &S) {
7091 Action.Enter(CGF);
7092 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7093 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7094 };
7095
7096 // Emit teams region as a standalone region.
7097 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7098 PrePostActionTy &Action) {
7099 Action.Enter(CGF);
7100 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7101 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7102 (void)PrivateScope.Privatize();
7103 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7104 CodeGen: CodeGenDistribute);
7105 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7106 };
7107 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
7108 emitPostUpdateForReductionClause(CGF, D: S,
7109 CondGen: [](CodeGenFunction &) { return nullptr; });
7110}
7111
7112void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7113 CodeGenModule &CGM, StringRef ParentName,
7114 const OMPTargetTeamsDistributeDirective &S) {
7115 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7116 emitTargetTeamsDistributeRegion(CGF, Action, S);
7117 };
7118 llvm::Function *Fn;
7119 llvm::Constant *Addr;
7120 // Emit target region as a standalone region.
7121 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7122 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7123 assert(Fn && Addr && "Target device function emission failed.");
7124}
7125
7126void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
7127 const OMPTargetTeamsDistributeDirective &S) {
7128 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7129 emitTargetTeamsDistributeRegion(CGF, Action, S);
7130 };
7131 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7132}
7133
7134static void emitTargetTeamsDistributeSimdRegion(
7135 CodeGenFunction &CGF, PrePostActionTy &Action,
7136 const OMPTargetTeamsDistributeSimdDirective &S) {
7137 Action.Enter(CGF);
7138 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7139 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7140 };
7141
7142 // Emit teams region as a standalone region.
7143 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7144 PrePostActionTy &Action) {
7145 Action.Enter(CGF);
7146 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7147 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7148 (void)PrivateScope.Privatize();
7149 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7150 CodeGen: CodeGenDistribute);
7151 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7152 };
7153 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7154 emitPostUpdateForReductionClause(CGF, D: S,
7155 CondGen: [](CodeGenFunction &) { return nullptr; });
7156}
7157
7158void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7159 CodeGenModule &CGM, StringRef ParentName,
7160 const OMPTargetTeamsDistributeSimdDirective &S) {
7161 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7162 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7163 };
7164 llvm::Function *Fn;
7165 llvm::Constant *Addr;
7166 // Emit target region as a standalone region.
7167 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7168 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7169 assert(Fn && Addr && "Target device function emission failed.");
7170}
7171
7172void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
7173 const OMPTargetTeamsDistributeSimdDirective &S) {
7174 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7175 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7176 };
7177 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7178}
7179
7180void CodeGenFunction::EmitOMPTeamsDistributeDirective(
7181 const OMPTeamsDistributeDirective &S) {
7182
7183 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7184 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7185 };
7186
7187 // Emit teams region as a standalone region.
7188 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7189 PrePostActionTy &Action) {
7190 Action.Enter(CGF);
7191 OMPPrivateScope PrivateScope(CGF);
7192 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7193 (void)PrivateScope.Privatize();
7194 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7195 CodeGen: CodeGenDistribute);
7196 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7197 };
7198 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7199 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7200 CondGen: [](CodeGenFunction &) { return nullptr; });
7201}
7202
7203void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
7204 const OMPTeamsDistributeSimdDirective &S) {
7205 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7206 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7207 };
7208
7209 // Emit teams region as a standalone region.
7210 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7211 PrePostActionTy &Action) {
7212 Action.Enter(CGF);
7213 OMPPrivateScope PrivateScope(CGF);
7214 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7215 (void)PrivateScope.Privatize();
7216 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
7217 CodeGen: CodeGenDistribute);
7218 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7219 };
7220 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7221 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7222 CondGen: [](CodeGenFunction &) { return nullptr; });
7223}
7224
7225void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7226 const OMPTeamsDistributeParallelForDirective &S) {
7227 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7228 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7229 IncExpr: S.getDistInc());
7230 };
7231
7232 // Emit teams region as a standalone region.
7233 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7234 PrePostActionTy &Action) {
7235 Action.Enter(CGF);
7236 OMPPrivateScope PrivateScope(CGF);
7237 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7238 (void)PrivateScope.Privatize();
7239 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7240 CodeGen: CodeGenDistribute);
7241 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7242 };
7243 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen);
7244 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7245 CondGen: [](CodeGenFunction &) { return nullptr; });
7246}
7247
7248void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7249 const OMPTeamsDistributeParallelForSimdDirective &S) {
7250 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7251 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7252 IncExpr: S.getDistInc());
7253 };
7254
7255 // Emit teams region as a standalone region.
7256 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7257 PrePostActionTy &Action) {
7258 Action.Enter(CGF);
7259 OMPPrivateScope PrivateScope(CGF);
7260 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7261 (void)PrivateScope.Privatize();
7262 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7263 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7264 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7265 };
7266 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7267 CodeGen);
7268 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7269 CondGen: [](CodeGenFunction &) { return nullptr; });
7270}
7271
7272void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7273 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7274 llvm::Value *Device = nullptr;
7275 llvm::Value *NumDependences = nullptr;
7276 llvm::Value *DependenceList = nullptr;
7277
7278 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7279 Device = EmitScalarExpr(E: C->getDevice());
7280
7281 // Build list and emit dependences
7282 OMPTaskDataTy Data;
7283 buildDependences(S, Data);
7284 if (!Data.Dependences.empty()) {
7285 Address DependenciesArray = Address::invalid();
7286 std::tie(args&: NumDependences, args&: DependenciesArray) =
7287 CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences,
7288 Loc: S.getBeginLoc());
7289 DependenceList = DependenciesArray.emitRawPointer(CGF&: *this);
7290 }
7291 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7292
7293 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7294 S.getSingleClause<OMPDestroyClause>() ||
7295 S.getSingleClause<OMPUseClause>())) &&
7296 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7297
7298 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7299 if (!ItOMPInitClause.empty()) {
7300 // Look at the multiple init clauses
7301 for (const OMPInitClause *C : ItOMPInitClause) {
7302 llvm::Value *InteropvarPtr =
7303 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7304 llvm::omp::OMPInteropType InteropType =
7305 llvm::omp::OMPInteropType::Unknown;
7306 if (C->getIsTarget()) {
7307 InteropType = llvm::omp::OMPInteropType::Target;
7308 } else {
7309 assert(C->getIsTargetSync() &&
7310 "Expected interop-type target/targetsync");
7311 InteropType = llvm::omp::OMPInteropType::TargetSync;
7312 }
7313 OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType,
7314 Device, NumDependences, DependenceAddress: DependenceList,
7315 HaveNowaitClause: Data.HasNowaitClause);
7316 }
7317 }
7318 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7319 if (!ItOMPDestroyClause.empty()) {
7320 // Look at the multiple destroy clauses
7321 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7322 llvm::Value *InteropvarPtr =
7323 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7324 OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device,
7325 NumDependences, DependenceAddress: DependenceList,
7326 HaveNowaitClause: Data.HasNowaitClause);
7327 }
7328 }
7329 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7330 if (!ItOMPUseClause.empty()) {
7331 // Look at the multiple use clauses
7332 for (const OMPUseClause *C : ItOMPUseClause) {
7333 llvm::Value *InteropvarPtr =
7334 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7335 OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device,
7336 NumDependences, DependenceAddress: DependenceList,
7337 HaveNowaitClause: Data.HasNowaitClause);
7338 }
7339 }
7340}
7341
7342static void emitTargetTeamsDistributeParallelForRegion(
7343 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7344 PrePostActionTy &Action) {
7345 Action.Enter(CGF);
7346 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7347 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7348 IncExpr: S.getDistInc());
7349 };
7350
7351 // Emit teams region as a standalone region.
7352 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7353 PrePostActionTy &Action) {
7354 Action.Enter(CGF);
7355 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7356 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7357 (void)PrivateScope.Privatize();
7358 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7359 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7360 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7361 };
7362
7363 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
7364 CodeGen: CodeGenTeams);
7365 emitPostUpdateForReductionClause(CGF, D: S,
7366 CondGen: [](CodeGenFunction &) { return nullptr; });
7367}
7368
7369void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7370 CodeGenModule &CGM, StringRef ParentName,
7371 const OMPTargetTeamsDistributeParallelForDirective &S) {
7372 // Emit SPMD target teams distribute parallel for region as a standalone
7373 // region.
7374 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7375 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7376 };
7377 llvm::Function *Fn;
7378 llvm::Constant *Addr;
7379 // Emit target region as a standalone region.
7380 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7381 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7382 assert(Fn && Addr && "Target device function emission failed.");
7383}
7384
7385void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7386 const OMPTargetTeamsDistributeParallelForDirective &S) {
7387 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7388 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7389 };
7390 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7391}
7392
7393static void emitTargetTeamsDistributeParallelForSimdRegion(
7394 CodeGenFunction &CGF,
7395 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7396 PrePostActionTy &Action) {
7397 Action.Enter(CGF);
7398 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7399 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7400 IncExpr: S.getDistInc());
7401 };
7402
7403 // Emit teams region as a standalone region.
7404 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7405 PrePostActionTy &Action) {
7406 Action.Enter(CGF);
7407 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7408 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7409 (void)PrivateScope.Privatize();
7410 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7411 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7412 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7413 };
7414
7415 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7416 CodeGen: CodeGenTeams);
7417 emitPostUpdateForReductionClause(CGF, D: S,
7418 CondGen: [](CodeGenFunction &) { return nullptr; });
7419}
7420
7421void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7422 CodeGenModule &CGM, StringRef ParentName,
7423 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7424 // Emit SPMD target teams distribute parallel for simd region as a standalone
7425 // region.
7426 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7427 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7428 };
7429 llvm::Function *Fn;
7430 llvm::Constant *Addr;
7431 // Emit target region as a standalone region.
7432 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7433 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7434 assert(Fn && Addr && "Target device function emission failed.");
7435}
7436
7437void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7438 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7439 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7440 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7441 };
7442 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7443}
7444
7445void CodeGenFunction::EmitOMPCancellationPointDirective(
7446 const OMPCancellationPointDirective &S) {
7447 CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(),
7448 CancelRegion: S.getCancelRegion());
7449}
7450
7451void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7452 const Expr *IfCond = nullptr;
7453 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7454 if (C->getNameModifier() == OMPD_unknown ||
7455 C->getNameModifier() == OMPD_cancel) {
7456 IfCond = C->getCondition();
7457 break;
7458 }
7459 }
7460 if (CGM.getLangOpts().OpenMPIRBuilder) {
7461 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7462 // TODO: This check is necessary as we only generate `omp parallel` through
7463 // the OpenMPIRBuilder for now.
7464 if (S.getCancelRegion() == OMPD_parallel ||
7465 S.getCancelRegion() == OMPD_sections ||
7466 S.getCancelRegion() == OMPD_section) {
7467 llvm::Value *IfCondition = nullptr;
7468 if (IfCond)
7469 IfCondition = EmitScalarExpr(E: IfCond,
7470 /*IgnoreResultAssign=*/true);
7471 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
7472 ValOrErr: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion()));
7473 return Builder.restoreIP(IP: AfterIP);
7474 }
7475 }
7476
7477 CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond,
7478 CancelRegion: S.getCancelRegion());
7479}
7480
7481CodeGenFunction::JumpDest
7482CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7483 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7484 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7485 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7486 return ReturnBlock;
7487 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7488 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7489 Kind == OMPD_distribute_parallel_for ||
7490 Kind == OMPD_target_parallel_for ||
7491 Kind == OMPD_teams_distribute_parallel_for ||
7492 Kind == OMPD_target_teams_distribute_parallel_for);
7493 return OMPCancelStack.getExitBlock();
7494}
7495
7496void CodeGenFunction::EmitOMPUseDevicePtrClause(
7497 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7498 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7499 CaptureDeviceAddrMap) {
7500 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7501 for (const Expr *OrigVarIt : C.varlist()) {
7502 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl());
7503 if (!Processed.insert(V: OrigVD).second)
7504 continue;
7505
7506 // In order to identify the right initializer we need to match the
7507 // declaration used by the mapping logic. In some cases we may get
7508 // OMPCapturedExprDecl that refers to the original declaration.
7509 const ValueDecl *MatchingVD = OrigVD;
7510 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7511 // OMPCapturedExprDecl are used to privative fields of the current
7512 // structure.
7513 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7514 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7515 "Base should be the current struct!");
7516 MatchingVD = ME->getMemberDecl();
7517 }
7518
7519 // If we don't have information about the current list item, move on to
7520 // the next one.
7521 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7522 if (InitAddrIt == CaptureDeviceAddrMap.end())
7523 continue;
7524
7525 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7526
7527 // Return the address of the private variable.
7528 bool IsRegistered = PrivateScope.addPrivate(
7529 LocalVD: OrigVD,
7530 Addr: Address(InitAddrIt->second, Ty,
7531 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)));
7532 assert(IsRegistered && "firstprivate var already registered as private");
7533 // Silence the warning about unused variable.
7534 (void)IsRegistered;
7535 }
7536}
7537
7538static const VarDecl *getBaseDecl(const Expr *Ref) {
7539 const Expr *Base = Ref->IgnoreParenImpCasts();
7540 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base))
7541 Base = OASE->getBase()->IgnoreParenImpCasts();
7542 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
7543 Base = ASE->getBase()->IgnoreParenImpCasts();
7544 return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl());
7545}
7546
7547void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7548 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7549 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7550 CaptureDeviceAddrMap) {
7551 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7552 for (const Expr *Ref : C.varlist()) {
7553 const VarDecl *OrigVD = getBaseDecl(Ref);
7554 if (!Processed.insert(V: OrigVD).second)
7555 continue;
7556 // In order to identify the right initializer we need to match the
7557 // declaration used by the mapping logic. In some cases we may get
7558 // OMPCapturedExprDecl that refers to the original declaration.
7559 const ValueDecl *MatchingVD = OrigVD;
7560 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7561 // OMPCapturedExprDecl are used to privative fields of the current
7562 // structure.
7563 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7564 assert(isa<CXXThisExpr>(ME->getBase()) &&
7565 "Base should be the current struct!");
7566 MatchingVD = ME->getMemberDecl();
7567 }
7568
7569 // If we don't have information about the current list item, move on to
7570 // the next one.
7571 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7572 if (InitAddrIt == CaptureDeviceAddrMap.end())
7573 continue;
7574
7575 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7576
7577 Address PrivAddr =
7578 Address(InitAddrIt->second, Ty,
7579 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy));
7580 // For declrefs and variable length array need to load the pointer for
7581 // correct mapping, since the pointer to the data was passed to the runtime.
7582 if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) ||
7583 MatchingVD->getType()->isArrayType()) {
7584 QualType PtrTy = getContext().getPointerType(
7585 T: OrigVD->getType().getNonReferenceType());
7586 PrivAddr =
7587 EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)),
7588 PtrTy: PtrTy->castAs<PointerType>());
7589 }
7590
7591 (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr);
7592 }
7593}
7594
7595// Generate the instructions for '#pragma omp target data' directive.
7596void CodeGenFunction::EmitOMPTargetDataDirective(
7597 const OMPTargetDataDirective &S) {
7598 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7599 /*SeparateBeginEndCalls=*/true);
7600
7601 // Create a pre/post action to signal the privatization of the device pointer.
7602 // This action can be replaced by the OpenMP runtime code generation to
7603 // deactivate privatization.
7604 bool PrivatizeDevicePointers = false;
7605 class DevicePointerPrivActionTy : public PrePostActionTy {
7606 bool &PrivatizeDevicePointers;
7607
7608 public:
7609 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7610 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7611 void Enter(CodeGenFunction &CGF) override {
7612 PrivatizeDevicePointers = true;
7613 }
7614 };
7615 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7616
7617 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7618 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7619 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7620 };
7621
7622 // Codegen that selects whether to generate the privatization code or not.
7623 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7624 RegionCodeGenTy RCG(InnermostCodeGen);
7625 PrivatizeDevicePointers = false;
7626
7627 // Call the pre-action to change the status of PrivatizeDevicePointers if
7628 // needed.
7629 Action.Enter(CGF);
7630
7631 if (PrivatizeDevicePointers) {
7632 OMPPrivateScope PrivateScope(CGF);
7633 // Emit all instances of the use_device_ptr clause.
7634 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7635 CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope,
7636 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7637 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7638 CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope,
7639 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7640 (void)PrivateScope.Privatize();
7641 RCG(CGF);
7642 } else {
7643 // If we don't have target devices, don't bother emitting the data
7644 // mapping code.
7645 std::optional<OpenMPDirectiveKind> CaptureRegion;
7646 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7647 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7648 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7649 for (const Expr *E : C->varlist()) {
7650 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
7651 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7652 CGF.EmitVarDecl(D: *OED);
7653 }
7654 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7655 for (const Expr *E : C->varlist()) {
7656 const Decl *D = getBaseDecl(Ref: E);
7657 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7658 CGF.EmitVarDecl(D: *OED);
7659 }
7660 } else {
7661 CaptureRegion = OMPD_unknown;
7662 }
7663
7664 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7665 RCG(CGF);
7666 }
7667 };
7668
7669 // Forward the provided action to the privatization codegen.
7670 RegionCodeGenTy PrivRCG(PrivCodeGen);
7671 PrivRCG.setAction(Action);
7672
7673 // Notwithstanding the body of the region is emitted as inlined directive,
7674 // we don't use an inline scope as changes in the references inside the
7675 // region are expected to be visible outside, so we do not privative them.
7676 OMPLexicalScope Scope(CGF, S);
7677 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data,
7678 CodeGen: PrivRCG);
7679 };
7680
7681 RegionCodeGenTy RCG(CodeGen);
7682
7683 // If we don't have target devices, don't bother emitting the data mapping
7684 // code.
7685 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7686 RCG(*this);
7687 return;
7688 }
7689
7690 // Check if we have any if clause associated with the directive.
7691 const Expr *IfCond = nullptr;
7692 if (const auto *C = S.getSingleClause<OMPIfClause>())
7693 IfCond = C->getCondition();
7694
7695 // Check if we have any device clause associated with the directive.
7696 const Expr *Device = nullptr;
7697 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7698 Device = C->getDevice();
7699
7700 // Set the action to signal privatization of device pointers.
7701 RCG.setAction(PrivAction);
7702
7703 // Emit region code.
7704 CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG,
7705 Info);
7706}
7707
7708void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7709 const OMPTargetEnterDataDirective &S) {
7710 // If we don't have target devices, don't bother emitting the data mapping
7711 // code.
7712 if (CGM.getLangOpts().OMPTargetTriples.empty())
7713 return;
7714
7715 // Check if we have any if clause associated with the directive.
7716 const Expr *IfCond = nullptr;
7717 if (const auto *C = S.getSingleClause<OMPIfClause>())
7718 IfCond = C->getCondition();
7719
7720 // Check if we have any device clause associated with the directive.
7721 const Expr *Device = nullptr;
7722 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7723 Device = C->getDevice();
7724
7725 OMPLexicalScope Scope(*this, S, OMPD_task);
7726 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7727}
7728
7729void CodeGenFunction::EmitOMPTargetExitDataDirective(
7730 const OMPTargetExitDataDirective &S) {
7731 // If we don't have target devices, don't bother emitting the data mapping
7732 // code.
7733 if (CGM.getLangOpts().OMPTargetTriples.empty())
7734 return;
7735
7736 // Check if we have any if clause associated with the directive.
7737 const Expr *IfCond = nullptr;
7738 if (const auto *C = S.getSingleClause<OMPIfClause>())
7739 IfCond = C->getCondition();
7740
7741 // Check if we have any device clause associated with the directive.
7742 const Expr *Device = nullptr;
7743 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7744 Device = C->getDevice();
7745
7746 OMPLexicalScope Scope(*this, S, OMPD_task);
7747 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7748}
7749
7750static void emitTargetParallelRegion(CodeGenFunction &CGF,
7751 const OMPTargetParallelDirective &S,
7752 PrePostActionTy &Action) {
7753 // Get the captured statement associated with the 'parallel' region.
7754 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
7755 Action.Enter(CGF);
7756 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7757 Action.Enter(CGF);
7758 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7759 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7760 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7761 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7762 (void)PrivateScope.Privatize();
7763 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7764 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7765 // TODO: Add support for clauses.
7766 CGF.EmitStmt(S: CS->getCapturedStmt());
7767 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
7768 };
7769 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen,
7770 CodeGenBoundParameters: emitEmptyBoundParameters);
7771 emitPostUpdateForReductionClause(CGF, D: S,
7772 CondGen: [](CodeGenFunction &) { return nullptr; });
7773}
7774
7775void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7776 CodeGenModule &CGM, StringRef ParentName,
7777 const OMPTargetParallelDirective &S) {
7778 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7779 emitTargetParallelRegion(CGF, S, Action);
7780 };
7781 llvm::Function *Fn;
7782 llvm::Constant *Addr;
7783 // Emit target region as a standalone region.
7784 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7785 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7786 assert(Fn && Addr && "Target device function emission failed.");
7787}
7788
7789void CodeGenFunction::EmitOMPTargetParallelDirective(
7790 const OMPTargetParallelDirective &S) {
7791 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7792 emitTargetParallelRegion(CGF, S, Action);
7793 };
7794 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7795}
7796
7797static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7798 const OMPTargetParallelForDirective &S,
7799 PrePostActionTy &Action) {
7800 Action.Enter(CGF);
7801 // Emit directive as a combined directive that consists of two implicit
7802 // directives: 'parallel' with 'for' directive.
7803 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7804 Action.Enter(CGF);
7805 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7806 CGF, OMPD_target_parallel_for, S.hasCancel());
7807 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7808 CGDispatchBounds: emitDispatchForLoopBounds);
7809 };
7810 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
7811 CodeGenBoundParameters: emitEmptyBoundParameters);
7812}
7813
7814void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7815 CodeGenModule &CGM, StringRef ParentName,
7816 const OMPTargetParallelForDirective &S) {
7817 // Emit SPMD target parallel for region as a standalone region.
7818 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7819 emitTargetParallelForRegion(CGF, S, Action);
7820 };
7821 llvm::Function *Fn;
7822 llvm::Constant *Addr;
7823 // Emit target region as a standalone region.
7824 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7825 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7826 assert(Fn && Addr && "Target device function emission failed.");
7827}
7828
7829void CodeGenFunction::EmitOMPTargetParallelForDirective(
7830 const OMPTargetParallelForDirective &S) {
7831 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7832 emitTargetParallelForRegion(CGF, S, Action);
7833 };
7834 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7835}
7836
7837static void
7838emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7839 const OMPTargetParallelForSimdDirective &S,
7840 PrePostActionTy &Action) {
7841 Action.Enter(CGF);
7842 // Emit directive as a combined directive that consists of two implicit
7843 // directives: 'parallel' with 'for' directive.
7844 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7845 Action.Enter(CGF);
7846 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7847 CGDispatchBounds: emitDispatchForLoopBounds);
7848 };
7849 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen,
7850 CodeGenBoundParameters: emitEmptyBoundParameters);
7851}
7852
7853void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7854 CodeGenModule &CGM, StringRef ParentName,
7855 const OMPTargetParallelForSimdDirective &S) {
7856 // Emit SPMD target parallel for region as a standalone region.
7857 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7858 emitTargetParallelForSimdRegion(CGF, S, Action);
7859 };
7860 llvm::Function *Fn;
7861 llvm::Constant *Addr;
7862 // Emit target region as a standalone region.
7863 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7864 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7865 assert(Fn && Addr && "Target device function emission failed.");
7866}
7867
7868void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7869 const OMPTargetParallelForSimdDirective &S) {
7870 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7871 emitTargetParallelForSimdRegion(CGF, S, Action);
7872 };
7873 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7874}
7875
7876/// Emit a helper variable and return corresponding lvalue.
7877static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7878 const ImplicitParamDecl *PVD,
7879 CodeGenFunction::OMPPrivateScope &Privates) {
7880 const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl());
7881 Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD));
7882}
7883
7884void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7885 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7886 // Emit outlined function for task construct.
7887 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop);
7888 Address CapturedStruct = Address::invalid();
7889 {
7890 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7891 CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
7892 }
7893 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
7894 const Expr *IfCond = nullptr;
7895 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7896 if (C->getNameModifier() == OMPD_unknown ||
7897 C->getNameModifier() == OMPD_taskloop) {
7898 IfCond = C->getCondition();
7899 break;
7900 }
7901 }
7902
7903 OMPTaskDataTy Data;
7904 // Check if taskloop must be emitted without taskgroup.
7905 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7906 // TODO: Check if we should emit tied or untied task.
7907 Data.Tied = true;
7908 // Set scheduling for taskloop
7909 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7910 // grainsize clause
7911 Data.Schedule.setInt(/*IntVal=*/false);
7912 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize()));
7913 Data.HasModifier =
7914 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
7915 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7916 // num_tasks clause
7917 Data.Schedule.setInt(/*IntVal=*/true);
7918 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks()));
7919 Data.HasModifier =
7920 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
7921 }
7922
7923 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7924 // if (PreCond) {
7925 // for (IV in 0..LastIteration) BODY;
7926 // <Final counter/linear vars updates>;
7927 // }
7928 //
7929
7930 // Emit: if (PreCond) - begin.
7931 // If the condition constant folds and can be elided, avoid emitting the
7932 // whole loop.
7933 bool CondConstant;
7934 llvm::BasicBlock *ContBlock = nullptr;
7935 OMPLoopScope PreInitScope(CGF, S);
7936 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
7937 if (!CondConstant)
7938 return;
7939 } else {
7940 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then");
7941 ContBlock = CGF.createBasicBlock(name: "taskloop.if.end");
7942 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
7943 TrueCount: CGF.getProfileCount(S: &S));
7944 CGF.EmitBlock(BB: ThenBlock);
7945 CGF.incrementProfileCounter(S: &S);
7946 }
7947
7948 (void)CGF.EmitOMPLinearClauseInit(D: S);
7949
7950 OMPPrivateScope LoopScope(CGF);
7951 // Emit helper vars inits.
7952 enum { LowerBound = 5, UpperBound, Stride, LastIter };
7953 auto *I = CS->getCapturedDecl()->param_begin();
7954 auto *LBP = std::next(x: I, n: LowerBound);
7955 auto *UBP = std::next(x: I, n: UpperBound);
7956 auto *STP = std::next(x: I, n: Stride);
7957 auto *LIP = std::next(x: I, n: LastIter);
7958 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP,
7959 Privates&: LoopScope);
7960 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP,
7961 Privates&: LoopScope);
7962 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope);
7963 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP,
7964 Privates&: LoopScope);
7965 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7966 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
7967 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
7968 (void)LoopScope.Privatize();
7969 // Emit the loop iteration variable.
7970 const Expr *IVExpr = S.getIterationVariable();
7971 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
7972 CGF.EmitVarDecl(D: *IVDecl);
7973 CGF.EmitIgnoredExpr(E: S.getInit());
7974
7975 // Emit the iterations count variable.
7976 // If it is not a variable, Sema decided to calculate iterations count on
7977 // each iteration (e.g., it is foldable into a constant).
7978 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
7979 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
7980 // Emit calculation of the iterations count.
7981 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
7982 }
7983
7984 {
7985 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7986 emitCommonSimdLoop(
7987 CGF, S,
7988 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7989 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
7990 CGF.EmitOMPSimdInit(D: S);
7991 },
7992 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7993 CGF.EmitOMPInnerLoop(
7994 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
7995 BodyGen: [&S](CodeGenFunction &CGF) {
7996 emitOMPLoopBodyWithStopPoint(CGF, S,
7997 LoopExit: CodeGenFunction::JumpDest());
7998 },
7999 PostIncGen: [](CodeGenFunction &) {});
8000 });
8001 }
8002 // Emit: if (PreCond) - end.
8003 if (ContBlock) {
8004 CGF.EmitBranch(Block: ContBlock);
8005 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
8006 }
8007 // Emit final copy of the lastprivate variables if IsLastIter != 0.
8008 if (HasLastprivateClause) {
8009 CGF.EmitOMPLastprivateClauseFinal(
8010 D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()),
8011 IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar(
8012 Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8013 Ty: (*LIP)->getType(), Loc: S.getBeginLoc())));
8014 }
8015 LoopScope.restoreMap();
8016 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) {
8017 return CGF.Builder.CreateIsNotNull(
8018 Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8019 Ty: (*LIP)->getType(), Loc: S.getBeginLoc()));
8020 });
8021 };
8022 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
8023 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
8024 const OMPTaskDataTy &Data) {
8025 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
8026 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
8027 OMPLoopScope PreInitScope(CGF, S);
8028 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S,
8029 TaskFunction: OutlinedFn, SharedsTy,
8030 Shareds: CapturedStruct, IfCond, Data);
8031 };
8032 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop,
8033 CodeGen);
8034 };
8035 if (Data.Nogroup) {
8036 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data);
8037 } else {
8038 CGM.getOpenMPRuntime().emitTaskgroupRegion(
8039 CGF&: *this,
8040 TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
8041 PrePostActionTy &Action) {
8042 Action.Enter(CGF);
8043 CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen,
8044 Data);
8045 },
8046 Loc: S.getBeginLoc());
8047 }
8048}
8049
8050void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
8051 auto LPCRegion =
8052 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8053 EmitOMPTaskLoopBasedDirective(S);
8054}
8055
8056void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
8057 const OMPTaskLoopSimdDirective &S) {
8058 auto LPCRegion =
8059 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8060 OMPLexicalScope Scope(*this, S);
8061 EmitOMPTaskLoopBasedDirective(S);
8062}
8063
8064void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
8065 const OMPMasterTaskLoopDirective &S) {
8066 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8067 Action.Enter(CGF);
8068 EmitOMPTaskLoopBasedDirective(S);
8069 };
8070 auto LPCRegion =
8071 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8072 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8073 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8074}
8075
8076void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
8077 const OMPMaskedTaskLoopDirective &S) {
8078 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8079 Action.Enter(CGF);
8080 EmitOMPTaskLoopBasedDirective(S);
8081 };
8082 auto LPCRegion =
8083 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8084 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8085 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8086}
8087
8088void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
8089 const OMPMasterTaskLoopSimdDirective &S) {
8090 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8091 Action.Enter(CGF);
8092 EmitOMPTaskLoopBasedDirective(S);
8093 };
8094 auto LPCRegion =
8095 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8096 OMPLexicalScope Scope(*this, S);
8097 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8098}
8099
8100void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
8101 const OMPMaskedTaskLoopSimdDirective &S) {
8102 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8103 Action.Enter(CGF);
8104 EmitOMPTaskLoopBasedDirective(S);
8105 };
8106 auto LPCRegion =
8107 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8108 OMPLexicalScope Scope(*this, S);
8109 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8110}
8111
8112void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
8113 const OMPParallelMasterTaskLoopDirective &S) {
8114 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8115 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8116 PrePostActionTy &Action) {
8117 Action.Enter(CGF);
8118 CGF.EmitOMPTaskLoopBasedDirective(S);
8119 };
8120 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8121 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8122 Loc: S.getBeginLoc());
8123 };
8124 auto LPCRegion =
8125 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8126 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen,
8127 CodeGenBoundParameters: emitEmptyBoundParameters);
8128}
8129
8130void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
8131 const OMPParallelMaskedTaskLoopDirective &S) {
8132 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8133 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8134 PrePostActionTy &Action) {
8135 Action.Enter(CGF);
8136 CGF.EmitOMPTaskLoopBasedDirective(S);
8137 };
8138 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8139 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8140 Loc: S.getBeginLoc());
8141 };
8142 auto LPCRegion =
8143 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8144 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop, CodeGen,
8145 CodeGenBoundParameters: emitEmptyBoundParameters);
8146}
8147
8148void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
8149 const OMPParallelMasterTaskLoopSimdDirective &S) {
8150 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8151 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8152 PrePostActionTy &Action) {
8153 Action.Enter(CGF);
8154 CGF.EmitOMPTaskLoopBasedDirective(S);
8155 };
8156 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8157 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8158 Loc: S.getBeginLoc());
8159 };
8160 auto LPCRegion =
8161 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8162 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen,
8163 CodeGenBoundParameters: emitEmptyBoundParameters);
8164}
8165
8166void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
8167 const OMPParallelMaskedTaskLoopSimdDirective &S) {
8168 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8169 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8170 PrePostActionTy &Action) {
8171 Action.Enter(CGF);
8172 CGF.EmitOMPTaskLoopBasedDirective(S);
8173 };
8174 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8175 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8176 Loc: S.getBeginLoc());
8177 };
8178 auto LPCRegion =
8179 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8180 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop_simd, CodeGen,
8181 CodeGenBoundParameters: emitEmptyBoundParameters);
8182}
8183
8184// Generate the instructions for '#pragma omp target update' directive.
8185void CodeGenFunction::EmitOMPTargetUpdateDirective(
8186 const OMPTargetUpdateDirective &S) {
8187 // If we don't have target devices, don't bother emitting the data mapping
8188 // code.
8189 if (CGM.getLangOpts().OMPTargetTriples.empty())
8190 return;
8191
8192 // Check if we have any if clause associated with the directive.
8193 const Expr *IfCond = nullptr;
8194 if (const auto *C = S.getSingleClause<OMPIfClause>())
8195 IfCond = C->getCondition();
8196
8197 // Check if we have any device clause associated with the directive.
8198 const Expr *Device = nullptr;
8199 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8200 Device = C->getDevice();
8201
8202 OMPLexicalScope Scope(*this, S, OMPD_task);
8203 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8204}
8205
8206void CodeGenFunction::EmitOMPGenericLoopDirective(
8207 const OMPGenericLoopDirective &S) {
8208 // Always expect a bind clause on the loop directive. It it wasn't
8209 // in the source, it should have been added in sema.
8210
8211 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
8212 if (const auto *C = S.getSingleClause<OMPBindClause>())
8213 BindKind = C->getBindKind();
8214
8215 switch (BindKind) {
8216 case OMPC_BIND_parallel: // for
8217 return emitOMPForDirective(S, CGF&: *this, CGM, /*HasCancel=*/false);
8218 case OMPC_BIND_teams: // distribute
8219 return emitOMPDistributeDirective(S, CGF&: *this, CGM);
8220 case OMPC_BIND_thread: // simd
8221 return emitOMPSimdDirective(S, CGF&: *this, CGM);
8222 case OMPC_BIND_unknown:
8223 break;
8224 }
8225
8226 // Unimplemented, just inline the underlying statement for now.
8227 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8228 // Emit the loop iteration variable.
8229 const Stmt *CS =
8230 cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt();
8231 const auto *ForS = dyn_cast<ForStmt>(Val: CS);
8232 if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) {
8233 OMPPrivateScope LoopScope(CGF);
8234 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8235 (void)LoopScope.Privatize();
8236 CGF.EmitStmt(S: CS);
8237 LoopScope.restoreMap();
8238 } else {
8239 CGF.EmitStmt(S: CS);
8240 }
8241 };
8242 OMPLexicalScope Scope(*this, S, OMPD_unknown);
8243 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen);
8244}
8245
8246void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
8247 const OMPLoopDirective &S) {
8248 // Emit combined directive as if its constituent constructs are 'parallel'
8249 // and 'for'.
8250 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8251 Action.Enter(CGF);
8252 emitOMPCopyinClause(CGF, S);
8253 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
8254 };
8255 {
8256 auto LPCRegion =
8257 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8258 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
8259 CodeGenBoundParameters: emitEmptyBoundParameters);
8260 }
8261 // Check for outer lastprivate conditional update.
8262 checkForLastprivateConditionalUpdate(CGF&: *this, S);
8263}
8264
8265void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
8266 const OMPTeamsGenericLoopDirective &S) {
8267 // To be consistent with current behavior of 'target teams loop', emit
8268 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
8269 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8270 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8271 };
8272
8273 // Emit teams region as a standalone region.
8274 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8275 PrePostActionTy &Action) {
8276 Action.Enter(CGF);
8277 OMPPrivateScope PrivateScope(CGF);
8278 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8279 (void)PrivateScope.Privatize();
8280 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
8281 CodeGen: CodeGenDistribute);
8282 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8283 };
8284 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
8285 emitPostUpdateForReductionClause(CGF&: *this, D: S,
8286 CondGen: [](CodeGenFunction &) { return nullptr; });
8287}
8288
8289#ifndef NDEBUG
8290static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
8291 std::string StatusMsg,
8292 const OMPExecutableDirective &D) {
8293 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
8294 if (IsDevice)
8295 StatusMsg += ": DEVICE";
8296 else
8297 StatusMsg += ": HOST";
8298 SourceLocation L = D.getBeginLoc();
8299 auto &SM = CGF.getContext().getSourceManager();
8300 PresumedLoc PLoc = SM.getPresumedLoc(L);
8301 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8302 unsigned LineNo =
8303 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8304 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8305}
8306#endif
8307
8308static void emitTargetTeamsGenericLoopRegionAsParallel(
8309 CodeGenFunction &CGF, PrePostActionTy &Action,
8310 const OMPTargetTeamsGenericLoopDirective &S) {
8311 Action.Enter(CGF);
8312 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8313 // 'parallel, and 'for'.
8314 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8315 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
8316 IncExpr: S.getDistInc());
8317 };
8318
8319 // Emit teams region as a standalone region.
8320 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8321 PrePostActionTy &Action) {
8322 Action.Enter(CGF);
8323 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8324 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8325 (void)PrivateScope.Privatize();
8326 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8327 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8328 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8329 };
8330 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8331 emitTargetTeamsLoopCodegenStatus(
8332 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8333 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
8334 CodeGen: CodeGenTeams);
8335 emitPostUpdateForReductionClause(CGF, D: S,
8336 CondGen: [](CodeGenFunction &) { return nullptr; });
8337}
8338
8339static void emitTargetTeamsGenericLoopRegionAsDistribute(
8340 CodeGenFunction &CGF, PrePostActionTy &Action,
8341 const OMPTargetTeamsGenericLoopDirective &S) {
8342 Action.Enter(CGF);
8343 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8344 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8345 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8346 };
8347
8348 // Emit teams region as a standalone region.
8349 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8350 PrePostActionTy &Action) {
8351 Action.Enter(CGF);
8352 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8353 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8354 (void)PrivateScope.Privatize();
8355 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8356 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8357 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8358 };
8359 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8360 emitTargetTeamsLoopCodegenStatus(
8361 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8362 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
8363 emitPostUpdateForReductionClause(CGF, D: S,
8364 CondGen: [](CodeGenFunction &) { return nullptr; });
8365}
8366
8367void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8368 const OMPTargetTeamsGenericLoopDirective &S) {
8369 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8370 if (S.canBeParallelFor())
8371 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8372 else
8373 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8374 };
8375 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8376}
8377
8378void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8379 CodeGenModule &CGM, StringRef ParentName,
8380 const OMPTargetTeamsGenericLoopDirective &S) {
8381 // Emit SPMD target parallel loop region as a standalone region.
8382 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8383 if (S.canBeParallelFor())
8384 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8385 else
8386 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8387 };
8388 llvm::Function *Fn;
8389 llvm::Constant *Addr;
8390 // Emit target region as a standalone region.
8391 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8392 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8393 assert(Fn && Addr &&
8394 "Target device function emission failed for 'target teams loop'.");
8395}
8396
8397static void emitTargetParallelGenericLoopRegion(
8398 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8399 PrePostActionTy &Action) {
8400 Action.Enter(CGF);
8401 // Emit as 'parallel for'.
8402 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8403 Action.Enter(CGF);
8404 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8405 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8406 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8407 CGDispatchBounds: emitDispatchForLoopBounds);
8408 };
8409 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8410 CodeGenBoundParameters: emitEmptyBoundParameters);
8411}
8412
8413void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8414 CodeGenModule &CGM, StringRef ParentName,
8415 const OMPTargetParallelGenericLoopDirective &S) {
8416 // Emit target parallel loop region as a standalone region.
8417 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8418 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8419 };
8420 llvm::Function *Fn;
8421 llvm::Constant *Addr;
8422 // Emit target region as a standalone region.
8423 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8424 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8425 assert(Fn && Addr && "Target device function emission failed.");
8426}
8427
8428/// Emit combined directive 'target parallel loop' as if its constituent
8429/// constructs are 'target', 'parallel', and 'for'.
8430void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8431 const OMPTargetParallelGenericLoopDirective &S) {
8432 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8433 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8434 };
8435 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8436}
8437
8438void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8439 const OMPExecutableDirective &D) {
8440 if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) {
8441 EmitOMPScanDirective(S: *SD);
8442 return;
8443 }
8444 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8445 return;
8446 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8447 OMPPrivateScope GlobalsScope(CGF);
8448 if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) {
8449 // Capture global firstprivates to avoid crash.
8450 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8451 for (const Expr *Ref : C->varlist()) {
8452 const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
8453 if (!DRE)
8454 continue;
8455 const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl());
8456 if (!VD || VD->hasLocalStorage())
8457 continue;
8458 if (!CGF.LocalDeclMap.count(Val: VD)) {
8459 LValue GlobLVal = CGF.EmitLValue(E: Ref);
8460 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8461 }
8462 }
8463 }
8464 }
8465 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
8466 (void)GlobalsScope.Privatize();
8467 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8468 emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action);
8469 } else {
8470 if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) {
8471 for (const Expr *E : LD->counters()) {
8472 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
8473 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) {
8474 LValue GlobLVal = CGF.EmitLValue(E);
8475 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8476 }
8477 if (isa<OMPCapturedExprDecl>(Val: VD)) {
8478 // Emit only those that were not explicitly referenced in clauses.
8479 if (!CGF.LocalDeclMap.count(Val: VD))
8480 CGF.EmitVarDecl(D: *VD);
8481 }
8482 }
8483 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8484 if (!C->getNumForLoops())
8485 continue;
8486 for (unsigned I = LD->getLoopsNumber(),
8487 E = C->getLoopNumIterations().size();
8488 I < E; ++I) {
8489 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8490 Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) {
8491 // Emit only those that were not explicitly referenced in clauses.
8492 if (!CGF.LocalDeclMap.count(Val: VD))
8493 CGF.EmitVarDecl(D: *VD);
8494 }
8495 }
8496 }
8497 }
8498 (void)GlobalsScope.Privatize();
8499 CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt());
8500 }
8501 };
8502 if (D.getDirectiveKind() == OMPD_atomic ||
8503 D.getDirectiveKind() == OMPD_critical ||
8504 D.getDirectiveKind() == OMPD_section ||
8505 D.getDirectiveKind() == OMPD_master ||
8506 D.getDirectiveKind() == OMPD_masked ||
8507 D.getDirectiveKind() == OMPD_unroll ||
8508 D.getDirectiveKind() == OMPD_assume) {
8509 EmitStmt(S: D.getAssociatedStmt());
8510 } else {
8511 auto LPCRegion =
8512 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D);
8513 OMPSimdLexicalScope Scope(*this, D);
8514 CGM.getOpenMPRuntime().emitInlinedDirective(
8515 CGF&: *this,
8516 InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd
8517 : D.getDirectiveKind(),
8518 CodeGen);
8519 }
8520 // Check for outer lastprivate conditional update.
8521 checkForLastprivateConditionalUpdate(CGF&: *this, S: D);
8522}
8523
8524void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) {
8525 EmitStmt(S: S.getAssociatedStmt());
8526}
8527