1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGDebugInfo.h"
15#include "CGOpenMPRuntime.h"
16#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "CodeGenPGO.h"
19#include "TargetInfo.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/DeclOpenMP.h"
23#include "clang/AST/OpenMPClause.h"
24#include "clang/AST/Stmt.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/PrettyStackTrace.h"
30#include "clang/Basic/SourceManager.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/BinaryFormat/Dwarf.h"
33#include "llvm/Frontend/OpenMP/OMPConstants.h"
34#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugInfoMetadata.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Debug.h"
42#include <optional>
43using namespace clang;
44using namespace CodeGen;
45using namespace llvm::omp;
46
47#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
48
49static const VarDecl *getBaseDecl(const Expr *Ref);
50static OpenMPDirectiveKind
51getEffectiveDirectiveKind(const OMPExecutableDirective &S);
52
53namespace {
54/// Lexical scope for OpenMP executable constructs, that handles correct codegen
55/// for captured expressions.
56class OMPLexicalScope : public CodeGenFunction::LexicalScope {
57 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
58 for (const auto *C : S.clauses()) {
59 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
60 if (const auto *PreInit =
61 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
62 for (const auto *I : PreInit->decls()) {
63 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
64 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
65 } else {
66 CodeGenFunction::AutoVarEmission Emission =
67 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
68 CGF.EmitAutoVarCleanups(emission: Emission);
69 }
70 }
71 }
72 }
73 }
74 }
75 CodeGenFunction::OMPPrivateScope InlinedShareds;
76
77 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
78 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
79 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
80 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
81 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
82 }
83
84public:
85 OMPLexicalScope(
86 CodeGenFunction &CGF, const OMPExecutableDirective &S,
87 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
88 const bool EmitPreInitStmt = true)
89 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
90 InlinedShareds(CGF) {
91 if (EmitPreInitStmt)
92 emitPreInitStmt(CGF, S);
93 if (!CapturedRegion)
94 return;
95 assert(S.hasAssociatedStmt() &&
96 "Expected associated statement for inlined directive.");
97 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion);
98 for (const auto &C : CS->captures()) {
99 if (C.capturesVariable() || C.capturesVariableByCopy()) {
100 auto *VD = C.getCapturedVar();
101 assert(VD == VD->getCanonicalDecl() &&
102 "Canonical decl must be captured.");
103 DeclRefExpr DRE(
104 CGF.getContext(), const_cast<VarDecl *>(VD),
105 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
106 InlinedShareds.isGlobalVarCaptured(VD)),
107 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
108 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
109 }
110 }
111 (void)InlinedShareds.Privatize();
112 }
113};
114
115/// Lexical scope for OpenMP parallel construct, that handles correct codegen
116/// for captured expressions.
117class OMPParallelScope final : public OMPLexicalScope {
118 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
119 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
120 return !(isOpenMPTargetExecutionDirective(DKind: EKind) ||
121 isOpenMPLoopBoundSharingDirective(Kind: EKind)) &&
122 isOpenMPParallelDirective(DKind: EKind);
123 }
124
125public:
126 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
127 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
128 EmitPreInitStmt(S)) {}
129};
130
131/// Lexical scope for OpenMP teams construct, that handles correct codegen
132/// for captured expressions.
133class OMPTeamsScope final : public OMPLexicalScope {
134 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
135 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
136 return !isOpenMPTargetExecutionDirective(DKind: EKind) &&
137 isOpenMPTeamsDirective(DKind: EKind);
138 }
139
140public:
141 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
142 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
143 EmitPreInitStmt(S)) {}
144};
145
146/// Private scope for OpenMP loop-based directives, that supports capturing
147/// of used expression from loop statement.
148class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
149 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
150 const Stmt *PreInits;
151 CodeGenFunction::OMPMapVars PreCondVars;
152 if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) {
153 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
154 for (const auto *E : LD->counters()) {
155 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
156 EmittedAsPrivate.insert(V: VD->getCanonicalDecl());
157 (void)PreCondVars.setVarAddr(
158 CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()));
159 }
160 // Mark private vars as undefs.
161 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
162 for (const Expr *IRef : C->varlist()) {
163 const auto *OrigVD =
164 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
165 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
166 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
167 (void)PreCondVars.setVarAddr(
168 CGF, LocalVD: OrigVD,
169 TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem(
170 T: CGF.getContext().getPointerType(T: OrigVDTy))),
171 CGF.ConvertTypeForMem(T: OrigVDTy),
172 CGF.getContext().getDeclAlign(D: OrigVD)));
173 }
174 }
175 }
176 (void)PreCondVars.apply(CGF);
177 // Emit init, __range and __end variables for C++ range loops.
178 (void)OMPLoopBasedDirective::doForAllLoops(
179 CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(),
180 /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(),
181 Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) {
182 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) {
183 if (const Stmt *Init = CXXFor->getInit())
184 CGF.EmitStmt(S: Init);
185 CGF.EmitStmt(S: CXXFor->getRangeStmt());
186 CGF.EmitStmt(S: CXXFor->getEndStmt());
187 }
188 return false;
189 });
190 PreInits = LD->getPreInits();
191 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) {
192 PreInits = Tile->getPreInits();
193 } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(Val: &S)) {
194 PreInits = Stripe->getPreInits();
195 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) {
196 PreInits = Unroll->getPreInits();
197 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) {
198 PreInits = Reverse->getPreInits();
199 } else if (const auto *Interchange =
200 dyn_cast<OMPInterchangeDirective>(Val: &S)) {
201 PreInits = Interchange->getPreInits();
202 } else {
203 llvm_unreachable("Unknown loop-based directive kind.");
204 }
205 doEmitPreinits(PreInits);
206 PreCondVars.restore(CGF);
207 }
208
209 void
210 emitPreInitStmt(CodeGenFunction &CGF,
211 const OMPCanonicalLoopSequenceTransformationDirective &S) {
212 const Stmt *PreInits;
213 if (const auto *Fuse = dyn_cast<OMPFuseDirective>(Val: &S)) {
214 PreInits = Fuse->getPreInits();
215 } else {
216 llvm_unreachable(
217 "Unknown canonical loop sequence transform directive kind.");
218 }
219 doEmitPreinits(PreInits);
220 }
221
222 void doEmitPreinits(const Stmt *PreInits) {
223 if (PreInits) {
224 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
225 // declarations. Since declarations must be visible in the the following
226 // that they initialize, unpack the CompoundStmt they are nested in.
227 SmallVector<const Stmt *> PreInitStmts;
228 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits))
229 llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body());
230 else
231 PreInitStmts.push_back(Elt: PreInits);
232
233 for (const Stmt *S : PreInitStmts) {
234 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
235 // here.
236 if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) {
237 for (Decl *I : PreInitDecl->decls())
238 CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I));
239 continue;
240 }
241 CGF.EmitStmt(S);
242 }
243 }
244 }
245
246public:
247 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
248 : CodeGenFunction::RunCleanupsScope(CGF) {
249 emitPreInitStmt(CGF, S);
250 }
251 OMPLoopScope(CodeGenFunction &CGF,
252 const OMPCanonicalLoopSequenceTransformationDirective &S)
253 : CodeGenFunction::RunCleanupsScope(CGF) {
254 emitPreInitStmt(CGF, S);
255 }
256};
257
258class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
259 CodeGenFunction::OMPPrivateScope InlinedShareds;
260
261 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
262 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
263 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
264 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
265 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
266 }
267
268public:
269 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
270 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
271 InlinedShareds(CGF) {
272 for (const auto *C : S.clauses()) {
273 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
274 if (const auto *PreInit =
275 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
276 for (const auto *I : PreInit->decls()) {
277 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
278 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
279 } else {
280 CodeGenFunction::AutoVarEmission Emission =
281 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
282 CGF.EmitAutoVarCleanups(emission: Emission);
283 }
284 }
285 }
286 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) {
287 for (const Expr *E : UDP->varlist()) {
288 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
289 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
290 CGF.EmitVarDecl(D: *OED);
291 }
292 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) {
293 for (const Expr *E : UDP->varlist()) {
294 const Decl *D = getBaseDecl(Ref: E);
295 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
296 CGF.EmitVarDecl(D: *OED);
297 }
298 }
299 }
300 if (!isOpenMPSimdDirective(DKind: getEffectiveDirectiveKind(S)))
301 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds);
302 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) {
303 if (const Expr *E = TG->getReductionRef())
304 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()));
305 }
306 // Temp copy arrays for inscan reductions should not be emitted as they are
307 // not used in simd only mode.
308 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
309 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
310 if (C->getModifier() != OMPC_REDUCTION_inscan)
311 continue;
312 for (const Expr *E : C->copy_array_temps())
313 CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl());
314 }
315 const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt());
316 while (CS) {
317 for (auto &C : CS->captures()) {
318 if (C.capturesVariable() || C.capturesVariableByCopy()) {
319 auto *VD = C.getCapturedVar();
320 if (CopyArrayTemps.contains(V: VD))
321 continue;
322 assert(VD == VD->getCanonicalDecl() &&
323 "Canonical decl must be captured.");
324 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
325 isCapturedVar(CGF, VD) ||
326 (CGF.CapturedStmtInfo &&
327 InlinedShareds.isGlobalVarCaptured(VD)),
328 VD->getType().getNonReferenceType(), VK_LValue,
329 C.getLocation());
330 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
331 }
332 }
333 CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt());
334 }
335 (void)InlinedShareds.Privatize();
336 }
337};
338
339} // namespace
340
341// The loop directive with a bind clause will be mapped to a different
342// directive with corresponding semantics.
343static OpenMPDirectiveKind
344getEffectiveDirectiveKind(const OMPExecutableDirective &S) {
345 OpenMPDirectiveKind Kind = S.getDirectiveKind();
346 if (Kind != OMPD_loop)
347 return Kind;
348
349 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
350 if (const auto *C = S.getSingleClause<OMPBindClause>())
351 BindKind = C->getBindKind();
352
353 switch (BindKind) {
354 case OMPC_BIND_parallel:
355 return OMPD_for;
356 case OMPC_BIND_teams:
357 return OMPD_distribute;
358 case OMPC_BIND_thread:
359 return OMPD_simd;
360 default:
361 return OMPD_loop;
362 }
363}
364
365static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
366 const OMPExecutableDirective &S,
367 const RegionCodeGenTy &CodeGen);
368
369LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
370 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) {
371 if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) {
372 OrigVD = OrigVD->getCanonicalDecl();
373 bool IsCaptured =
374 LambdaCaptureFields.lookup(Val: OrigVD) ||
375 (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) ||
376 (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl));
377 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
378 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
379 return EmitLValue(E: &DRE);
380 }
381 }
382 return EmitLValue(E);
383}
384
385llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
386 ASTContext &C = getContext();
387 llvm::Value *Size = nullptr;
388 auto SizeInChars = C.getTypeSizeInChars(T: Ty);
389 if (SizeInChars.isZero()) {
390 // getTypeSizeInChars() returns 0 for a VLA.
391 while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) {
392 VlaSizePair VlaSize = getVLASize(vla: VAT);
393 Ty = VlaSize.Type;
394 Size =
395 Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts;
396 }
397 SizeInChars = C.getTypeSizeInChars(T: Ty);
398 if (SizeInChars.isZero())
399 return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0);
400 return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars));
401 }
402 return CGM.getSize(numChars: SizeInChars);
403}
404
405void CodeGenFunction::GenerateOpenMPCapturedVars(
406 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
407 const RecordDecl *RD = S.getCapturedRecordDecl();
408 auto CurField = RD->field_begin();
409 auto CurCap = S.captures().begin();
410 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
411 E = S.capture_init_end();
412 I != E; ++I, ++CurField, ++CurCap) {
413 if (CurField->hasCapturedVLAType()) {
414 const VariableArrayType *VAT = CurField->getCapturedVLAType();
415 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
416 CapturedVars.push_back(Elt: Val);
417 } else if (CurCap->capturesThis()) {
418 CapturedVars.push_back(Elt: CXXThisValue);
419 } else if (CurCap->capturesVariableByCopy()) {
420 llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation());
421
422 // If the field is not a pointer, we need to save the actual value
423 // and load it as a void pointer.
424 if (!CurField->getType()->isAnyPointerType()) {
425 ASTContext &Ctx = getContext();
426 Address DstAddr = CreateMemTemp(
427 T: Ctx.getUIntPtrType(),
428 Name: Twine(CurCap->getCapturedVar()->getName(), ".casted"));
429 LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType());
430
431 llvm::Value *SrcAddrVal = EmitScalarConversion(
432 Src: DstAddr.emitRawPointer(CGF&: *this),
433 SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()),
434 DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation());
435 LValue SrcLV =
436 MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType());
437
438 // Store the value using the source type pointer.
439 EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV);
440
441 // Load the value using the destination type pointer.
442 CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation());
443 }
444 CapturedVars.push_back(Elt: CV);
445 } else {
446 assert(CurCap->capturesVariable() && "Expected capture by reference.");
447 CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this));
448 }
449 }
450}
451
452static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
453 QualType DstType, StringRef Name,
454 LValue AddrLV) {
455 ASTContext &Ctx = CGF.getContext();
456
457 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
458 Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(),
459 DstTy: Ctx.getPointerType(T: DstType), Loc);
460 // FIXME: should the pointee type (DstType) be passed?
461 Address TmpAddr =
462 CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress();
463 return TmpAddr;
464}
465
466static QualType getCanonicalParamType(ASTContext &C, QualType T) {
467 if (T->isLValueReferenceType())
468 return C.getLValueReferenceType(
469 T: getCanonicalParamType(C, T: T.getNonReferenceType()),
470 /*SpelledAsLValue=*/false);
471 if (T->isPointerType())
472 return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType()));
473 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
474 if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A))
475 return getCanonicalParamType(C, T: VLA->getElementType());
476 if (!A->isVariablyModifiedType())
477 return C.getCanonicalType(T);
478 }
479 return C.getCanonicalParamType(T);
480}
481
482namespace {
483/// Contains required data for proper outlined function codegen.
484struct FunctionOptions {
485 /// Captured statement for which the function is generated.
486 const CapturedStmt *S = nullptr;
487 /// true if cast to/from UIntPtr is required for variables captured by
488 /// value.
489 const bool UIntPtrCastRequired = true;
490 /// true if only casted arguments must be registered as local args or VLA
491 /// sizes.
492 const bool RegisterCastedArgsOnly = false;
493 /// Name of the generated function.
494 const StringRef FunctionName;
495 /// Location of the non-debug version of the outlined function.
496 SourceLocation Loc;
497 const bool IsDeviceKernel = false;
498 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
499 bool RegisterCastedArgsOnly, StringRef FunctionName,
500 SourceLocation Loc, bool IsDeviceKernel)
501 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
502 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
503 FunctionName(FunctionName), Loc(Loc), IsDeviceKernel(IsDeviceKernel) {}
504};
505} // namespace
506
507static llvm::Function *emitOutlinedFunctionPrologue(
508 CodeGenFunction &CGF, FunctionArgList &Args,
509 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
510 &LocalAddrs,
511 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
512 &VLASizes,
513 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
514 const CapturedDecl *CD = FO.S->getCapturedDecl();
515 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
516 assert(CD->hasBody() && "missing CapturedDecl body");
517
518 CXXThisValue = nullptr;
519 // Build the argument list.
520 CodeGenModule &CGM = CGF.CGM;
521 ASTContext &Ctx = CGM.getContext();
522 FunctionArgList TargetArgs;
523 Args.append(in_start: CD->param_begin(),
524 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
525 TargetArgs.append(
526 in_start: CD->param_begin(),
527 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
528 auto I = FO.S->captures().begin();
529 FunctionDecl *DebugFunctionDecl = nullptr;
530 if (!FO.UIntPtrCastRequired) {
531 FunctionProtoType::ExtProtoInfo EPI;
532 QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: {}, EPI);
533 DebugFunctionDecl = FunctionDecl::Create(
534 C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(),
535 NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy,
536 TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static,
537 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
538 /*hasWrittenPrototype=*/false);
539 }
540 for (const FieldDecl *FD : RD->fields()) {
541 QualType ArgType = FD->getType();
542 IdentifierInfo *II = nullptr;
543 VarDecl *CapVar = nullptr;
544
545 // If this is a capture by copy and the type is not a pointer, the outlined
546 // function argument type should be uintptr and the value properly casted to
547 // uintptr. This is necessary given that the runtime library is only able to
548 // deal with pointers. We can pass in the same way the VLA type sizes to the
549 // outlined function.
550 if (FO.UIntPtrCastRequired &&
551 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
552 I->capturesVariableArrayType()))
553 ArgType = Ctx.getUIntPtrType();
554
555 if (I->capturesVariable() || I->capturesVariableByCopy()) {
556 CapVar = I->getCapturedVar();
557 II = CapVar->getIdentifier();
558 } else if (I->capturesThis()) {
559 II = &Ctx.Idents.get(Name: "this");
560 } else {
561 assert(I->capturesVariableArrayType());
562 II = &Ctx.Idents.get(Name: "vla");
563 }
564 if (ArgType->isVariablyModifiedType())
565 ArgType = getCanonicalParamType(C&: Ctx, T: ArgType);
566 VarDecl *Arg;
567 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
568 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
569 Id: II, T: ArgType,
570 ParamKind: ImplicitParamKind::ThreadPrivateVar);
571 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
572 Arg = ParmVarDecl::Create(
573 C&: Ctx, DC: DebugFunctionDecl,
574 StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
575 IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType,
576 /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr);
577 } else {
578 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
579 Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other);
580 }
581 Args.emplace_back(Args&: Arg);
582 // Do not cast arguments if we emit function with non-original types.
583 TargetArgs.emplace_back(
584 Args: FO.UIntPtrCastRequired
585 ? Arg
586 : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg));
587 ++I;
588 }
589 Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
590 in_end: CD->param_end());
591 TargetArgs.append(
592 in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
593 in_end: CD->param_end());
594
595 // Create the function declaration.
596 const CGFunctionInfo &FuncInfo =
597 FO.IsDeviceKernel
598 ? CGM.getTypes().arrangeDeviceKernelCallerDeclaration(resultType: Ctx.VoidTy,
599 args: TargetArgs)
600 : CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy,
601 args: TargetArgs);
602 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
603
604 auto *F =
605 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
606 N: FO.FunctionName, M: &CGM.getModule());
607 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
608 if (CD->isNothrow())
609 F->setDoesNotThrow();
610 F->setDoesNotRecurse();
611
612 // Always inline the outlined function if optimizations are enabled.
613 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
614 F->removeFnAttr(Kind: llvm::Attribute::NoInline);
615 F->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
616 }
617 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
618 F->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
619
620 // Generate the function.
621 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs,
622 Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
623 StartLoc: FO.UIntPtrCastRequired ? FO.Loc
624 : CD->getBody()->getBeginLoc());
625 unsigned Cnt = CD->getContextParamPosition();
626 I = FO.S->captures().begin();
627 for (const FieldDecl *FD : RD->fields()) {
628 // Do not map arguments if we emit function with non-original types.
629 Address LocalAddr(Address::invalid());
630 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
631 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt],
632 TargetParam: TargetArgs[Cnt]);
633 } else {
634 LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]);
635 }
636 // If we are capturing a pointer by copy we don't need to do anything, just
637 // use the value that we get from the arguments.
638 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
639 const VarDecl *CurVD = I->getCapturedVar();
640 if (!FO.RegisterCastedArgsOnly)
641 LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}});
642 ++Cnt;
643 ++I;
644 continue;
645 }
646
647 LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(),
648 Source: AlignmentSource::Decl);
649 if (FD->hasCapturedVLAType()) {
650 if (FO.UIntPtrCastRequired) {
651 ArgLVal = CGF.MakeAddrLValue(
652 Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(),
653 Name: Args[Cnt]->getName(), AddrLV: ArgLVal),
654 T: FD->getType(), Source: AlignmentSource::Decl);
655 }
656 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
657 const VariableArrayType *VAT = FD->getCapturedVLAType();
658 VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg);
659 } else if (I->capturesVariable()) {
660 const VarDecl *Var = I->getCapturedVar();
661 QualType VarTy = Var->getType();
662 Address ArgAddr = ArgLVal.getAddress();
663 if (ArgLVal.getType()->isLValueReferenceType()) {
664 ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal);
665 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
666 assert(ArgLVal.getType()->isPointerType());
667 ArgAddr = CGF.EmitLoadOfPointer(
668 Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>());
669 }
670 if (!FO.RegisterCastedArgsOnly) {
671 LocalAddrs.insert(
672 KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}});
673 }
674 } else if (I->capturesVariableByCopy()) {
675 assert(!FD->getType()->isAnyPointerType() &&
676 "Not expecting a captured pointer.");
677 const VarDecl *Var = I->getCapturedVar();
678 LocalAddrs.insert(KV: {Args[Cnt],
679 {Var, FO.UIntPtrCastRequired
680 ? castValueFromUintptr(
681 CGF, Loc: I->getLocation(), DstType: FD->getType(),
682 Name: Args[Cnt]->getName(), AddrLV: ArgLVal)
683 : ArgLVal.getAddress()}});
684 } else {
685 // If 'this' is captured, load it into CXXThisValue.
686 assert(I->capturesThis());
687 CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
688 LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}});
689 }
690 ++Cnt;
691 ++I;
692 }
693
694 return F;
695}
696
697llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunction(
698 const CapturedStmt &S, const OMPExecutableDirective &D) {
699 SourceLocation Loc = D.getBeginLoc();
700 assert(
701 CapturedStmtInfo &&
702 "CapturedStmtInfo should be set when generating the captured function");
703 const CapturedDecl *CD = S.getCapturedDecl();
704 // Build the argument list.
705 bool NeedWrapperFunction =
706 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
707 FunctionArgList Args, WrapperArgs;
708 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
709 WrapperLocalAddrs;
710 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
711 WrapperVLASizes;
712 SmallString<256> Buffer;
713 llvm::raw_svector_ostream Out(Buffer);
714 Out << CapturedStmtInfo->getHelperName();
715 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
716 bool IsDeviceKernel = CGM.getOpenMPRuntime().isGPU() &&
717 isOpenMPTargetExecutionDirective(DKind: EKind) &&
718 D.getCapturedStmt(RegionKind: OMPD_target) == &S;
719 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
720 llvm::Function *WrapperF = nullptr;
721 if (NeedWrapperFunction) {
722 // Emit the final kernel early to allow attributes to be added by the
723 // OpenMPI-IR-Builder.
724 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
725 /*RegisterCastedArgsOnly=*/true,
726 CapturedStmtInfo->getHelperName(), Loc,
727 IsDeviceKernel);
728 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
729 WrapperF =
730 emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes,
731 CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO);
732 Out << "_debug__";
733 }
734 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
735 Out.str(), Loc, !NeedWrapperFunction && IsDeviceKernel);
736 llvm::Function *F = emitOutlinedFunctionPrologue(
737 CGF&: *this, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes, CXXThisValue, FO);
738 CodeGenFunction::OMPPrivateScope LocalScope(*this);
739 for (const auto &LocalAddrPair : WrapperLocalAddrs) {
740 if (LocalAddrPair.second.first) {
741 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
742 Addr: LocalAddrPair.second.second);
743 }
744 }
745 (void)LocalScope.Privatize();
746 for (const auto &VLASizePair : WrapperVLASizes)
747 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
748 PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
749 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
750 LocalScope.ForceCleanup();
751 FinishFunction(EndLoc: CD->getBodyRBrace());
752 if (!NeedWrapperFunction)
753 return F;
754
755 // Reverse the order.
756 WrapperF->removeFromParent();
757 F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF);
758
759 llvm::SmallVector<llvm::Value *, 4> CallArgs;
760 auto *PI = F->arg_begin();
761 for (const auto *Arg : Args) {
762 llvm::Value *CallArg;
763 auto I = LocalAddrs.find(Key: Arg);
764 if (I != LocalAddrs.end()) {
765 LValue LV = WrapperCGF.MakeAddrLValue(
766 Addr: I->second.second,
767 T: I->second.first ? I->second.first->getType() : Arg->getType(),
768 Source: AlignmentSource::Decl);
769 if (LV.getType()->isAnyComplexType())
770 LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType()));
771 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
772 } else {
773 auto EI = VLASizes.find(Val: Arg);
774 if (EI != VLASizes.end()) {
775 CallArg = EI->second.second;
776 } else {
777 LValue LV =
778 WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg),
779 T: Arg->getType(), Source: AlignmentSource::Decl);
780 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
781 }
782 }
783 CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType()));
784 ++PI;
785 }
786 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
787 WrapperCGF.FinishFunction();
788 return WrapperF;
789}
790
791//===----------------------------------------------------------------------===//
792// OpenMP Directive Emission
793//===----------------------------------------------------------------------===//
794void CodeGenFunction::EmitOMPAggregateAssign(
795 Address DestAddr, Address SrcAddr, QualType OriginalType,
796 const llvm::function_ref<void(Address, Address)> CopyGen) {
797 // Perform element-by-element initialization.
798 QualType ElementTy;
799
800 // Drill down to the base element type on both arrays.
801 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
802 llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
803 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
804
805 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this);
806 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this);
807 // Cast from pointer to array type to pointer to single element.
808 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(),
809 Ptr: DestBegin, IdxList: NumElements);
810
811 // The basic structure here is a while-do loop.
812 llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body");
813 llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done");
814 llvm::Value *IsEmpty =
815 Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty");
816 Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
817
818 // Enter the loop body, making that address the current address.
819 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
820 EmitBlock(BB: BodyBB);
821
822 CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy);
823
824 llvm::PHINode *SrcElementPHI =
825 Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
826 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
827 Address SrcElementCurrent =
828 Address(SrcElementPHI, SrcAddr.getElementType(),
829 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
830
831 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
832 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
833 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
834 Address DestElementCurrent =
835 Address(DestElementPHI, DestAddr.getElementType(),
836 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
837
838 // Emit copy.
839 CopyGen(DestElementCurrent, SrcElementCurrent);
840
841 // Shift the address forward by one element.
842 llvm::Value *DestElementNext =
843 Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI,
844 /*Idx0=*/1, Name: "omp.arraycpy.dest.element");
845 llvm::Value *SrcElementNext =
846 Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI,
847 /*Idx0=*/1, Name: "omp.arraycpy.src.element");
848 // Check whether we've reached the end.
849 llvm::Value *Done =
850 Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
851 Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
852 DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock());
853 SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock());
854
855 // Done.
856 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
857}
858
859void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
860 Address SrcAddr, const VarDecl *DestVD,
861 const VarDecl *SrcVD, const Expr *Copy) {
862 if (OriginalType->isArrayType()) {
863 const auto *BO = dyn_cast<BinaryOperator>(Val: Copy);
864 if (BO && BO->getOpcode() == BO_Assign) {
865 // Perform simple memcpy for simple copying.
866 LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType);
867 LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType);
868 EmitAggregateAssign(Dest, Src, EltTy: OriginalType);
869 } else {
870 // For arrays with complex element types perform element by element
871 // copying.
872 EmitOMPAggregateAssign(
873 DestAddr, SrcAddr, OriginalType,
874 CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
875 // Working with the single array element, so have to remap
876 // destination and source variables to corresponding array
877 // elements.
878 CodeGenFunction::OMPPrivateScope Remap(*this);
879 Remap.addPrivate(LocalVD: DestVD, Addr: DestElement);
880 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement);
881 (void)Remap.Privatize();
882 EmitIgnoredExpr(E: Copy);
883 });
884 }
885 } else {
886 // Remap pseudo source variable to private copy.
887 CodeGenFunction::OMPPrivateScope Remap(*this);
888 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr);
889 Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr);
890 (void)Remap.Privatize();
891 // Emit copying of the whole variable.
892 EmitIgnoredExpr(E: Copy);
893 }
894}
895
896bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
897 OMPPrivateScope &PrivateScope) {
898 if (!HaveInsertPoint())
899 return false;
900 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
901 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
902 isOpenMPTargetExecutionDirective(DKind: EKind);
903 bool FirstprivateIsLastprivate = false;
904 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
905 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
906 for (const auto *D : C->varlist())
907 Lastprivates.try_emplace(
908 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(),
909 Args: C->getKind());
910 }
911 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
912 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
913 getOpenMPCaptureRegions(CaptureRegions, DKind: EKind);
914 // Force emission of the firstprivate copy if the directive does not emit
915 // outlined function, like omp for, omp simd, omp distribute etc.
916 bool MustEmitFirstprivateCopy =
917 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
918 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
919 const auto *IRef = C->varlist_begin();
920 const auto *InitsRef = C->inits().begin();
921 for (const Expr *IInit : C->private_copies()) {
922 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
923 bool ThisFirstprivateIsLastprivate =
924 Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0;
925 const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD);
926 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
927 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
928 !FD->getType()->isReferenceType() &&
929 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
930 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
931 ++IRef;
932 ++InitsRef;
933 continue;
934 }
935 // Do not emit copy for firstprivate constant variables in target regions,
936 // captured by reference.
937 if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) &&
938 FD && FD->getType()->isReferenceType() &&
939 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
940 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
941 ++IRef;
942 ++InitsRef;
943 continue;
944 }
945 FirstprivateIsLastprivate =
946 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
947 if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) {
948 const auto *VDInit =
949 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl());
950 bool IsRegistered;
951 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
952 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
953 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
954 LValue OriginalLVal;
955 if (!FD) {
956 // Check if the firstprivate variable is just a constant value.
957 ConstantEmission CE = tryEmitAsConstant(RefExpr: &DRE);
958 if (CE && !CE.isReference()) {
959 // Constant value, no need to create a copy.
960 ++IRef;
961 ++InitsRef;
962 continue;
963 }
964 if (CE && CE.isReference()) {
965 OriginalLVal = CE.getReferenceLValue(CGF&: *this, RefExpr: &DRE);
966 } else {
967 assert(!CE && "Expected non-constant firstprivate.");
968 OriginalLVal = EmitLValue(E: &DRE);
969 }
970 } else {
971 OriginalLVal = EmitLValue(E: &DRE);
972 }
973 QualType Type = VD->getType();
974 if (Type->isArrayType()) {
975 // Emit VarDecl with copy init for arrays.
976 // Get the address of the original variable captured in current
977 // captured region.
978 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
979 const Expr *Init = VD->getInit();
980 if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) {
981 // Perform simple memcpy.
982 LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type);
983 EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type);
984 } else {
985 EmitOMPAggregateAssign(
986 DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type,
987 CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) {
988 // Clean up any temporaries needed by the
989 // initialization.
990 RunCleanupsScope InitScope(*this);
991 // Emit initialization for single element.
992 setAddrOfLocalVar(VD: VDInit, Addr: SrcElement);
993 EmitAnyExprToMem(E: Init, Location: DestElement,
994 Quals: Init->getType().getQualifiers(),
995 /*IsInitializer*/ false);
996 LocalDeclMap.erase(Val: VDInit);
997 });
998 }
999 EmitAutoVarCleanups(emission: Emission);
1000 IsRegistered =
1001 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress());
1002 } else {
1003 Address OriginalAddr = OriginalLVal.getAddress();
1004 // Emit private VarDecl with copy init.
1005 // Remap temp VDInit variable to the address of the original
1006 // variable (for proper handling of captured global variables).
1007 setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr);
1008 EmitDecl(D: *VD);
1009 LocalDeclMap.erase(Val: VDInit);
1010 Address VDAddr = GetAddrOfLocalVar(VD);
1011 if (ThisFirstprivateIsLastprivate &&
1012 Lastprivates[OrigVD->getCanonicalDecl()] ==
1013 OMPC_LASTPRIVATE_conditional) {
1014 // Create/init special variable for lastprivate conditionals.
1015 llvm::Value *V =
1016 EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1017 Source: AlignmentSource::Decl),
1018 Loc: (*IRef)->getExprLoc());
1019 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1020 CGF&: *this, VD: OrigVD);
1021 EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1022 Source: AlignmentSource::Decl));
1023 LocalDeclMap.erase(Val: VD);
1024 setAddrOfLocalVar(VD, Addr: VDAddr);
1025 }
1026 IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1027 }
1028 assert(IsRegistered &&
1029 "firstprivate var already registered as private");
1030 // Silence the warning about unused variable.
1031 (void)IsRegistered;
1032 }
1033 ++IRef;
1034 ++InitsRef;
1035 }
1036 }
1037 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
1038}
1039
1040void CodeGenFunction::EmitOMPPrivateClause(
1041 const OMPExecutableDirective &D,
1042 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1043 if (!HaveInsertPoint())
1044 return;
1045 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1046 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
1047 auto IRef = C->varlist_begin();
1048 for (const Expr *IInit : C->private_copies()) {
1049 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1050 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
1051 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1052 EmitDecl(D: *VD);
1053 // Emit private VarDecl with copy init.
1054 bool IsRegistered =
1055 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD));
1056 assert(IsRegistered && "private var already registered as private");
1057 // Silence the warning about unused variable.
1058 (void)IsRegistered;
1059 }
1060 ++IRef;
1061 }
1062 }
1063}
1064
1065bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
1066 if (!HaveInsertPoint())
1067 return false;
1068 // threadprivate_var1 = master_threadprivate_var1;
1069 // operator=(threadprivate_var2, master_threadprivate_var2);
1070 // ...
1071 // __kmpc_barrier(&loc, global_tid);
1072 llvm::DenseSet<const VarDecl *> CopiedVars;
1073 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1074 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1075 auto IRef = C->varlist_begin();
1076 auto ISrcRef = C->source_exprs().begin();
1077 auto IDestRef = C->destination_exprs().begin();
1078 for (const Expr *AssignOp : C->assignment_ops()) {
1079 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1080 QualType Type = VD->getType();
1081 if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) {
1082 // Get the address of the master variable. If we are emitting code with
1083 // TLS support, the address is passed from the master as field in the
1084 // captured declaration.
1085 Address MasterAddr = Address::invalid();
1086 if (getLangOpts().OpenMPUseTLS &&
1087 getContext().getTargetInfo().isTLSSupported()) {
1088 assert(CapturedStmtInfo->lookup(VD) &&
1089 "Copyin threadprivates should have been captured!");
1090 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1091 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1092 MasterAddr = EmitLValue(E: &DRE).getAddress();
1093 LocalDeclMap.erase(Val: VD);
1094 } else {
1095 MasterAddr =
1096 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD)
1097 : CGM.GetAddrOfGlobal(GD: VD),
1098 CGM.getTypes().ConvertTypeForMem(T: VD->getType()),
1099 getContext().getDeclAlign(D: VD));
1100 }
1101 // Get the address of the threadprivate variable.
1102 Address PrivateAddr = EmitLValue(E: *IRef).getAddress();
1103 if (CopiedVars.size() == 1) {
1104 // At first check if current thread is a master thread. If it is, no
1105 // need to copy data.
1106 CopyBegin = createBasicBlock(name: "copyin.not.master");
1107 CopyEnd = createBasicBlock(name: "copyin.not.master.end");
1108 // TODO: Avoid ptrtoint conversion.
1109 auto *MasterAddrInt = Builder.CreatePtrToInt(
1110 V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1111 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1112 V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1113 Builder.CreateCondBr(
1114 Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin,
1115 False: CopyEnd);
1116 EmitBlock(BB: CopyBegin);
1117 }
1118 const auto *SrcVD =
1119 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1120 const auto *DestVD =
1121 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1122 EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp);
1123 }
1124 ++IRef;
1125 ++ISrcRef;
1126 ++IDestRef;
1127 }
1128 }
1129 if (CopyEnd) {
1130 // Exit out of copying procedure for non-master thread.
1131 EmitBlock(BB: CopyEnd, /*IsFinished=*/true);
1132 return true;
1133 }
1134 return false;
1135}
1136
1137bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1138 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1139 if (!HaveInsertPoint())
1140 return false;
1141 bool HasAtLeastOneLastprivate = false;
1142 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1143 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1144 if (isOpenMPSimdDirective(DKind: EKind)) {
1145 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
1146 for (const Expr *C : LoopDirective->counters()) {
1147 SIMDLCVs.insert(
1148 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
1149 }
1150 }
1151 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1152 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1153 HasAtLeastOneLastprivate = true;
1154 if (isOpenMPTaskLoopDirective(DKind: EKind) && !getLangOpts().OpenMPSimd)
1155 break;
1156 const auto *IRef = C->varlist_begin();
1157 const auto *IDestRef = C->destination_exprs().begin();
1158 for (const Expr *IInit : C->private_copies()) {
1159 // Keep the address of the original variable for future update at the end
1160 // of the loop.
1161 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1162 // Taskloops do not require additional initialization, it is done in
1163 // runtime support library.
1164 if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) {
1165 const auto *DestVD =
1166 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1167 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1168 /*RefersToEnclosingVariableOrCapture=*/
1169 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
1170 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1171 PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress());
1172 // Check if the variable is also a firstprivate: in this case IInit is
1173 // not generated. Initialization of this variable will happen in codegen
1174 // for 'firstprivate' clause.
1175 if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) {
1176 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1177 Address VDAddr = Address::invalid();
1178 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1179 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1180 CGF&: *this, VD: OrigVD);
1181 setAddrOfLocalVar(VD, Addr: VDAddr);
1182 } else {
1183 // Emit private VarDecl with copy init.
1184 EmitDecl(D: *VD);
1185 VDAddr = GetAddrOfLocalVar(VD);
1186 }
1187 bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1188 assert(IsRegistered &&
1189 "lastprivate var already registered as private");
1190 (void)IsRegistered;
1191 }
1192 }
1193 ++IRef;
1194 ++IDestRef;
1195 }
1196 }
1197 return HasAtLeastOneLastprivate;
1198}
1199
1200void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1201 const OMPExecutableDirective &D, bool NoFinals,
1202 llvm::Value *IsLastIterCond) {
1203 if (!HaveInsertPoint())
1204 return;
1205 // Emit following code:
1206 // if (<IsLastIterCond>) {
1207 // orig_var1 = private_orig_var1;
1208 // ...
1209 // orig_varn = private_orig_varn;
1210 // }
1211 llvm::BasicBlock *ThenBB = nullptr;
1212 llvm::BasicBlock *DoneBB = nullptr;
1213 if (IsLastIterCond) {
1214 // Emit implicit barrier if at least one lastprivate conditional is found
1215 // and this is not a simd mode.
1216 if (!getLangOpts().OpenMPSimd &&
1217 llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(),
1218 P: [](const OMPLastprivateClause *C) {
1219 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1220 })) {
1221 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(),
1222 Kind: OMPD_unknown,
1223 /*EmitChecks=*/false,
1224 /*ForceSimpleCall=*/true);
1225 }
1226 ThenBB = createBasicBlock(name: ".omp.lastprivate.then");
1227 DoneBB = createBasicBlock(name: ".omp.lastprivate.done");
1228 Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB);
1229 EmitBlock(BB: ThenBB);
1230 }
1231 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1232 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1233 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) {
1234 auto IC = LoopDirective->counters().begin();
1235 for (const Expr *F : LoopDirective->finals()) {
1236 const auto *D =
1237 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl();
1238 if (NoFinals)
1239 AlreadyEmittedVars.insert(V: D);
1240 else
1241 LoopCountersAndUpdates[D] = F;
1242 ++IC;
1243 }
1244 }
1245 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1246 auto IRef = C->varlist_begin();
1247 auto ISrcRef = C->source_exprs().begin();
1248 auto IDestRef = C->destination_exprs().begin();
1249 for (const Expr *AssignOp : C->assignment_ops()) {
1250 const auto *PrivateVD =
1251 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1252 QualType Type = PrivateVD->getType();
1253 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1254 if (AlreadyEmittedVars.insert(V: CanonicalVD).second) {
1255 // If lastprivate variable is a loop control variable for loop-based
1256 // directive, update its value before copyin back to original
1257 // variable.
1258 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD))
1259 EmitIgnoredExpr(E: FinalExpr);
1260 const auto *SrcVD =
1261 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1262 const auto *DestVD =
1263 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1264 // Get the address of the private variable.
1265 Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD);
1266 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1267 PrivateAddr = Address(
1268 Builder.CreateLoad(Addr: PrivateAddr),
1269 CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()),
1270 CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType()));
1271 // Store the last value to the private copy in the last iteration.
1272 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1273 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1274 CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD,
1275 Loc: (*IRef)->getExprLoc());
1276 // Get the address of the original variable.
1277 Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD);
1278 EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp);
1279 }
1280 ++IRef;
1281 ++ISrcRef;
1282 ++IDestRef;
1283 }
1284 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1285 EmitIgnoredExpr(E: PostUpdate);
1286 }
1287 if (IsLastIterCond)
1288 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1289}
1290
1291void CodeGenFunction::EmitOMPReductionClauseInit(
1292 const OMPExecutableDirective &D,
1293 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1294 if (!HaveInsertPoint())
1295 return;
1296 SmallVector<const Expr *, 4> Shareds;
1297 SmallVector<const Expr *, 4> Privates;
1298 SmallVector<const Expr *, 4> ReductionOps;
1299 SmallVector<const Expr *, 4> LHSs;
1300 SmallVector<const Expr *, 4> RHSs;
1301 OMPTaskDataTy Data;
1302 SmallVector<const Expr *, 4> TaskLHSs;
1303 SmallVector<const Expr *, 4> TaskRHSs;
1304 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1305 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1306 continue;
1307 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1308 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1309 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1310 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1311 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1312 if (C->getModifier() == OMPC_REDUCTION_task) {
1313 Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end());
1314 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1315 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
1316 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
1317 in_end: C->reduction_ops().end());
1318 TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1319 TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1320 }
1321 }
1322 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1323 unsigned Count = 0;
1324 auto *ILHS = LHSs.begin();
1325 auto *IRHS = RHSs.begin();
1326 auto *IPriv = Privates.begin();
1327 for (const Expr *IRef : Shareds) {
1328 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl());
1329 // Emit private VarDecl with reduction init.
1330 RedCG.emitSharedOrigLValue(CGF&: *this, N: Count);
1331 RedCG.emitAggregateType(CGF&: *this, N: Count);
1332 AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD);
1333 RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(),
1334 SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(),
1335 DefaultInit: [&Emission](CodeGenFunction &CGF) {
1336 CGF.EmitAutoVarInit(emission: Emission);
1337 return true;
1338 });
1339 EmitAutoVarCleanups(emission: Emission);
1340 Address BaseAddr = RedCG.adjustPrivateAddress(
1341 CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress());
1342 bool IsRegistered =
1343 PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr);
1344 assert(IsRegistered && "private var already registered as private");
1345 // Silence the warning about unused variable.
1346 (void)IsRegistered;
1347
1348 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
1349 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
1350 QualType Type = PrivateVD->getType();
1351 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef);
1352 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1353 // Store the address of the original variable associated with the LHS
1354 // implicit variable.
1355 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1356 PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
1357 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1358 isa<ArraySubscriptExpr>(Val: IRef)) {
1359 // Store the address of the original variable associated with the LHS
1360 // implicit variable.
1361 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1362 PrivateScope.addPrivate(LocalVD: RHSVD,
1363 Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1364 ElemTy: ConvertTypeForMem(T: RHSVD->getType())));
1365 } else {
1366 QualType Type = PrivateVD->getType();
1367 bool IsArray = getContext().getAsArrayType(T: Type) != nullptr;
1368 Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress();
1369 // Store the address of the original variable associated with the LHS
1370 // implicit variable.
1371 if (IsArray) {
1372 OriginalAddr =
1373 OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType()));
1374 }
1375 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr);
1376 PrivateScope.addPrivate(
1377 LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1378 ElemTy: ConvertTypeForMem(T: RHSVD->getType()))
1379 : GetAddrOfLocalVar(VD: PrivateVD));
1380 }
1381 ++ILHS;
1382 ++IRHS;
1383 ++IPriv;
1384 ++Count;
1385 }
1386 if (!Data.ReductionVars.empty()) {
1387 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1388 Data.IsReductionWithTaskMod = true;
1389 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(DKind: EKind);
1390 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1391 CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data);
1392 const Expr *TaskRedRef = nullptr;
1393 switch (EKind) {
1394 case OMPD_parallel:
1395 TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr();
1396 break;
1397 case OMPD_for:
1398 TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr();
1399 break;
1400 case OMPD_sections:
1401 TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr();
1402 break;
1403 case OMPD_parallel_for:
1404 TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr();
1405 break;
1406 case OMPD_parallel_master:
1407 TaskRedRef =
1408 cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr();
1409 break;
1410 case OMPD_parallel_sections:
1411 TaskRedRef =
1412 cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr();
1413 break;
1414 case OMPD_target_parallel:
1415 TaskRedRef =
1416 cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr();
1417 break;
1418 case OMPD_target_parallel_for:
1419 TaskRedRef =
1420 cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr();
1421 break;
1422 case OMPD_distribute_parallel_for:
1423 TaskRedRef =
1424 cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr();
1425 break;
1426 case OMPD_teams_distribute_parallel_for:
1427 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D)
1428 .getTaskReductionRefExpr();
1429 break;
1430 case OMPD_target_teams_distribute_parallel_for:
1431 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D)
1432 .getTaskReductionRefExpr();
1433 break;
1434 case OMPD_simd:
1435 case OMPD_for_simd:
1436 case OMPD_section:
1437 case OMPD_single:
1438 case OMPD_master:
1439 case OMPD_critical:
1440 case OMPD_parallel_for_simd:
1441 case OMPD_task:
1442 case OMPD_taskyield:
1443 case OMPD_error:
1444 case OMPD_barrier:
1445 case OMPD_taskwait:
1446 case OMPD_taskgroup:
1447 case OMPD_flush:
1448 case OMPD_depobj:
1449 case OMPD_scan:
1450 case OMPD_ordered:
1451 case OMPD_atomic:
1452 case OMPD_teams:
1453 case OMPD_target:
1454 case OMPD_cancellation_point:
1455 case OMPD_cancel:
1456 case OMPD_target_data:
1457 case OMPD_target_enter_data:
1458 case OMPD_target_exit_data:
1459 case OMPD_taskloop:
1460 case OMPD_taskloop_simd:
1461 case OMPD_master_taskloop:
1462 case OMPD_master_taskloop_simd:
1463 case OMPD_parallel_master_taskloop:
1464 case OMPD_parallel_master_taskloop_simd:
1465 case OMPD_distribute:
1466 case OMPD_target_update:
1467 case OMPD_distribute_parallel_for_simd:
1468 case OMPD_distribute_simd:
1469 case OMPD_target_parallel_for_simd:
1470 case OMPD_target_simd:
1471 case OMPD_teams_distribute:
1472 case OMPD_teams_distribute_simd:
1473 case OMPD_teams_distribute_parallel_for_simd:
1474 case OMPD_target_teams:
1475 case OMPD_target_teams_distribute:
1476 case OMPD_target_teams_distribute_parallel_for_simd:
1477 case OMPD_target_teams_distribute_simd:
1478 case OMPD_declare_target:
1479 case OMPD_end_declare_target:
1480 case OMPD_threadprivate:
1481 case OMPD_allocate:
1482 case OMPD_declare_reduction:
1483 case OMPD_declare_mapper:
1484 case OMPD_declare_simd:
1485 case OMPD_requires:
1486 case OMPD_declare_variant:
1487 case OMPD_begin_declare_variant:
1488 case OMPD_end_declare_variant:
1489 case OMPD_unknown:
1490 default:
1491 llvm_unreachable("Unexpected directive with task reductions.");
1492 }
1493
1494 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl());
1495 EmitVarDecl(D: *VD);
1496 EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD),
1497 /*Volatile=*/false, Ty: TaskRedRef->getType());
1498 }
1499}
1500
1501void CodeGenFunction::EmitOMPReductionClauseFinal(
1502 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1503 if (!HaveInsertPoint())
1504 return;
1505 llvm::SmallVector<const Expr *, 8> Privates;
1506 llvm::SmallVector<const Expr *, 8> LHSExprs;
1507 llvm::SmallVector<const Expr *, 8> RHSExprs;
1508 llvm::SmallVector<const Expr *, 8> ReductionOps;
1509 llvm::SmallVector<bool, 8> IsPrivateVarReduction;
1510 bool HasAtLeastOneReduction = false;
1511 bool IsReductionWithTaskMod = false;
1512 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1513 // Do not emit for inscan reductions.
1514 if (C->getModifier() == OMPC_REDUCTION_inscan)
1515 continue;
1516 HasAtLeastOneReduction = true;
1517 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1518 LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1519 RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1520 IsPrivateVarReduction.append(in_start: C->private_var_reduction_flags().begin(),
1521 in_end: C->private_var_reduction_flags().end());
1522 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1523 IsReductionWithTaskMod =
1524 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1525 }
1526 if (HasAtLeastOneReduction) {
1527 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1528 if (IsReductionWithTaskMod) {
1529 CGM.getOpenMPRuntime().emitTaskReductionFini(
1530 CGF&: *this, Loc: D.getBeginLoc(), IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: EKind));
1531 }
1532 bool TeamsLoopCanBeParallel = false;
1533 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D))
1534 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1535 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1536 isOpenMPParallelDirective(DKind: EKind) ||
1537 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1538 bool SimpleReduction = ReductionKind == OMPD_simd;
1539 // Emit nowait reduction if nowait clause is present or directive is a
1540 // parallel directive (it always has implicit barrier).
1541 CGM.getOpenMPRuntime().emitReduction(
1542 CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1543 Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .IsPrivateVarReduction: IsPrivateVarReduction, .ReductionKind: ReductionKind});
1544 }
1545}
1546
1547static void emitPostUpdateForReductionClause(
1548 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1549 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1550 if (!CGF.HaveInsertPoint())
1551 return;
1552 llvm::BasicBlock *DoneBB = nullptr;
1553 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1554 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1555 if (!DoneBB) {
1556 if (llvm::Value *Cond = CondGen(CGF)) {
1557 // If the first post-update expression is found, emit conditional
1558 // block if it was requested.
1559 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu");
1560 DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done");
1561 CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
1562 CGF.EmitBlock(BB: ThenBB);
1563 }
1564 }
1565 CGF.EmitIgnoredExpr(E: PostUpdate);
1566 }
1567 }
1568 if (DoneBB)
1569 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1570}
1571
1572namespace {
1573/// Codegen lambda for appending distribute lower and upper bounds to outlined
1574/// parallel function. This is necessary for combined constructs such as
1575/// 'distribute parallel for'
1576typedef llvm::function_ref<void(CodeGenFunction &,
1577 const OMPExecutableDirective &,
1578 llvm::SmallVectorImpl<llvm::Value *> &)>
1579 CodeGenBoundParametersTy;
1580} // anonymous namespace
1581
1582static void
1583checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1584 const OMPExecutableDirective &S) {
1585 if (CGF.getLangOpts().OpenMP < 50)
1586 return;
1587 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1588 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1589 for (const Expr *Ref : C->varlist()) {
1590 if (!Ref->getType()->isScalarType())
1591 continue;
1592 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1593 if (!DRE)
1594 continue;
1595 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1596 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1597 }
1598 }
1599 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1600 for (const Expr *Ref : C->varlist()) {
1601 if (!Ref->getType()->isScalarType())
1602 continue;
1603 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1604 if (!DRE)
1605 continue;
1606 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1607 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1608 }
1609 }
1610 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1611 for (const Expr *Ref : C->varlist()) {
1612 if (!Ref->getType()->isScalarType())
1613 continue;
1614 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1615 if (!DRE)
1616 continue;
1617 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1618 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1619 }
1620 }
1621 // Privates should ne analyzed since they are not captured at all.
1622 // Task reductions may be skipped - tasks are ignored.
1623 // Firstprivates do not return value but may be passed by reference - no need
1624 // to check for updated lastprivate conditional.
1625 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1626 for (const Expr *Ref : C->varlist()) {
1627 if (!Ref->getType()->isScalarType())
1628 continue;
1629 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1630 if (!DRE)
1631 continue;
1632 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1633 }
1634 }
1635 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1636 CGF, D: S, IgnoredDecls: PrivateDecls);
1637}
1638
1639static void emitCommonOMPParallelDirective(
1640 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1641 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1642 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1643 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1644 llvm::Value *NumThreads = nullptr;
1645 OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown;
1646 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is as
1647 // if sev-level is fatal."
1648 OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
1649 clang::Expr *Message = nullptr;
1650 SourceLocation SeverityLoc = SourceLocation();
1651 SourceLocation MessageLoc = SourceLocation();
1652
1653 llvm::Function *OutlinedFn =
1654 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1655 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
1656 CodeGen);
1657
1658 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1659 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1660 NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1661 /*IgnoreResultAssign=*/true);
1662 Modifier = NumThreadsClause->getModifier();
1663 if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>()) {
1664 Message = MessageClause->getMessageString();
1665 MessageLoc = MessageClause->getBeginLoc();
1666 }
1667 if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>()) {
1668 Severity = SeverityClause->getSeverityKind();
1669 SeverityLoc = SeverityClause->getBeginLoc();
1670 }
1671 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1672 CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc(), Modifier, Severity,
1673 SeverityLoc, Message, MessageLoc);
1674 }
1675 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1676 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1677 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1678 CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc());
1679 }
1680 const Expr *IfCond = nullptr;
1681 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1682 if (C->getNameModifier() == OMPD_unknown ||
1683 C->getNameModifier() == OMPD_parallel) {
1684 IfCond = C->getCondition();
1685 break;
1686 }
1687 }
1688
1689 OMPParallelScope Scope(CGF, S);
1690 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1691 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1692 // lower and upper bounds with the pragma 'for' chunking mechanism.
1693 // The following lambda takes care of appending the lower and upper bound
1694 // parameters when necessary
1695 CodeGenBoundParameters(CGF, S, CapturedVars);
1696 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
1697 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn,
1698 CapturedVars, IfCond, NumThreads,
1699 NumThreadsModifier: Modifier, Severity, Message);
1700}
1701
1702static bool isAllocatableDecl(const VarDecl *VD) {
1703 const VarDecl *CVD = VD->getCanonicalDecl();
1704 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1705 return false;
1706 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1707 // Use the default allocation.
1708 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1709 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1710 !AA->getAllocator());
1711}
1712
1713static void emitEmptyBoundParameters(CodeGenFunction &,
1714 const OMPExecutableDirective &,
1715 llvm::SmallVectorImpl<llvm::Value *> &) {}
1716
1717static void emitOMPCopyinClause(CodeGenFunction &CGF,
1718 const OMPExecutableDirective &S) {
1719 bool Copyins = CGF.EmitOMPCopyinClause(D: S);
1720 if (Copyins) {
1721 // Emit implicit barrier to synchronize threads and avoid data races on
1722 // propagation master's thread values of threadprivate variables to local
1723 // instances of that variables of all other implicit threads.
1724 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1725 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
1726 /*ForceSimpleCall=*/true);
1727 }
1728}
1729
1730Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1731 CodeGenFunction &CGF, const VarDecl *VD) {
1732 CodeGenModule &CGM = CGF.CGM;
1733 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1734
1735 if (!VD)
1736 return Address::invalid();
1737 const VarDecl *CVD = VD->getCanonicalDecl();
1738 if (!isAllocatableDecl(VD: CVD))
1739 return Address::invalid();
1740 llvm::Value *Size;
1741 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
1742 if (CVD->getType()->isVariablyModifiedType()) {
1743 Size = CGF.getTypeSize(Ty: CVD->getType());
1744 // Align the size: ((size + align - 1) / align) * align
1745 Size = CGF.Builder.CreateNUWAdd(
1746 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
1747 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
1748 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
1749 } else {
1750 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
1751 Size = CGM.getSize(numChars: Sz.alignTo(Align));
1752 }
1753
1754 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1755 assert(AA->getAllocator() &&
1756 "Expected allocator expression for non-default allocator.");
1757 llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator());
1758 // According to the standard, the original allocator type is a enum (integer).
1759 // Convert to pointer type, if required.
1760 if (Allocator->getType()->isIntegerTy())
1761 Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy);
1762 else if (Allocator->getType()->isPointerTy())
1763 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator,
1764 DestTy: CGM.VoidPtrTy);
1765
1766 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1767 Loc: CGF.Builder, Size, Allocator,
1768 Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr"}, FirstSeparator: ".", Separator: "."));
1769 llvm::CallInst *FreeCI =
1770 OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator);
1771
1772 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI);
1773 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1774 V: Addr,
1775 DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())),
1776 Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr"}, FirstSeparator: ".", Separator: "."));
1777 return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
1778}
1779
1780Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1781 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1782 SourceLocation Loc) {
1783 CodeGenModule &CGM = CGF.CGM;
1784 if (CGM.getLangOpts().OpenMPUseTLS &&
1785 CGM.getContext().getTargetInfo().isTLSSupported())
1786 return VDAddr;
1787
1788 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1789
1790 llvm::Type *VarTy = VDAddr.getElementType();
1791 llvm::Value *Data =
1792 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy);
1793 llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy));
1794 std::string Suffix = getNameWithSeparators(Parts: {"cache", ""});
1795 llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix);
1796
1797 llvm::CallInst *ThreadPrivateCacheCall =
1798 OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName);
1799
1800 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1801}
1802
1803std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1804 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1805 SmallString<128> Buffer;
1806 llvm::raw_svector_ostream OS(Buffer);
1807 StringRef Sep = FirstSeparator;
1808 for (StringRef Part : Parts) {
1809 OS << Sep << Part;
1810 Sep = Separator;
1811 }
1812 return OS.str().str();
1813}
1814
1815void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1816 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1817 InsertPointTy CodeGenIP, Twine RegionName) {
1818 CGBuilderTy &Builder = CGF.Builder;
1819 Builder.restoreIP(IP: CodeGenIP);
1820 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1821 Suffix: "." + RegionName + ".after");
1822
1823 {
1824 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1825 CGF.EmitStmt(S: RegionBodyStmt);
1826 }
1827
1828 if (Builder.saveIP().isSet())
1829 Builder.CreateBr(Dest: FiniBB);
1830}
1831
1832void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1833 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1834 InsertPointTy CodeGenIP, Twine RegionName) {
1835 CGBuilderTy &Builder = CGF.Builder;
1836 Builder.restoreIP(IP: CodeGenIP);
1837 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1838 Suffix: "." + RegionName + ".after");
1839
1840 {
1841 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1842 CGF.EmitStmt(S: RegionBodyStmt);
1843 }
1844
1845 if (Builder.saveIP().isSet())
1846 Builder.CreateBr(Dest: FiniBB);
1847}
1848
1849void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1850 if (CGM.getLangOpts().OpenMPIRBuilder) {
1851 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1852 // Check if we have any if clause associated with the directive.
1853 llvm::Value *IfCond = nullptr;
1854 if (const auto *C = S.getSingleClause<OMPIfClause>())
1855 IfCond = EmitScalarExpr(E: C->getCondition(),
1856 /*IgnoreResultAssign=*/true);
1857
1858 llvm::Value *NumThreads = nullptr;
1859 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1860 NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1861 /*IgnoreResultAssign=*/true);
1862
1863 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1864 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1865 ProcBind = ProcBindClause->getProcBindKind();
1866
1867 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1868
1869 // The cleanup callback that finalizes all variables at the given location,
1870 // thus calls destructors etc.
1871 auto FiniCB = [this](InsertPointTy IP) {
1872 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
1873 return llvm::Error::success();
1874 };
1875
1876 // Privatization callback that performs appropriate action for
1877 // shared/private/firstprivate/lastprivate/copyin/... variables.
1878 //
1879 // TODO: This defaults to shared right now.
1880 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1881 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1882 // The next line is appropriate only for variables (Val) with the
1883 // data-sharing attribute "shared".
1884 ReplVal = &Val;
1885
1886 return CodeGenIP;
1887 };
1888
1889 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1890 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1891
1892 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1893 InsertPointTy CodeGenIP) {
1894 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1895 CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel");
1896 return llvm::Error::success();
1897 };
1898
1899 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1900 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1901 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1902 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1903 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
1904 ValOrErr: OMPBuilder.createParallel(Loc: Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1905 IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel()));
1906 Builder.restoreIP(IP: AfterIP);
1907 return;
1908 }
1909
1910 // Emit parallel region as a standalone region.
1911 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1912 Action.Enter(CGF);
1913 OMPPrivateScope PrivateScope(CGF);
1914 emitOMPCopyinClause(CGF, S);
1915 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
1916 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
1917 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
1918 (void)PrivateScope.Privatize();
1919 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt());
1920 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
1921 };
1922 {
1923 auto LPCRegion =
1924 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
1925 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen,
1926 CodeGenBoundParameters: emitEmptyBoundParameters);
1927 emitPostUpdateForReductionClause(CGF&: *this, D: S,
1928 CondGen: [](CodeGenFunction &) { return nullptr; });
1929 }
1930 // Check for outer lastprivate conditional update.
1931 checkForLastprivateConditionalUpdate(CGF&: *this, S);
1932}
1933
1934void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1935 EmitStmt(S: S.getIfStmt());
1936}
1937
1938namespace {
1939/// RAII to handle scopes for loop transformation directives.
1940class OMPTransformDirectiveScopeRAII {
1941 OMPLoopScope *Scope = nullptr;
1942 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1943 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1944
1945 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1946 delete;
1947 OMPTransformDirectiveScopeRAII &
1948 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1949
1950public:
1951 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1952 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) {
1953 Scope = new OMPLoopScope(CGF, *Dir);
1954 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1955 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1956 } else if (const auto *Dir =
1957 dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(
1958 Val: S)) {
1959 // For simplicity we reuse the loop scope similarly to what we do with
1960 // OMPCanonicalLoopNestTransformationDirective do by being a subclass
1961 // of OMPLoopBasedDirective.
1962 Scope = new OMPLoopScope(CGF, *Dir);
1963 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1964 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1965 }
1966 }
1967 ~OMPTransformDirectiveScopeRAII() {
1968 if (!Scope)
1969 return;
1970 delete CapInfoRAII;
1971 delete CGSI;
1972 delete Scope;
1973 }
1974};
1975} // namespace
1976
1977static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1978 int MaxLevel, int Level = 0) {
1979 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1980 const Stmt *SimplifiedS = S->IgnoreContainers();
1981 if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) {
1982 PrettyStackTraceLoc CrashInfo(
1983 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1984 "LLVM IR generation of compound statement ('{}')");
1985
1986 // Keep track of the current cleanup stack depth, including debug scopes.
1987 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1988 for (const Stmt *CurStmt : CS->body())
1989 emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level);
1990 return;
1991 }
1992 if (SimplifiedS == NextLoop) {
1993 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS))
1994 SimplifiedS = Dir->getTransformedStmt();
1995 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS))
1996 SimplifiedS = CanonLoop->getLoopStmt();
1997 if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) {
1998 S = For->getBody();
1999 } else {
2000 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
2001 "Expected canonical for loop or range-based for loop.");
2002 const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS);
2003 CGF.EmitStmt(S: CXXFor->getLoopVarStmt());
2004 S = CXXFor->getBody();
2005 }
2006 if (Level + 1 < MaxLevel) {
2007 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
2008 CurStmt: S, /*TryImperfectlyNestedLoops=*/true);
2009 emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1);
2010 return;
2011 }
2012 }
2013 CGF.EmitStmt(S);
2014}
2015
2016void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
2017 JumpDest LoopExit) {
2018 RunCleanupsScope BodyScope(*this);
2019 // Update counters values on current iteration.
2020 for (const Expr *UE : D.updates())
2021 EmitIgnoredExpr(E: UE);
2022 // Update the linear variables.
2023 // In distribute directives only loop counters may be marked as linear, no
2024 // need to generate the code for them.
2025 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2026 if (!isOpenMPDistributeDirective(DKind: EKind)) {
2027 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2028 for (const Expr *UE : C->updates())
2029 EmitIgnoredExpr(E: UE);
2030 }
2031 }
2032
2033 // On a continue in the body, jump to the end.
2034 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue");
2035 BreakContinueStack.push_back(Elt: BreakContinue(D, LoopExit, Continue));
2036 for (const Expr *E : D.finals_conditions()) {
2037 if (!E)
2038 continue;
2039 // Check that loop counter in non-rectangular nest fits into the iteration
2040 // space.
2041 llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next");
2042 EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(),
2043 TrueCount: getProfileCount(S: D.getBody()));
2044 EmitBlock(BB: NextBB);
2045 }
2046
2047 OMPPrivateScope InscanScope(*this);
2048 EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true);
2049 bool IsInscanRegion = InscanScope.Privatize();
2050 if (IsInscanRegion) {
2051 // Need to remember the block before and after scan directive
2052 // to dispatch them correctly depending on the clause used in
2053 // this directive, inclusive or exclusive. For inclusive scan the natural
2054 // order of the blocks is used, for exclusive clause the blocks must be
2055 // executed in reverse order.
2056 OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb");
2057 OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb");
2058 // No need to allocate inscan exit block, in simd mode it is selected in the
2059 // codegen for the scan directive.
2060 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd)
2061 OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb");
2062 OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch");
2063 EmitBranch(Block: OMPScanDispatch);
2064 EmitBlock(BB: OMPBeforeScanBlock);
2065 }
2066
2067 // Emit loop variables for C++ range loops.
2068 const Stmt *Body =
2069 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
2070 // Emit loop body.
2071 emitBody(CGF&: *this, S: Body,
2072 NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop(
2073 CurStmt: Body, /*TryImperfectlyNestedLoops=*/true),
2074 MaxLevel: D.getLoopsNumber());
2075
2076 // Jump to the dispatcher at the end of the loop body.
2077 if (IsInscanRegion)
2078 EmitBranch(Block: OMPScanExitBlock);
2079
2080 // The end (updates/cleanups).
2081 EmitBlock(BB: Continue.getBlock());
2082 BreakContinueStack.pop_back();
2083}
2084
2085using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
2086
2087/// Emit a captured statement and return the function as well as its captured
2088/// closure context.
2089static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
2090 const CapturedStmt *S) {
2091 LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S);
2092 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
2093 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
2094 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S);
2095 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
2096 llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S);
2097
2098 return {F, CapStruct.getPointer(CGF&: ParentCGF)};
2099}
2100
2101/// Emit a call to a previously captured closure.
2102static llvm::CallInst *
2103emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
2104 llvm::ArrayRef<llvm::Value *> Args) {
2105 // Append the closure context to the argument.
2106 SmallVector<llvm::Value *> EffectiveArgs;
2107 EffectiveArgs.reserve(N: Args.size() + 1);
2108 llvm::append_range(C&: EffectiveArgs, R&: Args);
2109 EffectiveArgs.push_back(Elt: Cap.second);
2110
2111 return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs);
2112}
2113
2114llvm::CanonicalLoopInfo *
2115CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2116 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2117
2118 // The caller is processing the loop-associated directive processing the \p
2119 // Depth loops nested in \p S. Put the previous pending loop-associated
2120 // directive to the stack. If the current loop-associated directive is a loop
2121 // transformation directive, it will push its generated loops onto the stack
2122 // such that together with the loops left here they form the combined loop
2123 // nest for the parent loop-associated directive.
2124 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2125 ExpectedOMPLoopDepth = Depth;
2126
2127 EmitStmt(S);
2128 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2129
2130 // The last added loop is the outermost one.
2131 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2132
2133 // Pop the \p Depth loops requested by the call from that stack and restore
2134 // the previous context.
2135 OMPLoopNestStack.pop_back_n(NumItems: Depth);
2136 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2137
2138 return Result;
2139}
2140
2141void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2142 const Stmt *SyntacticalLoop = S->getLoopStmt();
2143 if (!getLangOpts().OpenMPIRBuilder) {
2144 // Ignore if OpenMPIRBuilder is not enabled.
2145 EmitStmt(S: SyntacticalLoop);
2146 return;
2147 }
2148
2149 LexicalScope ForScope(*this, S->getSourceRange());
2150
2151 // Emit init statements. The Distance/LoopVar funcs may reference variable
2152 // declarations they contain.
2153 const Stmt *BodyStmt;
2154 if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) {
2155 if (const Stmt *InitStmt = For->getInit())
2156 EmitStmt(S: InitStmt);
2157 BodyStmt = For->getBody();
2158 } else if (const auto *RangeFor =
2159 dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) {
2160 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2161 EmitStmt(S: RangeStmt);
2162 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2163 EmitStmt(S: BeginStmt);
2164 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2165 EmitStmt(S: EndStmt);
2166 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2167 EmitStmt(S: LoopVarStmt);
2168 BodyStmt = RangeFor->getBody();
2169 } else
2170 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2171
2172 // Emit closure for later use. By-value captures will be captured here.
2173 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2174 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc);
2175 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2176 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc);
2177
2178 // Call the distance function to get the number of iterations of the loop to
2179 // come.
2180 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2181 ->getParam(i: 0)
2182 ->getType()
2183 .getNonReferenceType();
2184 RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr");
2185 emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()});
2186 llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count");
2187
2188 // Emit the loop structure.
2189 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2190 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2191 llvm::Value *IndVar) {
2192 Builder.restoreIP(IP: CodeGenIP);
2193
2194 // Emit the loop body: Convert the logical iteration number to the loop
2195 // variable and emit the body.
2196 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2197 LValue LCVal = EmitLValue(E: LoopVarRef);
2198 Address LoopVarAddress = LCVal.getAddress();
2199 emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure,
2200 Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar});
2201
2202 RunCleanupsScope BodyScope(*this);
2203 EmitStmt(S: BodyStmt);
2204 return llvm::Error::success();
2205 };
2206
2207 llvm::CanonicalLoopInfo *CL =
2208 cantFail(ValOrErr: OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal));
2209
2210 // Finish up the loop.
2211 Builder.restoreIP(IP: CL->getAfterIP());
2212 ForScope.ForceCleanup();
2213
2214 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2215 OMPLoopNestStack.push_back(Elt: CL);
2216}
2217
2218void CodeGenFunction::EmitOMPInnerLoop(
2219 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2220 const Expr *IncExpr,
2221 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2222 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2223 auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end");
2224
2225 // Start the loop with a block that tests the condition.
2226 auto CondBlock = createBasicBlock(name: "omp.inner.for.cond");
2227 EmitBlock(BB: CondBlock);
2228 const SourceRange R = S.getSourceRange();
2229
2230 // If attributes are attached, push to the basic block with them.
2231 const auto &OMPED = cast<OMPExecutableDirective>(Val: S);
2232 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2233 const Stmt *SS = ICS->getCapturedStmt();
2234 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS);
2235 OMPLoopNestStack.clear();
2236 if (AS)
2237 LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(),
2238 Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2239 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2240 else
2241 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2242 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2243
2244 // If there are any cleanups between here and the loop-exit scope,
2245 // create a block to stage a loop exit along.
2246 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2247 if (RequiresCleanup)
2248 ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup");
2249
2250 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body");
2251
2252 // Emit condition.
2253 EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S));
2254 if (ExitBlock != LoopExit.getBlock()) {
2255 EmitBlock(BB: ExitBlock);
2256 EmitBranchThroughCleanup(Dest: LoopExit);
2257 }
2258
2259 EmitBlock(BB: LoopBody);
2260 incrementProfileCounter(S: &S);
2261
2262 // Create a block for the increment.
2263 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc");
2264 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
2265
2266 BodyGen(*this);
2267
2268 // Emit "IV = IV + 1" and a back-edge to the condition block.
2269 EmitBlock(BB: Continue.getBlock());
2270 EmitIgnoredExpr(E: IncExpr);
2271 PostIncGen(*this);
2272 BreakContinueStack.pop_back();
2273 EmitBranch(Block: CondBlock);
2274 LoopStack.pop();
2275 // Emit the fall-through block.
2276 EmitBlock(BB: LoopExit.getBlock());
2277}
2278
2279bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2280 if (!HaveInsertPoint())
2281 return false;
2282 // Emit inits for the linear variables.
2283 bool HasLinears = false;
2284 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2285 for (const Expr *Init : C->inits()) {
2286 HasLinears = true;
2287 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl());
2288 if (const auto *Ref =
2289 dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) {
2290 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
2291 const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl());
2292 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2293 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2294 VD->getInit()->getType(), VK_LValue,
2295 VD->getInit()->getExprLoc());
2296 EmitExprAsInit(
2297 init: &DRE, D: VD,
2298 lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()),
2299 /*capturedByInit=*/false);
2300 EmitAutoVarCleanups(emission: Emission);
2301 } else {
2302 EmitVarDecl(D: *VD);
2303 }
2304 }
2305 // Emit the linear steps for the linear clauses.
2306 // If a step is not constant, it is pre-calculated before the loop.
2307 if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep()))
2308 if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) {
2309 EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl()));
2310 // Emit calculation of the linear step.
2311 EmitIgnoredExpr(E: CS);
2312 }
2313 }
2314 return HasLinears;
2315}
2316
2317void CodeGenFunction::EmitOMPLinearClauseFinal(
2318 const OMPLoopDirective &D,
2319 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2320 if (!HaveInsertPoint())
2321 return;
2322 llvm::BasicBlock *DoneBB = nullptr;
2323 // Emit the final values of the linear variables.
2324 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2325 auto IC = C->varlist_begin();
2326 for (const Expr *F : C->finals()) {
2327 if (!DoneBB) {
2328 if (llvm::Value *Cond = CondGen(*this)) {
2329 // If the first post-update expression is found, emit conditional
2330 // block if it was requested.
2331 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu");
2332 DoneBB = createBasicBlock(name: ".omp.linear.pu.done");
2333 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2334 EmitBlock(BB: ThenBB);
2335 }
2336 }
2337 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl());
2338 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2339 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2340 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2341 Address OrigAddr = EmitLValue(E: &DRE).getAddress();
2342 CodeGenFunction::OMPPrivateScope VarScope(*this);
2343 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2344 (void)VarScope.Privatize();
2345 EmitIgnoredExpr(E: F);
2346 ++IC;
2347 }
2348 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2349 EmitIgnoredExpr(E: PostUpdate);
2350 }
2351 if (DoneBB)
2352 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2353}
2354
2355static void emitAlignedClause(CodeGenFunction &CGF,
2356 const OMPExecutableDirective &D) {
2357 if (!CGF.HaveInsertPoint())
2358 return;
2359 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2360 llvm::APInt ClauseAlignment(64, 0);
2361 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2362 auto *AlignmentCI =
2363 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2364 ClauseAlignment = AlignmentCI->getValue();
2365 }
2366 for (const Expr *E : Clause->varlist()) {
2367 llvm::APInt Alignment(ClauseAlignment);
2368 if (Alignment == 0) {
2369 // OpenMP [2.8.1, Description]
2370 // If no optional parameter is specified, implementation-defined default
2371 // alignments for SIMD instructions on the target platforms are assumed.
2372 Alignment =
2373 CGF.getContext()
2374 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2375 T: E->getType()->getPointeeType()))
2376 .getQuantity();
2377 }
2378 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2379 "alignment is not power of 2");
2380 if (Alignment != 0) {
2381 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2382 CGF.emitAlignmentAssumption(
2383 PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(),
2384 Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment));
2385 }
2386 }
2387 }
2388}
2389
2390void CodeGenFunction::EmitOMPPrivateLoopCounters(
2391 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2392 if (!HaveInsertPoint())
2393 return;
2394 auto I = S.private_counters().begin();
2395 for (const Expr *E : S.counters()) {
2396 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2397 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl());
2398 // Emit var without initialization.
2399 AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD);
2400 EmitAutoVarCleanups(emission: VarEmission);
2401 LocalDeclMap.erase(Val: PrivateVD);
2402 (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress());
2403 if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) ||
2404 VD->hasGlobalStorage()) {
2405 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2406 LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD),
2407 E->getType(), VK_LValue, E->getExprLoc());
2408 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress());
2409 } else {
2410 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress());
2411 }
2412 ++I;
2413 }
2414 // Privatize extra loop counters used in loops for ordered(n) clauses.
2415 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2416 if (!C->getNumForLoops())
2417 continue;
2418 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2419 I < E; ++I) {
2420 const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I));
2421 const auto *VD = cast<VarDecl>(Val: DRE->getDecl());
2422 // Override only those variables that can be captured to avoid re-emission
2423 // of the variables declared within the loops.
2424 if (DRE->refersToEnclosingVariableOrCapture()) {
2425 (void)LoopScope.addPrivate(
2426 LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName()));
2427 }
2428 }
2429 }
2430}
2431
2432static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2433 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2434 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2435 if (!CGF.HaveInsertPoint())
2436 return;
2437 {
2438 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2439 CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope);
2440 (void)PreCondScope.Privatize();
2441 // Get initial values of real counters.
2442 for (const Expr *I : S.inits()) {
2443 CGF.EmitIgnoredExpr(E: I);
2444 }
2445 }
2446 // Create temp loop control variables with their init values to support
2447 // non-rectangular loops.
2448 CodeGenFunction::OMPMapVars PreCondVars;
2449 for (const Expr *E : S.dependent_counters()) {
2450 if (!E)
2451 continue;
2452 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2453 "dependent counter must not be an iterator.");
2454 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2455 Address CounterAddr =
2456 CGF.CreateMemTemp(T: VD->getType().getNonReferenceType());
2457 (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr);
2458 }
2459 (void)PreCondVars.apply(CGF);
2460 for (const Expr *E : S.dependent_inits()) {
2461 if (!E)
2462 continue;
2463 CGF.EmitIgnoredExpr(E);
2464 }
2465 // Check that loop is executed at least one time.
2466 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2467 PreCondVars.restore(CGF);
2468}
2469
2470void CodeGenFunction::EmitOMPLinearClause(
2471 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2472 if (!HaveInsertPoint())
2473 return;
2474 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2475 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2476 if (isOpenMPSimdDirective(DKind: EKind)) {
2477 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
2478 for (const Expr *C : LoopDirective->counters()) {
2479 SIMDLCVs.insert(
2480 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
2481 }
2482 }
2483 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2484 auto CurPrivate = C->privates().begin();
2485 for (const Expr *E : C->varlist()) {
2486 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2487 const auto *PrivateVD =
2488 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl());
2489 if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) {
2490 // Emit private VarDecl with copy init.
2491 EmitVarDecl(D: *PrivateVD);
2492 bool IsRegistered =
2493 PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
2494 assert(IsRegistered && "linear var already registered as private");
2495 // Silence the warning about unused variable.
2496 (void)IsRegistered;
2497 } else {
2498 EmitVarDecl(D: *PrivateVD);
2499 }
2500 ++CurPrivate;
2501 }
2502 }
2503}
2504
2505static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2506 const OMPExecutableDirective &D) {
2507 if (!CGF.HaveInsertPoint())
2508 return;
2509 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2510 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2511 /*ignoreResult=*/true);
2512 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2513 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2514 // In presence of finite 'safelen', it may be unsafe to mark all
2515 // the memory instructions parallel, because loop-carried
2516 // dependences of 'safelen' iterations are possible.
2517 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2518 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2519 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2520 /*ignoreResult=*/true);
2521 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2522 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2523 // In presence of finite 'safelen', it may be unsafe to mark all
2524 // the memory instructions parallel, because loop-carried
2525 // dependences of 'safelen' iterations are possible.
2526 CGF.LoopStack.setParallel(/*Enable=*/false);
2527 }
2528}
2529
2530// Check for the presence of an `OMPOrderedDirective`,
2531// i.e., `ordered` in `#pragma omp ordered simd`.
2532//
2533// Consider the following source code:
2534// ```
2535// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2536// {
2537// for (int r = 1; r < ARRAY_SIZE; ++r) {
2538// for (int c = 1; c < ARRAY_SIZE; ++c) {
2539// #pragma omp simd
2540// for (int k = 2; k < ARRAY_SIZE; ++k) {
2541// #pragma omp ordered simd
2542// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2543// }
2544// }
2545// }
2546// }
2547// ```
2548//
2549// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2550// &D)`. By examining `D.dump()` we have the following AST containing
2551// `OMPOrderedDirective`:
2552//
2553// ```
2554// OMPSimdDirective 0x1c32950
2555// `-CapturedStmt 0x1c32028
2556// |-CapturedDecl 0x1c310e8
2557// | |-ForStmt 0x1c31e30
2558// | | |-DeclStmt 0x1c31298
2559// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2560// | | | `-IntegerLiteral 0x1c31278 'int' 2
2561// | | |-<<<NULL>>>
2562// | | |-BinaryOperator 0x1c31308 'int' '<'
2563// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2564// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2565// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2566// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2567// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2568// | | `-CompoundStmt 0x1c31e18
2569// | | `-OMPOrderedDirective 0x1c31dd8
2570// | | |-OMPSimdClause 0x1c31380
2571// | | `-CapturedStmt 0x1c31cd0
2572// ```
2573//
2574// Note the presence of `OMPOrderedDirective` above:
2575// It's (transitively) nested in a `CapturedStmt` representing the pragma
2576// annotated compound statement. Thus, we need to consider this nesting and
2577// include checking the `getCapturedStmt` in this case.
2578static bool hasOrderedDirective(const Stmt *S) {
2579 if (isa<OMPOrderedDirective>(Val: S))
2580 return true;
2581
2582 if (const auto *CS = dyn_cast<CapturedStmt>(Val: S))
2583 return hasOrderedDirective(S: CS->getCapturedStmt());
2584
2585 for (const Stmt *Child : S->children()) {
2586 if (Child && hasOrderedDirective(S: Child))
2587 return true;
2588 }
2589
2590 return false;
2591}
2592
2593static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2594 LoopInfoStack &LoopStack) {
2595 // Check for the presence of an `OMPOrderedDirective`
2596 // i.e., `ordered` in `#pragma omp ordered simd`
2597 bool HasOrderedDirective = hasOrderedDirective(S: &AssociatedStmt);
2598 // If present then conservatively disable loop vectorization
2599 // analogously to how `emitSimdlenSafelenClause` does.
2600 if (HasOrderedDirective)
2601 LoopStack.setParallel(/*Enable=*/false);
2602}
2603
2604void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2605 // Walk clauses and process safelen/lastprivate.
2606 LoopStack.setParallel(/*Enable=*/true);
2607 LoopStack.setVectorizeEnable();
2608 const Stmt *AssociatedStmt = D.getAssociatedStmt();
2609 applyConservativeSimdOrderedDirective(AssociatedStmt: *AssociatedStmt, LoopStack);
2610 emitSimdlenSafelenClause(CGF&: *this, D);
2611 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2612 if (C->getKind() == OMPC_ORDER_concurrent)
2613 LoopStack.setParallel(/*Enable=*/true);
2614 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2615 if ((EKind == OMPD_simd ||
2616 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(DKind: EKind))) &&
2617 llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(),
2618 P: [](const OMPReductionClause *C) {
2619 return C->getModifier() == OMPC_REDUCTION_inscan;
2620 }))
2621 // Disable parallel access in case of prefix sum.
2622 LoopStack.setParallel(/*Enable=*/false);
2623}
2624
2625void CodeGenFunction::EmitOMPSimdFinal(
2626 const OMPLoopDirective &D,
2627 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2628 if (!HaveInsertPoint())
2629 return;
2630 llvm::BasicBlock *DoneBB = nullptr;
2631 auto IC = D.counters().begin();
2632 auto IPC = D.private_counters().begin();
2633 for (const Expr *F : D.finals()) {
2634 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl());
2635 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl());
2636 const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD);
2637 if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) ||
2638 OrigVD->hasGlobalStorage() || CED) {
2639 if (!DoneBB) {
2640 if (llvm::Value *Cond = CondGen(*this)) {
2641 // If the first post-update expression is found, emit conditional
2642 // block if it was requested.
2643 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then");
2644 DoneBB = createBasicBlock(name: ".omp.final.done");
2645 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2646 EmitBlock(BB: ThenBB);
2647 }
2648 }
2649 Address OrigAddr = Address::invalid();
2650 if (CED) {
2651 OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress();
2652 } else {
2653 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2654 /*RefersToEnclosingVariableOrCapture=*/false,
2655 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2656 OrigAddr = EmitLValue(E: &DRE).getAddress();
2657 }
2658 OMPPrivateScope VarScope(*this);
2659 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2660 (void)VarScope.Privatize();
2661 EmitIgnoredExpr(E: F);
2662 }
2663 ++IC;
2664 ++IPC;
2665 }
2666 if (DoneBB)
2667 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2668}
2669
2670static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2671 const OMPLoopDirective &S,
2672 CodeGenFunction::JumpDest LoopExit) {
2673 CGF.EmitOMPLoopBody(D: S, LoopExit);
2674 CGF.EmitStopPoint(S: &S);
2675}
2676
2677/// Emit a helper variable and return corresponding lvalue.
2678static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2679 const DeclRefExpr *Helper) {
2680 auto VDecl = cast<VarDecl>(Val: Helper->getDecl());
2681 CGF.EmitVarDecl(D: *VDecl);
2682 return CGF.EmitLValue(E: Helper);
2683}
2684
2685static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2686 const RegionCodeGenTy &SimdInitGen,
2687 const RegionCodeGenTy &BodyCodeGen) {
2688 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2689 PrePostActionTy &) {
2690 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2691 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2692 SimdInitGen(CGF);
2693
2694 BodyCodeGen(CGF);
2695 };
2696 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2697 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2698 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2699
2700 BodyCodeGen(CGF);
2701 };
2702 const Expr *IfCond = nullptr;
2703 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2704 if (isOpenMPSimdDirective(DKind: EKind)) {
2705 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2706 if (CGF.getLangOpts().OpenMP >= 50 &&
2707 (C->getNameModifier() == OMPD_unknown ||
2708 C->getNameModifier() == OMPD_simd)) {
2709 IfCond = C->getCondition();
2710 break;
2711 }
2712 }
2713 }
2714 if (IfCond) {
2715 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2716 } else {
2717 RegionCodeGenTy ThenRCG(ThenGen);
2718 ThenRCG(CGF);
2719 }
2720}
2721
2722static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2723 PrePostActionTy &Action) {
2724 Action.Enter(CGF);
2725 OMPLoopScope PreInitScope(CGF, S);
2726 // if (PreCond) {
2727 // for (IV in 0..LastIteration) BODY;
2728 // <Final counter/linear vars updates>;
2729 // }
2730
2731 // The presence of lower/upper bound variable depends on the actual directive
2732 // kind in the AST node. The variables must be emitted because some of the
2733 // expressions associated with the loop will use them.
2734 OpenMPDirectiveKind DKind = S.getDirectiveKind();
2735 if (isOpenMPDistributeDirective(DKind) ||
2736 isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
2737 isOpenMPGenericLoopDirective(DKind)) {
2738 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()));
2739 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()));
2740 }
2741
2742 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2743 // Emit: if (PreCond) - begin.
2744 // If the condition constant folds and can be elided, avoid emitting the
2745 // whole loop.
2746 bool CondConstant;
2747 llvm::BasicBlock *ContBlock = nullptr;
2748 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
2749 if (!CondConstant)
2750 return;
2751 } else {
2752 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then");
2753 ContBlock = CGF.createBasicBlock(name: "simd.if.end");
2754 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
2755 TrueCount: CGF.getProfileCount(S: &S));
2756 CGF.EmitBlock(BB: ThenBlock);
2757 CGF.incrementProfileCounter(S: &S);
2758 }
2759
2760 // Emit the loop iteration variable.
2761 const Expr *IVExpr = S.getIterationVariable();
2762 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
2763 CGF.EmitVarDecl(D: *IVDecl);
2764 CGF.EmitIgnoredExpr(E: S.getInit());
2765
2766 // Emit the iterations count variable.
2767 // If it is not a variable, Sema decided to calculate iterations count on
2768 // each iteration (e.g., it is foldable into a constant).
2769 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
2770 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
2771 // Emit calculation of the iterations count.
2772 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
2773 }
2774
2775 emitAlignedClause(CGF, D: S);
2776 (void)CGF.EmitOMPLinearClauseInit(D: S);
2777 {
2778 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2779 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
2780 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2781 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
2782 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
2783 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2784 CGF, S, CGF.EmitLValue(E: S.getIterationVariable()));
2785 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
2786 (void)LoopScope.Privatize();
2787 if (isOpenMPTargetExecutionDirective(DKind: EKind))
2788 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
2789
2790 emitCommonSimdLoop(
2791 CGF, S,
2792 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2793 CGF.EmitOMPSimdInit(D: S);
2794 },
2795 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2796 CGF.EmitOMPInnerLoop(
2797 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
2798 BodyGen: [&S](CodeGenFunction &CGF) {
2799 emitOMPLoopBodyWithStopPoint(CGF, S,
2800 LoopExit: CodeGenFunction::JumpDest());
2801 },
2802 PostIncGen: [](CodeGenFunction &) {});
2803 });
2804 CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2805 // Emit final copy of the lastprivate variables at the end of loops.
2806 if (HasLastprivateClause)
2807 CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true);
2808 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd);
2809 emitPostUpdateForReductionClause(CGF, D: S,
2810 CondGen: [](CodeGenFunction &) { return nullptr; });
2811 LoopScope.restoreMap();
2812 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2813 }
2814 // Emit: if (PreCond) - end.
2815 if (ContBlock) {
2816 CGF.EmitBranch(Block: ContBlock);
2817 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2818 }
2819}
2820
2821// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
2822// available for "loop bind(thread)", which maps to "simd".
2823static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) {
2824 // Check for unsupported clauses
2825 for (OMPClause *C : S.clauses()) {
2826 // Currently only order, simdlen and safelen clauses are supported
2827 if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) ||
2828 isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C)))
2829 return false;
2830 }
2831
2832 // Check if we have a statement with the ordered directive.
2833 // Visit the statement hierarchy to find a compound statement
2834 // with a ordered directive in it.
2835 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) {
2836 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2837 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2838 if (!SubStmt)
2839 continue;
2840 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) {
2841 for (const Stmt *CSSubStmt : CS->children()) {
2842 if (!CSSubStmt)
2843 continue;
2844 if (isa<OMPOrderedDirective>(Val: CSSubStmt)) {
2845 return false;
2846 }
2847 }
2848 }
2849 }
2850 }
2851 }
2852 return true;
2853}
2854
2855static llvm::MapVector<llvm::Value *, llvm::Value *>
2856GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) {
2857 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2858 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2859 llvm::APInt ClauseAlignment(64, 0);
2860 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2861 auto *AlignmentCI =
2862 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2863 ClauseAlignment = AlignmentCI->getValue();
2864 }
2865 for (const Expr *E : Clause->varlist()) {
2866 llvm::APInt Alignment(ClauseAlignment);
2867 if (Alignment == 0) {
2868 // OpenMP [2.8.1, Description]
2869 // If no optional parameter is specified, implementation-defined default
2870 // alignments for SIMD instructions on the target platforms are assumed.
2871 Alignment =
2872 CGF.getContext()
2873 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2874 T: E->getType()->getPointeeType()))
2875 .getQuantity();
2876 }
2877 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2878 "alignment is not power of 2");
2879 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2880 AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue());
2881 }
2882 }
2883 return AlignedVars;
2884}
2885
2886// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
2887// available for "loop bind(thread)", which maps to "simd".
2888static void emitOMPSimdDirective(const OMPLoopDirective &S,
2889 CodeGenFunction &CGF, CodeGenModule &CGM) {
2890 bool UseOMPIRBuilder =
2891 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S);
2892 if (UseOMPIRBuilder) {
2893 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF,
2894 PrePostActionTy &) {
2895 // Use the OpenMPIRBuilder if enabled.
2896 if (UseOMPIRBuilder) {
2897 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2898 GetAlignedMapping(S, CGF);
2899 // Emit the associated statement and get its loop representation.
2900 const Stmt *Inner = S.getRawStmt();
2901 llvm::CanonicalLoopInfo *CLI =
2902 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2903
2904 llvm::OpenMPIRBuilder &OMPBuilder =
2905 CGM.getOpenMPRuntime().getOMPBuilder();
2906 // Add SIMD specific metadata
2907 llvm::ConstantInt *Simdlen = nullptr;
2908 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2909 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2910 /*ignoreResult=*/true);
2911 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2912 Simdlen = Val;
2913 }
2914 llvm::ConstantInt *Safelen = nullptr;
2915 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2916 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2917 /*ignoreResult=*/true);
2918 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2919 Safelen = Val;
2920 }
2921 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2922 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2923 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) {
2924 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2925 }
2926 }
2927 // Add simd metadata to the collapsed loop. Do not generate
2928 // another loop for if clause. Support for if clause is done earlier.
2929 OMPBuilder.applySimd(Loop: CLI, AlignedVars,
2930 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2931 return;
2932 }
2933 };
2934 {
2935 auto LPCRegion =
2936 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
2937 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
2938 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
2939 CodeGen: CodeGenIRBuilder);
2940 }
2941 return;
2942 }
2943
2944 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
2945 CGF.OMPFirstScanLoop = true;
2946 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2947 emitOMPSimdRegion(CGF, S, Action);
2948 };
2949 {
2950 auto LPCRegion =
2951 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
2952 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
2953 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, CodeGen);
2954 }
2955 // Check for outer lastprivate conditional update.
2956 checkForLastprivateConditionalUpdate(CGF, S);
2957}
2958
2959void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2960 emitOMPSimdDirective(S, CGF&: *this, CGM);
2961}
2962
2963void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2964 // Emit the de-sugared statement.
2965 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2966 EmitStmt(S: S.getTransformedStmt());
2967}
2968
2969void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) {
2970 // Emit the de-sugared statement.
2971 OMPTransformDirectiveScopeRAII StripeScope(*this, &S);
2972 EmitStmt(S: S.getTransformedStmt());
2973}
2974
2975void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
2976 // Emit the de-sugared statement.
2977 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
2978 EmitStmt(S: S.getTransformedStmt());
2979}
2980
2981void CodeGenFunction::EmitOMPInterchangeDirective(
2982 const OMPInterchangeDirective &S) {
2983 // Emit the de-sugared statement.
2984 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
2985 EmitStmt(S: S.getTransformedStmt());
2986}
2987
2988void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
2989 // Emit the de-sugared statement
2990 OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
2991 EmitStmt(S: S.getTransformedStmt());
2992}
2993
2994void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2995 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2996
2997 if (UseOMPIRBuilder) {
2998 auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc());
2999 const Stmt *Inner = S.getRawStmt();
3000
3001 // Consume nested loop. Clear the entire remaining loop stack because a
3002 // fully unrolled loop is non-transformable. For partial unrolling the
3003 // generated outer loop is pushed back to the stack.
3004 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3005 OMPLoopNestStack.clear();
3006
3007 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3008
3009 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
3010 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
3011
3012 if (S.hasClausesOfKind<OMPFullClause>()) {
3013 assert(ExpectedOMPLoopDepth == 0);
3014 OMPBuilder.unrollLoopFull(DL, Loop: CLI);
3015 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3016 uint64_t Factor = 0;
3017 if (Expr *FactorExpr = PartialClause->getFactor()) {
3018 Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3019 assert(Factor >= 1 && "Only positive factors are valid");
3020 }
3021 OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor,
3022 UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
3023 } else {
3024 OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI);
3025 }
3026
3027 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
3028 "NeedsUnrolledCLI implies UnrolledCLI to be set");
3029 if (UnrolledCLI)
3030 OMPLoopNestStack.push_back(Elt: UnrolledCLI);
3031
3032 return;
3033 }
3034
3035 // This function is only called if the unrolled loop is not consumed by any
3036 // other loop-associated construct. Such a loop-associated construct will have
3037 // used the transformed AST.
3038
3039 // Set the unroll metadata for the next emitted loop.
3040 LoopStack.setUnrollState(LoopAttributes::Enable);
3041
3042 if (S.hasClausesOfKind<OMPFullClause>()) {
3043 LoopStack.setUnrollState(LoopAttributes::Full);
3044 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3045 if (Expr *FactorExpr = PartialClause->getFactor()) {
3046 uint64_t Factor =
3047 FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3048 assert(Factor >= 1 && "Only positive factors are valid");
3049 LoopStack.setUnrollCount(Factor);
3050 }
3051 }
3052
3053 EmitStmt(S: S.getAssociatedStmt());
3054}
3055
3056void CodeGenFunction::EmitOMPOuterLoop(
3057 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
3058 CodeGenFunction::OMPPrivateScope &LoopScope,
3059 const CodeGenFunction::OMPLoopArguments &LoopArgs,
3060 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
3061 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
3062 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3063
3064 const Expr *IVExpr = S.getIterationVariable();
3065 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3066 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3067
3068 JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end");
3069
3070 // Start the loop with a block that tests the condition.
3071 llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond");
3072 EmitBlock(BB: CondBlock);
3073 const SourceRange R = S.getSourceRange();
3074 OMPLoopNestStack.clear();
3075 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
3076 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
3077
3078 llvm::Value *BoolCondVal = nullptr;
3079 if (!DynamicOrOrdered) {
3080 // UB = min(UB, GlobalUB) or
3081 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
3082 // 'distribute parallel for')
3083 EmitIgnoredExpr(E: LoopArgs.EUB);
3084 // IV = LB
3085 EmitIgnoredExpr(E: LoopArgs.Init);
3086 // IV < UB
3087 BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond);
3088 } else {
3089 BoolCondVal =
3090 RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL,
3091 LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST);
3092 }
3093
3094 // If there are any cleanups between here and the loop-exit scope,
3095 // create a block to stage a loop exit along.
3096 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
3097 if (LoopScope.requiresCleanups())
3098 ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup");
3099
3100 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body");
3101 Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock);
3102 if (ExitBlock != LoopExit.getBlock()) {
3103 EmitBlock(BB: ExitBlock);
3104 EmitBranchThroughCleanup(Dest: LoopExit);
3105 }
3106 EmitBlock(BB: LoopBody);
3107
3108 // Emit "IV = LB" (in case of static schedule, we have already calculated new
3109 // LB for loop condition and emitted it above).
3110 if (DynamicOrOrdered)
3111 EmitIgnoredExpr(E: LoopArgs.Init);
3112
3113 // Create a block for the increment.
3114 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc");
3115 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
3116
3117 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3118 emitCommonSimdLoop(
3119 CGF&: *this, S,
3120 SimdInitGen: [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3121 // Generate !llvm.loop.parallel metadata for loads and stores for loops
3122 // with dynamic/guided scheduling and without ordered clause.
3123 if (!isOpenMPSimdDirective(DKind: EKind)) {
3124 CGF.LoopStack.setParallel(!IsMonotonic);
3125 if (const auto *C = S.getSingleClause<OMPOrderClause>())
3126 if (C->getKind() == OMPC_ORDER_concurrent)
3127 CGF.LoopStack.setParallel(/*Enable=*/true);
3128 } else {
3129 CGF.EmitOMPSimdInit(D: S);
3130 }
3131 },
3132 BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
3133 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3134 SourceLocation Loc = S.getBeginLoc();
3135 // when 'distribute' is not combined with a 'for':
3136 // while (idx <= UB) { BODY; ++idx; }
3137 // when 'distribute' is combined with a 'for'
3138 // (e.g. 'distribute parallel for')
3139 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3140 CGF.EmitOMPInnerLoop(
3141 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr,
3142 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3143 CodeGenLoop(CGF, S, LoopExit);
3144 },
3145 PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
3146 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
3147 });
3148 });
3149
3150 EmitBlock(BB: Continue.getBlock());
3151 BreakContinueStack.pop_back();
3152 if (!DynamicOrOrdered) {
3153 // Emit "LB = LB + Stride", "UB = UB + Stride".
3154 EmitIgnoredExpr(E: LoopArgs.NextLB);
3155 EmitIgnoredExpr(E: LoopArgs.NextUB);
3156 }
3157
3158 EmitBranch(Block: CondBlock);
3159 OMPLoopNestStack.clear();
3160 LoopStack.pop();
3161 // Emit the fall-through block.
3162 EmitBlock(BB: LoopExit.getBlock());
3163
3164 // Tell the runtime we are done.
3165 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
3166 if (!DynamicOrOrdered)
3167 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3168 DKind: LoopArgs.DKind);
3169 };
3170 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3171}
3172
3173void CodeGenFunction::EmitOMPForOuterLoop(
3174 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
3175 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
3176 const OMPLoopArguments &LoopArgs,
3177 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3178 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3179
3180 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
3181 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule);
3182
3183 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
3184 LoopArgs.Chunk != nullptr)) &&
3185 "static non-chunked schedule does not need outer loop");
3186
3187 // Emit outer loop.
3188 //
3189 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3190 // When schedule(dynamic,chunk_size) is specified, the iterations are
3191 // distributed to threads in the team in chunks as the threads request them.
3192 // Each thread executes a chunk of iterations, then requests another chunk,
3193 // until no chunks remain to be distributed. Each chunk contains chunk_size
3194 // iterations, except for the last chunk to be distributed, which may have
3195 // fewer iterations. When no chunk_size is specified, it defaults to 1.
3196 //
3197 // When schedule(guided,chunk_size) is specified, the iterations are assigned
3198 // to threads in the team in chunks as the executing threads request them.
3199 // Each thread executes a chunk of iterations, then requests another chunk,
3200 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
3201 // each chunk is proportional to the number of unassigned iterations divided
3202 // by the number of threads in the team, decreasing to 1. For a chunk_size
3203 // with value k (greater than 1), the size of each chunk is determined in the
3204 // same way, with the restriction that the chunks do not contain fewer than k
3205 // iterations (except for the last chunk to be assigned, which may have fewer
3206 // than k iterations).
3207 //
3208 // When schedule(auto) is specified, the decision regarding scheduling is
3209 // delegated to the compiler and/or runtime system. The programmer gives the
3210 // implementation the freedom to choose any possible mapping of iterations to
3211 // threads in the team.
3212 //
3213 // When schedule(runtime) is specified, the decision regarding scheduling is
3214 // deferred until run time, and the schedule and chunk size are taken from the
3215 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3216 // implementation defined
3217 //
3218 // __kmpc_dispatch_init();
3219 // while(__kmpc_dispatch_next(&LB, &UB)) {
3220 // idx = LB;
3221 // while (idx <= UB) { BODY; ++idx;
3222 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3223 // } // inner loop
3224 // }
3225 // __kmpc_dispatch_deinit();
3226 //
3227 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3228 // When schedule(static, chunk_size) is specified, iterations are divided into
3229 // chunks of size chunk_size, and the chunks are assigned to the threads in
3230 // the team in a round-robin fashion in the order of the thread number.
3231 //
3232 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3233 // while (idx <= UB) { BODY; ++idx; } // inner loop
3234 // LB = LB + ST;
3235 // UB = UB + ST;
3236 // }
3237 //
3238
3239 const Expr *IVExpr = S.getIterationVariable();
3240 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3241 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3242
3243 if (DynamicOrOrdered) {
3244 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3245 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3246 llvm::Value *LBVal = DispatchBounds.first;
3247 llvm::Value *UBVal = DispatchBounds.second;
3248 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3249 LoopArgs.Chunk};
3250 RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize,
3251 IVSigned, Ordered, DispatchValues: DipatchRTInputValues);
3252 } else {
3253 CGOpenMPRuntime::StaticRTInput StaticInit(
3254 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3255 LoopArgs.ST, LoopArgs.Chunk);
3256 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3257 RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind,
3258 Values: StaticInit);
3259 }
3260
3261 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3262 const unsigned IVSize,
3263 const bool IVSigned) {
3264 if (Ordered) {
3265 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3266 IVSigned);
3267 }
3268 };
3269
3270 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3271 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3272 OuterLoopArgs.IncExpr = S.getInc();
3273 OuterLoopArgs.Init = S.getInit();
3274 OuterLoopArgs.Cond = S.getCond();
3275 OuterLoopArgs.NextLB = S.getNextLowerBound();
3276 OuterLoopArgs.NextUB = S.getNextUpperBound();
3277 OuterLoopArgs.DKind = LoopArgs.DKind;
3278 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs,
3279 CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3280 if (DynamicOrOrdered) {
3281 RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc());
3282 }
3283}
3284
3285static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3286 const unsigned IVSize, const bool IVSigned) {}
3287
3288void CodeGenFunction::EmitOMPDistributeOuterLoop(
3289 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3290 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3291 const CodeGenLoopTy &CodeGenLoopContent) {
3292
3293 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3294
3295 // Emit outer loop.
3296 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3297 // dynamic
3298 //
3299
3300 const Expr *IVExpr = S.getIterationVariable();
3301 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3302 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3303 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3304
3305 CGOpenMPRuntime::StaticRTInput StaticInit(
3306 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3307 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3308 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit);
3309
3310 // for combined 'distribute' and 'for' the increment expression of distribute
3311 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3312 Expr *IncExpr;
3313 if (isOpenMPLoopBoundSharingDirective(Kind: EKind))
3314 IncExpr = S.getDistInc();
3315 else
3316 IncExpr = S.getInc();
3317
3318 // this routine is shared by 'omp distribute parallel for' and
3319 // 'omp distribute': select the right EUB expression depending on the
3320 // directive
3321 OMPLoopArguments OuterLoopArgs;
3322 OuterLoopArgs.LB = LoopArgs.LB;
3323 OuterLoopArgs.UB = LoopArgs.UB;
3324 OuterLoopArgs.ST = LoopArgs.ST;
3325 OuterLoopArgs.IL = LoopArgs.IL;
3326 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3327 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3328 ? S.getCombinedEnsureUpperBound()
3329 : S.getEnsureUpperBound();
3330 OuterLoopArgs.IncExpr = IncExpr;
3331 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3332 ? S.getCombinedInit()
3333 : S.getInit();
3334 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3335 ? S.getCombinedCond()
3336 : S.getCond();
3337 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3338 ? S.getCombinedNextLowerBound()
3339 : S.getNextLowerBound();
3340 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3341 ? S.getCombinedNextUpperBound()
3342 : S.getNextUpperBound();
3343 OuterLoopArgs.DKind = OMPD_distribute;
3344
3345 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3346 LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent,
3347 CodeGenOrdered: emitEmptyOrdered);
3348}
3349
3350static std::pair<LValue, LValue>
3351emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3352 const OMPExecutableDirective &S) {
3353 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3354 LValue LB =
3355 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3356 LValue UB =
3357 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3358
3359 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3360 // parallel for') we need to use the 'distribute'
3361 // chunk lower and upper bounds rather than the whole loop iteration
3362 // space. These are parameters to the outlined function for 'parallel'
3363 // and we copy the bounds of the previous schedule into the
3364 // the current ones.
3365 LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable());
3366 LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable());
3367 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3368 lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3369 PrevLBVal = CGF.EmitScalarConversion(
3370 Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(),
3371 DstTy: LS.getIterationVariable()->getType(),
3372 Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3373 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3374 lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3375 PrevUBVal = CGF.EmitScalarConversion(
3376 Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(),
3377 DstTy: LS.getIterationVariable()->getType(),
3378 Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3379
3380 CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB);
3381 CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB);
3382
3383 return {LB, UB};
3384}
3385
3386/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3387/// we need to use the LB and UB expressions generated by the worksharing
3388/// code generation support, whereas in non combined situations we would
3389/// just emit 0 and the LastIteration expression
3390/// This function is necessary due to the difference of the LB and UB
3391/// types for the RT emission routines for 'for_static_init' and
3392/// 'for_dispatch_init'
3393static std::pair<llvm::Value *, llvm::Value *>
3394emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3395 const OMPExecutableDirective &S,
3396 Address LB, Address UB) {
3397 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3398 const Expr *IVExpr = LS.getIterationVariable();
3399 // when implementing a dynamic schedule for a 'for' combined with a
3400 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3401 // is not normalized as each team only executes its own assigned
3402 // distribute chunk
3403 QualType IteratorTy = IVExpr->getType();
3404 llvm::Value *LBVal =
3405 CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3406 llvm::Value *UBVal =
3407 CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3408 return {LBVal, UBVal};
3409}
3410
3411static void emitDistributeParallelForDistributeInnerBoundParams(
3412 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3413 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3414 const auto &Dir = cast<OMPLoopDirective>(Val: S);
3415 LValue LB =
3416 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable()));
3417 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3418 V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3419 CapturedVars.push_back(Elt: LBCast);
3420 LValue UB =
3421 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable()));
3422
3423 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3424 V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3425 CapturedVars.push_back(Elt: UBCast);
3426}
3427
3428static void
3429emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3430 const OMPLoopDirective &S,
3431 CodeGenFunction::JumpDest LoopExit) {
3432 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3433 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
3434 PrePostActionTy &Action) {
3435 Action.Enter(CGF);
3436 bool HasCancel = false;
3437 if (!isOpenMPSimdDirective(DKind: EKind)) {
3438 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S))
3439 HasCancel = D->hasCancel();
3440 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S))
3441 HasCancel = D->hasCancel();
3442 else if (const auto *D =
3443 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S))
3444 HasCancel = D->hasCancel();
3445 }
3446 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
3447 CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(),
3448 CodeGenLoopBounds: emitDistributeParallelForInnerBounds,
3449 CGDispatchBounds: emitDistributeParallelForDispatchBounds);
3450 };
3451
3452 emitCommonOMPParallelDirective(
3453 CGF, S, InnermostKind: isOpenMPSimdDirective(DKind: EKind) ? OMPD_for_simd : OMPD_for,
3454 CodeGen: CGInlinedWorksharingLoop,
3455 CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams);
3456}
3457
3458void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3459 const OMPDistributeParallelForDirective &S) {
3460 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3461 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3462 IncExpr: S.getDistInc());
3463 };
3464 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3465 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3466}
3467
3468void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3469 const OMPDistributeParallelForSimdDirective &S) {
3470 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3471 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3472 IncExpr: S.getDistInc());
3473 };
3474 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3475 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3476}
3477
3478void CodeGenFunction::EmitOMPDistributeSimdDirective(
3479 const OMPDistributeSimdDirective &S) {
3480 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3481 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
3482 };
3483 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3484 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
3485}
3486
3487void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3488 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3489 // Emit SPMD target parallel for region as a standalone region.
3490 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3491 emitOMPSimdRegion(CGF, S, Action);
3492 };
3493 llvm::Function *Fn;
3494 llvm::Constant *Addr;
3495 // Emit target region as a standalone region.
3496 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3497 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
3498 assert(Fn && Addr && "Target device function emission failed.");
3499}
3500
3501void CodeGenFunction::EmitOMPTargetSimdDirective(
3502 const OMPTargetSimdDirective &S) {
3503 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3504 emitOMPSimdRegion(CGF, S, Action);
3505 };
3506 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
3507}
3508
3509namespace {
3510struct ScheduleKindModifiersTy {
3511 OpenMPScheduleClauseKind Kind;
3512 OpenMPScheduleClauseModifier M1;
3513 OpenMPScheduleClauseModifier M2;
3514 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3515 OpenMPScheduleClauseModifier M1,
3516 OpenMPScheduleClauseModifier M2)
3517 : Kind(Kind), M1(M1), M2(M2) {}
3518};
3519} // namespace
3520
3521bool CodeGenFunction::EmitOMPWorksharingLoop(
3522 const OMPLoopDirective &S, Expr *EUB,
3523 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3524 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3525 // Emit the loop iteration variable.
3526 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
3527 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
3528 EmitVarDecl(D: *IVDecl);
3529
3530 // Emit the iterations count variable.
3531 // If it is not a variable, Sema decided to calculate iterations count on each
3532 // iteration (e.g., it is foldable into a constant).
3533 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3534 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3535 // Emit calculation of the iterations count.
3536 EmitIgnoredExpr(E: S.getCalcLastIteration());
3537 }
3538
3539 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3540
3541 bool HasLastprivateClause;
3542 // Check pre-condition.
3543 {
3544 OMPLoopScope PreInitScope(*this, S);
3545 // Skip the entire loop if we don't meet the precondition.
3546 // If the condition constant folds and can be elided, avoid emitting the
3547 // whole loop.
3548 bool CondConstant;
3549 llvm::BasicBlock *ContBlock = nullptr;
3550 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3551 if (!CondConstant)
3552 return false;
3553 } else {
3554 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
3555 ContBlock = createBasicBlock(name: "omp.precond.end");
3556 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3557 TrueCount: getProfileCount(S: &S));
3558 EmitBlock(BB: ThenBlock);
3559 incrementProfileCounter(S: &S);
3560 }
3561
3562 RunCleanupsScope DoacrossCleanupScope(*this);
3563 bool Ordered = false;
3564 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3565 if (OrderedClause->getNumForLoops())
3566 RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations());
3567 else
3568 Ordered = true;
3569 }
3570
3571 emitAlignedClause(CGF&: *this, D: S);
3572 bool HasLinears = EmitOMPLinearClauseInit(D: S);
3573 // Emit helper vars inits.
3574
3575 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3576 LValue LB = Bounds.first;
3577 LValue UB = Bounds.second;
3578 LValue ST =
3579 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
3580 LValue IL =
3581 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
3582
3583 // Emit 'then' code.
3584 {
3585 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3586 OMPPrivateScope LoopScope(*this);
3587 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) {
3588 // Emit implicit barrier to synchronize threads and avoid data races on
3589 // initialization of firstprivate variables and post-update of
3590 // lastprivate variables.
3591 CGM.getOpenMPRuntime().emitBarrierCall(
3592 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3593 /*ForceSimpleCall=*/true);
3594 }
3595 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3596 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3597 *this, S, EmitLValue(E: S.getIterationVariable()));
3598 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3599 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3600 EmitOMPPrivateLoopCounters(S, LoopScope);
3601 EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3602 (void)LoopScope.Privatize();
3603 if (isOpenMPTargetExecutionDirective(DKind: EKind))
3604 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
3605
3606 // Detect the loop schedule kind and chunk.
3607 const Expr *ChunkExpr = nullptr;
3608 OpenMPScheduleTy ScheduleKind;
3609 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3610 ScheduleKind.Schedule = C->getScheduleKind();
3611 ScheduleKind.M1 = C->getFirstScheduleModifier();
3612 ScheduleKind.M2 = C->getSecondScheduleModifier();
3613 ChunkExpr = C->getChunkSize();
3614 } else {
3615 // Default behaviour for schedule clause.
3616 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3617 CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr);
3618 }
3619 bool HasChunkSizeOne = false;
3620 llvm::Value *Chunk = nullptr;
3621 if (ChunkExpr) {
3622 Chunk = EmitScalarExpr(E: ChunkExpr);
3623 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(),
3624 DstTy: S.getIterationVariable()->getType(),
3625 Loc: S.getBeginLoc());
3626 Expr::EvalResult Result;
3627 if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) {
3628 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3629 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3630 }
3631 }
3632 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3633 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3634 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3635 // If the static schedule kind is specified or if the ordered clause is
3636 // specified, and if no monotonic modifier is specified, the effect will
3637 // be as if the monotonic modifier was specified.
3638 bool StaticChunkedOne =
3639 RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule,
3640 /* Chunked */ Chunk != nullptr) &&
3641 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(Kind: EKind);
3642 bool IsMonotonic =
3643 Ordered ||
3644 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3645 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3646 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3647 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3648 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3649 if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule,
3650 /* Chunked */ Chunk != nullptr) ||
3651 StaticChunkedOne) &&
3652 !Ordered) {
3653 JumpDest LoopExit =
3654 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
3655 emitCommonSimdLoop(
3656 CGF&: *this, S,
3657 SimdInitGen: [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3658 if (isOpenMPSimdDirective(DKind: EKind)) {
3659 CGF.EmitOMPSimdInit(D: S);
3660 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3661 if (C->getKind() == OMPC_ORDER_concurrent)
3662 CGF.LoopStack.setParallel(/*Enable=*/true);
3663 }
3664 },
3665 BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3666 &S, ScheduleKind, LoopExit, EKind,
3667 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3668 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3669 // When no chunk_size is specified, the iteration space is divided
3670 // into chunks that are approximately equal in size, and at most
3671 // one chunk is distributed to each thread. Note that the size of
3672 // the chunks is unspecified in this case.
3673 CGOpenMPRuntime::StaticRTInput StaticInit(
3674 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3675 UB.getAddress(), ST.getAddress(),
3676 StaticChunkedOne ? Chunk : nullptr);
3677 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3678 CGF, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind, Values: StaticInit);
3679 // UB = min(UB, GlobalUB);
3680 if (!StaticChunkedOne)
3681 CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound());
3682 // IV = LB;
3683 CGF.EmitIgnoredExpr(E: S.getInit());
3684 // For unchunked static schedule generate:
3685 //
3686 // while (idx <= UB) {
3687 // BODY;
3688 // ++idx;
3689 // }
3690 //
3691 // For static schedule with chunk one:
3692 //
3693 // while (IV <= PrevUB) {
3694 // BODY;
3695 // IV += ST;
3696 // }
3697 CGF.EmitOMPInnerLoop(
3698 S, RequiresCleanup: LoopScope.requiresCleanups(),
3699 LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond()
3700 : S.getCond(),
3701 IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(),
3702 BodyGen: [&S, LoopExit](CodeGenFunction &CGF) {
3703 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3704 },
3705 PostIncGen: [](CodeGenFunction &) {});
3706 });
3707 EmitBlock(BB: LoopExit.getBlock());
3708 // Tell the runtime we are done.
3709 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3710 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3711 DKind: OMPD_for);
3712 };
3713 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3714 } else {
3715 // Emit the outer loop, which requests its work chunk [LB..UB] from
3716 // runtime and runs the inner loop to process it.
3717 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3718 ST.getAddress(), IL.getAddress(), Chunk,
3719 EUB);
3720 LoopArguments.DKind = OMPD_for;
3721 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3722 LoopArgs: LoopArguments, CGDispatchBounds);
3723 }
3724 if (isOpenMPSimdDirective(DKind: EKind)) {
3725 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3726 return CGF.Builder.CreateIsNotNull(
3727 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3728 });
3729 }
3730 EmitOMPReductionClauseFinal(
3731 D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: EKind)
3732 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3733 : /*Parallel only*/ OMPD_parallel);
3734 // Emit post-update of the reduction variables if IsLastIter != 0.
3735 emitPostUpdateForReductionClause(
3736 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3737 return CGF.Builder.CreateIsNotNull(
3738 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3739 });
3740 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3741 if (HasLastprivateClause)
3742 EmitOMPLastprivateClauseFinal(
3743 D: S, NoFinals: isOpenMPSimdDirective(DKind: EKind),
3744 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
3745 LoopScope.restoreMap();
3746 EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
3747 return CGF.Builder.CreateIsNotNull(
3748 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
3749 });
3750 }
3751 DoacrossCleanupScope.ForceCleanup();
3752 // We're now done with the loop, so jump to the continuation block.
3753 if (ContBlock) {
3754 EmitBranch(Block: ContBlock);
3755 EmitBlock(BB: ContBlock, /*IsFinished=*/true);
3756 }
3757 }
3758 return HasLastprivateClause;
3759}
3760
3761/// The following two functions generate expressions for the loop lower
3762/// and upper bounds in case of static and dynamic (dispatch) schedule
3763/// of the associated 'for' or 'distribute' loop.
3764static std::pair<LValue, LValue>
3765emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3766 const auto &LS = cast<OMPLoopDirective>(Val: S);
3767 LValue LB =
3768 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3769 LValue UB =
3770 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3771 return {LB, UB};
3772}
3773
3774/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3775/// consider the lower and upper bound expressions generated by the
3776/// worksharing loop support, but we use 0 and the iteration space size as
3777/// constants
3778static std::pair<llvm::Value *, llvm::Value *>
3779emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3780 Address LB, Address UB) {
3781 const auto &LS = cast<OMPLoopDirective>(Val: S);
3782 const Expr *IVExpr = LS.getIterationVariable();
3783 const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType());
3784 llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0);
3785 llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration());
3786 return {LBVal, UBVal};
3787}
3788
3789/// Emits internal temp array declarations for the directive with inscan
3790/// reductions.
3791/// The code is the following:
3792/// \code
3793/// size num_iters = <num_iters>;
3794/// <type> buffer[num_iters];
3795/// \endcode
3796static void emitScanBasedDirectiveDecls(
3797 CodeGenFunction &CGF, const OMPLoopDirective &S,
3798 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3799 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3800 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3801 SmallVector<const Expr *, 4> Shareds;
3802 SmallVector<const Expr *, 4> Privates;
3803 SmallVector<const Expr *, 4> ReductionOps;
3804 SmallVector<const Expr *, 4> CopyArrayTemps;
3805 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3806 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3807 "Only inscan reductions are expected.");
3808 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3809 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3810 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3811 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
3812 in_end: C->copy_array_temps().end());
3813 }
3814 {
3815 // Emit buffers for each reduction variables.
3816 // ReductionCodeGen is required to emit correctly the code for array
3817 // reductions.
3818 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3819 unsigned Count = 0;
3820 auto *ITA = CopyArrayTemps.begin();
3821 for (const Expr *IRef : Privates) {
3822 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
3823 // Emit variably modified arrays, used for arrays/array sections
3824 // reductions.
3825 if (PrivateVD->getType()->isVariablyModifiedType()) {
3826 RedCG.emitSharedOrigLValue(CGF, N: Count);
3827 RedCG.emitAggregateType(CGF, N: Count);
3828 }
3829 CodeGenFunction::OpaqueValueMapping DimMapping(
3830 CGF,
3831 cast<OpaqueValueExpr>(
3832 Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe())
3833 ->getSizeExpr()),
3834 RValue::get(V: OMPScanNumIterations));
3835 // Emit temp buffer.
3836 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl()));
3837 ++ITA;
3838 ++Count;
3839 }
3840 }
3841}
3842
3843/// Copies final inscan reductions values to the original variables.
3844/// The code is the following:
3845/// \code
3846/// <orig_var> = buffer[num_iters-1];
3847/// \endcode
3848static void emitScanBasedDirectiveFinals(
3849 CodeGenFunction &CGF, const OMPLoopDirective &S,
3850 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3851 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3852 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3853 SmallVector<const Expr *, 4> Shareds;
3854 SmallVector<const Expr *, 4> LHSs;
3855 SmallVector<const Expr *, 4> RHSs;
3856 SmallVector<const Expr *, 4> Privates;
3857 SmallVector<const Expr *, 4> CopyOps;
3858 SmallVector<const Expr *, 4> CopyArrayElems;
3859 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3860 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3861 "Only inscan reductions are expected.");
3862 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
3863 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3864 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3865 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3866 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
3867 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3868 in_end: C->copy_array_elems().end());
3869 }
3870 // Create temp var and copy LHS value to this temp value.
3871 // LHS = TMP[LastIter];
3872 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3873 LHS: OMPScanNumIterations,
3874 RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false));
3875 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3876 const Expr *PrivateExpr = Privates[I];
3877 const Expr *OrigExpr = Shareds[I];
3878 const Expr *CopyArrayElem = CopyArrayElems[I];
3879 CodeGenFunction::OpaqueValueMapping IdxMapping(
3880 CGF,
3881 cast<OpaqueValueExpr>(
3882 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3883 RValue::get(V: OMPLast));
3884 LValue DestLVal = CGF.EmitLValue(E: OrigExpr);
3885 LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem);
3886 CGF.EmitOMPCopy(
3887 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
3888 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
3889 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
3890 }
3891}
3892
3893/// Emits the code for the directive with inscan reductions.
3894/// The code is the following:
3895/// \code
3896/// #pragma omp ...
3897/// for (i: 0..<num_iters>) {
3898/// <input phase>;
3899/// buffer[i] = red;
3900/// }
3901/// #pragma omp master // in parallel region
3902/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3903/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3904/// buffer[i] op= buffer[i-pow(2,k)];
3905/// #pragma omp barrier // in parallel region
3906/// #pragma omp ...
3907/// for (0..<num_iters>) {
3908/// red = InclusiveScan ? buffer[i] : buffer[i-1];
3909/// <scan phase>;
3910/// }
3911/// \endcode
3912static void emitScanBasedDirective(
3913 CodeGenFunction &CGF, const OMPLoopDirective &S,
3914 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3915 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3916 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3917 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3918 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3919 SmallVector<const Expr *, 4> Privates;
3920 SmallVector<const Expr *, 4> ReductionOps;
3921 SmallVector<const Expr *, 4> LHSs;
3922 SmallVector<const Expr *, 4> RHSs;
3923 SmallVector<const Expr *, 4> CopyArrayElems;
3924 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3925 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3926 "Only inscan reductions are expected.");
3927 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
3928 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
3929 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
3930 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
3931 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
3932 in_end: C->copy_array_elems().end());
3933 }
3934 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3935 {
3936 // Emit loop with input phase:
3937 // #pragma omp ...
3938 // for (i: 0..<num_iters>) {
3939 // <input phase>;
3940 // buffer[i] = red;
3941 // }
3942 CGF.OMPFirstScanLoop = true;
3943 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3944 FirstGen(CGF);
3945 }
3946 // #pragma omp barrier // in parallel region
3947 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3948 &ReductionOps,
3949 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3950 Action.Enter(CGF);
3951 // Emit prefix reduction:
3952 // #pragma omp master // in parallel region
3953 // for (int k = 0; k <= ceil(log2(n)); ++k)
3954 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3955 llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body");
3956 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit");
3957 llvm::Function *F =
3958 CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy);
3959 llvm::Value *Arg =
3960 CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy);
3961 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg);
3962 F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy);
3963 LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal);
3964 LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy);
3965 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3966 LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3967 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc());
3968 CGF.EmitBlock(BB: LoopBB);
3969 auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2);
3970 // size pow2k = 1;
3971 auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3972 Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB);
3973 Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB);
3974 // for (size i = n - 1; i >= 2 ^ k; --i)
3975 // tmp[i] op= tmp[i-pow2k];
3976 llvm::BasicBlock *InnerLoopBB =
3977 CGF.createBasicBlock(name: "omp.inner.log.scan.body");
3978 llvm::BasicBlock *InnerExitBB =
3979 CGF.createBasicBlock(name: "omp.inner.log.scan.exit");
3980 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K);
3981 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3982 CGF.EmitBlock(BB: InnerLoopBB);
3983 auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3984 IVal->addIncoming(V: NMin1, BB: LoopBB);
3985 {
3986 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3987 auto *ILHS = LHSs.begin();
3988 auto *IRHS = RHSs.begin();
3989 for (const Expr *CopyArrayElem : CopyArrayElems) {
3990 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
3991 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
3992 Address LHSAddr = Address::invalid();
3993 {
3994 CodeGenFunction::OpaqueValueMapping IdxMapping(
3995 CGF,
3996 cast<OpaqueValueExpr>(
3997 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3998 RValue::get(V: IVal));
3999 LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4000 }
4001 PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr);
4002 Address RHSAddr = Address::invalid();
4003 {
4004 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K);
4005 CodeGenFunction::OpaqueValueMapping IdxMapping(
4006 CGF,
4007 cast<OpaqueValueExpr>(
4008 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4009 RValue::get(V: OffsetIVal));
4010 RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4011 }
4012 PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr);
4013 ++ILHS;
4014 ++IRHS;
4015 }
4016 PrivScope.Privatize();
4017 CGF.CGM.getOpenMPRuntime().emitReduction(
4018 CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
4019 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
4020 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_unknown});
4021 }
4022 llvm::Value *NextIVal =
4023 CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
4024 IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock());
4025 CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K);
4026 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
4027 CGF.EmitBlock(BB: InnerExitBB);
4028 llvm::Value *Next =
4029 CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1));
4030 Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock());
4031 // pow2k <<= 1;
4032 llvm::Value *NextPow2K =
4033 CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "", /*HasNUW=*/true);
4034 Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock());
4035 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal);
4036 CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB);
4037 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc());
4038 CGF.EmitBlock(BB: ExitBB);
4039 };
4040 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4041 if (isOpenMPParallelDirective(DKind: EKind)) {
4042 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4043 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4044 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4045 /*ForceSimpleCall=*/true);
4046 } else {
4047 RegionCodeGenTy RCG(CodeGen);
4048 RCG(CGF);
4049 }
4050
4051 CGF.OMPFirstScanLoop = false;
4052 SecondGen(CGF);
4053}
4054
4055static bool emitWorksharingDirective(CodeGenFunction &CGF,
4056 const OMPLoopDirective &S,
4057 bool HasCancel) {
4058 bool HasLastprivates;
4059 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4060 if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4061 P: [](const OMPReductionClause *C) {
4062 return C->getModifier() == OMPC_REDUCTION_inscan;
4063 })) {
4064 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4065 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4066 OMPLoopScope LoopScope(CGF, S);
4067 return CGF.EmitScalarExpr(E: S.getNumIterations());
4068 };
4069 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) {
4070 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4071 (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4072 CodeGenLoopBounds: emitForLoopBounds,
4073 CGDispatchBounds: emitDispatchForLoopBounds);
4074 // Emit an implicit barrier at the end.
4075 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(),
4076 Kind: OMPD_for);
4077 };
4078 const auto &&SecondGen = [&S, HasCancel, EKind,
4079 &HasLastprivates](CodeGenFunction &CGF) {
4080 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4081 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4082 CodeGenLoopBounds: emitForLoopBounds,
4083 CGDispatchBounds: emitDispatchForLoopBounds);
4084 };
4085 if (!isOpenMPParallelDirective(DKind: EKind))
4086 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
4087 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
4088 if (!isOpenMPParallelDirective(DKind: EKind))
4089 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
4090 } else {
4091 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4092 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4093 CodeGenLoopBounds: emitForLoopBounds,
4094 CGDispatchBounds: emitDispatchForLoopBounds);
4095 }
4096 return HasLastprivates;
4097}
4098
4099// Pass OMPLoopDirective (instead of OMPForDirective) to make this check
4100// available for "loop bind(parallel)", which maps to "for".
4101static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S,
4102 bool HasCancel) {
4103 if (HasCancel)
4104 return false;
4105 for (OMPClause *C : S.clauses()) {
4106 if (isa<OMPNowaitClause, OMPBindClause>(Val: C))
4107 continue;
4108
4109 if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) {
4110 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4111 return false;
4112 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4113 return false;
4114 switch (SC->getScheduleKind()) {
4115 case OMPC_SCHEDULE_auto:
4116 case OMPC_SCHEDULE_dynamic:
4117 case OMPC_SCHEDULE_runtime:
4118 case OMPC_SCHEDULE_guided:
4119 case OMPC_SCHEDULE_static:
4120 continue;
4121 case OMPC_SCHEDULE_unknown:
4122 return false;
4123 }
4124 }
4125
4126 return false;
4127 }
4128
4129 return true;
4130}
4131
4132static llvm::omp::ScheduleKind
4133convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
4134 switch (ScheduleClauseKind) {
4135 case OMPC_SCHEDULE_unknown:
4136 return llvm::omp::OMP_SCHEDULE_Default;
4137 case OMPC_SCHEDULE_auto:
4138 return llvm::omp::OMP_SCHEDULE_Auto;
4139 case OMPC_SCHEDULE_dynamic:
4140 return llvm::omp::OMP_SCHEDULE_Dynamic;
4141 case OMPC_SCHEDULE_guided:
4142 return llvm::omp::OMP_SCHEDULE_Guided;
4143 case OMPC_SCHEDULE_runtime:
4144 return llvm::omp::OMP_SCHEDULE_Runtime;
4145 case OMPC_SCHEDULE_static:
4146 return llvm::omp::OMP_SCHEDULE_Static;
4147 }
4148 llvm_unreachable("Unhandled schedule kind");
4149}
4150
4151// Pass OMPLoopDirective (instead of OMPForDirective) to make this function
4152// available for "loop bind(parallel)", which maps to "for".
4153static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
4154 CodeGenModule &CGM, bool HasCancel) {
4155 bool HasLastprivates = false;
4156 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder &&
4157 isForSupportedByOpenMPIRBuilder(S, HasCancel);
4158 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates,
4159 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
4160 // Use the OpenMPIRBuilder if enabled.
4161 if (UseOMPIRBuilder) {
4162 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
4163
4164 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
4165 llvm::Value *ChunkSize = nullptr;
4166 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
4167 SchedKind =
4168 convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind());
4169 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
4170 ChunkSize = CGF.EmitScalarExpr(E: ChunkSizeExpr);
4171 }
4172
4173 // Emit the associated statement and get its loop representation.
4174 const Stmt *Inner = S.getRawStmt();
4175 llvm::CanonicalLoopInfo *CLI =
4176 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
4177
4178 llvm::OpenMPIRBuilder &OMPBuilder =
4179 CGM.getOpenMPRuntime().getOMPBuilder();
4180 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4181 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
4182 cantFail(ValOrErr: OMPBuilder.applyWorkshareLoop(
4183 DL: CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
4184 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
4185 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
4186 /*HasOrderedClause=*/false));
4187 return;
4188 }
4189
4190 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel);
4191 };
4192 {
4193 auto LPCRegion =
4194 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
4195 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
4196 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_for, CodeGen,
4197 HasCancel);
4198 }
4199
4200 if (!UseOMPIRBuilder) {
4201 // Emit an implicit barrier at the end.
4202 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4203 CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), Kind: OMPD_for);
4204 }
4205 // Check for outer lastprivate conditional update.
4206 checkForLastprivateConditionalUpdate(CGF, S);
4207}
4208
4209void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
4210 return emitOMPForDirective(S, CGF&: *this, CGM, HasCancel: S.hasCancel());
4211}
4212
4213void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
4214 bool HasLastprivates = false;
4215 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
4216 PrePostActionTy &) {
4217 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4218 };
4219 {
4220 auto LPCRegion =
4221 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4222 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4223 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
4224 }
4225
4226 // Emit an implicit barrier at the end.
4227 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4228 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for);
4229 // Check for outer lastprivate conditional update.
4230 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4231}
4232
4233static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4234 const Twine &Name,
4235 llvm::Value *Init = nullptr) {
4236 LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty);
4237 if (Init)
4238 CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true);
4239 return LVal;
4240}
4241
4242void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4243 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4244 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4245 bool HasLastprivates = false;
4246 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4247 auto &&CodeGen = [&S, CapturedStmt, CS, EKind,
4248 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4249 const ASTContext &C = CGF.getContext();
4250 QualType KmpInt32Ty =
4251 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4252 // Emit helper vars inits.
4253 LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb.",
4254 Init: CGF.Builder.getInt32(C: 0));
4255 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4256 ? CGF.Builder.getInt32(C: CS->size() - 1)
4257 : CGF.Builder.getInt32(C: 0);
4258 LValue UB =
4259 createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub.", Init: GlobalUBVal);
4260 LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st.",
4261 Init: CGF.Builder.getInt32(C: 1));
4262 LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il.",
4263 Init: CGF.Builder.getInt32(C: 0));
4264 // Loop counter.
4265 LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv.");
4266 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4267 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4268 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4269 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4270 // Generate condition for loop.
4271 BinaryOperator *Cond = BinaryOperator::Create(
4272 C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary,
4273 opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride());
4274 // Increment for loop counter.
4275 UnaryOperator *Inc = UnaryOperator::Create(
4276 C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary,
4277 l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride());
4278 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4279 // Iterate through all sections and emit a switch construct:
4280 // switch (IV) {
4281 // case 0:
4282 // <SectionStmt[0]>;
4283 // break;
4284 // ...
4285 // case <NumSection> - 1:
4286 // <SectionStmt[<NumSection> - 1]>;
4287 // break;
4288 // }
4289 // .omp.sections.exit:
4290 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit");
4291 llvm::SwitchInst *SwitchStmt =
4292 CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()),
4293 Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size());
4294 if (CS) {
4295 unsigned CaseNumber = 0;
4296 for (const Stmt *SubStmt : CS->children()) {
4297 auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4298 CGF.EmitBlock(BB: CaseBB);
4299 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB);
4300 CGF.EmitStmt(S: SubStmt);
4301 CGF.EmitBranch(Block: ExitBB);
4302 ++CaseNumber;
4303 }
4304 } else {
4305 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4306 CGF.EmitBlock(BB: CaseBB);
4307 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB);
4308 CGF.EmitStmt(S: CapturedStmt);
4309 CGF.EmitBranch(Block: ExitBB);
4310 }
4311 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
4312 };
4313
4314 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4315 if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
4316 // Emit implicit barrier to synchronize threads and avoid data races on
4317 // initialization of firstprivate variables and post-update of lastprivate
4318 // variables.
4319 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4320 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4321 /*ForceSimpleCall=*/true);
4322 }
4323 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
4324 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4325 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
4326 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
4327 (void)LoopScope.Privatize();
4328 if (isOpenMPTargetExecutionDirective(DKind: EKind))
4329 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
4330
4331 // Emit static non-chunked loop.
4332 OpenMPScheduleTy ScheduleKind;
4333 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4334 CGOpenMPRuntime::StaticRTInput StaticInit(
4335 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4336 LB.getAddress(), UB.getAddress(), ST.getAddress());
4337 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, Loc: S.getBeginLoc(), DKind: EKind,
4338 ScheduleKind, Values: StaticInit);
4339 // UB = min(UB, GlobalUB);
4340 llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc());
4341 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4342 C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal);
4343 CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB);
4344 // IV = LB;
4345 CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV);
4346 // while (idx <= UB) { BODY; ++idx; }
4347 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen,
4348 PostIncGen: [](CodeGenFunction &) {});
4349 // Tell the runtime we are done.
4350 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4351 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
4352 DKind: OMPD_sections);
4353 };
4354 CGF.OMPCancelStack.emitExit(CGF, Kind: EKind, CodeGen);
4355 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4356 // Emit post-update of the reduction variables if IsLastIter != 0.
4357 emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4358 return CGF.Builder.CreateIsNotNull(
4359 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4360 });
4361
4362 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4363 if (HasLastprivates)
4364 CGF.EmitOMPLastprivateClauseFinal(
4365 D: S, /*NoFinals=*/false,
4366 IsLastIterCond: CGF.Builder.CreateIsNotNull(
4367 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4368 };
4369
4370 bool HasCancel = false;
4371 if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S))
4372 HasCancel = OSD->hasCancel();
4373 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S))
4374 HasCancel = OPSD->hasCancel();
4375 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel);
4376 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen,
4377 HasCancel);
4378 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4379 // clause. Otherwise the barrier will be generated by the codegen for the
4380 // directive.
4381 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4382 // Emit implicit barrier to synchronize threads and avoid data races on
4383 // initialization of firstprivate variables.
4384 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4385 Kind: OMPD_unknown);
4386 }
4387}
4388
4389void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) {
4390 {
4391 // Emit code for 'scope' region
4392 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4393 Action.Enter(CGF);
4394 OMPPrivateScope PrivateScope(CGF);
4395 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4396 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4397 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4398 (void)PrivateScope.Privatize();
4399 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4400 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4401 };
4402 auto LPCRegion =
4403 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4404 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4405 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_scope, CodeGen);
4406 }
4407 // Emit an implicit barrier at the end.
4408 if (!S.getSingleClause<OMPNowaitClause>()) {
4409 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_scope);
4410 }
4411 // Check for outer lastprivate conditional update.
4412 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4413}
4414
4415void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4416 if (CGM.getLangOpts().OpenMPIRBuilder) {
4417 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4418 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4419 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4420
4421 auto FiniCB = [](InsertPointTy IP) {
4422 // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for
4423 // sections.
4424 return llvm::Error::success();
4425 };
4426
4427 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4428 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4429 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4430 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4431 if (CS) {
4432 for (const Stmt *SubStmt : CS->children()) {
4433 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4434 InsertPointTy CodeGenIP) {
4435 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4436 CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP, CodeGenIP, RegionName: "section");
4437 return llvm::Error::success();
4438 };
4439 SectionCBVector.push_back(Elt: SectionCB);
4440 }
4441 } else {
4442 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4443 InsertPointTy CodeGenIP) {
4444 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4445 CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section");
4446 return llvm::Error::success();
4447 };
4448 SectionCBVector.push_back(Elt: SectionCB);
4449 }
4450
4451 // Privatization callback that performs appropriate action for
4452 // shared/private/firstprivate/lastprivate/copyin/... variables.
4453 //
4454 // TODO: This defaults to shared right now.
4455 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4456 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4457 // The next line is appropriate only for variables (Val) with the
4458 // data-sharing attribute "shared".
4459 ReplVal = &Val;
4460
4461 return CodeGenIP;
4462 };
4463
4464 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4465 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4466 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4467 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4468 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4469 cantFail(ValOrErr: OMPBuilder.createSections(
4470 Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(),
4471 IsNowait: S.getSingleClause<OMPNowaitClause>()));
4472 Builder.restoreIP(IP: AfterIP);
4473 return;
4474 }
4475 {
4476 auto LPCRegion =
4477 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4478 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4479 EmitSections(S);
4480 }
4481 // Emit an implicit barrier at the end.
4482 if (!S.getSingleClause<OMPNowaitClause>()) {
4483 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4484 Kind: OMPD_sections);
4485 }
4486 // Check for outer lastprivate conditional update.
4487 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4488}
4489
4490void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4491 if (CGM.getLangOpts().OpenMPIRBuilder) {
4492 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4493 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4494
4495 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4496 auto FiniCB = [this](InsertPointTy IP) {
4497 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4498 return llvm::Error::success();
4499 };
4500
4501 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4502 InsertPointTy CodeGenIP) {
4503 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4504 CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section");
4505 return llvm::Error::success();
4506 };
4507
4508 LexicalScope Scope(*this, S.getSourceRange());
4509 EmitStopPoint(S: &S);
4510 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4511 cantFail(ValOrErr: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB));
4512 Builder.restoreIP(IP: AfterIP);
4513
4514 return;
4515 }
4516 LexicalScope Scope(*this, S.getSourceRange());
4517 EmitStopPoint(S: &S);
4518 EmitStmt(S: S.getAssociatedStmt());
4519}
4520
4521void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4522 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4523 llvm::SmallVector<const Expr *, 8> DestExprs;
4524 llvm::SmallVector<const Expr *, 8> SrcExprs;
4525 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4526 // Check if there are any 'copyprivate' clauses associated with this
4527 // 'single' construct.
4528 // Build a list of copyprivate variables along with helper expressions
4529 // (<source>, <destination>, <destination>=<source> expressions)
4530 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4531 CopyprivateVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4532 DestExprs.append(in_start: C->destination_exprs().begin(),
4533 in_end: C->destination_exprs().end());
4534 SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end());
4535 AssignmentOps.append(in_start: C->assignment_ops().begin(),
4536 in_end: C->assignment_ops().end());
4537 }
4538 // Emit code for 'single' region along with 'copyprivate' clauses
4539 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4540 Action.Enter(CGF);
4541 OMPPrivateScope SingleScope(CGF);
4542 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope);
4543 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope);
4544 (void)SingleScope.Privatize();
4545 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4546 };
4547 {
4548 auto LPCRegion =
4549 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4550 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4551 CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(),
4552 CopyprivateVars, DestExprs,
4553 SrcExprs, AssignmentOps);
4554 }
4555 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4556 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4557 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4558 CGM.getOpenMPRuntime().emitBarrierCall(
4559 CGF&: *this, Loc: S.getBeginLoc(),
4560 Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4561 }
4562 // Check for outer lastprivate conditional update.
4563 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4564}
4565
4566static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4567 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4568 Action.Enter(CGF);
4569 CGF.EmitStmt(S: S.getRawStmt());
4570 };
4571 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4572}
4573
4574void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4575 if (CGM.getLangOpts().OpenMPIRBuilder) {
4576 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4577 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4578
4579 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4580
4581 auto FiniCB = [this](InsertPointTy IP) {
4582 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4583 return llvm::Error::success();
4584 };
4585
4586 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4587 InsertPointTy CodeGenIP) {
4588 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4589 CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master");
4590 return llvm::Error::success();
4591 };
4592
4593 LexicalScope Scope(*this, S.getSourceRange());
4594 EmitStopPoint(S: &S);
4595 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4596 cantFail(ValOrErr: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB));
4597 Builder.restoreIP(IP: AfterIP);
4598
4599 return;
4600 }
4601 LexicalScope Scope(*this, S.getSourceRange());
4602 EmitStopPoint(S: &S);
4603 emitMaster(CGF&: *this, S);
4604}
4605
4606static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4607 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4608 Action.Enter(CGF);
4609 CGF.EmitStmt(S: S.getRawStmt());
4610 };
4611 Expr *Filter = nullptr;
4612 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4613 Filter = FilterClause->getThreadID();
4614 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(),
4615 Filter);
4616}
4617
4618void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4619 if (CGM.getLangOpts().OpenMPIRBuilder) {
4620 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4621 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4622
4623 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4624 const Expr *Filter = nullptr;
4625 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4626 Filter = FilterClause->getThreadID();
4627 llvm::Value *FilterVal = Filter
4628 ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty)
4629 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
4630
4631 auto FiniCB = [this](InsertPointTy IP) {
4632 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4633 return llvm::Error::success();
4634 };
4635
4636 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4637 InsertPointTy CodeGenIP) {
4638 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4639 CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked");
4640 return llvm::Error::success();
4641 };
4642
4643 LexicalScope Scope(*this, S.getSourceRange());
4644 EmitStopPoint(S: &S);
4645 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
4646 ValOrErr: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal));
4647 Builder.restoreIP(IP: AfterIP);
4648
4649 return;
4650 }
4651 LexicalScope Scope(*this, S.getSourceRange());
4652 EmitStopPoint(S: &S);
4653 emitMasked(CGF&: *this, S);
4654}
4655
4656void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4657 if (CGM.getLangOpts().OpenMPIRBuilder) {
4658 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4659 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4660
4661 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4662 const Expr *Hint = nullptr;
4663 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4664 Hint = HintClause->getHint();
4665
4666 // TODO: This is slightly different from what's currently being done in
4667 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4668 // about typing is final.
4669 llvm::Value *HintInst = nullptr;
4670 if (Hint)
4671 HintInst =
4672 Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false);
4673
4674 auto FiniCB = [this](InsertPointTy IP) {
4675 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4676 return llvm::Error::success();
4677 };
4678
4679 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4680 InsertPointTy CodeGenIP) {
4681 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4682 CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical");
4683 return llvm::Error::success();
4684 };
4685
4686 LexicalScope Scope(*this, S.getSourceRange());
4687 EmitStopPoint(S: &S);
4688 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4689 cantFail(ValOrErr: OMPBuilder.createCritical(Loc: Builder, BodyGenCB, FiniCB,
4690 CriticalName: S.getDirectiveName().getAsString(),
4691 HintInst));
4692 Builder.restoreIP(IP: AfterIP);
4693
4694 return;
4695 }
4696
4697 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4698 Action.Enter(CGF);
4699 CGF.EmitStmt(S: S.getAssociatedStmt());
4700 };
4701 const Expr *Hint = nullptr;
4702 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4703 Hint = HintClause->getHint();
4704 LexicalScope Scope(*this, S.getSourceRange());
4705 EmitStopPoint(S: &S);
4706 CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this,
4707 CriticalName: S.getDirectiveName().getAsString(),
4708 CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint);
4709}
4710
4711void CodeGenFunction::EmitOMPParallelForDirective(
4712 const OMPParallelForDirective &S) {
4713 // Emit directive as a combined directive that consists of two implicit
4714 // directives: 'parallel' with 'for' directive.
4715 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4716 Action.Enter(CGF);
4717 emitOMPCopyinClause(CGF, S);
4718 (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel());
4719 };
4720 {
4721 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4722 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4723 CGCapturedStmtInfo CGSI(CR_OpenMP);
4724 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4725 OMPLoopScope LoopScope(CGF, S);
4726 return CGF.EmitScalarExpr(E: S.getNumIterations());
4727 };
4728 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4729 P: [](const OMPReductionClause *C) {
4730 return C->getModifier() == OMPC_REDUCTION_inscan;
4731 });
4732 if (IsInscan)
4733 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4734 auto LPCRegion =
4735 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4736 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
4737 CodeGenBoundParameters: emitEmptyBoundParameters);
4738 if (IsInscan)
4739 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4740 }
4741 // Check for outer lastprivate conditional update.
4742 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4743}
4744
4745void CodeGenFunction::EmitOMPParallelForSimdDirective(
4746 const OMPParallelForSimdDirective &S) {
4747 // Emit directive as a combined directive that consists of two implicit
4748 // directives: 'parallel' with 'for' directive.
4749 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4750 Action.Enter(CGF);
4751 emitOMPCopyinClause(CGF, S);
4752 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4753 };
4754 {
4755 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4756 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4757 CGCapturedStmtInfo CGSI(CR_OpenMP);
4758 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4759 OMPLoopScope LoopScope(CGF, S);
4760 return CGF.EmitScalarExpr(E: S.getNumIterations());
4761 };
4762 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4763 P: [](const OMPReductionClause *C) {
4764 return C->getModifier() == OMPC_REDUCTION_inscan;
4765 });
4766 if (IsInscan)
4767 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
4768 auto LPCRegion =
4769 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4770 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen,
4771 CodeGenBoundParameters: emitEmptyBoundParameters);
4772 if (IsInscan)
4773 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
4774 }
4775 // Check for outer lastprivate conditional update.
4776 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4777}
4778
4779void CodeGenFunction::EmitOMPParallelMasterDirective(
4780 const OMPParallelMasterDirective &S) {
4781 // Emit directive as a combined directive that consists of two implicit
4782 // directives: 'parallel' with 'master' directive.
4783 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4784 Action.Enter(CGF);
4785 OMPPrivateScope PrivateScope(CGF);
4786 emitOMPCopyinClause(CGF, S);
4787 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4788 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4789 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4790 (void)PrivateScope.Privatize();
4791 emitMaster(CGF, S);
4792 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4793 };
4794 {
4795 auto LPCRegion =
4796 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4797 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen,
4798 CodeGenBoundParameters: emitEmptyBoundParameters);
4799 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4800 CondGen: [](CodeGenFunction &) { return nullptr; });
4801 }
4802 // Check for outer lastprivate conditional update.
4803 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4804}
4805
4806void CodeGenFunction::EmitOMPParallelMaskedDirective(
4807 const OMPParallelMaskedDirective &S) {
4808 // Emit directive as a combined directive that consists of two implicit
4809 // directives: 'parallel' with 'masked' directive.
4810 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4811 Action.Enter(CGF);
4812 OMPPrivateScope PrivateScope(CGF);
4813 emitOMPCopyinClause(CGF, S);
4814 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4815 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4816 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4817 (void)PrivateScope.Privatize();
4818 emitMasked(CGF, S);
4819 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4820 };
4821 {
4822 auto LPCRegion =
4823 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4824 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen,
4825 CodeGenBoundParameters: emitEmptyBoundParameters);
4826 emitPostUpdateForReductionClause(CGF&: *this, D: S,
4827 CondGen: [](CodeGenFunction &) { return nullptr; });
4828 }
4829 // Check for outer lastprivate conditional update.
4830 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4831}
4832
4833void CodeGenFunction::EmitOMPParallelSectionsDirective(
4834 const OMPParallelSectionsDirective &S) {
4835 // Emit directive as a combined directive that consists of two implicit
4836 // directives: 'parallel' with 'sections' directive.
4837 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4838 Action.Enter(CGF);
4839 emitOMPCopyinClause(CGF, S);
4840 CGF.EmitSections(S);
4841 };
4842 {
4843 auto LPCRegion =
4844 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4845 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen,
4846 CodeGenBoundParameters: emitEmptyBoundParameters);
4847 }
4848 // Check for outer lastprivate conditional update.
4849 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4850}
4851
4852namespace {
4853/// Get the list of variables declared in the context of the untied tasks.
4854class CheckVarsEscapingUntiedTaskDeclContext final
4855 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4856 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4857
4858public:
4859 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4860 ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4861 void VisitDeclStmt(const DeclStmt *S) {
4862 if (!S)
4863 return;
4864 // Need to privatize only local vars, static locals can be processed as is.
4865 for (const Decl *D : S->decls()) {
4866 if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D))
4867 if (VD->hasLocalStorage())
4868 PrivateDecls.push_back(Elt: VD);
4869 }
4870 }
4871 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4872 void VisitCapturedStmt(const CapturedStmt *) {}
4873 void VisitLambdaExpr(const LambdaExpr *) {}
4874 void VisitBlockExpr(const BlockExpr *) {}
4875 void VisitStmt(const Stmt *S) {
4876 if (!S)
4877 return;
4878 for (const Stmt *Child : S->children())
4879 if (Child)
4880 Visit(S: Child);
4881 }
4882
4883 /// Swaps list of vars with the provided one.
4884 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4885};
4886} // anonymous namespace
4887
4888static void buildDependences(const OMPExecutableDirective &S,
4889 OMPTaskDataTy &Data) {
4890
4891 // First look for 'omp_all_memory' and add this first.
4892 bool OmpAllMemory = false;
4893 if (llvm::any_of(
4894 Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) {
4895 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4896 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4897 })) {
4898 OmpAllMemory = true;
4899 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4900 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4901 // simplify.
4902 OMPTaskDataTy::DependData &DD =
4903 Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory,
4904 /*IteratorExpr=*/Args: nullptr);
4905 // Add a nullptr Expr to simplify the codegen in emitDependData.
4906 DD.DepExprs.push_back(Elt: nullptr);
4907 }
4908 // Add remaining dependences skipping any 'out' or 'inout' if they are
4909 // overridden by 'omp_all_memory'.
4910 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4911 OpenMPDependClauseKind Kind = C->getDependencyKind();
4912 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4913 continue;
4914 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4915 continue;
4916 OMPTaskDataTy::DependData &DD =
4917 Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier());
4918 DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4919 }
4920}
4921
4922void CodeGenFunction::EmitOMPTaskBasedDirective(
4923 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4924 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4925 OMPTaskDataTy &Data) {
4926 // Emit outlined function for task construct.
4927 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion);
4928 auto I = CS->getCapturedDecl()->param_begin();
4929 auto PartId = std::next(x: I);
4930 auto TaskT = std::next(x: I, n: 4);
4931 // Check if the task is final
4932 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4933 // If the condition constant folds and can be elided, try to avoid emitting
4934 // the condition and the dead arm of the if/else.
4935 const Expr *Cond = Clause->getCondition();
4936 bool CondConstant;
4937 if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant))
4938 Data.Final.setInt(CondConstant);
4939 else
4940 Data.Final.setPointer(EvaluateExprAsBool(E: Cond));
4941 } else {
4942 // By default the task is not final.
4943 Data.Final.setInt(/*IntVal=*/false);
4944 }
4945 // Check if the task has 'priority' clause.
4946 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4947 const Expr *Prio = Clause->getPriority();
4948 Data.Priority.setInt(/*IntVal=*/true);
4949 Data.Priority.setPointer(EmitScalarConversion(
4950 Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(),
4951 DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4952 Loc: Prio->getExprLoc()));
4953 }
4954 // The first function argument for tasks is a thread id, the second one is a
4955 // part id (0 for tied tasks, >=0 for untied task).
4956 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4957 // Get list of private variables.
4958 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4959 auto IRef = C->varlist_begin();
4960 for (const Expr *IInit : C->private_copies()) {
4961 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4962 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4963 Data.PrivateVars.push_back(Elt: *IRef);
4964 Data.PrivateCopies.push_back(Elt: IInit);
4965 }
4966 ++IRef;
4967 }
4968 }
4969 EmittedAsPrivate.clear();
4970 // Get list of firstprivate variables.
4971 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4972 auto IRef = C->varlist_begin();
4973 auto IElemInitRef = C->inits().begin();
4974 for (const Expr *IInit : C->private_copies()) {
4975 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4976 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4977 Data.FirstprivateVars.push_back(Elt: *IRef);
4978 Data.FirstprivateCopies.push_back(Elt: IInit);
4979 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
4980 }
4981 ++IRef;
4982 ++IElemInitRef;
4983 }
4984 }
4985 // Get list of lastprivate variables (for taskloops).
4986 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4987 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4988 auto IRef = C->varlist_begin();
4989 auto ID = C->destination_exprs().begin();
4990 for (const Expr *IInit : C->private_copies()) {
4991 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
4992 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
4993 Data.LastprivateVars.push_back(Elt: *IRef);
4994 Data.LastprivateCopies.push_back(Elt: IInit);
4995 }
4996 LastprivateDstsOrigs.insert(
4997 KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()),
4998 y: cast<DeclRefExpr>(Val: *IRef)));
4999 ++IRef;
5000 ++ID;
5001 }
5002 }
5003 SmallVector<const Expr *, 4> LHSs;
5004 SmallVector<const Expr *, 4> RHSs;
5005 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
5006 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5007 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5008 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5009 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5010 in_end: C->reduction_ops().end());
5011 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5012 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5013 }
5014 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
5015 CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data);
5016 // Build list of dependences.
5017 buildDependences(S, Data);
5018 // Get list of local vars for untied tasks.
5019 if (!Data.Tied) {
5020 CheckVarsEscapingUntiedTaskDeclContext Checker;
5021 Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5022 Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(),
5023 in_end: Checker.getPrivateDecls().end());
5024 }
5025 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
5026 CapturedRegion](CodeGenFunction &CGF,
5027 PrePostActionTy &Action) {
5028 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
5029 std::pair<Address, Address>>
5030 UntiedLocalVars;
5031 // Set proper addresses for generated private copies.
5032 OMPPrivateScope Scope(CGF);
5033 // Generate debug info for variables present in shared clause.
5034 if (auto *DI = CGF.getDebugInfo()) {
5035 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
5036 CGF.CapturedStmtInfo->getCaptureFields();
5037 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
5038 if (CaptureFields.size() && ContextValue) {
5039 unsigned CharWidth = CGF.getContext().getCharWidth();
5040 // The shared variables are packed together as members of structure.
5041 // So the address of each shared variable can be computed by adding
5042 // offset of it (within record) to the base address of record. For each
5043 // shared variable, debug intrinsic llvm.dbg.declare is generated with
5044 // appropriate expressions (DIExpression).
5045 // Ex:
5046 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
5047 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5048 // metadata !svar1,
5049 // metadata !DIExpression(DW_OP_deref))
5050 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5051 // metadata !svar2,
5052 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
5053 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
5054 const VarDecl *SharedVar = It->first;
5055 RecordDecl *CaptureRecord = It->second->getParent();
5056 const ASTRecordLayout &Layout =
5057 CGF.getContext().getASTRecordLayout(D: CaptureRecord);
5058 unsigned Offset =
5059 Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth;
5060 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5061 (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue,
5062 Builder&: CGF.Builder, UsePointerValue: false);
5063 // Get the call dbg.declare instruction we just created and update
5064 // its DIExpression to add offset to base address.
5065 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
5066 unsigned Offset) {
5067 SmallVector<uint64_t, 8> Ops;
5068 // Add offset to the base address if non zero.
5069 if (Offset) {
5070 Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst);
5071 Ops.push_back(Elt: Offset);
5072 }
5073 Ops.push_back(Elt: llvm::dwarf::DW_OP_deref);
5074 Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops));
5075 };
5076 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
5077 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last))
5078 UpdateExpr(DDI->getContext(), DDI, Offset);
5079 // If we're emitting using the new debug info format into a block
5080 // without a terminator, the record will be "trailing".
5081 assert(!Last.isTerminator() && "unexpected terminator");
5082 if (auto *Marker =
5083 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
5084 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
5085 C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) {
5086 UpdateExpr(Last.getContext(), &DVR, Offset);
5087 break;
5088 }
5089 }
5090 }
5091 }
5092 }
5093 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
5094 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
5095 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
5096 enum { PrivatesParam = 2, CopyFnParam = 3 };
5097 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5098 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5099 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5100 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5101 // Map privates.
5102 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5103 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5104 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5105 CallArgs.push_back(Elt: PrivatesPtr);
5106 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5107 for (const Expr *E : Data.PrivateVars) {
5108 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5109 RawAddress PrivatePtr = CGF.CreateMemTemp(
5110 T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr");
5111 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5112 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5113 ParamTypes.push_back(Elt: PrivatePtr.getType());
5114 }
5115 for (const Expr *E : Data.FirstprivateVars) {
5116 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5117 RawAddress PrivatePtr =
5118 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5119 Name: ".firstpriv.ptr.addr");
5120 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5121 FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5122 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5123 ParamTypes.push_back(Elt: PrivatePtr.getType());
5124 }
5125 for (const Expr *E : Data.LastprivateVars) {
5126 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5127 RawAddress PrivatePtr =
5128 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5129 Name: ".lastpriv.ptr.addr");
5130 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5131 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5132 ParamTypes.push_back(Elt: PrivatePtr.getType());
5133 }
5134 for (const VarDecl *VD : Data.PrivateLocals) {
5135 QualType Ty = VD->getType().getNonReferenceType();
5136 if (VD->getType()->isLValueReferenceType())
5137 Ty = CGF.getContext().getPointerType(T: Ty);
5138 if (isAllocatableDecl(VD))
5139 Ty = CGF.getContext().getPointerType(T: Ty);
5140 RawAddress PrivatePtr = CGF.CreateMemTemp(
5141 T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr");
5142 auto Result = UntiedLocalVars.insert(
5143 KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())));
5144 // If key exists update in place.
5145 if (Result.second == false)
5146 *Result.first = std::make_pair(
5147 x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()));
5148 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5149 ParamTypes.push_back(Elt: PrivatePtr.getType());
5150 }
5151 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5152 Params: ParamTypes, /*isVarArg=*/false);
5153 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5154 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5155 for (const auto &Pair : LastprivateDstsOrigs) {
5156 const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl());
5157 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
5158 /*RefersToEnclosingVariableOrCapture=*/
5159 CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
5160 Pair.second->getType(), VK_LValue,
5161 Pair.second->getExprLoc());
5162 Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress());
5163 }
5164 for (const auto &Pair : PrivatePtrs) {
5165 Address Replacement = Address(
5166 CGF.Builder.CreateLoad(Addr: Pair.second),
5167 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5168 CGF.getContext().getDeclAlign(D: Pair.first));
5169 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5170 if (auto *DI = CGF.getDebugInfo())
5171 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5172 (void)DI->EmitDeclareOfAutoVariable(
5173 Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder,
5174 /*UsePointerValue*/ true);
5175 }
5176 // Adjust mapping for internal locals by mapping actual memory instead of
5177 // a pointer to this memory.
5178 for (auto &Pair : UntiedLocalVars) {
5179 QualType VDType = Pair.first->getType().getNonReferenceType();
5180 if (Pair.first->getType()->isLValueReferenceType())
5181 VDType = CGF.getContext().getPointerType(T: VDType);
5182 if (isAllocatableDecl(VD: Pair.first)) {
5183 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5184 Address Replacement(
5185 Ptr,
5186 CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)),
5187 CGF.getPointerAlign());
5188 Pair.second.first = Replacement;
5189 Ptr = CGF.Builder.CreateLoad(Addr: Replacement);
5190 Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType),
5191 CGF.getContext().getDeclAlign(D: Pair.first));
5192 Pair.second.second = Replacement;
5193 } else {
5194 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5195 Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType),
5196 CGF.getContext().getDeclAlign(D: Pair.first));
5197 Pair.second.first = Replacement;
5198 }
5199 }
5200 }
5201 if (Data.Reductions) {
5202 OMPPrivateScope FirstprivateScope(CGF);
5203 for (const auto &Pair : FirstprivatePtrs) {
5204 Address Replacement(
5205 CGF.Builder.CreateLoad(Addr: Pair.second),
5206 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5207 CGF.getContext().getDeclAlign(D: Pair.first));
5208 FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5209 }
5210 (void)FirstprivateScope.Privatize();
5211 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5212 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5213 Data.ReductionCopies, Data.ReductionOps);
5214 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5215 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9)));
5216 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5217 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5218 RedCG.emitAggregateType(CGF, N: Cnt);
5219 // FIXME: This must removed once the runtime library is fixed.
5220 // Emit required threadprivate variables for
5221 // initializer/combiner/finalizer.
5222 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5223 RCG&: RedCG, N: Cnt);
5224 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5225 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5226 Replacement = Address(
5227 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5228 SrcTy: CGF.getContext().VoidPtrTy,
5229 DstTy: CGF.getContext().getPointerType(
5230 T: Data.ReductionCopies[Cnt]->getType()),
5231 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5232 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5233 Replacement.getAlignment());
5234 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5235 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5236 }
5237 }
5238 // Privatize all private variables except for in_reduction items.
5239 (void)Scope.Privatize();
5240 SmallVector<const Expr *, 4> InRedVars;
5241 SmallVector<const Expr *, 4> InRedPrivs;
5242 SmallVector<const Expr *, 4> InRedOps;
5243 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5244 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5245 auto IPriv = C->privates().begin();
5246 auto IRed = C->reduction_ops().begin();
5247 auto ITD = C->taskgroup_descriptors().begin();
5248 for (const Expr *Ref : C->varlist()) {
5249 InRedVars.emplace_back(Args&: Ref);
5250 InRedPrivs.emplace_back(Args: *IPriv);
5251 InRedOps.emplace_back(Args: *IRed);
5252 TaskgroupDescriptors.emplace_back(Args: *ITD);
5253 std::advance(i&: IPriv, n: 1);
5254 std::advance(i&: IRed, n: 1);
5255 std::advance(i&: ITD, n: 1);
5256 }
5257 }
5258 // Privatize in_reduction items here, because taskgroup descriptors must be
5259 // privatized earlier.
5260 OMPPrivateScope InRedScope(CGF);
5261 if (!InRedVars.empty()) {
5262 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5263 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5264 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5265 RedCG.emitAggregateType(CGF, N: Cnt);
5266 // The taskgroup descriptor variable is always implicit firstprivate and
5267 // privatized already during processing of the firstprivates.
5268 // FIXME: This must removed once the runtime library is fixed.
5269 // Emit required threadprivate variables for
5270 // initializer/combiner/finalizer.
5271 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5272 RCG&: RedCG, N: Cnt);
5273 llvm::Value *ReductionsPtr;
5274 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5275 ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr),
5276 Loc: TRExpr->getExprLoc());
5277 } else {
5278 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5279 }
5280 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5281 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5282 Replacement = Address(
5283 CGF.EmitScalarConversion(
5284 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5285 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5286 Loc: InRedPrivs[Cnt]->getExprLoc()),
5287 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5288 Replacement.getAlignment());
5289 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5290 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5291 }
5292 }
5293 (void)InRedScope.Privatize();
5294
5295 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5296 UntiedLocalVars);
5297 Action.Enter(CGF);
5298 BodyGen(CGF);
5299 };
5300 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5301 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5302 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, Tied: Data.Tied, NumberOfParts&: Data.NumberOfParts);
5303 OMPLexicalScope Scope(*this, S, std::nullopt,
5304 !isOpenMPParallelDirective(DKind: EKind) &&
5305 !isOpenMPSimdDirective(DKind: EKind));
5306 TaskGen(*this, OutlinedFn, Data);
5307}
5308
5309static ImplicitParamDecl *
5310createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5311 QualType Ty, CapturedDecl *CD,
5312 SourceLocation Loc) {
5313 auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5314 ParamKind: ImplicitParamKind::Other);
5315 auto *OrigRef = DeclRefExpr::Create(
5316 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD,
5317 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5318 auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5319 ParamKind: ImplicitParamKind::Other);
5320 auto *PrivateRef = DeclRefExpr::Create(
5321 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD,
5322 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5323 QualType ElemType = C.getBaseElementType(QT: Ty);
5324 auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType,
5325 ParamKind: ImplicitParamKind::Other);
5326 auto *InitRef = DeclRefExpr::Create(
5327 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD,
5328 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue);
5329 PrivateVD->setInitStyle(VarDecl::CInit);
5330 PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue,
5331 Operand: InitRef, /*BasePath=*/nullptr,
5332 Cat: VK_PRValue, FPO: FPOptionsOverride()));
5333 Data.FirstprivateVars.emplace_back(Args&: OrigRef);
5334 Data.FirstprivateCopies.emplace_back(Args&: PrivateRef);
5335 Data.FirstprivateInits.emplace_back(Args&: InitRef);
5336 return OrigVD;
5337}
5338
5339void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5340 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5341 OMPTargetDataInfo &InputInfo) {
5342 // Emit outlined function for task construct.
5343 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5344 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5345 CanQualType SharedsTy =
5346 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5347 auto I = CS->getCapturedDecl()->param_begin();
5348 auto PartId = std::next(x: I);
5349 auto TaskT = std::next(x: I, n: 4);
5350 OMPTaskDataTy Data;
5351 // The task is not final.
5352 Data.Final.setInt(/*IntVal=*/false);
5353 // Get list of firstprivate variables.
5354 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5355 auto IRef = C->varlist_begin();
5356 auto IElemInitRef = C->inits().begin();
5357 for (auto *IInit : C->private_copies()) {
5358 Data.FirstprivateVars.push_back(Elt: *IRef);
5359 Data.FirstprivateCopies.push_back(Elt: IInit);
5360 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5361 ++IRef;
5362 ++IElemInitRef;
5363 }
5364 }
5365 SmallVector<const Expr *, 4> LHSs;
5366 SmallVector<const Expr *, 4> RHSs;
5367 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5368 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5369 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5370 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5371 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5372 in_end: C->reduction_ops().end());
5373 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5374 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5375 }
5376 OMPPrivateScope TargetScope(*this);
5377 VarDecl *BPVD = nullptr;
5378 VarDecl *PVD = nullptr;
5379 VarDecl *SVD = nullptr;
5380 VarDecl *MVD = nullptr;
5381 if (InputInfo.NumberOfTargetItems > 0) {
5382 auto *CD = CapturedDecl::Create(
5383 C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5384 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5385 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5386 EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5387 /*IndexTypeQuals=*/0);
5388 BPVD = createImplicitFirstprivateForType(
5389 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5390 PVD = createImplicitFirstprivateForType(
5391 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5392 QualType SizesType = getContext().getConstantArrayType(
5393 EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5394 ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5395 /*IndexTypeQuals=*/0);
5396 SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD,
5397 Loc: S.getBeginLoc());
5398 TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray);
5399 TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray);
5400 TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray);
5401 // If there is no user-defined mapper, the mapper array will be nullptr. In
5402 // this case, we don't need to privatize it.
5403 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5404 Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) {
5405 MVD = createImplicitFirstprivateForType(
5406 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5407 TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray);
5408 }
5409 }
5410 (void)TargetScope.Privatize();
5411 buildDependences(S, Data);
5412 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5413 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind,
5414 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5415 // Set proper addresses for generated private copies.
5416 OMPPrivateScope Scope(CGF);
5417 if (!Data.FirstprivateVars.empty()) {
5418 enum { PrivatesParam = 2, CopyFnParam = 3 };
5419 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5420 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5421 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5422 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5423 // Map privates.
5424 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5425 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5426 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5427 CallArgs.push_back(Elt: PrivatesPtr);
5428 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5429 for (const Expr *E : Data.FirstprivateVars) {
5430 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5431 RawAddress PrivatePtr =
5432 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5433 Name: ".firstpriv.ptr.addr");
5434 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5435 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5436 ParamTypes.push_back(Elt: PrivatePtr.getType());
5437 }
5438 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5439 Params: ParamTypes, /*isVarArg=*/false);
5440 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5441 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5442 for (const auto &Pair : PrivatePtrs) {
5443 Address Replacement(
5444 CGF.Builder.CreateLoad(Addr: Pair.second),
5445 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5446 CGF.getContext().getDeclAlign(D: Pair.first));
5447 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5448 }
5449 }
5450 CGF.processInReduction(S, Data, CGF, CS, Scope);
5451 if (InputInfo.NumberOfTargetItems > 0) {
5452 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5453 Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0);
5454 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5455 Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0);
5456 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5457 Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0);
5458 // If MVD is nullptr, the mapper array is not privatized
5459 if (MVD)
5460 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5461 Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0);
5462 }
5463
5464 Action.Enter(CGF);
5465 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5466 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5467 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5468 needsTaskBasedThreadLimit(DKind: EKind) && TL) {
5469 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5470 // enclosing this target region. This will indirectly set the thread_limit
5471 // for every applicable construct within target region.
5472 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5473 CGF, ThreadLimit: TL->getThreadLimit().front(), Loc: S.getBeginLoc());
5474 }
5475 BodyGen(CGF);
5476 };
5477 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5478 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, /*Tied=*/true,
5479 NumberOfParts&: Data.NumberOfParts);
5480 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5481 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5482 getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
5483 SourceLocation());
5484 CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5485 SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data);
5486}
5487
5488void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5489 OMPTaskDataTy &Data,
5490 CodeGenFunction &CGF,
5491 const CapturedStmt *CS,
5492 OMPPrivateScope &Scope) {
5493 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5494 if (Data.Reductions) {
5495 OpenMPDirectiveKind CapturedRegion = EKind;
5496 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5497 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5498 Data.ReductionCopies, Data.ReductionOps);
5499 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5500 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4)));
5501 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5502 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5503 RedCG.emitAggregateType(CGF, N: Cnt);
5504 // FIXME: This must removed once the runtime library is fixed.
5505 // Emit required threadprivate variables for
5506 // initializer/combiner/finalizer.
5507 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5508 RCG&: RedCG, N: Cnt);
5509 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5510 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5511 Replacement = Address(
5512 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5513 SrcTy: CGF.getContext().VoidPtrTy,
5514 DstTy: CGF.getContext().getPointerType(
5515 T: Data.ReductionCopies[Cnt]->getType()),
5516 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5517 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5518 Replacement.getAlignment());
5519 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5520 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5521 }
5522 }
5523 (void)Scope.Privatize();
5524 SmallVector<const Expr *, 4> InRedVars;
5525 SmallVector<const Expr *, 4> InRedPrivs;
5526 SmallVector<const Expr *, 4> InRedOps;
5527 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5528 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5529 auto IPriv = C->privates().begin();
5530 auto IRed = C->reduction_ops().begin();
5531 auto ITD = C->taskgroup_descriptors().begin();
5532 for (const Expr *Ref : C->varlist()) {
5533 InRedVars.emplace_back(Args&: Ref);
5534 InRedPrivs.emplace_back(Args: *IPriv);
5535 InRedOps.emplace_back(Args: *IRed);
5536 TaskgroupDescriptors.emplace_back(Args: *ITD);
5537 std::advance(i&: IPriv, n: 1);
5538 std::advance(i&: IRed, n: 1);
5539 std::advance(i&: ITD, n: 1);
5540 }
5541 }
5542 OMPPrivateScope InRedScope(CGF);
5543 if (!InRedVars.empty()) {
5544 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5545 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5546 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5547 RedCG.emitAggregateType(CGF, N: Cnt);
5548 // FIXME: This must removed once the runtime library is fixed.
5549 // Emit required threadprivate variables for
5550 // initializer/combiner/finalizer.
5551 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5552 RCG&: RedCG, N: Cnt);
5553 llvm::Value *ReductionsPtr;
5554 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5555 ReductionsPtr =
5556 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc());
5557 } else {
5558 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5559 }
5560 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5561 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5562 Replacement = Address(
5563 CGF.EmitScalarConversion(
5564 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5565 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5566 Loc: InRedPrivs[Cnt]->getExprLoc()),
5567 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5568 Replacement.getAlignment());
5569 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5570 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5571 }
5572 }
5573 (void)InRedScope.Privatize();
5574}
5575
5576void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5577 // Emit outlined function for task construct.
5578 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5579 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5580 CanQualType SharedsTy =
5581 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5582 const Expr *IfCond = nullptr;
5583 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5584 if (C->getNameModifier() == OMPD_unknown ||
5585 C->getNameModifier() == OMPD_task) {
5586 IfCond = C->getCondition();
5587 break;
5588 }
5589 }
5590
5591 OMPTaskDataTy Data;
5592 // Check if we should emit tied or untied task.
5593 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5594 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5595 CGF.EmitStmt(S: CS->getCapturedStmt());
5596 };
5597 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5598 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5599 const OMPTaskDataTy &Data) {
5600 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5601 SharedsTy, Shareds: CapturedStruct, IfCond,
5602 Data);
5603 };
5604 auto LPCRegion =
5605 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5606 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data);
5607}
5608
5609void CodeGenFunction::EmitOMPTaskyieldDirective(
5610 const OMPTaskyieldDirective &S) {
5611 CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc());
5612}
5613
5614void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5615 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5616 Expr *ME = MC ? MC->getMessageString() : nullptr;
5617 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5618 bool IsFatal = false;
5619 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5620 IsFatal = true;
5621 CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal);
5622}
5623
5624void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5625 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier);
5626}
5627
5628void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5629 OMPTaskDataTy Data;
5630 // Build list of dependences
5631 buildDependences(S, Data);
5632 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5633 CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data);
5634}
5635
5636static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5637 return T.clauses().empty();
5638}
5639
5640void CodeGenFunction::EmitOMPTaskgroupDirective(
5641 const OMPTaskgroupDirective &S) {
5642 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5643 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) {
5644 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5645 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5646 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5647 AllocaInsertPt->getIterator());
5648
5649 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5650 InsertPointTy CodeGenIP) {
5651 Builder.restoreIP(IP: CodeGenIP);
5652 EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5653 return llvm::Error::success();
5654 };
5655 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5656 if (!CapturedStmtInfo)
5657 CapturedStmtInfo = &CapStmtInfo;
5658 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
5659 cantFail(ValOrErr: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB));
5660 Builder.restoreIP(IP: AfterIP);
5661 return;
5662 }
5663 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5664 Action.Enter(CGF);
5665 if (const Expr *E = S.getReductionRef()) {
5666 SmallVector<const Expr *, 4> LHSs;
5667 SmallVector<const Expr *, 4> RHSs;
5668 OMPTaskDataTy Data;
5669 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5670 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5671 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5672 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5673 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5674 in_end: C->reduction_ops().end());
5675 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5676 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5677 }
5678 llvm::Value *ReductionDesc =
5679 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(),
5680 LHSExprs: LHSs, RHSExprs: RHSs, Data);
5681 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5682 CGF.EmitVarDecl(D: *VD);
5683 CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD),
5684 /*Volatile=*/false, Ty: E->getType());
5685 }
5686 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5687 };
5688 CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc());
5689}
5690
5691void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5692 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5693 ? llvm::AtomicOrdering::NotAtomic
5694 : llvm::AtomicOrdering::AcquireRelease;
5695 CGM.getOpenMPRuntime().emitFlush(
5696 CGF&: *this,
5697 Vars: [&S]() -> ArrayRef<const Expr *> {
5698 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5699 return llvm::ArrayRef(FlushClause->varlist_begin(),
5700 FlushClause->varlist_end());
5701 return {};
5702 }(),
5703 Loc: S.getBeginLoc(), AO);
5704}
5705
5706void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5707 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5708 LValue DOLVal = EmitLValue(E: DO->getDepobj());
5709 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5710 // Build list and emit dependences
5711 OMPTaskDataTy Data;
5712 buildDependences(S, Data);
5713 for (auto &Dep : Data.Dependences) {
5714 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5715 CGF&: *this, Dependencies: Dep, Loc: DC->getBeginLoc());
5716 EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal);
5717 }
5718 return;
5719 }
5720 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5721 CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc());
5722 return;
5723 }
5724 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5725 CGM.getOpenMPRuntime().emitUpdateClause(
5726 CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc());
5727 return;
5728 }
5729}
5730
5731void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5732 if (!OMPParentLoopDirectiveForScan)
5733 return;
5734 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5735 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5736 SmallVector<const Expr *, 4> Shareds;
5737 SmallVector<const Expr *, 4> Privates;
5738 SmallVector<const Expr *, 4> LHSs;
5739 SmallVector<const Expr *, 4> RHSs;
5740 SmallVector<const Expr *, 4> ReductionOps;
5741 SmallVector<const Expr *, 4> CopyOps;
5742 SmallVector<const Expr *, 4> CopyArrayTemps;
5743 SmallVector<const Expr *, 4> CopyArrayElems;
5744 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5745 if (C->getModifier() != OMPC_REDUCTION_inscan)
5746 continue;
5747 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5748 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
5749 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5750 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5751 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
5752 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
5753 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
5754 in_end: C->copy_array_temps().end());
5755 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
5756 in_end: C->copy_array_elems().end());
5757 }
5758 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5759 (getLangOpts().OpenMPSimd &&
5760 isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) {
5761 // For simd directive and simd-based directives in simd only mode, use the
5762 // following codegen:
5763 // int x = 0;
5764 // #pragma omp simd reduction(inscan, +: x)
5765 // for (..) {
5766 // <first part>
5767 // #pragma omp scan inclusive(x)
5768 // <second part>
5769 // }
5770 // is transformed to:
5771 // int x = 0;
5772 // for (..) {
5773 // int x_priv = 0;
5774 // <first part>
5775 // x = x_priv + x;
5776 // x_priv = x;
5777 // <second part>
5778 // }
5779 // and
5780 // int x = 0;
5781 // #pragma omp simd reduction(inscan, +: x)
5782 // for (..) {
5783 // <first part>
5784 // #pragma omp scan exclusive(x)
5785 // <second part>
5786 // }
5787 // to
5788 // int x = 0;
5789 // for (..) {
5790 // int x_priv = 0;
5791 // <second part>
5792 // int temp = x;
5793 // x = x_priv + x;
5794 // x_priv = temp;
5795 // <first part>
5796 // }
5797 llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce");
5798 EmitBranch(Block: IsInclusive
5799 ? OMPScanReduce
5800 : BreakContinueStack.back().ContinueBlock.getBlock());
5801 EmitBlock(BB: OMPScanDispatch);
5802 {
5803 // New scope for correct construction/destruction of temp variables for
5804 // exclusive scan.
5805 LexicalScope Scope(*this, S.getSourceRange());
5806 EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5807 EmitBlock(BB: OMPScanReduce);
5808 if (!IsInclusive) {
5809 // Create temp var and copy LHS value to this temp value.
5810 // TMP = LHS;
5811 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5812 const Expr *PrivateExpr = Privates[I];
5813 const Expr *TempExpr = CopyArrayTemps[I];
5814 EmitAutoVarDecl(
5815 D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl()));
5816 LValue DestLVal = EmitLValue(E: TempExpr);
5817 LValue SrcLVal = EmitLValue(E: LHSs[I]);
5818 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(),
5819 SrcAddr: SrcLVal.getAddress(),
5820 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5821 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5822 Copy: CopyOps[I]);
5823 }
5824 }
5825 CGM.getOpenMPRuntime().emitReduction(
5826 CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
5827 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
5828 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_simd});
5829 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5830 const Expr *PrivateExpr = Privates[I];
5831 LValue DestLVal;
5832 LValue SrcLVal;
5833 if (IsInclusive) {
5834 DestLVal = EmitLValue(E: RHSs[I]);
5835 SrcLVal = EmitLValue(E: LHSs[I]);
5836 } else {
5837 const Expr *TempExpr = CopyArrayTemps[I];
5838 DestLVal = EmitLValue(E: RHSs[I]);
5839 SrcLVal = EmitLValue(E: TempExpr);
5840 }
5841 EmitOMPCopy(
5842 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5843 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5844 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5845 }
5846 }
5847 EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5848 OMPScanExitBlock = IsInclusive
5849 ? BreakContinueStack.back().ContinueBlock.getBlock()
5850 : OMPScanReduce;
5851 EmitBlock(BB: OMPAfterScanBlock);
5852 return;
5853 }
5854 if (!IsInclusive) {
5855 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5856 EmitBlock(BB: OMPScanExitBlock);
5857 }
5858 if (OMPFirstScanLoop) {
5859 // Emit buffer[i] = red; at the end of the input phase.
5860 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5861 .getIterationVariable()
5862 ->IgnoreParenImpCasts();
5863 LValue IdxLVal = EmitLValue(E: IVExpr);
5864 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5865 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5866 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5867 const Expr *PrivateExpr = Privates[I];
5868 const Expr *OrigExpr = Shareds[I];
5869 const Expr *CopyArrayElem = CopyArrayElems[I];
5870 OpaqueValueMapping IdxMapping(
5871 *this,
5872 cast<OpaqueValueExpr>(
5873 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5874 RValue::get(V: IdxVal));
5875 LValue DestLVal = EmitLValue(E: CopyArrayElem);
5876 LValue SrcLVal = EmitLValue(E: OrigExpr);
5877 EmitOMPCopy(
5878 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5879 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5880 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5881 }
5882 }
5883 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5884 if (IsInclusive) {
5885 EmitBlock(BB: OMPScanExitBlock);
5886 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5887 }
5888 EmitBlock(BB: OMPScanDispatch);
5889 if (!OMPFirstScanLoop) {
5890 // Emit red = buffer[i]; at the entrance to the scan phase.
5891 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5892 .getIterationVariable()
5893 ->IgnoreParenImpCasts();
5894 LValue IdxLVal = EmitLValue(E: IVExpr);
5895 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5896 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5897 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5898 if (!IsInclusive) {
5899 llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec");
5900 ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit");
5901 llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal);
5902 Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB);
5903 EmitBlock(BB: ContBB);
5904 // Use idx - 1 iteration for exclusive scan.
5905 IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1));
5906 }
5907 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5908 const Expr *PrivateExpr = Privates[I];
5909 const Expr *OrigExpr = Shareds[I];
5910 const Expr *CopyArrayElem = CopyArrayElems[I];
5911 OpaqueValueMapping IdxMapping(
5912 *this,
5913 cast<OpaqueValueExpr>(
5914 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5915 RValue::get(V: IdxVal));
5916 LValue SrcLVal = EmitLValue(E: CopyArrayElem);
5917 LValue DestLVal = EmitLValue(E: OrigExpr);
5918 EmitOMPCopy(
5919 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
5920 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5921 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
5922 }
5923 if (!IsInclusive) {
5924 EmitBlock(BB: ExclusiveExitBB);
5925 }
5926 }
5927 EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5928 : OMPAfterScanBlock);
5929 EmitBlock(BB: OMPAfterScanBlock);
5930}
5931
5932void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5933 const CodeGenLoopTy &CodeGenLoop,
5934 Expr *IncExpr) {
5935 // Emit the loop iteration variable.
5936 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
5937 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
5938 EmitVarDecl(D: *IVDecl);
5939
5940 // Emit the iterations count variable.
5941 // If it is not a variable, Sema decided to calculate iterations count on each
5942 // iteration (e.g., it is foldable into a constant).
5943 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
5944 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
5945 // Emit calculation of the iterations count.
5946 EmitIgnoredExpr(E: S.getCalcLastIteration());
5947 }
5948
5949 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5950
5951 bool HasLastprivateClause = false;
5952 // Check pre-condition.
5953 {
5954 OMPLoopScope PreInitScope(*this, S);
5955 // Skip the entire loop if we don't meet the precondition.
5956 // If the condition constant folds and can be elided, avoid emitting the
5957 // whole loop.
5958 bool CondConstant;
5959 llvm::BasicBlock *ContBlock = nullptr;
5960 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
5961 if (!CondConstant)
5962 return;
5963 } else {
5964 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
5965 ContBlock = createBasicBlock(name: "omp.precond.end");
5966 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
5967 TrueCount: getProfileCount(S: &S));
5968 EmitBlock(BB: ThenBlock);
5969 incrementProfileCounter(S: &S);
5970 }
5971
5972 emitAlignedClause(CGF&: *this, D: S);
5973 // Emit 'then' code.
5974 {
5975 // Emit helper vars inits.
5976
5977 LValue LB = EmitOMPHelperVar(
5978 CGF&: *this, Helper: cast<DeclRefExpr>(
5979 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5980 ? S.getCombinedLowerBoundVariable()
5981 : S.getLowerBoundVariable())));
5982 LValue UB = EmitOMPHelperVar(
5983 CGF&: *this, Helper: cast<DeclRefExpr>(
5984 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
5985 ? S.getCombinedUpperBoundVariable()
5986 : S.getUpperBoundVariable())));
5987 LValue ST =
5988 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
5989 LValue IL =
5990 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
5991
5992 OMPPrivateScope LoopScope(*this);
5993 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
5994 // Emit implicit barrier to synchronize threads and avoid data races
5995 // on initialization of firstprivate variables and post-update of
5996 // lastprivate variables.
5997 CGM.getOpenMPRuntime().emitBarrierCall(
5998 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
5999 /*ForceSimpleCall=*/true);
6000 }
6001 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
6002 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6003 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6004 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind()))
6005 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
6006 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
6007 EmitOMPPrivateLoopCounters(S, LoopScope);
6008 (void)LoopScope.Privatize();
6009 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
6010 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
6011
6012 // Detect the distribute schedule kind and chunk.
6013 llvm::Value *Chunk = nullptr;
6014 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
6015 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
6016 ScheduleKind = C->getDistScheduleKind();
6017 if (const Expr *Ch = C->getChunkSize()) {
6018 Chunk = EmitScalarExpr(E: Ch);
6019 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(),
6020 DstTy: S.getIterationVariable()->getType(),
6021 Loc: S.getBeginLoc());
6022 }
6023 } else {
6024 // Default behaviour for dist_schedule clause.
6025 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
6026 CGF&: *this, S, ScheduleKind, Chunk);
6027 }
6028 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
6029 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
6030
6031 // OpenMP [2.10.8, distribute Construct, Description]
6032 // If dist_schedule is specified, kind must be static. If specified,
6033 // iterations are divided into chunks of size chunk_size, chunks are
6034 // assigned to the teams of the league in a round-robin fashion in the
6035 // order of the team number. When no chunk_size is specified, the
6036 // iteration space is divided into chunks that are approximately equal
6037 // in size, and at most one chunk is distributed to each team of the
6038 // league. The size of the chunks is unspecified in this case.
6039 bool StaticChunked =
6040 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
6041 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind());
6042 if (RT.isStaticNonchunked(ScheduleKind,
6043 /* Chunked */ Chunk != nullptr) ||
6044 StaticChunked) {
6045 CGOpenMPRuntime::StaticRTInput StaticInit(
6046 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
6047 LB.getAddress(), UB.getAddress(), ST.getAddress(),
6048 StaticChunked ? Chunk : nullptr);
6049 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind,
6050 Values: StaticInit);
6051 JumpDest LoopExit =
6052 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
6053 // UB = min(UB, GlobalUB);
6054 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6055 ? S.getCombinedEnsureUpperBound()
6056 : S.getEnsureUpperBound());
6057 // IV = LB;
6058 EmitIgnoredExpr(E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6059 ? S.getCombinedInit()
6060 : S.getInit());
6061
6062 const Expr *Cond =
6063 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6064 ? S.getCombinedCond()
6065 : S.getCond();
6066
6067 if (StaticChunked)
6068 Cond = S.getCombinedDistCond();
6069
6070 // For static unchunked schedules generate:
6071 //
6072 // 1. For distribute alone, codegen
6073 // while (idx <= UB) {
6074 // BODY;
6075 // ++idx;
6076 // }
6077 //
6078 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
6079 // while (idx <= UB) {
6080 // <CodeGen rest of pragma>(LB, UB);
6081 // idx += ST;
6082 // }
6083 //
6084 // For static chunk one schedule generate:
6085 //
6086 // while (IV <= GlobalUB) {
6087 // <CodeGen rest of pragma>(LB, UB);
6088 // LB += ST;
6089 // UB += ST;
6090 // UB = min(UB, GlobalUB);
6091 // IV = LB;
6092 // }
6093 //
6094 emitCommonSimdLoop(
6095 CGF&: *this, S,
6096 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6097 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
6098 CGF.EmitOMPSimdInit(D: S);
6099 },
6100 BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
6101 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
6102 CGF.EmitOMPInnerLoop(
6103 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr,
6104 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
6105 CodeGenLoop(CGF, S, LoopExit);
6106 },
6107 PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) {
6108 if (StaticChunked) {
6109 CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound());
6110 CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound());
6111 CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound());
6112 CGF.EmitIgnoredExpr(E: S.getCombinedInit());
6113 }
6114 });
6115 });
6116 EmitBlock(BB: LoopExit.getBlock());
6117 // Tell the runtime we are done.
6118 RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute);
6119 } else {
6120 // Emit the outer loop, which requests its work chunk [LB..UB] from
6121 // runtime and runs the inner loop to process it.
6122 const OMPLoopArguments LoopArguments = {
6123 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
6124 Chunk};
6125 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments,
6126 CodeGenLoopContent: CodeGenLoop);
6127 }
6128 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
6129 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6130 return CGF.Builder.CreateIsNotNull(
6131 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6132 });
6133 }
6134 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6135 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6136 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) {
6137 EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd);
6138 // Emit post-update of the reduction variables if IsLastIter != 0.
6139 emitPostUpdateForReductionClause(
6140 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6141 return CGF.Builder.CreateIsNotNull(
6142 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6143 });
6144 }
6145 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6146 if (HasLastprivateClause) {
6147 EmitOMPLastprivateClauseFinal(
6148 D: S, /*NoFinals=*/false,
6149 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
6150 }
6151 }
6152
6153 // We're now done with the loop, so jump to the continuation block.
6154 if (ContBlock) {
6155 EmitBranch(Block: ContBlock);
6156 EmitBlock(BB: ContBlock, IsFinished: true);
6157 }
6158 }
6159}
6160
6161// Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this
6162// function available for "loop bind(teams)", which maps to "distribute".
6163static void emitOMPDistributeDirective(const OMPLoopDirective &S,
6164 CodeGenFunction &CGF,
6165 CodeGenModule &CGM) {
6166 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6167 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6168 };
6169 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6170 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, CodeGen);
6171}
6172
6173void CodeGenFunction::EmitOMPDistributeDirective(
6174 const OMPDistributeDirective &S) {
6175 emitOMPDistributeDirective(S, CGF&: *this, CGM);
6176}
6177
6178static llvm::Function *
6179emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S,
6180 const OMPExecutableDirective &D) {
6181 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
6182 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
6183 CGF.CapturedStmtInfo = &CapStmtInfo;
6184 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, D);
6185 Fn->setDoesNotRecurse();
6186 return Fn;
6187}
6188
6189template <typename T>
6190static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
6191 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6192 llvm::OpenMPIRBuilder &OMPBuilder) {
6193
6194 unsigned NumLoops = C->getNumLoops();
6195 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
6196 /*DestWidth=*/64, /*Signed=*/1);
6197 llvm::SmallVector<llvm::Value *> StoreValues;
6198 for (unsigned I = 0; I < NumLoops; I++) {
6199 const Expr *CounterVal = C->getLoopData(I);
6200 assert(CounterVal);
6201 llvm::Value *StoreValue = CGF.EmitScalarConversion(
6202 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
6203 Loc: CounterVal->getExprLoc());
6204 StoreValues.emplace_back(Args&: StoreValue);
6205 }
6206 OMPDoacrossKind<T> ODK;
6207 bool IsDependSource = ODK.isSource(C);
6208 CGF.Builder.restoreIP(
6209 IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops,
6210 StoreValues, Name: ".cnt.addr", IsDependSource));
6211}
6212
6213void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
6214 if (CGM.getLangOpts().OpenMPIRBuilder) {
6215 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6216 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
6217
6218 if (S.hasClausesOfKind<OMPDependClause>() ||
6219 S.hasClausesOfKind<OMPDoacrossClause>()) {
6220 // The ordered directive with depend clause.
6221 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
6222 "ordered depend|doacross construct.");
6223 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
6224 AllocaInsertPt->getIterator());
6225 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6226 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6227 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6228 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6229 } else {
6230 // The ordered directive with threads or simd clause, or without clause.
6231 // Without clause, it behaves as if the threads clause is specified.
6232 const auto *C = S.getSingleClause<OMPSIMDClause>();
6233
6234 auto FiniCB = [this](InsertPointTy IP) {
6235 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
6236 return llvm::Error::success();
6237 };
6238
6239 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
6240 InsertPointTy CodeGenIP) {
6241 Builder.restoreIP(IP: CodeGenIP);
6242
6243 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6244 if (C) {
6245 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
6246 Builder, /*CreateBranch=*/false, Suffix: ".ordered.after");
6247 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6248 GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6249 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6250 assert(S.getBeginLoc().isValid() &&
6251 "Outlined function call location must be valid.");
6252 ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc());
6253 OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB,
6254 Fn: OutlinedFn, Args: CapturedVars);
6255 } else {
6256 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
6257 CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered");
6258 }
6259 return llvm::Error::success();
6260 };
6261
6262 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6263 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
6264 ValOrErr: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C));
6265 Builder.restoreIP(IP: AfterIP);
6266 }
6267 return;
6268 }
6269
6270 if (S.hasClausesOfKind<OMPDependClause>()) {
6271 assert(!S.hasAssociatedStmt() &&
6272 "No associated statement must be in ordered depend construct.");
6273 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6274 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6275 return;
6276 }
6277 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
6278 assert(!S.hasAssociatedStmt() &&
6279 "No associated statement must be in ordered doacross construct.");
6280 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6281 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6282 return;
6283 }
6284 const auto *C = S.getSingleClause<OMPSIMDClause>();
6285 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
6286 PrePostActionTy &Action) {
6287 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6288 if (C) {
6289 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6290 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6291 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6292 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(),
6293 OutlinedFn, Args: CapturedVars);
6294 } else {
6295 Action.Enter(CGF);
6296 CGF.EmitStmt(S: CS->getCapturedStmt());
6297 }
6298 };
6299 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6300 CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C);
6301}
6302
6303static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6304 QualType SrcType, QualType DestType,
6305 SourceLocation Loc) {
6306 assert(CGF.hasScalarEvaluationKind(DestType) &&
6307 "DestType must have scalar evaluation kind.");
6308 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6309 return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType,
6310 DstTy: DestType, Loc)
6311 : CGF.EmitComplexToScalarConversion(
6312 Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc);
6313}
6314
6315static CodeGenFunction::ComplexPairTy
6316convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6317 QualType DestType, SourceLocation Loc) {
6318 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6319 "DestType must have complex evaluation kind.");
6320 CodeGenFunction::ComplexPairTy ComplexVal;
6321 if (Val.isScalar()) {
6322 // Convert the input element to the element type of the complex.
6323 QualType DestElementType =
6324 DestType->castAs<ComplexType>()->getElementType();
6325 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6326 Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc);
6327 ComplexVal = CodeGenFunction::ComplexPairTy(
6328 ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType()));
6329 } else {
6330 assert(Val.isComplex() && "Must be a scalar or complex.");
6331 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6332 QualType DestElementType =
6333 DestType->castAs<ComplexType>()->getElementType();
6334 ComplexVal.first = CGF.EmitScalarConversion(
6335 Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6336 ComplexVal.second = CGF.EmitScalarConversion(
6337 Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6338 }
6339 return ComplexVal;
6340}
6341
6342static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6343 LValue LVal, RValue RVal) {
6344 if (LVal.isGlobalReg())
6345 CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal);
6346 else
6347 CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false);
6348}
6349
6350static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6351 llvm::AtomicOrdering AO, LValue LVal,
6352 SourceLocation Loc) {
6353 if (LVal.isGlobalReg())
6354 return CGF.EmitLoadOfLValue(V: LVal, Loc);
6355 return CGF.EmitAtomicLoad(
6356 lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO),
6357 IsVolatile: LVal.isVolatile());
6358}
6359
6360void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6361 QualType RValTy, SourceLocation Loc) {
6362 switch (getEvaluationKind(T: LVal.getType())) {
6363 case TEK_Scalar:
6364 EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue(
6365 CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)),
6366 Dst: LVal);
6367 break;
6368 case TEK_Complex:
6369 EmitStoreOfComplex(
6370 V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal,
6371 /*isInit=*/false);
6372 break;
6373 case TEK_Aggregate:
6374 llvm_unreachable("Must be a scalar or complex.");
6375 }
6376}
6377
6378static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6379 const Expr *X, const Expr *V,
6380 SourceLocation Loc) {
6381 // v = x;
6382 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6383 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6384 LValue XLValue = CGF.EmitLValue(E: X);
6385 LValue VLValue = CGF.EmitLValue(E: V);
6386 RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc);
6387 // OpenMP, 2.17.7, atomic Construct
6388 // If the read or capture clause is specified and the acquire, acq_rel, or
6389 // seq_cst clause is specified then the strong flush on exit from the atomic
6390 // operation is also an acquire flush.
6391 switch (AO) {
6392 case llvm::AtomicOrdering::Acquire:
6393 case llvm::AtomicOrdering::AcquireRelease:
6394 case llvm::AtomicOrdering::SequentiallyConsistent:
6395 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6396 AO: llvm::AtomicOrdering::Acquire);
6397 break;
6398 case llvm::AtomicOrdering::Monotonic:
6399 case llvm::AtomicOrdering::Release:
6400 break;
6401 case llvm::AtomicOrdering::NotAtomic:
6402 case llvm::AtomicOrdering::Unordered:
6403 llvm_unreachable("Unexpected ordering.");
6404 }
6405 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc);
6406 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6407}
6408
6409static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6410 llvm::AtomicOrdering AO, const Expr *X,
6411 const Expr *E, SourceLocation Loc) {
6412 // x = expr;
6413 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6414 emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E));
6415 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6416 // OpenMP, 2.17.7, atomic Construct
6417 // If the write, update, or capture clause is specified and the release,
6418 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6419 // the atomic operation is also a release flush.
6420 switch (AO) {
6421 case llvm::AtomicOrdering::Release:
6422 case llvm::AtomicOrdering::AcquireRelease:
6423 case llvm::AtomicOrdering::SequentiallyConsistent:
6424 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6425 AO: llvm::AtomicOrdering::Release);
6426 break;
6427 case llvm::AtomicOrdering::Acquire:
6428 case llvm::AtomicOrdering::Monotonic:
6429 break;
6430 case llvm::AtomicOrdering::NotAtomic:
6431 case llvm::AtomicOrdering::Unordered:
6432 llvm_unreachable("Unexpected ordering.");
6433 }
6434}
6435
6436static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6437 RValue Update,
6438 BinaryOperatorKind BO,
6439 llvm::AtomicOrdering AO,
6440 bool IsXLHSInRHSPart) {
6441 ASTContext &Context = CGF.getContext();
6442 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6443 // expression is simple and atomic is allowed for the given type for the
6444 // target platform.
6445 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6446 (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) &&
6447 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6448 !Context.getTargetInfo().hasBuiltinAtomic(
6449 AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment())))
6450 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6451
6452 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6453 if (T->isIntegerTy())
6454 return true;
6455
6456 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6457 return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T));
6458
6459 return false;
6460 };
6461
6462 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6463 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6464 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6465
6466 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6467 llvm::AtomicRMWInst::BinOp RMWOp;
6468 switch (BO) {
6469 case BO_Add:
6470 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6471 break;
6472 case BO_Sub:
6473 if (!IsXLHSInRHSPart)
6474 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6475 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6476 break;
6477 case BO_And:
6478 RMWOp = llvm::AtomicRMWInst::And;
6479 break;
6480 case BO_Or:
6481 RMWOp = llvm::AtomicRMWInst::Or;
6482 break;
6483 case BO_Xor:
6484 RMWOp = llvm::AtomicRMWInst::Xor;
6485 break;
6486 case BO_LT:
6487 if (IsInteger)
6488 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6489 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6490 : llvm::AtomicRMWInst::Max)
6491 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6492 : llvm::AtomicRMWInst::UMax);
6493 else
6494 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6495 : llvm::AtomicRMWInst::FMax;
6496 break;
6497 case BO_GT:
6498 if (IsInteger)
6499 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6500 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6501 : llvm::AtomicRMWInst::Min)
6502 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6503 : llvm::AtomicRMWInst::UMin);
6504 else
6505 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6506 : llvm::AtomicRMWInst::FMin;
6507 break;
6508 case BO_Assign:
6509 RMWOp = llvm::AtomicRMWInst::Xchg;
6510 break;
6511 case BO_Mul:
6512 case BO_Div:
6513 case BO_Rem:
6514 case BO_Shl:
6515 case BO_Shr:
6516 case BO_LAnd:
6517 case BO_LOr:
6518 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6519 case BO_PtrMemD:
6520 case BO_PtrMemI:
6521 case BO_LE:
6522 case BO_GE:
6523 case BO_EQ:
6524 case BO_NE:
6525 case BO_Cmp:
6526 case BO_AddAssign:
6527 case BO_SubAssign:
6528 case BO_AndAssign:
6529 case BO_OrAssign:
6530 case BO_XorAssign:
6531 case BO_MulAssign:
6532 case BO_DivAssign:
6533 case BO_RemAssign:
6534 case BO_ShlAssign:
6535 case BO_ShrAssign:
6536 case BO_Comma:
6537 llvm_unreachable("Unsupported atomic update operation");
6538 }
6539 llvm::Value *UpdateVal = Update.getScalarVal();
6540 if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) {
6541 if (IsInteger)
6542 UpdateVal = CGF.Builder.CreateIntCast(
6543 V: IC, DestTy: X.getAddress().getElementType(),
6544 isSigned: X.getType()->hasSignedIntegerRepresentation());
6545 else
6546 UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC,
6547 DestTy: X.getAddress().getElementType());
6548 }
6549 llvm::AtomicRMWInst *Res =
6550 CGF.emitAtomicRMWInst(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Order: AO);
6551 return std::make_pair(x: true, y: RValue::get(V: Res));
6552}
6553
6554std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6555 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6556 llvm::AtomicOrdering AO, SourceLocation Loc,
6557 const llvm::function_ref<RValue(RValue)> CommonGen) {
6558 // Update expressions are allowed to have the following forms:
6559 // x binop= expr; -> xrval + expr;
6560 // x++, ++x -> xrval + 1;
6561 // x--, --x -> xrval - 1;
6562 // x = x binop expr; -> xrval binop expr
6563 // x = expr Op x; - > expr binop xrval;
6564 auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart);
6565 if (!Res.first) {
6566 if (X.isGlobalReg()) {
6567 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6568 // 'xrval'.
6569 EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X);
6570 } else {
6571 // Perform compare-and-swap procedure.
6572 EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified());
6573 }
6574 }
6575 return Res;
6576}
6577
6578static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6579 llvm::AtomicOrdering AO, const Expr *X,
6580 const Expr *E, const Expr *UE,
6581 bool IsXLHSInRHSPart, SourceLocation Loc) {
6582 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6583 "Update expr in 'atomic update' must be a binary operator.");
6584 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6585 // Update expressions are allowed to have the following forms:
6586 // x binop= expr; -> xrval + expr;
6587 // x++, ++x -> xrval + 1;
6588 // x--, --x -> xrval - 1;
6589 // x = x binop expr; -> xrval binop expr
6590 // x = expr Op x; - > expr binop xrval;
6591 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6592 LValue XLValue = CGF.EmitLValue(E: X);
6593 RValue ExprRValue = CGF.EmitAnyExpr(E);
6594 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6595 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6596 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6597 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6598 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6599 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6600 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6601 return CGF.EmitAnyExpr(E: UE);
6602 };
6603 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6604 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6605 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6606 // OpenMP, 2.17.7, atomic Construct
6607 // If the write, update, or capture clause is specified and the release,
6608 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6609 // the atomic operation is also a release flush.
6610 switch (AO) {
6611 case llvm::AtomicOrdering::Release:
6612 case llvm::AtomicOrdering::AcquireRelease:
6613 case llvm::AtomicOrdering::SequentiallyConsistent:
6614 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6615 AO: llvm::AtomicOrdering::Release);
6616 break;
6617 case llvm::AtomicOrdering::Acquire:
6618 case llvm::AtomicOrdering::Monotonic:
6619 break;
6620 case llvm::AtomicOrdering::NotAtomic:
6621 case llvm::AtomicOrdering::Unordered:
6622 llvm_unreachable("Unexpected ordering.");
6623 }
6624}
6625
6626static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6627 QualType SourceType, QualType ResType,
6628 SourceLocation Loc) {
6629 switch (CGF.getEvaluationKind(T: ResType)) {
6630 case TEK_Scalar:
6631 return RValue::get(
6632 V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc));
6633 case TEK_Complex: {
6634 auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc);
6635 return RValue::getComplex(V1: Res.first, V2: Res.second);
6636 }
6637 case TEK_Aggregate:
6638 break;
6639 }
6640 llvm_unreachable("Must be a scalar or complex.");
6641}
6642
6643static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6644 llvm::AtomicOrdering AO,
6645 bool IsPostfixUpdate, const Expr *V,
6646 const Expr *X, const Expr *E,
6647 const Expr *UE, bool IsXLHSInRHSPart,
6648 SourceLocation Loc) {
6649 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6650 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6651 RValue NewVVal;
6652 LValue VLValue = CGF.EmitLValue(E: V);
6653 LValue XLValue = CGF.EmitLValue(E: X);
6654 RValue ExprRValue = CGF.EmitAnyExpr(E);
6655 QualType NewVValType;
6656 if (UE) {
6657 // 'x' is updated with some additional value.
6658 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6659 "Update expr in 'atomic capture' must be a binary operator.");
6660 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6661 // Update expressions are allowed to have the following forms:
6662 // x binop= expr; -> xrval + expr;
6663 // x++, ++x -> xrval + 1;
6664 // x--, --x -> xrval - 1;
6665 // x = x binop expr; -> xrval binop expr
6666 // x = expr Op x; - > expr binop xrval;
6667 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6668 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6669 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6670 NewVValType = XRValExpr->getType();
6671 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6672 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6673 IsPostfixUpdate](RValue XRValue) {
6674 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6675 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6676 RValue Res = CGF.EmitAnyExpr(E: UE);
6677 NewVVal = IsPostfixUpdate ? XRValue : Res;
6678 return Res;
6679 };
6680 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6681 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6682 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6683 if (Res.first) {
6684 // 'atomicrmw' instruction was generated.
6685 if (IsPostfixUpdate) {
6686 // Use old value from 'atomicrmw'.
6687 NewVVal = Res.second;
6688 } else {
6689 // 'atomicrmw' does not provide new value, so evaluate it using old
6690 // value of 'x'.
6691 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6692 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6693 NewVVal = CGF.EmitAnyExpr(E: UE);
6694 }
6695 }
6696 } else {
6697 // 'x' is simply rewritten with some 'expr'.
6698 NewVValType = X->getType().getNonReferenceType();
6699 ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(),
6700 ResType: X->getType().getNonReferenceType(), Loc);
6701 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6702 NewVVal = XRValue;
6703 return ExprRValue;
6704 };
6705 // Try to perform atomicrmw xchg, otherwise simple exchange.
6706 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6707 X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6708 Loc, CommonGen: Gen);
6709 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6710 if (Res.first) {
6711 // 'atomicrmw' instruction was generated.
6712 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6713 }
6714 }
6715 // Emit post-update store to 'v' of old/new 'x' value.
6716 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc);
6717 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6718 // OpenMP 5.1 removes the required flush for capture clause.
6719 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6720 // OpenMP, 2.17.7, atomic Construct
6721 // If the write, update, or capture clause is specified and the release,
6722 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6723 // the atomic operation is also a release flush.
6724 // If the read or capture clause is specified and the acquire, acq_rel, or
6725 // seq_cst clause is specified then the strong flush on exit from the atomic
6726 // operation is also an acquire flush.
6727 switch (AO) {
6728 case llvm::AtomicOrdering::Release:
6729 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6730 AO: llvm::AtomicOrdering::Release);
6731 break;
6732 case llvm::AtomicOrdering::Acquire:
6733 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6734 AO: llvm::AtomicOrdering::Acquire);
6735 break;
6736 case llvm::AtomicOrdering::AcquireRelease:
6737 case llvm::AtomicOrdering::SequentiallyConsistent:
6738 CGF.CGM.getOpenMPRuntime().emitFlush(
6739 CGF, Vars: {}, Loc, AO: llvm::AtomicOrdering::AcquireRelease);
6740 break;
6741 case llvm::AtomicOrdering::Monotonic:
6742 break;
6743 case llvm::AtomicOrdering::NotAtomic:
6744 case llvm::AtomicOrdering::Unordered:
6745 llvm_unreachable("Unexpected ordering.");
6746 }
6747 }
6748}
6749
6750static void emitOMPAtomicCompareExpr(
6751 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6752 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6753 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6754 SourceLocation Loc) {
6755 llvm::OpenMPIRBuilder &OMPBuilder =
6756 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6757
6758 OMPAtomicCompareOp Op;
6759 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6760 switch (cast<BinaryOperator>(Val: CE)->getOpcode()) {
6761 case BO_EQ:
6762 Op = OMPAtomicCompareOp::EQ;
6763 break;
6764 case BO_LT:
6765 Op = OMPAtomicCompareOp::MIN;
6766 break;
6767 case BO_GT:
6768 Op = OMPAtomicCompareOp::MAX;
6769 break;
6770 default:
6771 llvm_unreachable("unsupported atomic compare binary operator");
6772 }
6773
6774 LValue XLVal = CGF.EmitLValue(E: X);
6775 Address XAddr = XLVal.getAddress();
6776
6777 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6778 if (X->getType() == E->getType())
6779 return CGF.EmitScalarExpr(E);
6780 const Expr *NewE = E->IgnoreImplicitAsWritten();
6781 llvm::Value *V = CGF.EmitScalarExpr(E: NewE);
6782 if (NewE->getType() == X->getType())
6783 return V;
6784 return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc);
6785 };
6786
6787 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6788 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6789 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal))
6790 EVal = CGF.Builder.CreateIntCast(
6791 V: CI, DestTy: XLVal.getAddress().getElementType(),
6792 isSigned: E->getType()->hasSignedIntegerRepresentation());
6793 if (DVal)
6794 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal))
6795 DVal = CGF.Builder.CreateIntCast(
6796 V: CI, DestTy: XLVal.getAddress().getElementType(),
6797 isSigned: D->getType()->hasSignedIntegerRepresentation());
6798
6799 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6800 .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(),
6801 .IsSigned: X->getType()->hasSignedIntegerRepresentation(),
6802 .IsVolatile: X->getType().isVolatileQualified()};
6803 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6804 if (V) {
6805 LValue LV = CGF.EmitLValue(E: V);
6806 Address Addr = LV.getAddress();
6807 VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6808 .IsSigned: V->getType()->hasSignedIntegerRepresentation(),
6809 .IsVolatile: V->getType().isVolatileQualified()};
6810 }
6811 if (R) {
6812 LValue LV = CGF.EmitLValue(E: R);
6813 Address Addr = LV.getAddress();
6814 ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6815 .IsSigned: R->getType()->hasSignedIntegerRepresentation(),
6816 .IsVolatile: R->getType().isVolatileQualified()};
6817 }
6818
6819 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6820 // fail clause was not mentioned on the
6821 // "#pragma omp atomic compare" construct.
6822 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6823 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6824 IsPostfixUpdate, IsFailOnly));
6825 } else
6826 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6827 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6828 IsPostfixUpdate, IsFailOnly, Failure: FailAO));
6829}
6830
6831static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6832 llvm::AtomicOrdering AO,
6833 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6834 const Expr *X, const Expr *V, const Expr *R,
6835 const Expr *E, const Expr *UE, const Expr *D,
6836 const Expr *CE, bool IsXLHSInRHSPart,
6837 bool IsFailOnly, SourceLocation Loc) {
6838 switch (Kind) {
6839 case OMPC_read:
6840 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6841 break;
6842 case OMPC_write:
6843 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6844 break;
6845 case OMPC_unknown:
6846 case OMPC_update:
6847 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6848 break;
6849 case OMPC_capture:
6850 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6851 IsXLHSInRHSPart, Loc);
6852 break;
6853 case OMPC_compare: {
6854 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6855 IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6856 break;
6857 }
6858 default:
6859 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6860 }
6861}
6862
6863void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6864 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6865 // Fail Memory Clause Ordering.
6866 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6867 bool MemOrderingSpecified = false;
6868 if (S.getSingleClause<OMPSeqCstClause>()) {
6869 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6870 MemOrderingSpecified = true;
6871 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6872 AO = llvm::AtomicOrdering::AcquireRelease;
6873 MemOrderingSpecified = true;
6874 } else if (S.getSingleClause<OMPAcquireClause>()) {
6875 AO = llvm::AtomicOrdering::Acquire;
6876 MemOrderingSpecified = true;
6877 } else if (S.getSingleClause<OMPReleaseClause>()) {
6878 AO = llvm::AtomicOrdering::Release;
6879 MemOrderingSpecified = true;
6880 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6881 AO = llvm::AtomicOrdering::Monotonic;
6882 MemOrderingSpecified = true;
6883 }
6884 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6885 OpenMPClauseKind Kind = OMPC_unknown;
6886 for (const OMPClause *C : S.clauses()) {
6887 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6888 // if it is first).
6889 OpenMPClauseKind K = C->getClauseKind();
6890 // TBD
6891 if (K == OMPC_weak)
6892 return;
6893 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6894 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6895 continue;
6896 Kind = K;
6897 KindsEncountered.insert(V: K);
6898 }
6899 // We just need to correct Kind here. No need to set a bool saying it is
6900 // actually compare capture because we can tell from whether V and R are
6901 // nullptr.
6902 if (KindsEncountered.contains(V: OMPC_compare) &&
6903 KindsEncountered.contains(V: OMPC_capture))
6904 Kind = OMPC_compare;
6905 if (!MemOrderingSpecified) {
6906 llvm::AtomicOrdering DefaultOrder =
6907 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6908 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6909 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6910 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6911 Kind == OMPC_capture)) {
6912 AO = DefaultOrder;
6913 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6914 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6915 AO = llvm::AtomicOrdering::Release;
6916 } else if (Kind == OMPC_read) {
6917 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6918 AO = llvm::AtomicOrdering::Acquire;
6919 }
6920 }
6921 }
6922
6923 if (KindsEncountered.contains(V: OMPC_compare) &&
6924 KindsEncountered.contains(V: OMPC_fail)) {
6925 Kind = OMPC_compare;
6926 const auto *FailClause = S.getSingleClause<OMPFailClause>();
6927 if (FailClause) {
6928 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6929 if (FailParameter == llvm::omp::OMPC_relaxed)
6930 FailAO = llvm::AtomicOrdering::Monotonic;
6931 else if (FailParameter == llvm::omp::OMPC_acquire)
6932 FailAO = llvm::AtomicOrdering::Acquire;
6933 else if (FailParameter == llvm::omp::OMPC_seq_cst)
6934 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6935 }
6936 }
6937
6938 LexicalScope Scope(*this, S.getSourceRange());
6939 EmitStopPoint(S: S.getAssociatedStmt());
6940 emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(),
6941 V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(),
6942 D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(),
6943 IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc());
6944}
6945
6946static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6947 const OMPExecutableDirective &S,
6948 const RegionCodeGenTy &CodeGen) {
6949 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6950 CodeGenModule &CGM = CGF.CGM;
6951
6952 // On device emit this construct as inlined code.
6953 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6954 OMPLexicalScope Scope(CGF, S, OMPD_target);
6955 CGM.getOpenMPRuntime().emitInlinedDirective(
6956 CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6957 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
6958 });
6959 return;
6960 }
6961
6962 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6963 llvm::Function *Fn = nullptr;
6964 llvm::Constant *FnID = nullptr;
6965
6966 const Expr *IfCond = nullptr;
6967 // Check for the at most one if clause associated with the target region.
6968 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6969 if (C->getNameModifier() == OMPD_unknown ||
6970 C->getNameModifier() == OMPD_target) {
6971 IfCond = C->getCondition();
6972 break;
6973 }
6974 }
6975
6976 // Check if we have any device clause associated with the directive.
6977 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6978 nullptr, OMPC_DEVICE_unknown);
6979 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6980 Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier());
6981
6982 // Check if we have an if clause whose conditional always evaluates to false
6983 // or if we do not have any targets specified. If so the target region is not
6984 // an offload entry point.
6985 bool IsOffloadEntry = true;
6986 if (IfCond) {
6987 bool Val;
6988 if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val)
6989 IsOffloadEntry = false;
6990 }
6991 if (CGM.getLangOpts().OMPTargetTriples.empty())
6992 IsOffloadEntry = false;
6993
6994 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6995 CGM.getDiags().Report(DiagID: diag::err_missing_mandatory_offloading);
6996 }
6997
6998 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6999 StringRef ParentName;
7000 // In case we have Ctors/Dtors we use the complete type variant to produce
7001 // the mangling of the device outlined kernel.
7002 if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl))
7003 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete));
7004 else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl))
7005 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete));
7006 else
7007 ParentName =
7008 CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl)));
7009
7010 // Emit target region as a standalone region.
7011 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID,
7012 IsOffloadEntry, CodeGen);
7013 OMPLexicalScope Scope(CGF, S, OMPD_task);
7014 auto &&SizeEmitter =
7015 [IsOffloadEntry](CodeGenFunction &CGF,
7016 const OMPLoopDirective &D) -> llvm::Value * {
7017 if (IsOffloadEntry) {
7018 OMPLoopScope(CGF, D);
7019 // Emit calculation of the iterations count.
7020 llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations());
7021 NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty,
7022 /*isSigned=*/false);
7023 return NumIterations;
7024 }
7025 return nullptr;
7026 };
7027 CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device,
7028 SizeEmitter);
7029}
7030
7031static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
7032 PrePostActionTy &Action) {
7033 Action.Enter(CGF);
7034 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7035 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7036 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7037 (void)PrivateScope.Privatize();
7038 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7039 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7040
7041 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt());
7042 CGF.EnsureInsertPoint();
7043}
7044
7045void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
7046 StringRef ParentName,
7047 const OMPTargetDirective &S) {
7048 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7049 emitTargetRegion(CGF, S, Action);
7050 };
7051 llvm::Function *Fn;
7052 llvm::Constant *Addr;
7053 // Emit target region as a standalone region.
7054 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7055 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7056 assert(Fn && Addr && "Target device function emission failed.");
7057}
7058
7059void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
7060 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7061 emitTargetRegion(CGF, S, Action);
7062 };
7063 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7064}
7065
7066static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
7067 const OMPExecutableDirective &S,
7068 OpenMPDirectiveKind InnermostKind,
7069 const RegionCodeGenTy &CodeGen) {
7070 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7071 llvm::Function *OutlinedFn =
7072 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
7073 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
7074 CodeGen);
7075
7076 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
7077 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
7078 if (NT || TL) {
7079 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr;
7080 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr;
7081
7082 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
7083 Loc: S.getBeginLoc());
7084 }
7085
7086 OMPTeamsScope Scope(CGF, S);
7087 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7088 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
7089 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn,
7090 CapturedVars);
7091}
7092
7093void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
7094 // Emit teams region as a standalone region.
7095 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7096 Action.Enter(CGF);
7097 OMPPrivateScope PrivateScope(CGF);
7098 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7099 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7100 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7101 (void)PrivateScope.Privatize();
7102 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt());
7103 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7104 };
7105 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7106 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7107 CondGen: [](CodeGenFunction &) { return nullptr; });
7108}
7109
7110static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7111 const OMPTargetTeamsDirective &S) {
7112 auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7113 Action.Enter(CGF);
7114 // Emit teams region as a standalone region.
7115 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7116 Action.Enter(CGF);
7117 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7118 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7119 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7120 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7121 (void)PrivateScope.Privatize();
7122 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7123 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7124 CGF.EmitStmt(S: CS->getCapturedStmt());
7125 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7126 };
7127 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen);
7128 emitPostUpdateForReductionClause(CGF, D: S,
7129 CondGen: [](CodeGenFunction &) { return nullptr; });
7130}
7131
7132void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7133 CodeGenModule &CGM, StringRef ParentName,
7134 const OMPTargetTeamsDirective &S) {
7135 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7136 emitTargetTeamsRegion(CGF, Action, S);
7137 };
7138 llvm::Function *Fn;
7139 llvm::Constant *Addr;
7140 // Emit target region as a standalone region.
7141 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7142 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7143 assert(Fn && Addr && "Target device function emission failed.");
7144}
7145
7146void CodeGenFunction::EmitOMPTargetTeamsDirective(
7147 const OMPTargetTeamsDirective &S) {
7148 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7149 emitTargetTeamsRegion(CGF, Action, S);
7150 };
7151 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7152}
7153
7154static void
7155emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7156 const OMPTargetTeamsDistributeDirective &S) {
7157 Action.Enter(CGF);
7158 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7159 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7160 };
7161
7162 // Emit teams region as a standalone region.
7163 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7164 PrePostActionTy &Action) {
7165 Action.Enter(CGF);
7166 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7167 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7168 (void)PrivateScope.Privatize();
7169 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7170 CodeGen: CodeGenDistribute);
7171 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7172 };
7173 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
7174 emitPostUpdateForReductionClause(CGF, D: S,
7175 CondGen: [](CodeGenFunction &) { return nullptr; });
7176}
7177
7178void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7179 CodeGenModule &CGM, StringRef ParentName,
7180 const OMPTargetTeamsDistributeDirective &S) {
7181 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7182 emitTargetTeamsDistributeRegion(CGF, Action, S);
7183 };
7184 llvm::Function *Fn;
7185 llvm::Constant *Addr;
7186 // Emit target region as a standalone region.
7187 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7188 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7189 assert(Fn && Addr && "Target device function emission failed.");
7190}
7191
7192void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
7193 const OMPTargetTeamsDistributeDirective &S) {
7194 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7195 emitTargetTeamsDistributeRegion(CGF, Action, S);
7196 };
7197 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7198}
7199
7200static void emitTargetTeamsDistributeSimdRegion(
7201 CodeGenFunction &CGF, PrePostActionTy &Action,
7202 const OMPTargetTeamsDistributeSimdDirective &S) {
7203 Action.Enter(CGF);
7204 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7205 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7206 };
7207
7208 // Emit teams region as a standalone region.
7209 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7210 PrePostActionTy &Action) {
7211 Action.Enter(CGF);
7212 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7213 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7214 (void)PrivateScope.Privatize();
7215 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7216 CodeGen: CodeGenDistribute);
7217 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7218 };
7219 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7220 emitPostUpdateForReductionClause(CGF, D: S,
7221 CondGen: [](CodeGenFunction &) { return nullptr; });
7222}
7223
7224void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7225 CodeGenModule &CGM, StringRef ParentName,
7226 const OMPTargetTeamsDistributeSimdDirective &S) {
7227 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7228 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7229 };
7230 llvm::Function *Fn;
7231 llvm::Constant *Addr;
7232 // Emit target region as a standalone region.
7233 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7234 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7235 assert(Fn && Addr && "Target device function emission failed.");
7236}
7237
7238void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
7239 const OMPTargetTeamsDistributeSimdDirective &S) {
7240 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7241 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7242 };
7243 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7244}
7245
7246void CodeGenFunction::EmitOMPTeamsDistributeDirective(
7247 const OMPTeamsDistributeDirective &S) {
7248
7249 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7250 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7251 };
7252
7253 // Emit teams region as a standalone region.
7254 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7255 PrePostActionTy &Action) {
7256 Action.Enter(CGF);
7257 OMPPrivateScope PrivateScope(CGF);
7258 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7259 (void)PrivateScope.Privatize();
7260 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7261 CodeGen: CodeGenDistribute);
7262 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7263 };
7264 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7265 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7266 CondGen: [](CodeGenFunction &) { return nullptr; });
7267}
7268
7269void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
7270 const OMPTeamsDistributeSimdDirective &S) {
7271 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7272 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7273 };
7274
7275 // Emit teams region as a standalone region.
7276 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7277 PrePostActionTy &Action) {
7278 Action.Enter(CGF);
7279 OMPPrivateScope PrivateScope(CGF);
7280 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7281 (void)PrivateScope.Privatize();
7282 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
7283 CodeGen: CodeGenDistribute);
7284 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7285 };
7286 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7287 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7288 CondGen: [](CodeGenFunction &) { return nullptr; });
7289}
7290
7291void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7292 const OMPTeamsDistributeParallelForDirective &S) {
7293 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7294 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7295 IncExpr: S.getDistInc());
7296 };
7297
7298 // Emit teams region as a standalone region.
7299 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7300 PrePostActionTy &Action) {
7301 Action.Enter(CGF);
7302 OMPPrivateScope PrivateScope(CGF);
7303 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7304 (void)PrivateScope.Privatize();
7305 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7306 CodeGen: CodeGenDistribute);
7307 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7308 };
7309 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen);
7310 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7311 CondGen: [](CodeGenFunction &) { return nullptr; });
7312}
7313
7314void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7315 const OMPTeamsDistributeParallelForSimdDirective &S) {
7316 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7317 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7318 IncExpr: S.getDistInc());
7319 };
7320
7321 // Emit teams region as a standalone region.
7322 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7323 PrePostActionTy &Action) {
7324 Action.Enter(CGF);
7325 OMPPrivateScope PrivateScope(CGF);
7326 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7327 (void)PrivateScope.Privatize();
7328 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7329 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7330 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7331 };
7332 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7333 CodeGen);
7334 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7335 CondGen: [](CodeGenFunction &) { return nullptr; });
7336}
7337
7338void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7339 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7340 llvm::Value *Device = nullptr;
7341 llvm::Value *NumDependences = nullptr;
7342 llvm::Value *DependenceList = nullptr;
7343
7344 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7345 Device = EmitScalarExpr(E: C->getDevice());
7346
7347 // Build list and emit dependences
7348 OMPTaskDataTy Data;
7349 buildDependences(S, Data);
7350 if (!Data.Dependences.empty()) {
7351 Address DependenciesArray = Address::invalid();
7352 std::tie(args&: NumDependences, args&: DependenciesArray) =
7353 CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences,
7354 Loc: S.getBeginLoc());
7355 DependenceList = DependenciesArray.emitRawPointer(CGF&: *this);
7356 }
7357 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7358
7359 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7360 S.getSingleClause<OMPDestroyClause>() ||
7361 S.getSingleClause<OMPUseClause>())) &&
7362 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7363
7364 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7365 if (!ItOMPInitClause.empty()) {
7366 // Look at the multiple init clauses
7367 for (const OMPInitClause *C : ItOMPInitClause) {
7368 llvm::Value *InteropvarPtr =
7369 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7370 llvm::omp::OMPInteropType InteropType =
7371 llvm::omp::OMPInteropType::Unknown;
7372 if (C->getIsTarget()) {
7373 InteropType = llvm::omp::OMPInteropType::Target;
7374 } else {
7375 assert(C->getIsTargetSync() &&
7376 "Expected interop-type target/targetsync");
7377 InteropType = llvm::omp::OMPInteropType::TargetSync;
7378 }
7379 OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType,
7380 Device, NumDependences, DependenceAddress: DependenceList,
7381 HaveNowaitClause: Data.HasNowaitClause);
7382 }
7383 }
7384 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7385 if (!ItOMPDestroyClause.empty()) {
7386 // Look at the multiple destroy clauses
7387 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7388 llvm::Value *InteropvarPtr =
7389 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7390 OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device,
7391 NumDependences, DependenceAddress: DependenceList,
7392 HaveNowaitClause: Data.HasNowaitClause);
7393 }
7394 }
7395 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7396 if (!ItOMPUseClause.empty()) {
7397 // Look at the multiple use clauses
7398 for (const OMPUseClause *C : ItOMPUseClause) {
7399 llvm::Value *InteropvarPtr =
7400 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7401 OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device,
7402 NumDependences, DependenceAddress: DependenceList,
7403 HaveNowaitClause: Data.HasNowaitClause);
7404 }
7405 }
7406}
7407
7408static void emitTargetTeamsDistributeParallelForRegion(
7409 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7410 PrePostActionTy &Action) {
7411 Action.Enter(CGF);
7412 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7413 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7414 IncExpr: S.getDistInc());
7415 };
7416
7417 // Emit teams region as a standalone region.
7418 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7419 PrePostActionTy &Action) {
7420 Action.Enter(CGF);
7421 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7422 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7423 (void)PrivateScope.Privatize();
7424 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7425 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7426 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7427 };
7428
7429 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
7430 CodeGen: CodeGenTeams);
7431 emitPostUpdateForReductionClause(CGF, D: S,
7432 CondGen: [](CodeGenFunction &) { return nullptr; });
7433}
7434
7435void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7436 CodeGenModule &CGM, StringRef ParentName,
7437 const OMPTargetTeamsDistributeParallelForDirective &S) {
7438 // Emit SPMD target teams distribute parallel for region as a standalone
7439 // region.
7440 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7441 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7442 };
7443 llvm::Function *Fn;
7444 llvm::Constant *Addr;
7445 // Emit target region as a standalone region.
7446 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7447 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7448 assert(Fn && Addr && "Target device function emission failed.");
7449}
7450
7451void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7452 const OMPTargetTeamsDistributeParallelForDirective &S) {
7453 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7454 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7455 };
7456 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7457}
7458
7459static void emitTargetTeamsDistributeParallelForSimdRegion(
7460 CodeGenFunction &CGF,
7461 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7462 PrePostActionTy &Action) {
7463 Action.Enter(CGF);
7464 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7465 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7466 IncExpr: S.getDistInc());
7467 };
7468
7469 // Emit teams region as a standalone region.
7470 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7471 PrePostActionTy &Action) {
7472 Action.Enter(CGF);
7473 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7474 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7475 (void)PrivateScope.Privatize();
7476 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7477 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7478 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7479 };
7480
7481 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7482 CodeGen: CodeGenTeams);
7483 emitPostUpdateForReductionClause(CGF, D: S,
7484 CondGen: [](CodeGenFunction &) { return nullptr; });
7485}
7486
7487void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7488 CodeGenModule &CGM, StringRef ParentName,
7489 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7490 // Emit SPMD target teams distribute parallel for simd region as a standalone
7491 // region.
7492 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7493 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7494 };
7495 llvm::Function *Fn;
7496 llvm::Constant *Addr;
7497 // Emit target region as a standalone region.
7498 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7499 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7500 assert(Fn && Addr && "Target device function emission failed.");
7501}
7502
7503void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7504 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7505 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7506 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7507 };
7508 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7509}
7510
7511void CodeGenFunction::EmitOMPCancellationPointDirective(
7512 const OMPCancellationPointDirective &S) {
7513 CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(),
7514 CancelRegion: S.getCancelRegion());
7515}
7516
7517void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7518 const Expr *IfCond = nullptr;
7519 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7520 if (C->getNameModifier() == OMPD_unknown ||
7521 C->getNameModifier() == OMPD_cancel) {
7522 IfCond = C->getCondition();
7523 break;
7524 }
7525 }
7526 if (CGM.getLangOpts().OpenMPIRBuilder) {
7527 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7528 // TODO: This check is necessary as we only generate `omp parallel` through
7529 // the OpenMPIRBuilder for now.
7530 if (S.getCancelRegion() == OMPD_parallel ||
7531 S.getCancelRegion() == OMPD_sections ||
7532 S.getCancelRegion() == OMPD_section) {
7533 llvm::Value *IfCondition = nullptr;
7534 if (IfCond)
7535 IfCondition = EmitScalarExpr(E: IfCond,
7536 /*IgnoreResultAssign=*/true);
7537 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
7538 ValOrErr: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion()));
7539 return Builder.restoreIP(IP: AfterIP);
7540 }
7541 }
7542
7543 CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond,
7544 CancelRegion: S.getCancelRegion());
7545}
7546
7547CodeGenFunction::JumpDest
7548CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7549 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7550 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7551 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7552 return ReturnBlock;
7553 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7554 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7555 Kind == OMPD_distribute_parallel_for ||
7556 Kind == OMPD_target_parallel_for ||
7557 Kind == OMPD_teams_distribute_parallel_for ||
7558 Kind == OMPD_target_teams_distribute_parallel_for);
7559 return OMPCancelStack.getExitBlock();
7560}
7561
7562void CodeGenFunction::EmitOMPUseDevicePtrClause(
7563 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7564 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7565 CaptureDeviceAddrMap) {
7566 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7567 for (const Expr *OrigVarIt : C.varlist()) {
7568 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl());
7569 if (!Processed.insert(V: OrigVD).second)
7570 continue;
7571
7572 // In order to identify the right initializer we need to match the
7573 // declaration used by the mapping logic. In some cases we may get
7574 // OMPCapturedExprDecl that refers to the original declaration.
7575 const ValueDecl *MatchingVD = OrigVD;
7576 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7577 // OMPCapturedExprDecl are used to privative fields of the current
7578 // structure.
7579 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7580 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7581 "Base should be the current struct!");
7582 MatchingVD = ME->getMemberDecl();
7583 }
7584
7585 // If we don't have information about the current list item, move on to
7586 // the next one.
7587 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7588 if (InitAddrIt == CaptureDeviceAddrMap.end())
7589 continue;
7590
7591 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7592
7593 // Return the address of the private variable.
7594 bool IsRegistered = PrivateScope.addPrivate(
7595 LocalVD: OrigVD,
7596 Addr: Address(InitAddrIt->second, Ty,
7597 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)));
7598 assert(IsRegistered && "firstprivate var already registered as private");
7599 // Silence the warning about unused variable.
7600 (void)IsRegistered;
7601 }
7602}
7603
7604static const VarDecl *getBaseDecl(const Expr *Ref) {
7605 const Expr *Base = Ref->IgnoreParenImpCasts();
7606 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base))
7607 Base = OASE->getBase()->IgnoreParenImpCasts();
7608 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
7609 Base = ASE->getBase()->IgnoreParenImpCasts();
7610 return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl());
7611}
7612
7613void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7614 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7615 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7616 CaptureDeviceAddrMap) {
7617 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7618 for (const Expr *Ref : C.varlist()) {
7619 const VarDecl *OrigVD = getBaseDecl(Ref);
7620 if (!Processed.insert(V: OrigVD).second)
7621 continue;
7622 // In order to identify the right initializer we need to match the
7623 // declaration used by the mapping logic. In some cases we may get
7624 // OMPCapturedExprDecl that refers to the original declaration.
7625 const ValueDecl *MatchingVD = OrigVD;
7626 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7627 // OMPCapturedExprDecl are used to privative fields of the current
7628 // structure.
7629 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7630 assert(isa<CXXThisExpr>(ME->getBase()) &&
7631 "Base should be the current struct!");
7632 MatchingVD = ME->getMemberDecl();
7633 }
7634
7635 // If we don't have information about the current list item, move on to
7636 // the next one.
7637 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7638 if (InitAddrIt == CaptureDeviceAddrMap.end())
7639 continue;
7640
7641 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7642
7643 Address PrivAddr =
7644 Address(InitAddrIt->second, Ty,
7645 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy));
7646 // For declrefs and variable length array need to load the pointer for
7647 // correct mapping, since the pointer to the data was passed to the runtime.
7648 if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) ||
7649 MatchingVD->getType()->isArrayType()) {
7650 QualType PtrTy = getContext().getPointerType(
7651 T: OrigVD->getType().getNonReferenceType());
7652 PrivAddr =
7653 EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)),
7654 PtrTy: PtrTy->castAs<PointerType>());
7655 }
7656
7657 (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr);
7658 }
7659}
7660
7661// Generate the instructions for '#pragma omp target data' directive.
7662void CodeGenFunction::EmitOMPTargetDataDirective(
7663 const OMPTargetDataDirective &S) {
7664 // Emit vtable only from host for target data directive.
7665 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
7666 CGM.getOpenMPRuntime().registerVTable(D: S);
7667
7668 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7669 /*SeparateBeginEndCalls=*/true);
7670
7671 // Create a pre/post action to signal the privatization of the device pointer.
7672 // This action can be replaced by the OpenMP runtime code generation to
7673 // deactivate privatization.
7674 bool PrivatizeDevicePointers = false;
7675 class DevicePointerPrivActionTy : public PrePostActionTy {
7676 bool &PrivatizeDevicePointers;
7677
7678 public:
7679 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7680 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7681 void Enter(CodeGenFunction &CGF) override {
7682 PrivatizeDevicePointers = true;
7683 }
7684 };
7685 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7686
7687 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7688 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7689 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7690 };
7691
7692 // Codegen that selects whether to generate the privatization code or not.
7693 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7694 RegionCodeGenTy RCG(InnermostCodeGen);
7695 PrivatizeDevicePointers = false;
7696
7697 // Call the pre-action to change the status of PrivatizeDevicePointers if
7698 // needed.
7699 Action.Enter(CGF);
7700
7701 if (PrivatizeDevicePointers) {
7702 OMPPrivateScope PrivateScope(CGF);
7703 // Emit all instances of the use_device_ptr clause.
7704 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7705 CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope,
7706 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7707 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7708 CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope,
7709 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
7710 (void)PrivateScope.Privatize();
7711 RCG(CGF);
7712 } else {
7713 // If we don't have target devices, don't bother emitting the data
7714 // mapping code.
7715 std::optional<OpenMPDirectiveKind> CaptureRegion;
7716 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7717 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7718 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7719 for (const Expr *E : C->varlist()) {
7720 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
7721 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7722 CGF.EmitVarDecl(D: *OED);
7723 }
7724 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7725 for (const Expr *E : C->varlist()) {
7726 const Decl *D = getBaseDecl(Ref: E);
7727 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
7728 CGF.EmitVarDecl(D: *OED);
7729 }
7730 } else {
7731 CaptureRegion = OMPD_unknown;
7732 }
7733
7734 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7735 RCG(CGF);
7736 }
7737 };
7738
7739 // Forward the provided action to the privatization codegen.
7740 RegionCodeGenTy PrivRCG(PrivCodeGen);
7741 PrivRCG.setAction(Action);
7742
7743 // Notwithstanding the body of the region is emitted as inlined directive,
7744 // we don't use an inline scope as changes in the references inside the
7745 // region are expected to be visible outside, so we do not privative them.
7746 OMPLexicalScope Scope(CGF, S);
7747 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data,
7748 CodeGen: PrivRCG);
7749 };
7750
7751 RegionCodeGenTy RCG(CodeGen);
7752
7753 // If we don't have target devices, don't bother emitting the data mapping
7754 // code.
7755 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7756 RCG(*this);
7757 return;
7758 }
7759
7760 // Check if we have any if clause associated with the directive.
7761 const Expr *IfCond = nullptr;
7762 if (const auto *C = S.getSingleClause<OMPIfClause>())
7763 IfCond = C->getCondition();
7764
7765 // Check if we have any device clause associated with the directive.
7766 const Expr *Device = nullptr;
7767 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7768 Device = C->getDevice();
7769
7770 // Set the action to signal privatization of device pointers.
7771 RCG.setAction(PrivAction);
7772
7773 // Emit region code.
7774 CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG,
7775 Info);
7776}
7777
7778void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7779 const OMPTargetEnterDataDirective &S) {
7780 // If we don't have target devices, don't bother emitting the data mapping
7781 // code.
7782 if (CGM.getLangOpts().OMPTargetTriples.empty())
7783 return;
7784
7785 // Check if we have any if clause associated with the directive.
7786 const Expr *IfCond = nullptr;
7787 if (const auto *C = S.getSingleClause<OMPIfClause>())
7788 IfCond = C->getCondition();
7789
7790 // Check if we have any device clause associated with the directive.
7791 const Expr *Device = nullptr;
7792 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7793 Device = C->getDevice();
7794
7795 OMPLexicalScope Scope(*this, S, OMPD_task);
7796 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7797}
7798
7799void CodeGenFunction::EmitOMPTargetExitDataDirective(
7800 const OMPTargetExitDataDirective &S) {
7801 // If we don't have target devices, don't bother emitting the data mapping
7802 // code.
7803 if (CGM.getLangOpts().OMPTargetTriples.empty())
7804 return;
7805
7806 // Check if we have any if clause associated with the directive.
7807 const Expr *IfCond = nullptr;
7808 if (const auto *C = S.getSingleClause<OMPIfClause>())
7809 IfCond = C->getCondition();
7810
7811 // Check if we have any device clause associated with the directive.
7812 const Expr *Device = nullptr;
7813 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7814 Device = C->getDevice();
7815
7816 OMPLexicalScope Scope(*this, S, OMPD_task);
7817 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
7818}
7819
7820static void emitTargetParallelRegion(CodeGenFunction &CGF,
7821 const OMPTargetParallelDirective &S,
7822 PrePostActionTy &Action) {
7823 // Get the captured statement associated with the 'parallel' region.
7824 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
7825 Action.Enter(CGF);
7826 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7827 Action.Enter(CGF);
7828 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7829 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7830 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7831 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7832 (void)PrivateScope.Privatize();
7833 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7834 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7835 // TODO: Add support for clauses.
7836 CGF.EmitStmt(S: CS->getCapturedStmt());
7837 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
7838 };
7839 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen,
7840 CodeGenBoundParameters: emitEmptyBoundParameters);
7841 emitPostUpdateForReductionClause(CGF, D: S,
7842 CondGen: [](CodeGenFunction &) { return nullptr; });
7843}
7844
7845void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7846 CodeGenModule &CGM, StringRef ParentName,
7847 const OMPTargetParallelDirective &S) {
7848 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7849 emitTargetParallelRegion(CGF, S, Action);
7850 };
7851 llvm::Function *Fn;
7852 llvm::Constant *Addr;
7853 // Emit target region as a standalone region.
7854 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7855 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7856 assert(Fn && Addr && "Target device function emission failed.");
7857}
7858
7859void CodeGenFunction::EmitOMPTargetParallelDirective(
7860 const OMPTargetParallelDirective &S) {
7861 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7862 emitTargetParallelRegion(CGF, S, Action);
7863 };
7864 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7865}
7866
7867static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7868 const OMPTargetParallelForDirective &S,
7869 PrePostActionTy &Action) {
7870 Action.Enter(CGF);
7871 // Emit directive as a combined directive that consists of two implicit
7872 // directives: 'parallel' with 'for' directive.
7873 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7874 Action.Enter(CGF);
7875 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7876 CGF, OMPD_target_parallel_for, S.hasCancel());
7877 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7878 CGDispatchBounds: emitDispatchForLoopBounds);
7879 };
7880 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
7881 CodeGenBoundParameters: emitEmptyBoundParameters);
7882}
7883
7884void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7885 CodeGenModule &CGM, StringRef ParentName,
7886 const OMPTargetParallelForDirective &S) {
7887 // Emit SPMD target parallel for region as a standalone region.
7888 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7889 emitTargetParallelForRegion(CGF, S, Action);
7890 };
7891 llvm::Function *Fn;
7892 llvm::Constant *Addr;
7893 // Emit target region as a standalone region.
7894 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7895 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7896 assert(Fn && Addr && "Target device function emission failed.");
7897}
7898
7899void CodeGenFunction::EmitOMPTargetParallelForDirective(
7900 const OMPTargetParallelForDirective &S) {
7901 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7902 emitTargetParallelForRegion(CGF, S, Action);
7903 };
7904 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7905}
7906
7907static void
7908emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7909 const OMPTargetParallelForSimdDirective &S,
7910 PrePostActionTy &Action) {
7911 Action.Enter(CGF);
7912 // Emit directive as a combined directive that consists of two implicit
7913 // directives: 'parallel' with 'for' directive.
7914 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7915 Action.Enter(CGF);
7916 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7917 CGDispatchBounds: emitDispatchForLoopBounds);
7918 };
7919 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen,
7920 CodeGenBoundParameters: emitEmptyBoundParameters);
7921}
7922
7923void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7924 CodeGenModule &CGM, StringRef ParentName,
7925 const OMPTargetParallelForSimdDirective &S) {
7926 // Emit SPMD target parallel for region as a standalone region.
7927 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7928 emitTargetParallelForSimdRegion(CGF, S, Action);
7929 };
7930 llvm::Function *Fn;
7931 llvm::Constant *Addr;
7932 // Emit target region as a standalone region.
7933 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7934 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7935 assert(Fn && Addr && "Target device function emission failed.");
7936}
7937
7938void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7939 const OMPTargetParallelForSimdDirective &S) {
7940 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7941 emitTargetParallelForSimdRegion(CGF, S, Action);
7942 };
7943 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7944}
7945
7946/// Emit a helper variable and return corresponding lvalue.
7947static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7948 const ImplicitParamDecl *PVD,
7949 CodeGenFunction::OMPPrivateScope &Privates) {
7950 const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl());
7951 Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD));
7952}
7953
7954void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7955 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7956 // Emit outlined function for task construct.
7957 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop);
7958 Address CapturedStruct = Address::invalid();
7959 {
7960 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7961 CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
7962 }
7963 CanQualType SharedsTy =
7964 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
7965 const Expr *IfCond = nullptr;
7966 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7967 if (C->getNameModifier() == OMPD_unknown ||
7968 C->getNameModifier() == OMPD_taskloop) {
7969 IfCond = C->getCondition();
7970 break;
7971 }
7972 }
7973
7974 OMPTaskDataTy Data;
7975 // Check if taskloop must be emitted without taskgroup.
7976 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7977 // TODO: Check if we should emit tied or untied task.
7978 Data.Tied = true;
7979 // Set scheduling for taskloop
7980 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7981 // grainsize clause
7982 Data.Schedule.setInt(/*IntVal=*/false);
7983 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize()));
7984 Data.HasModifier =
7985 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
7986 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7987 // num_tasks clause
7988 Data.Schedule.setInt(/*IntVal=*/true);
7989 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks()));
7990 Data.HasModifier =
7991 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
7992 }
7993
7994 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7995 // if (PreCond) {
7996 // for (IV in 0..LastIteration) BODY;
7997 // <Final counter/linear vars updates>;
7998 // }
7999 //
8000
8001 // Emit: if (PreCond) - begin.
8002 // If the condition constant folds and can be elided, avoid emitting the
8003 // whole loop.
8004 bool CondConstant;
8005 llvm::BasicBlock *ContBlock = nullptr;
8006 OMPLoopScope PreInitScope(CGF, S);
8007 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
8008 if (!CondConstant)
8009 return;
8010 } else {
8011 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then");
8012 ContBlock = CGF.createBasicBlock(name: "taskloop.if.end");
8013 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
8014 TrueCount: CGF.getProfileCount(S: &S));
8015 CGF.EmitBlock(BB: ThenBlock);
8016 CGF.incrementProfileCounter(S: &S);
8017 }
8018
8019 (void)CGF.EmitOMPLinearClauseInit(D: S);
8020
8021 OMPPrivateScope LoopScope(CGF);
8022 // Emit helper vars inits.
8023 enum { LowerBound = 5, UpperBound, Stride, LastIter };
8024 auto *I = CS->getCapturedDecl()->param_begin();
8025 auto *LBP = std::next(x: I, n: LowerBound);
8026 auto *UBP = std::next(x: I, n: UpperBound);
8027 auto *STP = std::next(x: I, n: Stride);
8028 auto *LIP = std::next(x: I, n: LastIter);
8029 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP,
8030 Privates&: LoopScope);
8031 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP,
8032 Privates&: LoopScope);
8033 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope);
8034 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP,
8035 Privates&: LoopScope);
8036 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8037 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
8038 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
8039 (void)LoopScope.Privatize();
8040 // Emit the loop iteration variable.
8041 const Expr *IVExpr = S.getIterationVariable();
8042 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
8043 CGF.EmitVarDecl(D: *IVDecl);
8044 CGF.EmitIgnoredExpr(E: S.getInit());
8045
8046 // Emit the iterations count variable.
8047 // If it is not a variable, Sema decided to calculate iterations count on
8048 // each iteration (e.g., it is foldable into a constant).
8049 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
8050 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
8051 // Emit calculation of the iterations count.
8052 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
8053 }
8054
8055 {
8056 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8057 emitCommonSimdLoop(
8058 CGF, S,
8059 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8060 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
8061 CGF.EmitOMPSimdInit(D: S);
8062 },
8063 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
8064 CGF.EmitOMPInnerLoop(
8065 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
8066 BodyGen: [&S](CodeGenFunction &CGF) {
8067 emitOMPLoopBodyWithStopPoint(CGF, S,
8068 LoopExit: CodeGenFunction::JumpDest());
8069 },
8070 PostIncGen: [](CodeGenFunction &) {});
8071 });
8072 }
8073 // Emit: if (PreCond) - end.
8074 if (ContBlock) {
8075 CGF.EmitBranch(Block: ContBlock);
8076 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
8077 }
8078 // Emit final copy of the lastprivate variables if IsLastIter != 0.
8079 if (HasLastprivateClause) {
8080 CGF.EmitOMPLastprivateClauseFinal(
8081 D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()),
8082 IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar(
8083 Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8084 Ty: (*LIP)->getType(), Loc: S.getBeginLoc())));
8085 }
8086 LoopScope.restoreMap();
8087 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) {
8088 return CGF.Builder.CreateIsNotNull(
8089 Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8090 Ty: (*LIP)->getType(), Loc: S.getBeginLoc()));
8091 });
8092 };
8093 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
8094 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
8095 const OMPTaskDataTy &Data) {
8096 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
8097 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
8098 OMPLoopScope PreInitScope(CGF, S);
8099 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S,
8100 TaskFunction: OutlinedFn, SharedsTy,
8101 Shareds: CapturedStruct, IfCond, Data);
8102 };
8103 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop,
8104 CodeGen);
8105 };
8106 if (Data.Nogroup) {
8107 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data);
8108 } else {
8109 CGM.getOpenMPRuntime().emitTaskgroupRegion(
8110 CGF&: *this,
8111 TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
8112 PrePostActionTy &Action) {
8113 Action.Enter(CGF);
8114 CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen,
8115 Data);
8116 },
8117 Loc: S.getBeginLoc());
8118 }
8119}
8120
8121void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
8122 auto LPCRegion =
8123 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8124 EmitOMPTaskLoopBasedDirective(S);
8125}
8126
8127void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
8128 const OMPTaskLoopSimdDirective &S) {
8129 auto LPCRegion =
8130 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8131 OMPLexicalScope Scope(*this, S);
8132 EmitOMPTaskLoopBasedDirective(S);
8133}
8134
8135void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
8136 const OMPMasterTaskLoopDirective &S) {
8137 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8138 Action.Enter(CGF);
8139 EmitOMPTaskLoopBasedDirective(S);
8140 };
8141 auto LPCRegion =
8142 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8143 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8144 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8145}
8146
8147void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
8148 const OMPMaskedTaskLoopDirective &S) {
8149 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8150 Action.Enter(CGF);
8151 EmitOMPTaskLoopBasedDirective(S);
8152 };
8153 auto LPCRegion =
8154 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8155 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8156 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8157}
8158
8159void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
8160 const OMPMasterTaskLoopSimdDirective &S) {
8161 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8162 Action.Enter(CGF);
8163 EmitOMPTaskLoopBasedDirective(S);
8164 };
8165 auto LPCRegion =
8166 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8167 OMPLexicalScope Scope(*this, S);
8168 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8169}
8170
8171void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
8172 const OMPMaskedTaskLoopSimdDirective &S) {
8173 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8174 Action.Enter(CGF);
8175 EmitOMPTaskLoopBasedDirective(S);
8176 };
8177 auto LPCRegion =
8178 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8179 OMPLexicalScope Scope(*this, S);
8180 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8181}
8182
8183void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
8184 const OMPParallelMasterTaskLoopDirective &S) {
8185 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8186 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8187 PrePostActionTy &Action) {
8188 Action.Enter(CGF);
8189 CGF.EmitOMPTaskLoopBasedDirective(S);
8190 };
8191 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8192 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8193 Loc: S.getBeginLoc());
8194 };
8195 auto LPCRegion =
8196 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8197 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen,
8198 CodeGenBoundParameters: emitEmptyBoundParameters);
8199}
8200
8201void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
8202 const OMPParallelMaskedTaskLoopDirective &S) {
8203 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8204 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8205 PrePostActionTy &Action) {
8206 Action.Enter(CGF);
8207 CGF.EmitOMPTaskLoopBasedDirective(S);
8208 };
8209 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8210 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8211 Loc: S.getBeginLoc());
8212 };
8213 auto LPCRegion =
8214 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8215 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop, CodeGen,
8216 CodeGenBoundParameters: emitEmptyBoundParameters);
8217}
8218
8219void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
8220 const OMPParallelMasterTaskLoopSimdDirective &S) {
8221 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8222 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8223 PrePostActionTy &Action) {
8224 Action.Enter(CGF);
8225 CGF.EmitOMPTaskLoopBasedDirective(S);
8226 };
8227 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8228 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8229 Loc: S.getBeginLoc());
8230 };
8231 auto LPCRegion =
8232 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8233 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen,
8234 CodeGenBoundParameters: emitEmptyBoundParameters);
8235}
8236
8237void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
8238 const OMPParallelMaskedTaskLoopSimdDirective &S) {
8239 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8240 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8241 PrePostActionTy &Action) {
8242 Action.Enter(CGF);
8243 CGF.EmitOMPTaskLoopBasedDirective(S);
8244 };
8245 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8246 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8247 Loc: S.getBeginLoc());
8248 };
8249 auto LPCRegion =
8250 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8251 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop_simd, CodeGen,
8252 CodeGenBoundParameters: emitEmptyBoundParameters);
8253}
8254
8255// Generate the instructions for '#pragma omp target update' directive.
8256void CodeGenFunction::EmitOMPTargetUpdateDirective(
8257 const OMPTargetUpdateDirective &S) {
8258 // If we don't have target devices, don't bother emitting the data mapping
8259 // code.
8260 if (CGM.getLangOpts().OMPTargetTriples.empty())
8261 return;
8262
8263 // Check if we have any if clause associated with the directive.
8264 const Expr *IfCond = nullptr;
8265 if (const auto *C = S.getSingleClause<OMPIfClause>())
8266 IfCond = C->getCondition();
8267
8268 // Check if we have any device clause associated with the directive.
8269 const Expr *Device = nullptr;
8270 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8271 Device = C->getDevice();
8272
8273 OMPLexicalScope Scope(*this, S, OMPD_task);
8274 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8275}
8276
8277void CodeGenFunction::EmitOMPGenericLoopDirective(
8278 const OMPGenericLoopDirective &S) {
8279 // Always expect a bind clause on the loop directive. It it wasn't
8280 // in the source, it should have been added in sema.
8281
8282 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
8283 if (const auto *C = S.getSingleClause<OMPBindClause>())
8284 BindKind = C->getBindKind();
8285
8286 switch (BindKind) {
8287 case OMPC_BIND_parallel: // for
8288 return emitOMPForDirective(S, CGF&: *this, CGM, /*HasCancel=*/false);
8289 case OMPC_BIND_teams: // distribute
8290 return emitOMPDistributeDirective(S, CGF&: *this, CGM);
8291 case OMPC_BIND_thread: // simd
8292 return emitOMPSimdDirective(S, CGF&: *this, CGM);
8293 case OMPC_BIND_unknown:
8294 break;
8295 }
8296
8297 // Unimplemented, just inline the underlying statement for now.
8298 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8299 // Emit the loop iteration variable.
8300 const Stmt *CS =
8301 cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt();
8302 const auto *ForS = dyn_cast<ForStmt>(Val: CS);
8303 if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) {
8304 OMPPrivateScope LoopScope(CGF);
8305 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8306 (void)LoopScope.Privatize();
8307 CGF.EmitStmt(S: CS);
8308 LoopScope.restoreMap();
8309 } else {
8310 CGF.EmitStmt(S: CS);
8311 }
8312 };
8313 OMPLexicalScope Scope(*this, S, OMPD_unknown);
8314 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen);
8315}
8316
8317void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
8318 const OMPLoopDirective &S) {
8319 // Emit combined directive as if its constituent constructs are 'parallel'
8320 // and 'for'.
8321 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8322 Action.Enter(CGF);
8323 emitOMPCopyinClause(CGF, S);
8324 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
8325 };
8326 {
8327 auto LPCRegion =
8328 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8329 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
8330 CodeGenBoundParameters: emitEmptyBoundParameters);
8331 }
8332 // Check for outer lastprivate conditional update.
8333 checkForLastprivateConditionalUpdate(CGF&: *this, S);
8334}
8335
8336void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
8337 const OMPTeamsGenericLoopDirective &S) {
8338 // To be consistent with current behavior of 'target teams loop', emit
8339 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
8340 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8341 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8342 };
8343
8344 // Emit teams region as a standalone region.
8345 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8346 PrePostActionTy &Action) {
8347 Action.Enter(CGF);
8348 OMPPrivateScope PrivateScope(CGF);
8349 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8350 (void)PrivateScope.Privatize();
8351 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
8352 CodeGen: CodeGenDistribute);
8353 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8354 };
8355 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
8356 emitPostUpdateForReductionClause(CGF&: *this, D: S,
8357 CondGen: [](CodeGenFunction &) { return nullptr; });
8358}
8359
8360#ifndef NDEBUG
8361static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
8362 std::string StatusMsg,
8363 const OMPExecutableDirective &D) {
8364 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
8365 if (IsDevice)
8366 StatusMsg += ": DEVICE";
8367 else
8368 StatusMsg += ": HOST";
8369 SourceLocation L = D.getBeginLoc();
8370 auto &SM = CGF.getContext().getSourceManager();
8371 PresumedLoc PLoc = SM.getPresumedLoc(L);
8372 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8373 unsigned LineNo =
8374 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8375 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8376}
8377#endif
8378
8379static void emitTargetTeamsGenericLoopRegionAsParallel(
8380 CodeGenFunction &CGF, PrePostActionTy &Action,
8381 const OMPTargetTeamsGenericLoopDirective &S) {
8382 Action.Enter(CGF);
8383 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8384 // 'parallel, and 'for'.
8385 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8386 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
8387 IncExpr: S.getDistInc());
8388 };
8389
8390 // Emit teams region as a standalone region.
8391 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8392 PrePostActionTy &Action) {
8393 Action.Enter(CGF);
8394 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8395 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8396 (void)PrivateScope.Privatize();
8397 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8398 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8399 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8400 };
8401 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8402 emitTargetTeamsLoopCodegenStatus(
8403 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8404 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
8405 CodeGen: CodeGenTeams);
8406 emitPostUpdateForReductionClause(CGF, D: S,
8407 CondGen: [](CodeGenFunction &) { return nullptr; });
8408}
8409
8410static void emitTargetTeamsGenericLoopRegionAsDistribute(
8411 CodeGenFunction &CGF, PrePostActionTy &Action,
8412 const OMPTargetTeamsGenericLoopDirective &S) {
8413 Action.Enter(CGF);
8414 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8415 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8416 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8417 };
8418
8419 // Emit teams region as a standalone region.
8420 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8421 PrePostActionTy &Action) {
8422 Action.Enter(CGF);
8423 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8424 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8425 (void)PrivateScope.Privatize();
8426 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8427 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8428 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8429 };
8430 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8431 emitTargetTeamsLoopCodegenStatus(
8432 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8433 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
8434 emitPostUpdateForReductionClause(CGF, D: S,
8435 CondGen: [](CodeGenFunction &) { return nullptr; });
8436}
8437
8438void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8439 const OMPTargetTeamsGenericLoopDirective &S) {
8440 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8441 if (S.canBeParallelFor())
8442 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8443 else
8444 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8445 };
8446 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8447}
8448
8449void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8450 CodeGenModule &CGM, StringRef ParentName,
8451 const OMPTargetTeamsGenericLoopDirective &S) {
8452 // Emit SPMD target parallel loop region as a standalone region.
8453 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8454 if (S.canBeParallelFor())
8455 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8456 else
8457 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8458 };
8459 llvm::Function *Fn;
8460 llvm::Constant *Addr;
8461 // Emit target region as a standalone region.
8462 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8463 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8464 assert(Fn && Addr &&
8465 "Target device function emission failed for 'target teams loop'.");
8466}
8467
8468static void emitTargetParallelGenericLoopRegion(
8469 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8470 PrePostActionTy &Action) {
8471 Action.Enter(CGF);
8472 // Emit as 'parallel for'.
8473 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8474 Action.Enter(CGF);
8475 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8476 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8477 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8478 CGDispatchBounds: emitDispatchForLoopBounds);
8479 };
8480 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8481 CodeGenBoundParameters: emitEmptyBoundParameters);
8482}
8483
8484void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8485 CodeGenModule &CGM, StringRef ParentName,
8486 const OMPTargetParallelGenericLoopDirective &S) {
8487 // Emit target parallel loop region as a standalone region.
8488 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8489 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8490 };
8491 llvm::Function *Fn;
8492 llvm::Constant *Addr;
8493 // Emit target region as a standalone region.
8494 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8495 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8496 assert(Fn && Addr && "Target device function emission failed.");
8497}
8498
8499/// Emit combined directive 'target parallel loop' as if its constituent
8500/// constructs are 'target', 'parallel', and 'for'.
8501void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8502 const OMPTargetParallelGenericLoopDirective &S) {
8503 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8504 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8505 };
8506 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8507}
8508
8509void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8510 const OMPExecutableDirective &D) {
8511 if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) {
8512 EmitOMPScanDirective(S: *SD);
8513 return;
8514 }
8515 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8516 return;
8517 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8518 OMPPrivateScope GlobalsScope(CGF);
8519 if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) {
8520 // Capture global firstprivates to avoid crash.
8521 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8522 for (const Expr *Ref : C->varlist()) {
8523 const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
8524 if (!DRE)
8525 continue;
8526 const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl());
8527 if (!VD || VD->hasLocalStorage())
8528 continue;
8529 if (!CGF.LocalDeclMap.count(Val: VD)) {
8530 LValue GlobLVal = CGF.EmitLValue(E: Ref);
8531 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8532 }
8533 }
8534 }
8535 }
8536 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
8537 (void)GlobalsScope.Privatize();
8538 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8539 emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action);
8540 } else {
8541 if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) {
8542 for (const Expr *E : LD->counters()) {
8543 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
8544 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) {
8545 LValue GlobLVal = CGF.EmitLValue(E);
8546 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8547 }
8548 if (isa<OMPCapturedExprDecl>(Val: VD)) {
8549 // Emit only those that were not explicitly referenced in clauses.
8550 if (!CGF.LocalDeclMap.count(Val: VD))
8551 CGF.EmitVarDecl(D: *VD);
8552 }
8553 }
8554 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8555 if (!C->getNumForLoops())
8556 continue;
8557 for (unsigned I = LD->getLoopsNumber(),
8558 E = C->getLoopNumIterations().size();
8559 I < E; ++I) {
8560 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8561 Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) {
8562 // Emit only those that were not explicitly referenced in clauses.
8563 if (!CGF.LocalDeclMap.count(Val: VD))
8564 CGF.EmitVarDecl(D: *VD);
8565 }
8566 }
8567 }
8568 }
8569 (void)GlobalsScope.Privatize();
8570 CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt());
8571 }
8572 };
8573 if (D.getDirectiveKind() == OMPD_atomic ||
8574 D.getDirectiveKind() == OMPD_critical ||
8575 D.getDirectiveKind() == OMPD_section ||
8576 D.getDirectiveKind() == OMPD_master ||
8577 D.getDirectiveKind() == OMPD_masked ||
8578 D.getDirectiveKind() == OMPD_unroll ||
8579 D.getDirectiveKind() == OMPD_assume) {
8580 EmitStmt(S: D.getAssociatedStmt());
8581 } else {
8582 auto LPCRegion =
8583 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D);
8584 OMPSimdLexicalScope Scope(*this, D);
8585 CGM.getOpenMPRuntime().emitInlinedDirective(
8586 CGF&: *this,
8587 InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd
8588 : D.getDirectiveKind(),
8589 CodeGen);
8590 }
8591 // Check for outer lastprivate conditional update.
8592 checkForLastprivateConditionalUpdate(CGF&: *this, S: D);
8593}
8594
8595void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) {
8596 EmitStmt(S: S.getAssociatedStmt());
8597}
8598