1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGDebugInfo.h"
15#include "CGOpenMPRuntime.h"
16#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "CodeGenPGO.h"
19#include "TargetInfo.h"
20#include "clang/AST/ASTContext.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/DeclOpenMP.h"
23#include "clang/AST/OpenMPClause.h"
24#include "clang/AST/Stmt.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/DiagnosticFrontend.h"
28#include "clang/Basic/OpenMPKinds.h"
29#include "clang/Basic/PrettyStackTrace.h"
30#include "clang/Basic/SourceManager.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/BinaryFormat/Dwarf.h"
33#include "llvm/Frontend/OpenMP/OMPConstants.h"
34#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DebugInfoMetadata.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/IntrinsicInst.h"
39#include "llvm/IR/Metadata.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Debug.h"
42#include <optional>
43using namespace clang;
44using namespace CodeGen;
45using namespace llvm::omp;
46
47#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
48
49static const VarDecl *getBaseDecl(const Expr *Ref);
50static OpenMPDirectiveKind
51getEffectiveDirectiveKind(const OMPExecutableDirective &S);
52
53/// Whether a combined `distribute parallel for` may use the fused
54/// distr_static_chunk + static_chunkone schedule (enum 93): one
55/// for_static_init, no surrounding distribute_static_init.
56static bool canEmitGPUFusedDistSchedule(const CodeGenModule &CGM,
57 const OMPLoopDirective &S,
58 OpenMPDirectiveKind DKind) {
59 // Reduction-only for now. Non-reduction cases might follow in the future, but
60 // need more analysis for maximum profit.
61 return CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU() &&
62 isOpenMPLoopBoundSharingDirective(Kind: DKind) &&
63 S.hasClausesOfKind<OMPReductionClause>() &&
64 !S.getSingleClause<OMPDistScheduleClause>() &&
65 !S.getSingleClause<OMPScheduleClause>() &&
66 !S.getSingleClause<OMPOrderedClause>();
67}
68
69namespace {
70/// Lexical scope for OpenMP executable constructs, that handles correct codegen
71/// for captured expressions.
72class OMPLexicalScope : public CodeGenFunction::LexicalScope {
73 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
74 for (const auto *C : S.clauses()) {
75 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
76 if (const auto *PreInit =
77 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
78 for (const auto *I : PreInit->decls()) {
79 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
80 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
81 } else {
82 CodeGenFunction::AutoVarEmission Emission =
83 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
84 CGF.EmitAutoVarCleanups(emission: Emission);
85 }
86 }
87 }
88 }
89 }
90 }
91 CodeGenFunction::OMPPrivateScope InlinedShareds;
92
93 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
94 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
95 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
96 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
97 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
98 }
99
100public:
101 OMPLexicalScope(
102 CodeGenFunction &CGF, const OMPExecutableDirective &S,
103 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
104 const bool EmitPreInitStmt = true)
105 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
106 InlinedShareds(CGF) {
107 if (EmitPreInitStmt)
108 emitPreInitStmt(CGF, S);
109 if (!CapturedRegion)
110 return;
111 assert(S.hasAssociatedStmt() &&
112 "Expected associated statement for inlined directive.");
113 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: *CapturedRegion);
114 for (const auto &C : CS->captures()) {
115 if (C.capturesVariable() || C.capturesVariableByCopy()) {
116 auto *VD = C.getCapturedVar();
117 assert(VD == VD->getCanonicalDecl() &&
118 "Canonical decl must be captured.");
119 DeclRefExpr DRE(
120 CGF.getContext(), const_cast<VarDecl *>(VD),
121 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
122 InlinedShareds.isGlobalVarCaptured(VD)),
123 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
124 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
125 }
126 }
127 (void)InlinedShareds.Privatize();
128 }
129};
130
131/// Lexical scope for OpenMP parallel construct, that handles correct codegen
132/// for captured expressions.
133class OMPParallelScope final : public OMPLexicalScope {
134 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
135 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
136 return !(isOpenMPTargetExecutionDirective(DKind: EKind) ||
137 isOpenMPLoopBoundSharingDirective(Kind: EKind)) &&
138 isOpenMPParallelDirective(DKind: EKind);
139 }
140
141public:
142 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
143 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
144 EmitPreInitStmt(S)) {}
145};
146
147/// Lexical scope for OpenMP teams construct, that handles correct codegen
148/// for captured expressions.
149class OMPTeamsScope final : public OMPLexicalScope {
150 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
151 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
152 return !isOpenMPTargetExecutionDirective(DKind: EKind) &&
153 isOpenMPTeamsDirective(DKind: EKind);
154 }
155
156public:
157 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
158 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
159 EmitPreInitStmt(S)) {}
160};
161
162/// Private scope for OpenMP loop-based directives, that supports capturing
163/// of used expression from loop statement.
164class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
165 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
166 const Stmt *PreInits;
167 CodeGenFunction::OMPMapVars PreCondVars;
168 if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) {
169 // Emit init, __range, __begin and __end variables for C++ range loops.
170 (void)OMPLoopBasedDirective::doForAllLoops(
171 CurStmt: LD->getInnermostCapturedStmt()->getCapturedStmt(),
172 /*TryImperfectlyNestedLoops=*/true, NumLoops: LD->getLoopsNumber(),
173 Callback: [&CGF](unsigned Cnt, const Stmt *CurStmt) {
174 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) {
175 if (const Stmt *Init = CXXFor->getInit())
176 CGF.EmitStmt(S: Init);
177 CGF.EmitStmt(S: CXXFor->getRangeStmt());
178 CGF.EmitStmt(S: CXXFor->getBeginStmt());
179 CGF.EmitStmt(S: CXXFor->getEndStmt());
180 }
181 return false;
182 });
183 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
184 for (const auto *E : LD->counters()) {
185 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
186 EmittedAsPrivate.insert(V: VD->getCanonicalDecl());
187 (void)PreCondVars.setVarAddr(
188 CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(T: VD->getType().getNonReferenceType()));
189 }
190 // Mark private vars as undefs.
191 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
192 for (const Expr *IRef : C->varlist()) {
193 const auto *OrigVD =
194 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
195 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
196 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
197 (void)PreCondVars.setVarAddr(
198 CGF, LocalVD: OrigVD,
199 TempAddr: Address(llvm::UndefValue::get(T: CGF.ConvertTypeForMem(
200 T: CGF.getContext().getPointerType(T: OrigVDTy))),
201 CGF.ConvertTypeForMem(T: OrigVDTy),
202 CGF.getContext().getDeclAlign(D: OrigVD)));
203 }
204 }
205 }
206 (void)PreCondVars.apply(CGF);
207 PreInits = LD->getPreInits();
208 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) {
209 PreInits = Tile->getPreInits();
210 } else if (const auto *Stripe = dyn_cast<OMPStripeDirective>(Val: &S)) {
211 PreInits = Stripe->getPreInits();
212 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) {
213 PreInits = Unroll->getPreInits();
214 } else if (const auto *Reverse = dyn_cast<OMPReverseDirective>(Val: &S)) {
215 PreInits = Reverse->getPreInits();
216 } else if (const auto *Split = dyn_cast<OMPSplitDirective>(Val: &S)) {
217 PreInits = Split->getPreInits();
218 } else if (const auto *Interchange =
219 dyn_cast<OMPInterchangeDirective>(Val: &S)) {
220 PreInits = Interchange->getPreInits();
221 } else {
222 llvm_unreachable("Unknown loop-based directive kind.");
223 }
224 doEmitPreinits(PreInits);
225 PreCondVars.restore(CGF);
226 }
227
228 void
229 emitPreInitStmt(CodeGenFunction &CGF,
230 const OMPCanonicalLoopSequenceTransformationDirective &S) {
231 const Stmt *PreInits;
232 if (const auto *Fuse = dyn_cast<OMPFuseDirective>(Val: &S)) {
233 PreInits = Fuse->getPreInits();
234 } else {
235 llvm_unreachable(
236 "Unknown canonical loop sequence transform directive kind.");
237 }
238 doEmitPreinits(PreInits);
239 }
240
241 void doEmitPreinits(const Stmt *PreInits) {
242 if (PreInits) {
243 // CompoundStmts and DeclStmts are used as lists of PreInit statements and
244 // declarations. Since declarations must be visible in the the following
245 // that they initialize, unpack the CompoundStmt they are nested in.
246 SmallVector<const Stmt *> PreInitStmts;
247 if (auto *PreInitCompound = dyn_cast<CompoundStmt>(Val: PreInits))
248 llvm::append_range(C&: PreInitStmts, R: PreInitCompound->body());
249 else
250 PreInitStmts.push_back(Elt: PreInits);
251
252 for (const Stmt *S : PreInitStmts) {
253 // EmitStmt skips any OMPCapturedExprDecls, but needs to be emitted
254 // here.
255 if (auto *PreInitDecl = dyn_cast<DeclStmt>(Val: S)) {
256 for (Decl *I : PreInitDecl->decls())
257 CGF.EmitVarDecl(D: cast<VarDecl>(Val&: *I));
258 continue;
259 }
260 CGF.EmitStmt(S);
261 }
262 }
263 }
264
265public:
266 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
267 : CodeGenFunction::RunCleanupsScope(CGF) {
268 emitPreInitStmt(CGF, S);
269 }
270 OMPLoopScope(CodeGenFunction &CGF,
271 const OMPCanonicalLoopSequenceTransformationDirective &S)
272 : CodeGenFunction::RunCleanupsScope(CGF) {
273 emitPreInitStmt(CGF, S);
274 }
275};
276
277class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
278 CodeGenFunction::OMPPrivateScope InlinedShareds;
279
280 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
281 return CGF.LambdaCaptureFields.lookup(Val: VD) ||
282 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
283 (isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl) &&
284 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
285 }
286
287public:
288 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
289 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
290 InlinedShareds(CGF) {
291 for (const auto *C : S.clauses()) {
292 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
293 if (const auto *PreInit =
294 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
295 for (const auto *I : PreInit->decls()) {
296 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
297 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
298 } else {
299 CodeGenFunction::AutoVarEmission Emission =
300 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
301 CGF.EmitAutoVarCleanups(emission: Emission);
302 }
303 }
304 }
305 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) {
306 for (const Expr *E : UDP->varlist()) {
307 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
308 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
309 CGF.EmitVarDecl(D: *OED);
310 }
311 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) {
312 for (const Expr *E : UDP->varlist()) {
313 const Decl *D = getBaseDecl(Ref: E);
314 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
315 CGF.EmitVarDecl(D: *OED);
316 }
317 }
318 }
319 if (!isOpenMPSimdDirective(DKind: getEffectiveDirectiveKind(S)))
320 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds);
321 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) {
322 if (const Expr *E = TG->getReductionRef())
323 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()));
324 }
325 // Temp copy arrays for inscan reductions should not be emitted as they are
326 // not used in simd only mode.
327 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
328 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
329 if (C->getModifier() != OMPC_REDUCTION_inscan)
330 continue;
331 for (const Expr *E : C->copy_array_temps())
332 CopyArrayTemps.insert(V: cast<DeclRefExpr>(Val: E)->getDecl());
333 }
334 const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt());
335 while (CS) {
336 for (auto &C : CS->captures()) {
337 if (C.capturesVariable() || C.capturesVariableByCopy()) {
338 auto *VD = C.getCapturedVar();
339 if (CopyArrayTemps.contains(V: VD))
340 continue;
341 assert(VD == VD->getCanonicalDecl() &&
342 "Canonical decl must be captured.");
343 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
344 isCapturedVar(CGF, VD) ||
345 (CGF.CapturedStmtInfo &&
346 InlinedShareds.isGlobalVarCaptured(VD)),
347 VD->getType().getNonReferenceType(), VK_LValue,
348 C.getLocation());
349 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
350 }
351 }
352 CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt());
353 }
354 (void)InlinedShareds.Privatize();
355 }
356};
357
358} // namespace
359
360// The loop directive with a bind clause will be mapped to a different
361// directive with corresponding semantics.
362static OpenMPDirectiveKind
363getEffectiveDirectiveKind(const OMPExecutableDirective &S) {
364 OpenMPDirectiveKind Kind = S.getDirectiveKind();
365 if (Kind != OMPD_loop)
366 return Kind;
367
368 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
369 if (const auto *C = S.getSingleClause<OMPBindClause>())
370 BindKind = C->getBindKind();
371
372 switch (BindKind) {
373 case OMPC_BIND_parallel:
374 return OMPD_for;
375 case OMPC_BIND_teams:
376 return OMPD_distribute;
377 case OMPC_BIND_thread:
378 return OMPD_simd;
379 default:
380 return OMPD_loop;
381 }
382}
383
384static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
385 const OMPExecutableDirective &S,
386 const RegionCodeGenTy &CodeGen);
387
388LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
389 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) {
390 if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) {
391 OrigVD = OrigVD->getCanonicalDecl();
392 bool IsCaptured =
393 LambdaCaptureFields.lookup(Val: OrigVD) ||
394 (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) ||
395 (isa_and_nonnull<BlockDecl>(Val: CurCodeDecl));
396 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
397 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
398 return EmitLValue(E: &DRE);
399 }
400 }
401 return EmitLValue(E);
402}
403
404llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
405 ASTContext &C = getContext();
406 llvm::Value *Size = nullptr;
407 auto SizeInChars = C.getTypeSizeInChars(T: Ty);
408 if (SizeInChars.isZero()) {
409 // getTypeSizeInChars() returns 0 for a VLA.
410 while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) {
411 VlaSizePair VlaSize = getVLASize(vla: VAT);
412 Ty = VlaSize.Type;
413 Size =
414 Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts;
415 }
416 SizeInChars = C.getTypeSizeInChars(T: Ty);
417 if (SizeInChars.isZero())
418 return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0);
419 return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars));
420 }
421 return CGM.getSize(numChars: SizeInChars);
422}
423
424void CodeGenFunction::GenerateOpenMPCapturedVars(
425 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
426 const RecordDecl *RD = S.getCapturedRecordDecl();
427 auto CurField = RD->field_begin();
428 auto CurCap = S.captures().begin();
429 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
430 E = S.capture_init_end();
431 I != E; ++I, ++CurField, ++CurCap) {
432 if (CurField->hasCapturedVLAType()) {
433 const VariableArrayType *VAT = CurField->getCapturedVLAType();
434 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
435 CapturedVars.push_back(Elt: Val);
436 } else if (CurCap->capturesThis()) {
437 CapturedVars.push_back(Elt: CXXThisValue);
438 } else if (CurCap->capturesVariableByCopy()) {
439 llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation());
440
441 // If the field is not a pointer, we need to save the actual value
442 // and load it as a void pointer.
443 if (!CurField->getType()->isAnyPointerType()) {
444 ASTContext &Ctx = getContext();
445 Address DstAddr = CreateMemTempWithoutCast(
446 T: Ctx.getUIntPtrType(),
447 Name: Twine(CurCap->getCapturedVar()->getName(), ".casted"));
448 LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType());
449
450 llvm::Value *SrcAddrVal = EmitScalarConversion(
451 Src: DstAddr.emitRawPointer(CGF&: *this),
452 SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()),
453 DstTy: Ctx.getPointerType(T: CurField->getType()), Loc: CurCap->getLocation());
454 LValue SrcLV =
455 MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType());
456
457 // Store the value using the source type pointer.
458 EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV);
459
460 // Load the value using the destination type pointer.
461 CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation());
462 }
463 CapturedVars.push_back(Elt: CV);
464 } else {
465 assert(CurCap->capturesVariable() && "Expected capture by reference.");
466 CapturedVars.push_back(Elt: EmitLValue(E: *I).getAddress().emitRawPointer(CGF&: *this));
467 }
468 }
469}
470
471static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
472 QualType DstType, StringRef Name,
473 LValue AddrLV) {
474 ASTContext &Ctx = CGF.getContext();
475
476 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
477 Src: AddrLV.getAddress().emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(),
478 DstTy: Ctx.getPointerType(T: DstType), Loc);
479 // FIXME: should the pointee type (DstType) be passed?
480 Address TmpAddr =
481 CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress();
482 return TmpAddr;
483}
484
485static QualType getCanonicalParamType(ASTContext &C, QualType T) {
486 if (T->isLValueReferenceType())
487 return C.getLValueReferenceType(
488 T: getCanonicalParamType(C, T: T.getNonReferenceType()),
489 /*SpelledAsLValue=*/false);
490 if (T->isPointerType())
491 return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType()));
492 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
493 if (const auto *VLA = dyn_cast<VariableArrayType>(Val: A))
494 return getCanonicalParamType(C, T: VLA->getElementType());
495 if (!A->isVariablyModifiedType())
496 return C.getCanonicalType(T);
497 }
498 return C.getCanonicalParamType(T);
499}
500
501namespace {
502/// Contains required data for proper outlined function codegen.
503struct FunctionOptions {
504 /// Captured statement for which the function is generated.
505 const CapturedStmt *S = nullptr;
506 /// true if cast to/from UIntPtr is required for variables captured by
507 /// value.
508 const bool UIntPtrCastRequired = true;
509 /// true if only casted arguments must be registered as local args or VLA
510 /// sizes.
511 const bool RegisterCastedArgsOnly = false;
512 /// Name of the generated function.
513 const StringRef FunctionName;
514 /// Location of the non-debug version of the outlined function.
515 SourceLocation Loc;
516 const bool IsDeviceKernel = false;
517 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
518 bool RegisterCastedArgsOnly, StringRef FunctionName,
519 SourceLocation Loc, bool IsDeviceKernel)
520 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
521 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
522 FunctionName(FunctionName), Loc(Loc), IsDeviceKernel(IsDeviceKernel) {}
523};
524} // namespace
525
526static llvm::Function *emitOutlinedFunctionPrologue(
527 CodeGenFunction &CGF, FunctionArgList &Args,
528 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
529 &LocalAddrs,
530 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
531 &VLASizes,
532 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
533 const CapturedDecl *CD = FO.S->getCapturedDecl();
534 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
535 assert(CD->hasBody() && "missing CapturedDecl body");
536
537 CXXThisValue = nullptr;
538 // Build the argument list.
539 CodeGenModule &CGM = CGF.CGM;
540 ASTContext &Ctx = CGM.getContext();
541 FunctionArgList TargetArgs;
542 Args.append(in_start: CD->param_begin(),
543 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
544 TargetArgs.append(
545 in_start: CD->param_begin(),
546 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
547 auto I = FO.S->captures().begin();
548 FunctionDecl *DebugFunctionDecl = nullptr;
549 if (!FO.UIntPtrCastRequired) {
550 FunctionProtoType::ExtProtoInfo EPI;
551 QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: {}, EPI);
552 DebugFunctionDecl = FunctionDecl::Create(
553 C&: Ctx, DC: Ctx.getTranslationUnitDecl(), StartLoc: FO.S->getBeginLoc(),
554 NLoc: SourceLocation(), N: DeclarationName(), T: FunctionTy,
555 TInfo: Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC: SC_Static,
556 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
557 /*hasWrittenPrototype=*/false);
558 }
559 for (const FieldDecl *FD : RD->fields()) {
560 QualType ArgType = FD->getType();
561 IdentifierInfo *II = nullptr;
562 VarDecl *CapVar = nullptr;
563
564 // If this is a capture by copy and the type is not a pointer, the outlined
565 // function argument type should be uintptr and the value properly casted to
566 // uintptr. This is necessary given that the runtime library is only able to
567 // deal with pointers. We can pass in the same way the VLA type sizes to the
568 // outlined function.
569 if (FO.UIntPtrCastRequired &&
570 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
571 I->capturesVariableArrayType()))
572 ArgType = Ctx.getUIntPtrType();
573
574 if (I->capturesVariable() || I->capturesVariableByCopy()) {
575 CapVar = I->getCapturedVar();
576 II = CapVar->getIdentifier();
577 } else if (I->capturesThis()) {
578 II = &Ctx.Idents.get(Name: "this");
579 } else {
580 assert(I->capturesVariableArrayType());
581 II = &Ctx.Idents.get(Name: "vla");
582 }
583 if (ArgType->isVariablyModifiedType())
584 ArgType = getCanonicalParamType(C&: Ctx, T: ArgType);
585 VarDecl *Arg;
586 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
587 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
588 Id: II, T: ArgType,
589 ParamKind: ImplicitParamKind::ThreadPrivateVar);
590 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
591 Arg = ParmVarDecl::Create(
592 C&: Ctx, DC: DebugFunctionDecl,
593 StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
594 IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType,
595 /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr);
596 } else {
597 Arg = ImplicitParamDecl::Create(C&: Ctx, /*DC=*/nullptr, IdLoc: FD->getLocation(),
598 Id: II, T: ArgType, ParamKind: ImplicitParamKind::Other);
599 }
600 Args.emplace_back(Args&: Arg);
601 // Do not cast arguments if we emit function with non-original types.
602 TargetArgs.emplace_back(
603 Args: FO.UIntPtrCastRequired
604 ? Arg
605 : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg));
606 ++I;
607 }
608 Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
609 in_end: CD->param_end());
610 TargetArgs.append(
611 in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
612 in_end: CD->param_end());
613
614 // Create the function declaration.
615 const CGFunctionInfo &FuncInfo =
616 FO.IsDeviceKernel
617 ? CGM.getTypes().arrangeDeviceKernelCallerDeclaration(resultType: Ctx.VoidTy,
618 args: TargetArgs)
619 : CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy,
620 args: TargetArgs);
621 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
622
623 auto *F =
624 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
625 N: FO.FunctionName, M: &CGM.getModule());
626 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
627
628 // Adjust the calling convention for SPIR-V targets to avoid mismatches
629 // between callee and caller.
630 if (CGM.getTriple().isSPIRV() && !FO.IsDeviceKernel)
631 F->setCallingConv(llvm::CallingConv::SPIR_FUNC);
632
633 if (CD->isNothrow())
634 F->setDoesNotThrow();
635 F->setDoesNotRecurse();
636
637 // Always inline the outlined function if optimizations are enabled.
638 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
639 F->removeFnAttr(Kind: llvm::Attribute::NoInline);
640 F->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
641 }
642 if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
643 F->addFnAttr(Kind: "sample-profile-suffix-elision-policy", Val: "selected");
644
645 // Generate the function.
646 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs,
647 Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
648 StartLoc: FO.UIntPtrCastRequired ? FO.Loc
649 : CD->getBody()->getBeginLoc());
650 unsigned Cnt = CD->getContextParamPosition();
651 I = FO.S->captures().begin();
652 for (const FieldDecl *FD : RD->fields()) {
653 // Do not map arguments if we emit function with non-original types.
654 Address LocalAddr(Address::invalid());
655 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
656 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt],
657 TargetParam: TargetArgs[Cnt]);
658 } else {
659 LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]);
660 }
661 // If we are capturing a pointer by copy we don't need to do anything, just
662 // use the value that we get from the arguments.
663 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
664 const VarDecl *CurVD = I->getCapturedVar();
665 if (!FO.RegisterCastedArgsOnly)
666 LocalAddrs.insert(KV: {Args[Cnt], {CurVD, LocalAddr}});
667 ++Cnt;
668 ++I;
669 continue;
670 }
671
672 LValue ArgLVal = CGF.MakeAddrLValue(Addr: LocalAddr, T: Args[Cnt]->getType(),
673 Source: AlignmentSource::Decl);
674 if (FD->hasCapturedVLAType()) {
675 if (FO.UIntPtrCastRequired) {
676 ArgLVal = CGF.MakeAddrLValue(
677 Addr: castValueFromUintptr(CGF, Loc: I->getLocation(), DstType: FD->getType(),
678 Name: Args[Cnt]->getName(), AddrLV: ArgLVal),
679 T: FD->getType(), Source: AlignmentSource::Decl);
680 }
681 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
682 const VariableArrayType *VAT = FD->getCapturedVLAType();
683 VLASizes.try_emplace(Key: Args[Cnt], Args: VAT->getSizeExpr(), Args&: ExprArg);
684 } else if (I->capturesVariable()) {
685 const VarDecl *Var = I->getCapturedVar();
686 QualType VarTy = Var->getType();
687 Address ArgAddr = ArgLVal.getAddress();
688 if (ArgLVal.getType()->isLValueReferenceType()) {
689 ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal);
690 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
691 assert(ArgLVal.getType()->isPointerType());
692 ArgAddr = CGF.EmitLoadOfPointer(
693 Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>());
694 }
695 if (!FO.RegisterCastedArgsOnly) {
696 LocalAddrs.insert(
697 KV: {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(D: Var))}});
698 }
699 } else if (I->capturesVariableByCopy()) {
700 assert(!FD->getType()->isAnyPointerType() &&
701 "Not expecting a captured pointer.");
702 const VarDecl *Var = I->getCapturedVar();
703 LocalAddrs.insert(KV: {Args[Cnt],
704 {Var, FO.UIntPtrCastRequired
705 ? castValueFromUintptr(
706 CGF, Loc: I->getLocation(), DstType: FD->getType(),
707 Name: Args[Cnt]->getName(), AddrLV: ArgLVal)
708 : ArgLVal.getAddress()}});
709 } else {
710 // If 'this' is captured, load it into CXXThisValue.
711 assert(I->capturesThis());
712 CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
713 LocalAddrs.insert(KV: {Args[Cnt], {nullptr, ArgLVal.getAddress()}});
714 }
715 ++Cnt;
716 ++I;
717 }
718
719 return F;
720}
721
722static llvm::Function *emitOutlinedFunctionPrologueAggregate(
723 CodeGenFunction &CGF, FunctionArgList &Args,
724 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
725 &LocalAddrs,
726 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
727 &VLASizes,
728 llvm::Value *&CXXThisValue, llvm::Value *&ContextV, const CapturedStmt &CS,
729 SourceLocation Loc, StringRef FunctionName) {
730 const CapturedDecl *CD = CS.getCapturedDecl();
731 const RecordDecl *RD = CS.getCapturedRecordDecl();
732
733 CXXThisValue = nullptr;
734 CodeGenModule &CGM = CGF.CGM;
735 ASTContext &Ctx = CGM.getContext();
736 Args.push_back(Elt: CD->getContextParam());
737
738 const CGFunctionInfo &FuncInfo =
739 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: Ctx.VoidTy, args: Args);
740 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
741
742 auto *F =
743 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
744 N: FunctionName, M: &CGM.getModule());
745 CGM.SetInternalFunctionAttributes(GD: CD, F, FI: FuncInfo);
746 if (CD->isNothrow())
747 F->setDoesNotThrow();
748 F->setDoesNotRecurse();
749
750 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args, Loc, StartLoc: Loc);
751 Address ContextAddr = CGF.GetAddrOfLocalVar(VD: CD->getContextParam());
752 ContextV = CGF.Builder.CreateLoad(Addr: ContextAddr);
753
754 // The runtime passes arguments as an array of pointers.
755 llvm::Type *PtrTy = CGF.Builder.getPtrTy();
756 llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(AS: 0);
757 CharUnits SlotAlign = CharUnits::fromQuantity(Quantity: PtrAlign.value());
758
759 for (auto [FD, C, FieldIdx] :
760 llvm::zip(t: RD->fields(), u: CS.captures(),
761 args: llvm::seq<unsigned>(Size: RD->getNumFields()))) {
762 llvm::Value *SlotPtr =
763 CGF.Builder.CreateConstInBoundsGEP1_32(Ty: PtrTy, Ptr: ContextV, Idx0: FieldIdx);
764 llvm::Value *Slot = CGF.Builder.CreateAlignedLoad(Ty: PtrTy, Ptr: SlotPtr, Align: PtrAlign);
765
766 // Generate the appropriate load from the per-argument storage. This
767 // includes all of the user arguments as well as the implicit kernel
768 // argument pointer.
769 if (C.capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
770 const VarDecl *CurVD = C.getCapturedVar();
771 Slot->setName(CurVD->getName());
772 Address SlotAddr(Slot, PtrTy, SlotAlign);
773 LocalAddrs.insert(KV: {FD, {CurVD, SlotAddr}});
774 } else if (FD->hasCapturedVLAType()) {
775 // VLA size is stored as intptr_t directly in the slot.
776 Address SlotAddr(Slot, CGF.ConvertTypeForMem(T: FD->getType()), SlotAlign);
777 LValue ArgLVal =
778 CGF.MakeAddrLValue(Addr: SlotAddr, T: FD->getType(), Source: AlignmentSource::Decl);
779 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: C.getLocation());
780 const VariableArrayType *VAT = FD->getCapturedVLAType();
781 VLASizes.try_emplace(Key: FD, Args: VAT->getSizeExpr(), Args&: ExprArg);
782 } else if (C.capturesVariable()) {
783 const VarDecl *Var = C.getCapturedVar();
784 QualType VarTy = Var->getType();
785
786 if (VarTy->isVariablyModifiedType() && VarTy->isPointerType()) {
787 Slot->setName(Var->getName() + ".addr");
788 Address SlotAddr(Slot, PtrTy, SlotAlign);
789 LocalAddrs.insert(KV: {FD, {Var, SlotAddr}});
790 } else {
791 llvm::Value *VarAddr = CGF.Builder.CreateAlignedLoad(
792 Ty: PtrTy, Ptr: Slot, Align: PtrAlign, Name: Var->getName());
793 LocalAddrs.insert(KV: {FD,
794 {Var, Address(VarAddr, CGF.ConvertTypeForMem(T: VarTy),
795 Ctx.getDeclAlign(D: Var))}});
796 }
797 } else if (C.capturesVariableByCopy()) {
798 assert(!FD->getType()->isAnyPointerType() &&
799 "Not expecting a captured pointer.");
800 const VarDecl *Var = C.getCapturedVar();
801 QualType FieldTy = FD->getType();
802
803 // Scalar values are promoted and stored directly in the slot.
804 Address SlotAddr(Slot, CGF.ConvertTypeForMem(T: FieldTy), SlotAlign);
805 Address CopyAddr =
806 CGF.CreateMemTemp(T: FieldTy, Align: Ctx.getDeclAlign(D: FD), Name: Var->getName());
807 LValue SrcLVal =
808 CGF.MakeAddrLValue(Addr: SlotAddr, T: FieldTy, Source: AlignmentSource::Decl);
809 LValue CopyLVal =
810 CGF.MakeAddrLValue(Addr: CopyAddr, T: FieldTy, Source: AlignmentSource::Decl);
811
812 RValue ArgRVal = CGF.EmitLoadOfLValue(V: SrcLVal, Loc: C.getLocation());
813 CGF.EmitStoreThroughLValue(Src: ArgRVal, Dst: CopyLVal);
814
815 LocalAddrs.insert(KV: {FD, {Var, CopyAddr}});
816 } else {
817 assert(C.capturesThis() && "Default case expected to be CXX 'this'");
818 CXXThisValue =
819 CGF.Builder.CreateAlignedLoad(Ty: PtrTy, Ptr: Slot, Align: PtrAlign, Name: "this");
820 Address SlotAddr(Slot, PtrTy, SlotAlign);
821 LocalAddrs.insert(KV: {FD, {nullptr, SlotAddr}});
822 }
823 }
824
825 return F;
826}
827
828llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunction(
829 const CapturedStmt &S, const OMPExecutableDirective &D) {
830 SourceLocation Loc = D.getBeginLoc();
831 assert(
832 CapturedStmtInfo &&
833 "CapturedStmtInfo should be set when generating the captured function");
834 const CapturedDecl *CD = S.getCapturedDecl();
835 // Build the argument list.
836 bool NeedWrapperFunction =
837 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
838 FunctionArgList Args, WrapperArgs;
839 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
840 WrapperLocalAddrs;
841 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
842 WrapperVLASizes;
843 SmallString<256> Buffer;
844 llvm::raw_svector_ostream Out(Buffer);
845 Out << CapturedStmtInfo->getHelperName();
846 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
847 bool IsDeviceKernel = CGM.getOpenMPRuntime().isGPU() &&
848 isOpenMPTargetExecutionDirective(DKind: EKind) &&
849 D.getCapturedStmt(RegionKind: OMPD_target) == &S;
850 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
851 llvm::Function *WrapperF = nullptr;
852 if (NeedWrapperFunction) {
853 // Emit the final kernel early to allow attributes to be added by the
854 // OpenMPI-IR-Builder.
855 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
856 /*RegisterCastedArgsOnly=*/true,
857 CapturedStmtInfo->getHelperName(), Loc,
858 IsDeviceKernel);
859 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
860 WrapperF =
861 emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes,
862 CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO);
863 Out << "_debug__";
864 }
865 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
866 Out.str(), Loc, !NeedWrapperFunction && IsDeviceKernel);
867 llvm::Function *F = emitOutlinedFunctionPrologue(
868 CGF&: *this, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes, CXXThisValue, FO);
869 CodeGenFunction::OMPPrivateScope LocalScope(*this);
870 for (const auto &LocalAddrPair : WrapperLocalAddrs) {
871 if (LocalAddrPair.second.first) {
872 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
873 Addr: LocalAddrPair.second.second);
874 }
875 }
876 (void)LocalScope.Privatize();
877 for (const auto &VLASizePair : WrapperVLASizes)
878 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
879 PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
880 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
881 LocalScope.ForceCleanup();
882 FinishFunction(EndLoc: CD->getBodyRBrace());
883 if (!NeedWrapperFunction)
884 return F;
885
886 // Reverse the order.
887 WrapperF->removeFromParent();
888 F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF);
889
890 llvm::SmallVector<llvm::Value *, 4> CallArgs;
891 auto *PI = F->arg_begin();
892 for (const auto *Arg : Args) {
893 llvm::Value *CallArg;
894 auto I = LocalAddrs.find(Key: Arg);
895 if (I != LocalAddrs.end()) {
896 LValue LV = WrapperCGF.MakeAddrLValue(
897 Addr: I->second.second,
898 T: I->second.first ? I->second.first->getType() : Arg->getType(),
899 Source: AlignmentSource::Decl);
900 if (LV.getType()->isAnyComplexType())
901 LV.setAddress(LV.getAddress().withElementType(ElemTy: PI->getType()));
902 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
903 } else {
904 auto EI = VLASizes.find(Val: Arg);
905 if (EI != VLASizes.end()) {
906 CallArg = EI->second.second;
907 } else {
908 LValue LV =
909 WrapperCGF.MakeAddrLValue(Addr: WrapperCGF.GetAddrOfLocalVar(VD: Arg),
910 T: Arg->getType(), Source: AlignmentSource::Decl);
911 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
912 }
913 }
914 CallArgs.emplace_back(Args: WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType()));
915 ++PI;
916 }
917 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
918 WrapperCGF.FinishFunction();
919 return WrapperF;
920}
921
922llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate(
923 const CapturedStmt &S, const OMPExecutableDirective &D) {
924 SourceLocation Loc = D.getBeginLoc();
925 assert(
926 CapturedStmtInfo &&
927 "CapturedStmtInfo should be set when generating the captured function");
928 const CapturedDecl *CD = S.getCapturedDecl();
929 const RecordDecl *RD = S.getCapturedRecordDecl();
930 StringRef FunctionName = CapturedStmtInfo->getHelperName();
931 bool NeedWrapperFunction =
932 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
933
934 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
935 llvm::Function *WrapperF = nullptr;
936 llvm::Value *WrapperContextV = nullptr;
937 if (NeedWrapperFunction) {
938 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
939 FunctionArgList WrapperArgs;
940 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
941 WrapperLocalAddrs;
942 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
943 WrapperVLASizes;
944 WrapperF = emitOutlinedFunctionPrologueAggregate(
945 CGF&: WrapperCGF, Args&: WrapperArgs, LocalAddrs&: WrapperLocalAddrs, VLASizes&: WrapperVLASizes,
946 CXXThisValue&: WrapperCGF.CXXThisValue, ContextV&: WrapperContextV, CS: S, Loc, FunctionName);
947 }
948
949 FunctionArgList Args;
950 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
951 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
952 llvm::Function *F;
953
954 if (NeedWrapperFunction) {
955 SmallString<256> Buffer;
956 llvm::raw_svector_ostream Out(Buffer);
957 Out << FunctionName << "_debug__";
958
959 FunctionOptions FO(&S, /*UIntPtrCastRequired=*/false,
960 /*RegisterCastedArgsOnly=*/false, Out.str(), Loc,
961 /*IsDeviceKernel=*/false);
962 F = emitOutlinedFunctionPrologue(CGF&: *this, Args, LocalAddrs, VLASizes,
963 CXXThisValue, FO);
964 } else {
965 llvm::Value *ContextV = nullptr;
966 F = emitOutlinedFunctionPrologueAggregate(CGF&: *this, Args, LocalAddrs, VLASizes,
967 CXXThisValue, ContextV, CS: S, Loc,
968 FunctionName);
969
970 const RecordDecl *RD = S.getCapturedRecordDecl();
971 unsigned FieldIdx = RD->getNumFields();
972 for (unsigned I = 0; I < CD->getNumParams(); ++I) {
973 const ImplicitParamDecl *Param = CD->getParam(i: I);
974 if (Param == CD->getContextParam())
975 continue;
976 llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(AS: 0);
977 llvm::Value *SlotPtr = Builder.CreateConstInBoundsGEP1_32(
978 Ty: Builder.getPtrTy(), Ptr: ContextV, Idx0: FieldIdx,
979 Name: Twine(Param->getName()) + ".addr");
980 llvm::Value *ParamAddr =
981 Builder.CreateAlignedLoad(Ty: Builder.getPtrTy(), Ptr: SlotPtr, Align: PtrAlign);
982 llvm::Value *ParamVal = Builder.CreateAlignedLoad(
983 Ty: Builder.getPtrTy(), Ptr: ParamAddr, Align: PtrAlign, Name: Param->getName());
984 Address ParamLocalAddr =
985 CreateMemTemp(T: Param->getType(), Name: Param->getName());
986 Builder.CreateStore(Val: ParamVal, Addr: ParamLocalAddr);
987 LocalAddrs.insert(KV: {Param, {Param, ParamLocalAddr}});
988 ++FieldIdx;
989 }
990 }
991
992 CodeGenFunction::OMPPrivateScope LocalScope(*this);
993 for (const auto &LocalAddrPair : LocalAddrs) {
994 if (LocalAddrPair.second.first)
995 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
996 Addr: LocalAddrPair.second.second);
997 }
998 (void)LocalScope.Privatize();
999 for (const auto &VLASizePair : VLASizes)
1000 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
1001 PGO->assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
1002 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
1003 (void)LocalScope.ForceCleanup();
1004 FinishFunction(EndLoc: CD->getBodyRBrace());
1005
1006 if (!NeedWrapperFunction)
1007 return F;
1008
1009 // Reverse the order.
1010 WrapperF->removeFromParent();
1011 F->getParent()->getFunctionList().insertAfter(where: F->getIterator(), New: WrapperF);
1012
1013 llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(AS: 0);
1014 llvm::SmallVector<llvm::Value *, 16> CallArgs;
1015 assert(CD->getContextParamPosition() == 0 &&
1016 "Expected context param at position 0 for target regions");
1017 assert(RD->getNumFields() + 1 == F->getNumOperands() &&
1018 "Argument count mismatch");
1019
1020 for (auto [FD, InnerParam, SlotIdx] : llvm::zip(
1021 t: RD->fields(), u: F->args(), args: llvm::seq<unsigned>(Size: RD->getNumFields()))) {
1022 llvm::Value *SlotPtr = WrapperCGF.Builder.CreateConstInBoundsGEP1_32(
1023 Ty: WrapperCGF.Builder.getPtrTy(), Ptr: WrapperContextV, Idx0: SlotIdx);
1024 llvm::Value *Slot = WrapperCGF.Builder.CreateAlignedLoad(
1025 Ty: WrapperCGF.Builder.getPtrTy(), Ptr: SlotPtr, Align: PtrAlign);
1026 llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad(
1027 Ty: InnerParam.getType(), Ptr: Slot, Align: PtrAlign, Name: InnerParam.getName());
1028 CallArgs.push_back(Elt: Val);
1029 }
1030
1031 // Handle the load from the implicit dyn_ptr at the end of the __context.
1032 unsigned SlotIdx = RD->getNumFields();
1033 auto InnerParam = F->arg_begin() + SlotIdx;
1034 llvm::Value *SlotPtr = WrapperCGF.Builder.CreateConstInBoundsGEP1_32(
1035 Ty: WrapperCGF.Builder.getPtrTy(), Ptr: WrapperContextV, Idx0: SlotIdx);
1036 llvm::Value *Slot = WrapperCGF.Builder.CreateAlignedLoad(
1037 Ty: WrapperCGF.Builder.getPtrTy(), Ptr: SlotPtr, Align: PtrAlign);
1038 llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad(
1039 Ty: InnerParam->getType(), Ptr: Slot, Align: PtrAlign, Name: InnerParam->getName());
1040 CallArgs.push_back(Elt: Val);
1041
1042 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
1043 WrapperCGF.FinishFunction();
1044 return WrapperF;
1045}
1046
1047//===----------------------------------------------------------------------===//
1048// OpenMP Directive Emission
1049//===----------------------------------------------------------------------===//
1050void CodeGenFunction::EmitOMPAggregateAssign(
1051 Address DestAddr, Address SrcAddr, QualType OriginalType,
1052 const llvm::function_ref<void(Address, Address)> CopyGen) {
1053 // Perform element-by-element initialization.
1054 QualType ElementTy;
1055
1056 // Drill down to the base element type on both arrays.
1057 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
1058 llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
1059 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
1060
1061 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this);
1062 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this);
1063 // Cast from pointer to array type to pointer to single element.
1064 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(),
1065 Ptr: DestBegin, IdxList: NumElements);
1066
1067 // The basic structure here is a while-do loop.
1068 llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body");
1069 llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done");
1070 llvm::Value *IsEmpty =
1071 Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty");
1072 Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
1073
1074 // Enter the loop body, making that address the current address.
1075 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
1076 EmitBlock(BB: BodyBB);
1077
1078 CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy);
1079
1080 llvm::PHINode *SrcElementPHI =
1081 Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
1082 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
1083 Address SrcElementCurrent =
1084 Address(SrcElementPHI, SrcAddr.getElementType(),
1085 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
1086
1087 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
1088 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
1089 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
1090 Address DestElementCurrent =
1091 Address(DestElementPHI, DestAddr.getElementType(),
1092 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
1093
1094 // Emit copy.
1095 CopyGen(DestElementCurrent, SrcElementCurrent);
1096
1097 // Shift the address forward by one element.
1098 llvm::Value *DestElementNext =
1099 Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI,
1100 /*Idx0=*/1, Name: "omp.arraycpy.dest.element");
1101 llvm::Value *SrcElementNext =
1102 Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI,
1103 /*Idx0=*/1, Name: "omp.arraycpy.src.element");
1104 // Check whether we've reached the end.
1105 llvm::Value *Done =
1106 Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
1107 Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
1108 DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock());
1109 SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock());
1110
1111 // Done.
1112 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1113}
1114
1115void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
1116 Address SrcAddr, const VarDecl *DestVD,
1117 const VarDecl *SrcVD, const Expr *Copy) {
1118 if (OriginalType->isArrayType()) {
1119 const auto *BO = dyn_cast<BinaryOperator>(Val: Copy);
1120 if (BO && BO->getOpcode() == BO_Assign) {
1121 // Perform simple memcpy for simple copying.
1122 LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType);
1123 LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType);
1124 EmitAggregateAssign(Dest, Src, EltTy: OriginalType);
1125 } else {
1126 // For arrays with complex element types perform element by element
1127 // copying.
1128 EmitOMPAggregateAssign(
1129 DestAddr, SrcAddr, OriginalType,
1130 CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
1131 // Working with the single array element, so have to remap
1132 // destination and source variables to corresponding array
1133 // elements.
1134 CodeGenFunction::OMPPrivateScope Remap(*this);
1135 Remap.addPrivate(LocalVD: DestVD, Addr: DestElement);
1136 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement);
1137 (void)Remap.Privatize();
1138 EmitIgnoredExpr(E: Copy);
1139 });
1140 }
1141 } else {
1142 // Remap pseudo source variable to private copy.
1143 CodeGenFunction::OMPPrivateScope Remap(*this);
1144 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr);
1145 Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr);
1146 (void)Remap.Privatize();
1147 // Emit copying of the whole variable.
1148 EmitIgnoredExpr(E: Copy);
1149 }
1150}
1151
1152bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
1153 OMPPrivateScope &PrivateScope) {
1154 if (!HaveInsertPoint())
1155 return false;
1156 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1157 bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
1158 isOpenMPTargetExecutionDirective(DKind: EKind);
1159 bool FirstprivateIsLastprivate = false;
1160 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
1161 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1162 for (const auto *D : C->varlist())
1163 Lastprivates.try_emplace(
1164 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl())->getCanonicalDecl(),
1165 Args: C->getKind());
1166 }
1167 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
1168 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
1169 getOpenMPCaptureRegions(CaptureRegions, DKind: EKind);
1170 // Force emission of the firstprivate copy if the directive does not emit
1171 // outlined function, like omp for, omp simd, omp distribute etc.
1172 bool MustEmitFirstprivateCopy =
1173 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
1174 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
1175 const auto *IRef = C->varlist_begin();
1176 const auto *InitsRef = C->inits().begin();
1177 for (const Expr *IInit : C->private_copies()) {
1178 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1179 bool ThisFirstprivateIsLastprivate =
1180 Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0;
1181 const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD);
1182 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1183 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
1184 !FD->getType()->isReferenceType() &&
1185 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
1186 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
1187 ++IRef;
1188 ++InitsRef;
1189 continue;
1190 }
1191 // Do not emit copy for firstprivate constant variables in target regions,
1192 // captured by reference.
1193 if (DeviceConstTarget && OrigVD->getType().isConstant(Ctx: getContext()) &&
1194 FD && FD->getType()->isReferenceType() &&
1195 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
1196 EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl());
1197 ++IRef;
1198 ++InitsRef;
1199 continue;
1200 }
1201 FirstprivateIsLastprivate =
1202 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
1203 if (EmittedAsFirstprivate.insert(V: OrigVD->getCanonicalDecl()).second) {
1204 const auto *VDInit =
1205 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl());
1206 bool IsRegistered;
1207 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1208 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
1209 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1210 LValue OriginalLVal;
1211 if (!FD) {
1212 // Check if the firstprivate variable is just a constant value.
1213 ConstantEmission CE = tryEmitAsConstant(RefExpr: &DRE);
1214 if (CE && !CE.isReference()) {
1215 // Constant value, no need to create a copy.
1216 ++IRef;
1217 ++InitsRef;
1218 continue;
1219 }
1220 if (CE && CE.isReference()) {
1221 OriginalLVal = CE.getReferenceLValue(CGF&: *this, RefExpr: &DRE);
1222 } else {
1223 assert(!CE && "Expected non-constant firstprivate.");
1224 OriginalLVal = EmitLValue(E: &DRE);
1225 }
1226 } else {
1227 OriginalLVal = EmitLValue(E: &DRE);
1228 }
1229 QualType Type = VD->getType();
1230 if (Type->isArrayType()) {
1231 // Emit VarDecl with copy init for arrays.
1232 // Get the address of the original variable captured in current
1233 // captured region.
1234 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
1235 const Expr *Init = VD->getInit();
1236 if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) {
1237 // Perform simple memcpy.
1238 LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type);
1239 EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type);
1240 } else {
1241 EmitOMPAggregateAssign(
1242 DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(), OriginalType: Type,
1243 CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) {
1244 // Clean up any temporaries needed by the
1245 // initialization.
1246 RunCleanupsScope InitScope(*this);
1247 // Emit initialization for single element.
1248 setAddrOfLocalVar(VD: VDInit, Addr: SrcElement);
1249 EmitAnyExprToMem(E: Init, Location: DestElement,
1250 Quals: Init->getType().getQualifiers(),
1251 /*IsInitializer*/ false);
1252 LocalDeclMap.erase(Val: VDInit);
1253 });
1254 }
1255 EmitAutoVarCleanups(emission: Emission);
1256 IsRegistered =
1257 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress());
1258 } else {
1259 Address OriginalAddr = OriginalLVal.getAddress();
1260 // Emit private VarDecl with copy init.
1261 // Remap temp VDInit variable to the address of the original
1262 // variable (for proper handling of captured global variables).
1263 setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr);
1264 EmitDecl(D: *VD);
1265 LocalDeclMap.erase(Val: VDInit);
1266 Address VDAddr = GetAddrOfLocalVar(VD);
1267 if (ThisFirstprivateIsLastprivate &&
1268 Lastprivates[OrigVD->getCanonicalDecl()] ==
1269 OMPC_LASTPRIVATE_conditional) {
1270 // Create/init special variable for lastprivate conditionals.
1271 llvm::Value *V =
1272 EmitLoadOfScalar(lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1273 Source: AlignmentSource::Decl),
1274 Loc: (*IRef)->getExprLoc());
1275 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1276 CGF&: *this, VD: OrigVD);
1277 EmitStoreOfScalar(value: V, lvalue: MakeAddrLValue(Addr: VDAddr, T: (*IRef)->getType(),
1278 Source: AlignmentSource::Decl));
1279 LocalDeclMap.erase(Val: VD);
1280 setAddrOfLocalVar(VD, Addr: VDAddr);
1281 }
1282 IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1283 }
1284 assert(IsRegistered &&
1285 "firstprivate var already registered as private");
1286 // Silence the warning about unused variable.
1287 (void)IsRegistered;
1288 }
1289 ++IRef;
1290 ++InitsRef;
1291 }
1292 }
1293 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
1294}
1295
1296void CodeGenFunction::EmitOMPPrivateClause(
1297 const OMPExecutableDirective &D,
1298 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1299 if (!HaveInsertPoint())
1300 return;
1301 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
1302 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
1303 auto IRef = C->varlist_begin();
1304 for (const Expr *IInit : C->private_copies()) {
1305 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1306 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
1307 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1308 EmitDecl(D: *VD);
1309 // Emit private VarDecl with copy init.
1310 bool IsRegistered =
1311 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD));
1312 assert(IsRegistered && "private var already registered as private");
1313 // Silence the warning about unused variable.
1314 (void)IsRegistered;
1315 }
1316 ++IRef;
1317 }
1318 }
1319}
1320
1321bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
1322 if (!HaveInsertPoint())
1323 return false;
1324 // threadprivate_var1 = master_threadprivate_var1;
1325 // operator=(threadprivate_var2, master_threadprivate_var2);
1326 // ...
1327 // __kmpc_barrier(&loc, global_tid);
1328 llvm::DenseSet<const VarDecl *> CopiedVars;
1329 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
1330 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
1331 auto IRef = C->varlist_begin();
1332 auto ISrcRef = C->source_exprs().begin();
1333 auto IDestRef = C->destination_exprs().begin();
1334 for (const Expr *AssignOp : C->assignment_ops()) {
1335 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1336 QualType Type = VD->getType();
1337 if (CopiedVars.insert(V: VD->getCanonicalDecl()).second) {
1338 // Get the address of the master variable. If we are emitting code with
1339 // TLS support, the address is passed from the master as field in the
1340 // captured declaration.
1341 Address MasterAddr = Address::invalid();
1342 if (getLangOpts().OpenMPUseTLS &&
1343 getContext().getTargetInfo().isTLSSupported()) {
1344 assert(CapturedStmtInfo->lookup(VD) &&
1345 "Copyin threadprivates should have been captured!");
1346 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
1347 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1348 MasterAddr = EmitLValue(E: &DRE).getAddress();
1349 LocalDeclMap.erase(Val: VD);
1350 } else {
1351 MasterAddr =
1352 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD)
1353 : CGM.GetAddrOfGlobal(GD: VD),
1354 CGM.getTypes().ConvertTypeForMem(T: VD->getType()),
1355 getContext().getDeclAlign(D: VD));
1356 }
1357 // Get the address of the threadprivate variable.
1358 Address PrivateAddr = EmitLValue(E: *IRef).getAddress();
1359 if (CopiedVars.size() == 1) {
1360 // At first check if current thread is a master thread. If it is, no
1361 // need to copy data.
1362 CopyBegin = createBasicBlock(name: "copyin.not.master");
1363 CopyEnd = createBasicBlock(name: "copyin.not.master.end");
1364 // TODO: Avoid ptrtoint conversion.
1365 auto *MasterAddrInt = Builder.CreatePtrToInt(
1366 V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1367 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1368 V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1369 Builder.CreateCondBr(
1370 Cond: Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), True: CopyBegin,
1371 False: CopyEnd);
1372 EmitBlock(BB: CopyBegin);
1373 }
1374 const auto *SrcVD =
1375 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1376 const auto *DestVD =
1377 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1378 EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp);
1379 }
1380 ++IRef;
1381 ++ISrcRef;
1382 ++IDestRef;
1383 }
1384 }
1385 if (CopyEnd) {
1386 // Exit out of copying procedure for non-master thread.
1387 EmitBlock(BB: CopyEnd, /*IsFinished=*/true);
1388 return true;
1389 }
1390 return false;
1391}
1392
1393bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1394 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1395 if (!HaveInsertPoint())
1396 return false;
1397 bool HasAtLeastOneLastprivate = false;
1398 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1399 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1400 if (isOpenMPSimdDirective(DKind: EKind)) {
1401 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
1402 for (const Expr *C : LoopDirective->counters()) {
1403 SIMDLCVs.insert(
1404 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
1405 }
1406 }
1407 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1408 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1409 HasAtLeastOneLastprivate = true;
1410 if (isOpenMPTaskLoopDirective(DKind: EKind) && !getLangOpts().OpenMPSimd)
1411 break;
1412 const auto *IRef = C->varlist_begin();
1413 const auto *IDestRef = C->destination_exprs().begin();
1414 for (const Expr *IInit : C->private_copies()) {
1415 // Keep the address of the original variable for future update at the end
1416 // of the loop.
1417 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1418 // Taskloops do not require additional initialization, it is done in
1419 // runtime support library.
1420 if (AlreadyEmittedVars.insert(V: OrigVD->getCanonicalDecl()).second) {
1421 const auto *DestVD =
1422 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1423 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1424 /*RefersToEnclosingVariableOrCapture=*/
1425 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
1426 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1427 PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(E: &DRE).getAddress());
1428 // Check if the variable is also a firstprivate: in this case IInit is
1429 // not generated. Initialization of this variable will happen in codegen
1430 // for 'firstprivate' clause.
1431 if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) {
1432 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1433 Address VDAddr = Address::invalid();
1434 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1435 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1436 CGF&: *this, VD: OrigVD);
1437 setAddrOfLocalVar(VD, Addr: VDAddr);
1438 } else {
1439 // Emit private VarDecl with copy init.
1440 EmitDecl(D: *VD);
1441 VDAddr = GetAddrOfLocalVar(VD);
1442 }
1443 bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1444 assert(IsRegistered &&
1445 "lastprivate var already registered as private");
1446 (void)IsRegistered;
1447 }
1448 }
1449 ++IRef;
1450 ++IDestRef;
1451 }
1452 }
1453 return HasAtLeastOneLastprivate;
1454}
1455
1456void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1457 const OMPExecutableDirective &D, bool NoFinals,
1458 llvm::Value *IsLastIterCond) {
1459 if (!HaveInsertPoint())
1460 return;
1461 // Emit following code:
1462 // if (<IsLastIterCond>) {
1463 // orig_var1 = private_orig_var1;
1464 // ...
1465 // orig_varn = private_orig_varn;
1466 // }
1467 llvm::BasicBlock *ThenBB = nullptr;
1468 llvm::BasicBlock *DoneBB = nullptr;
1469 if (IsLastIterCond) {
1470 // Emit implicit barrier if at least one lastprivate conditional is found
1471 // and this is not a simd mode.
1472 if (!getLangOpts().OpenMPSimd &&
1473 llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(),
1474 P: [](const OMPLastprivateClause *C) {
1475 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1476 })) {
1477 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: D.getBeginLoc(),
1478 Kind: OMPD_unknown,
1479 /*EmitChecks=*/false,
1480 /*ForceSimpleCall=*/true);
1481 }
1482 ThenBB = createBasicBlock(name: ".omp.lastprivate.then");
1483 DoneBB = createBasicBlock(name: ".omp.lastprivate.done");
1484 Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB);
1485 EmitBlock(BB: ThenBB);
1486 }
1487 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1488 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1489 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) {
1490 auto IC = LoopDirective->counters().begin();
1491 for (const Expr *F : LoopDirective->finals()) {
1492 const auto *D =
1493 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl();
1494 if (NoFinals)
1495 AlreadyEmittedVars.insert(V: D);
1496 else
1497 LoopCountersAndUpdates[D] = F;
1498 ++IC;
1499 }
1500 }
1501 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1502 auto IRef = C->varlist_begin();
1503 auto ISrcRef = C->source_exprs().begin();
1504 auto IDestRef = C->destination_exprs().begin();
1505 for (const Expr *AssignOp : C->assignment_ops()) {
1506 const auto *PrivateVD =
1507 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
1508 QualType Type = PrivateVD->getType();
1509 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1510 if (AlreadyEmittedVars.insert(V: CanonicalVD).second) {
1511 // If lastprivate variable is a loop control variable for loop-based
1512 // directive, update its value before copyin back to original
1513 // variable.
1514 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD))
1515 EmitIgnoredExpr(E: FinalExpr);
1516 const auto *SrcVD =
1517 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1518 const auto *DestVD =
1519 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1520 // Get the address of the private variable.
1521 Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD);
1522 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1523 PrivateAddr = Address(
1524 Builder.CreateLoad(Addr: PrivateAddr),
1525 CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()),
1526 CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType()));
1527 // Store the last value to the private copy in the last iteration.
1528 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1529 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1530 CGF&: *this, PrivLVal: MakeAddrLValue(Addr: PrivateAddr, T: (*IRef)->getType()), VD: PrivateVD,
1531 Loc: (*IRef)->getExprLoc());
1532 // Get the address of the original variable.
1533 Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD);
1534 EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp);
1535 }
1536 ++IRef;
1537 ++ISrcRef;
1538 ++IDestRef;
1539 }
1540 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1541 EmitIgnoredExpr(E: PostUpdate);
1542 }
1543 if (IsLastIterCond)
1544 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1545}
1546
1547void CodeGenFunction::EmitOMPReductionClauseInit(
1548 const OMPExecutableDirective &D,
1549 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1550 if (!HaveInsertPoint())
1551 return;
1552 SmallVector<const Expr *, 4> Shareds;
1553 SmallVector<const Expr *, 4> Privates;
1554 SmallVector<const Expr *, 4> ReductionOps;
1555 SmallVector<const Expr *, 4> LHSs;
1556 SmallVector<const Expr *, 4> RHSs;
1557 OMPTaskDataTy Data;
1558 SmallVector<const Expr *, 4> TaskLHSs;
1559 SmallVector<const Expr *, 4> TaskRHSs;
1560 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1561 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1562 continue;
1563 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1564 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1565 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1566 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1567 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1568 if (C->getModifier() == OMPC_REDUCTION_task) {
1569 Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end());
1570 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
1571 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
1572 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
1573 in_end: C->reduction_ops().end());
1574 TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1575 TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1576 }
1577 }
1578 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1579 unsigned Count = 0;
1580 auto *ILHS = LHSs.begin();
1581 auto *IRHS = RHSs.begin();
1582 auto *IPriv = Privates.begin();
1583 for (const Expr *IRef : Shareds) {
1584 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl());
1585 // Emit private VarDecl with reduction init.
1586 RedCG.emitSharedOrigLValue(CGF&: *this, N: Count);
1587 RedCG.emitAggregateType(CGF&: *this, N: Count);
1588 AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD);
1589 RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(),
1590 SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(),
1591 DefaultInit: [&Emission](CodeGenFunction &CGF) {
1592 CGF.EmitAutoVarInit(emission: Emission);
1593 return true;
1594 });
1595 EmitAutoVarCleanups(emission: Emission);
1596 Address BaseAddr = RedCG.adjustPrivateAddress(
1597 CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress());
1598 bool IsRegistered =
1599 PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr);
1600 assert(IsRegistered && "private var already registered as private");
1601 // Silence the warning about unused variable.
1602 (void)IsRegistered;
1603
1604 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
1605 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
1606 QualType Type = PrivateVD->getType();
1607 bool isaOMPArraySectionExpr = isa<ArraySectionExpr>(Val: IRef);
1608 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1609 // Store the address of the original variable associated with the LHS
1610 // implicit variable.
1611 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1612 PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
1613 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1614 isa<ArraySubscriptExpr>(Val: IRef)) {
1615 // Store the address of the original variable associated with the LHS
1616 // implicit variable.
1617 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: RedCG.getSharedLValue(N: Count).getAddress());
1618 PrivateScope.addPrivate(LocalVD: RHSVD,
1619 Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1620 ElemTy: ConvertTypeForMem(T: RHSVD->getType())));
1621 } else {
1622 QualType Type = PrivateVD->getType();
1623 bool IsArray = getContext().getAsArrayType(T: Type) != nullptr;
1624 Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress();
1625 // Store the address of the original variable associated with the LHS
1626 // implicit variable.
1627 if (IsArray) {
1628 OriginalAddr =
1629 OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType()));
1630 }
1631 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr);
1632 PrivateScope.addPrivate(
1633 LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1634 ElemTy: ConvertTypeForMem(T: RHSVD->getType()))
1635 : GetAddrOfLocalVar(VD: PrivateVD));
1636 }
1637 ++ILHS;
1638 ++IRHS;
1639 ++IPriv;
1640 ++Count;
1641 }
1642 if (!Data.ReductionVars.empty()) {
1643 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1644 Data.IsReductionWithTaskMod = true;
1645 Data.IsWorksharingReduction = isOpenMPWorksharingDirective(DKind: EKind);
1646 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1647 CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data);
1648 const Expr *TaskRedRef = nullptr;
1649 switch (EKind) {
1650 case OMPD_parallel:
1651 TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr();
1652 break;
1653 case OMPD_for:
1654 TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr();
1655 break;
1656 case OMPD_sections:
1657 TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr();
1658 break;
1659 case OMPD_parallel_for:
1660 TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr();
1661 break;
1662 case OMPD_parallel_master:
1663 TaskRedRef =
1664 cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr();
1665 break;
1666 case OMPD_parallel_sections:
1667 TaskRedRef =
1668 cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr();
1669 break;
1670 case OMPD_target_parallel:
1671 TaskRedRef =
1672 cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr();
1673 break;
1674 case OMPD_target_parallel_for:
1675 TaskRedRef =
1676 cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr();
1677 break;
1678 case OMPD_distribute_parallel_for:
1679 TaskRedRef =
1680 cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr();
1681 break;
1682 case OMPD_teams_distribute_parallel_for:
1683 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D)
1684 .getTaskReductionRefExpr();
1685 break;
1686 case OMPD_target_teams_distribute_parallel_for:
1687 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D)
1688 .getTaskReductionRefExpr();
1689 break;
1690 case OMPD_simd:
1691 case OMPD_for_simd:
1692 case OMPD_section:
1693 case OMPD_single:
1694 case OMPD_master:
1695 case OMPD_critical:
1696 case OMPD_parallel_for_simd:
1697 case OMPD_task:
1698 case OMPD_taskyield:
1699 case OMPD_error:
1700 case OMPD_barrier:
1701 case OMPD_taskwait:
1702 case OMPD_taskgroup:
1703 case OMPD_flush:
1704 case OMPD_depobj:
1705 case OMPD_scan:
1706 case OMPD_ordered:
1707 case OMPD_atomic:
1708 case OMPD_teams:
1709 case OMPD_target:
1710 case OMPD_cancellation_point:
1711 case OMPD_cancel:
1712 case OMPD_target_data:
1713 case OMPD_target_enter_data:
1714 case OMPD_target_exit_data:
1715 case OMPD_taskloop:
1716 case OMPD_taskloop_simd:
1717 case OMPD_master_taskloop:
1718 case OMPD_master_taskloop_simd:
1719 case OMPD_parallel_master_taskloop:
1720 case OMPD_parallel_master_taskloop_simd:
1721 case OMPD_distribute:
1722 case OMPD_target_update:
1723 case OMPD_distribute_parallel_for_simd:
1724 case OMPD_distribute_simd:
1725 case OMPD_target_parallel_for_simd:
1726 case OMPD_target_simd:
1727 case OMPD_teams_distribute:
1728 case OMPD_teams_distribute_simd:
1729 case OMPD_teams_distribute_parallel_for_simd:
1730 case OMPD_target_teams:
1731 case OMPD_target_teams_distribute:
1732 case OMPD_target_teams_distribute_parallel_for_simd:
1733 case OMPD_target_teams_distribute_simd:
1734 case OMPD_declare_target:
1735 case OMPD_end_declare_target:
1736 case OMPD_threadprivate:
1737 case OMPD_allocate:
1738 case OMPD_declare_reduction:
1739 case OMPD_declare_mapper:
1740 case OMPD_declare_simd:
1741 case OMPD_requires:
1742 case OMPD_declare_variant:
1743 case OMPD_begin_declare_variant:
1744 case OMPD_end_declare_variant:
1745 case OMPD_unknown:
1746 default:
1747 llvm_unreachable("Unexpected directive with task reductions.");
1748 }
1749
1750 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl());
1751 EmitVarDecl(D: *VD);
1752 EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD),
1753 /*Volatile=*/false, Ty: TaskRedRef->getType());
1754 }
1755}
1756
1757void CodeGenFunction::EmitOMPReductionClauseFinal(
1758 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1759 if (!HaveInsertPoint())
1760 return;
1761 llvm::SmallVector<const Expr *, 8> Privates;
1762 llvm::SmallVector<const Expr *, 8> LHSExprs;
1763 llvm::SmallVector<const Expr *, 8> RHSExprs;
1764 llvm::SmallVector<const Expr *, 8> ReductionOps;
1765 llvm::SmallVector<bool, 8> IsPrivateVarReduction;
1766 bool HasAtLeastOneReduction = false;
1767 bool IsReductionWithTaskMod = false;
1768 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1769 // Do not emit for inscan reductions.
1770 if (C->getModifier() == OMPC_REDUCTION_inscan)
1771 continue;
1772 HasAtLeastOneReduction = true;
1773 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1774 LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1775 RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1776 IsPrivateVarReduction.append(in_start: C->private_var_reduction_flags().begin(),
1777 in_end: C->private_var_reduction_flags().end());
1778 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1779 IsReductionWithTaskMod =
1780 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1781 }
1782 if (HasAtLeastOneReduction) {
1783 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
1784 if (IsReductionWithTaskMod) {
1785 CGM.getOpenMPRuntime().emitTaskReductionFini(
1786 CGF&: *this, Loc: D.getBeginLoc(), IsWorksharingReduction: isOpenMPWorksharingDirective(DKind: EKind));
1787 }
1788 bool TeamsLoopCanBeParallel = false;
1789 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D))
1790 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1791 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1792 isOpenMPParallelDirective(DKind: EKind) ||
1793 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1794 bool SimpleReduction = ReductionKind == OMPD_simd;
1795 // Emit nowait reduction if nowait clause is present or directive is a
1796 // parallel directive (it always has implicit barrier).
1797 CGM.getOpenMPRuntime().emitReduction(
1798 CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1799 Options: {.WithNowait: WithNowait, .SimpleReduction: SimpleReduction, .IsPrivateVarReduction: IsPrivateVarReduction, .ReductionKind: ReductionKind});
1800 }
1801}
1802
1803static void emitPostUpdateForReductionClause(
1804 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1805 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1806 if (!CGF.HaveInsertPoint())
1807 return;
1808 llvm::BasicBlock *DoneBB = nullptr;
1809 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1810 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1811 if (!DoneBB) {
1812 if (llvm::Value *Cond = CondGen(CGF)) {
1813 // If the first post-update expression is found, emit conditional
1814 // block if it was requested.
1815 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu");
1816 DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done");
1817 CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
1818 CGF.EmitBlock(BB: ThenBB);
1819 }
1820 }
1821 CGF.EmitIgnoredExpr(E: PostUpdate);
1822 }
1823 }
1824 if (DoneBB)
1825 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1826}
1827
1828namespace {
1829/// Codegen lambda for appending distribute lower and upper bounds to outlined
1830/// parallel function. This is necessary for combined constructs such as
1831/// 'distribute parallel for'
1832typedef llvm::function_ref<void(CodeGenFunction &,
1833 const OMPExecutableDirective &,
1834 llvm::SmallVectorImpl<llvm::Value *> &)>
1835 CodeGenBoundParametersTy;
1836} // anonymous namespace
1837
1838static void
1839checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1840 const OMPExecutableDirective &S) {
1841 if (CGF.getLangOpts().OpenMP < 50)
1842 return;
1843 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1844 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1845 for (const Expr *Ref : C->varlist()) {
1846 if (!Ref->getType()->isScalarType())
1847 continue;
1848 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1849 if (!DRE)
1850 continue;
1851 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1852 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1853 }
1854 }
1855 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1856 for (const Expr *Ref : C->varlist()) {
1857 if (!Ref->getType()->isScalarType())
1858 continue;
1859 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1860 if (!DRE)
1861 continue;
1862 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1863 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1864 }
1865 }
1866 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1867 for (const Expr *Ref : C->varlist()) {
1868 if (!Ref->getType()->isScalarType())
1869 continue;
1870 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1871 if (!DRE)
1872 continue;
1873 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1874 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: Ref);
1875 }
1876 }
1877 // Privates should ne analyzed since they are not captured at all.
1878 // Task reductions may be skipped - tasks are ignored.
1879 // Firstprivates do not return value but may be passed by reference - no need
1880 // to check for updated lastprivate conditional.
1881 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1882 for (const Expr *Ref : C->varlist()) {
1883 if (!Ref->getType()->isScalarType())
1884 continue;
1885 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
1886 if (!DRE)
1887 continue;
1888 PrivateDecls.insert(V: cast<VarDecl>(Val: DRE->getDecl()));
1889 }
1890 }
1891 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1892 CGF, D: S, IgnoredDecls: PrivateDecls);
1893}
1894
1895static void emitCommonOMPParallelDirective(
1896 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1897 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1898 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1899 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
1900 llvm::Value *NumThreads = nullptr;
1901 OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown;
1902 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is as
1903 // if sev-level is fatal."
1904 OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
1905 clang::Expr *Message = nullptr;
1906 SourceLocation SeverityLoc = SourceLocation();
1907 SourceLocation MessageLoc = SourceLocation();
1908
1909 llvm::Function *OutlinedFn =
1910 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1911 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
1912 CodeGen);
1913
1914 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1915 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1916 NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1917 /*IgnoreResultAssign=*/true);
1918 Modifier = NumThreadsClause->getModifier();
1919 if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>()) {
1920 Message = MessageClause->getMessageString();
1921 MessageLoc = MessageClause->getBeginLoc();
1922 }
1923 if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>()) {
1924 Severity = SeverityClause->getSeverityKind();
1925 SeverityLoc = SeverityClause->getBeginLoc();
1926 }
1927 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1928 CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc(), Modifier, Severity,
1929 SeverityLoc, Message, MessageLoc);
1930 }
1931 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1932 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1933 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1934 CGF, ProcBind: ProcBindClause->getProcBindKind(), Loc: ProcBindClause->getBeginLoc());
1935 }
1936 const Expr *IfCond = nullptr;
1937 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1938 if (C->getNameModifier() == OMPD_unknown ||
1939 C->getNameModifier() == OMPD_parallel) {
1940 IfCond = C->getCondition();
1941 break;
1942 }
1943 }
1944
1945 OMPParallelScope Scope(CGF, S);
1946 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1947 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1948 // lower and upper bounds with the pragma 'for' chunking mechanism.
1949 // The following lambda takes care of appending the lower and upper bound
1950 // parameters when necessary
1951 CodeGenBoundParameters(CGF, S, CapturedVars);
1952 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
1953 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn,
1954 CapturedVars, IfCond, NumThreads,
1955 NumThreadsModifier: Modifier, Severity, Message);
1956}
1957
1958static bool isAllocatableDecl(const VarDecl *VD) {
1959 const VarDecl *CVD = VD->getCanonicalDecl();
1960 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1961 return false;
1962 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1963 // Use the default allocation.
1964 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1965 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1966 !AA->getAllocator());
1967}
1968
1969static void emitEmptyBoundParameters(CodeGenFunction &,
1970 const OMPExecutableDirective &,
1971 llvm::SmallVectorImpl<llvm::Value *> &) {}
1972
1973static void emitOMPCopyinClause(CodeGenFunction &CGF,
1974 const OMPExecutableDirective &S) {
1975 bool Copyins = CGF.EmitOMPCopyinClause(D: S);
1976 if (Copyins) {
1977 // Emit implicit barrier to synchronize threads and avoid data races on
1978 // propagation master's thread values of threadprivate variables to local
1979 // instances of that variables of all other implicit threads.
1980 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1981 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
1982 /*ForceSimpleCall=*/true);
1983 }
1984}
1985
1986Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1987 CodeGenFunction &CGF, const VarDecl *VD) {
1988 CodeGenModule &CGM = CGF.CGM;
1989 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1990
1991 if (!VD)
1992 return Address::invalid();
1993 const VarDecl *CVD = VD->getCanonicalDecl();
1994 if (!isAllocatableDecl(VD: CVD))
1995 return Address::invalid();
1996 llvm::Value *Size;
1997 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
1998 if (CVD->getType()->isVariablyModifiedType()) {
1999 Size = CGF.getTypeSize(Ty: CVD->getType());
2000 // Align the size: ((size + align - 1) / align) * align
2001 Size = CGF.Builder.CreateNUWAdd(
2002 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
2003 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
2004 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
2005 } else {
2006 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
2007 Size = CGM.getSize(numChars: Sz.alignTo(Align));
2008 }
2009
2010 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2011 assert(AA->getAllocator() &&
2012 "Expected allocator expression for non-default allocator.");
2013 llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator());
2014 // According to the standard, the original allocator type is a enum (integer).
2015 // Convert to pointer type, if required.
2016 if (Allocator->getType()->isIntegerTy())
2017 Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy);
2018 else if (Allocator->getType()->isPointerTy())
2019 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator,
2020 DestTy: CGM.VoidPtrTy);
2021
2022 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
2023 Loc: CGF.Builder, Size, Allocator,
2024 Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr"}, FirstSeparator: ".", Separator: "."));
2025 llvm::CallInst *FreeCI =
2026 OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator);
2027
2028 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI);
2029 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2030 V: Addr,
2031 DestTy: CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(T: CVD->getType())),
2032 Name: getNameWithSeparators(Parts: {CVD->getName(), ".addr"}, FirstSeparator: ".", Separator: "."));
2033 return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
2034}
2035
2036Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
2037 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
2038 SourceLocation Loc) {
2039 CodeGenModule &CGM = CGF.CGM;
2040 if (CGM.getLangOpts().OpenMPUseTLS &&
2041 CGM.getContext().getTargetInfo().isTLSSupported())
2042 return VDAddr;
2043
2044 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2045
2046 llvm::Type *VarTy = VDAddr.getElementType();
2047 llvm::Value *Data =
2048 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy);
2049 llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy));
2050 std::string Suffix = getNameWithSeparators(Parts: {"cache", ""});
2051 llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix);
2052
2053 llvm::CallInst *ThreadPrivateCacheCall =
2054 OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName);
2055
2056 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
2057}
2058
2059std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
2060 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
2061 SmallString<128> Buffer;
2062 llvm::raw_svector_ostream OS(Buffer);
2063 StringRef Sep = FirstSeparator;
2064 for (StringRef Part : Parts) {
2065 OS << Sep << Part;
2066 Sep = Separator;
2067 }
2068 return OS.str().str();
2069}
2070
2071void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
2072 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
2073 InsertPointTy CodeGenIP, Twine RegionName) {
2074 CGBuilderTy &Builder = CGF.Builder;
2075 Builder.restoreIP(IP: CodeGenIP);
2076 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
2077 Suffix: "." + RegionName + ".after");
2078
2079 {
2080 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
2081 CGF.EmitStmt(S: RegionBodyStmt);
2082 }
2083
2084 if (Builder.saveIP().isSet())
2085 Builder.CreateBr(Dest: FiniBB);
2086}
2087
2088void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
2089 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
2090 InsertPointTy CodeGenIP, Twine RegionName) {
2091 CGBuilderTy &Builder = CGF.Builder;
2092 Builder.restoreIP(IP: CodeGenIP);
2093 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
2094 Suffix: "." + RegionName + ".after");
2095
2096 {
2097 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
2098 CGF.EmitStmt(S: RegionBodyStmt);
2099 }
2100
2101 if (Builder.saveIP().isSet())
2102 Builder.CreateBr(Dest: FiniBB);
2103}
2104
2105void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
2106 if (CGM.getLangOpts().OpenMPIRBuilder) {
2107 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2108 // Check if we have any if clause associated with the directive.
2109 llvm::Value *IfCond = nullptr;
2110 if (const auto *C = S.getSingleClause<OMPIfClause>())
2111 IfCond = EmitScalarExpr(E: C->getCondition(),
2112 /*IgnoreResultAssign=*/true);
2113
2114 llvm::Value *NumThreads = nullptr;
2115 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
2116 NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
2117 /*IgnoreResultAssign=*/true);
2118
2119 ProcBindKind ProcBind = OMP_PROC_BIND_default;
2120 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
2121 ProcBind = ProcBindClause->getProcBindKind();
2122
2123 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
2124
2125 // The cleanup callback that finalizes all variables at the given location,
2126 // thus calls destructors etc.
2127 auto FiniCB = [this](InsertPointTy IP) {
2128 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
2129 return llvm::Error::success();
2130 };
2131
2132 // Privatization callback that performs appropriate action for
2133 // shared/private/firstprivate/lastprivate/copyin/... variables.
2134 //
2135 // TODO: This defaults to shared right now.
2136 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
2137 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
2138 // The next line is appropriate only for variables (Val) with the
2139 // data-sharing attribute "shared".
2140 ReplVal = &Val;
2141
2142 return CodeGenIP;
2143 };
2144
2145 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
2146 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
2147
2148 auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
2149 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
2150 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
2151 CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP: AllocIP, CodeGenIP, RegionName: "parallel");
2152 return llvm::Error::success();
2153 };
2154
2155 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
2156 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
2157 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
2158 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
2159 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2160 cantFail(ValOrErr: OMPBuilder.createParallel(
2161 Loc: Builder, AllocaIP, /*DeallocBlocks=*/{}, BodyGenCB, PrivCB, FiniCB,
2162 IfCondition: IfCond, NumThreads, ProcBind, IsCancellable: S.hasCancel()));
2163 Builder.restoreIP(IP: AfterIP);
2164 return;
2165 }
2166
2167 // Emit parallel region as a standalone region.
2168 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2169 Action.Enter(CGF);
2170 OMPPrivateScope PrivateScope(CGF);
2171 emitOMPCopyinClause(CGF, S);
2172 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
2173 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
2174 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
2175 (void)PrivateScope.Privatize();
2176 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_parallel)->getCapturedStmt());
2177 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
2178 };
2179 {
2180 auto LPCRegion =
2181 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
2182 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_parallel, CodeGen,
2183 CodeGenBoundParameters: emitEmptyBoundParameters);
2184 emitPostUpdateForReductionClause(CGF&: *this, D: S,
2185 CondGen: [](CodeGenFunction &) { return nullptr; });
2186 }
2187 // Check for outer lastprivate conditional update.
2188 checkForLastprivateConditionalUpdate(CGF&: *this, S);
2189}
2190
2191void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
2192 EmitStmt(S: S.getIfStmt());
2193}
2194
2195namespace {
2196/// RAII to handle scopes for loop transformation directives.
2197class OMPTransformDirectiveScopeRAII {
2198 OMPLoopScope *Scope = nullptr;
2199 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
2200 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
2201
2202 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
2203 delete;
2204 OMPTransformDirectiveScopeRAII &
2205 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
2206
2207public:
2208 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
2209 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) {
2210 Scope = new OMPLoopScope(CGF, *Dir);
2211 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
2212 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
2213 } else if (const auto *Dir =
2214 dyn_cast<OMPCanonicalLoopSequenceTransformationDirective>(
2215 Val: S)) {
2216 // For simplicity we reuse the loop scope similarly to what we do with
2217 // OMPCanonicalLoopNestTransformationDirective do by being a subclass
2218 // of OMPLoopBasedDirective.
2219 Scope = new OMPLoopScope(CGF, *Dir);
2220 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
2221 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
2222 }
2223 }
2224 ~OMPTransformDirectiveScopeRAII() {
2225 if (!Scope)
2226 return;
2227 delete CapInfoRAII;
2228 delete CGSI;
2229 delete Scope;
2230 }
2231};
2232} // namespace
2233
2234static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
2235 int MaxLevel, int Level = 0) {
2236 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
2237 const Stmt *SimplifiedS = S->IgnoreContainers();
2238 if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) {
2239 PrettyStackTraceLoc CrashInfo(
2240 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
2241 "LLVM IR generation of compound statement ('{}')");
2242
2243 // Keep track of the current cleanup stack depth, including debug scopes.
2244 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
2245 for (const Stmt *CurStmt : CS->body())
2246 emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level);
2247 return;
2248 }
2249 if (SimplifiedS == NextLoop) {
2250 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS))
2251 SimplifiedS = Dir->getTransformedStmt();
2252 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS))
2253 SimplifiedS = CanonLoop->getLoopStmt();
2254 if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) {
2255 S = For->getBody();
2256 } else {
2257 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
2258 "Expected canonical for loop or range-based for loop.");
2259 const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS);
2260 CGF.EmitStmt(S: CXXFor->getLoopVarStmt());
2261 S = CXXFor->getBody();
2262 }
2263 if (Level + 1 < MaxLevel) {
2264 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
2265 CurStmt: S, /*TryImperfectlyNestedLoops=*/true);
2266 emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1);
2267 return;
2268 }
2269 }
2270 CGF.EmitStmt(S);
2271}
2272
2273void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
2274 JumpDest LoopExit) {
2275 RunCleanupsScope BodyScope(*this);
2276 // Update counters values on current iteration.
2277 for (const Expr *UE : D.updates())
2278 EmitIgnoredExpr(E: UE);
2279 // Update the linear variables.
2280 // In distribute directives only loop counters may be marked as linear, no
2281 // need to generate the code for them.
2282 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2283 if (!isOpenMPDistributeDirective(DKind: EKind)) {
2284 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2285 for (const Expr *UE : C->updates())
2286 EmitIgnoredExpr(E: UE);
2287 }
2288 }
2289
2290 // On a continue in the body, jump to the end.
2291 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue");
2292 BreakContinueStack.push_back(Elt: BreakContinue(D, LoopExit, Continue));
2293 for (const Expr *E : D.finals_conditions()) {
2294 if (!E)
2295 continue;
2296 // Check that loop counter in non-rectangular nest fits into the iteration
2297 // space.
2298 llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next");
2299 EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(),
2300 TrueCount: getProfileCount(S: D.getBody()));
2301 EmitBlock(BB: NextBB);
2302 }
2303
2304 OMPPrivateScope InscanScope(*this);
2305 EmitOMPReductionClauseInit(D, PrivateScope&: InscanScope, /*ForInscan=*/true);
2306 bool IsInscanRegion = InscanScope.Privatize();
2307 if (IsInscanRegion) {
2308 // Need to remember the block before and after scan directive
2309 // to dispatch them correctly depending on the clause used in
2310 // this directive, inclusive or exclusive. For inclusive scan the natural
2311 // order of the blocks is used, for exclusive clause the blocks must be
2312 // executed in reverse order.
2313 OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb");
2314 OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb");
2315 // No need to allocate inscan exit block, in simd mode it is selected in the
2316 // codegen for the scan directive.
2317 if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd)
2318 OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb");
2319 OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch");
2320 EmitBranch(Block: OMPScanDispatch);
2321 EmitBlock(BB: OMPBeforeScanBlock);
2322 }
2323
2324 // Emit loop variables for C++ range loops.
2325 const Stmt *Body =
2326 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
2327 // Emit loop body.
2328 emitBody(CGF&: *this, S: Body,
2329 NextLoop: OMPLoopBasedDirective::tryToFindNextInnerLoop(
2330 CurStmt: Body, /*TryImperfectlyNestedLoops=*/true),
2331 MaxLevel: D.getLoopsNumber());
2332
2333 // Jump to the dispatcher at the end of the loop body.
2334 if (IsInscanRegion)
2335 EmitBranch(Block: OMPScanExitBlock);
2336
2337 // The end (updates/cleanups).
2338 EmitBlock(BB: Continue.getBlock());
2339 BreakContinueStack.pop_back();
2340}
2341
2342using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
2343
2344/// Emit a captured statement and return the function as well as its captured
2345/// closure context.
2346static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
2347 const CapturedStmt *S) {
2348 LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S);
2349 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
2350 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
2351 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S);
2352 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
2353 llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S);
2354
2355 return {F, CapStruct.getPointer(CGF&: ParentCGF)};
2356}
2357
2358/// Emit a call to a previously captured closure.
2359static llvm::CallInst *
2360emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
2361 llvm::ArrayRef<llvm::Value *> Args) {
2362 // Append the closure context to the argument.
2363 SmallVector<llvm::Value *> EffectiveArgs;
2364 EffectiveArgs.reserve(N: Args.size() + 1);
2365 llvm::append_range(C&: EffectiveArgs, R&: Args);
2366 EffectiveArgs.push_back(Elt: Cap.second);
2367
2368 return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs);
2369}
2370
2371llvm::CanonicalLoopInfo *
2372CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
2373 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
2374
2375 // The caller is processing the loop-associated directive processing the \p
2376 // Depth loops nested in \p S. Put the previous pending loop-associated
2377 // directive to the stack. If the current loop-associated directive is a loop
2378 // transformation directive, it will push its generated loops onto the stack
2379 // such that together with the loops left here they form the combined loop
2380 // nest for the parent loop-associated directive.
2381 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
2382 ExpectedOMPLoopDepth = Depth;
2383
2384 EmitStmt(S);
2385 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2386
2387 // The last added loop is the outermost one.
2388 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2389
2390 // Pop the \p Depth loops requested by the call from that stack and restore
2391 // the previous context.
2392 OMPLoopNestStack.pop_back_n(NumItems: Depth);
2393 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2394
2395 return Result;
2396}
2397
2398void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2399 const Stmt *SyntacticalLoop = S->getLoopStmt();
2400 if (!getLangOpts().OpenMPIRBuilder) {
2401 // Ignore if OpenMPIRBuilder is not enabled.
2402 EmitStmt(S: SyntacticalLoop);
2403 return;
2404 }
2405
2406 LexicalScope ForScope(*this, S->getSourceRange());
2407
2408 // Emit init statements. The Distance/LoopVar funcs may reference variable
2409 // declarations they contain.
2410 const Stmt *BodyStmt;
2411 if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) {
2412 if (const Stmt *InitStmt = For->getInit())
2413 EmitStmt(S: InitStmt);
2414 BodyStmt = For->getBody();
2415 } else if (const auto *RangeFor =
2416 dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) {
2417 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2418 EmitStmt(S: RangeStmt);
2419 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2420 EmitStmt(S: BeginStmt);
2421 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2422 EmitStmt(S: EndStmt);
2423 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2424 EmitStmt(S: LoopVarStmt);
2425 BodyStmt = RangeFor->getBody();
2426 } else
2427 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2428
2429 // Emit closure for later use. By-value captures will be captured here.
2430 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2431 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc);
2432 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2433 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc);
2434
2435 // Call the distance function to get the number of iterations of the loop to
2436 // come.
2437 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2438 ->getParam(i: 0)
2439 ->getType()
2440 .getNonReferenceType();
2441 RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr");
2442 emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()});
2443 llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count");
2444
2445 // Emit the loop structure.
2446 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2447 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2448 llvm::Value *IndVar) {
2449 Builder.restoreIP(IP: CodeGenIP);
2450
2451 // Emit the loop body: Convert the logical iteration number to the loop
2452 // variable and emit the body.
2453 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2454 LValue LCVal = EmitLValue(E: LoopVarRef);
2455 Address LoopVarAddress = LCVal.getAddress();
2456 emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure,
2457 Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar});
2458
2459 RunCleanupsScope BodyScope(*this);
2460 EmitStmt(S: BodyStmt);
2461 return llvm::Error::success();
2462 };
2463
2464 llvm::CanonicalLoopInfo *CL =
2465 cantFail(ValOrErr: OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal));
2466
2467 // Finish up the loop.
2468 Builder.restoreIP(IP: CL->getAfterIP());
2469 ForScope.ForceCleanup();
2470
2471 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2472 OMPLoopNestStack.push_back(Elt: CL);
2473}
2474
2475void CodeGenFunction::EmitOMPInnerLoop(
2476 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2477 const Expr *IncExpr,
2478 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2479 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2480 auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end");
2481
2482 // Start the loop with a block that tests the condition.
2483 auto CondBlock = createBasicBlock(name: "omp.inner.for.cond");
2484 EmitBlock(BB: CondBlock);
2485 const SourceRange R = S.getSourceRange();
2486
2487 // If attributes are attached, push to the basic block with them.
2488 const auto &OMPED = cast<OMPExecutableDirective>(Val: S);
2489 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2490 const Stmt *SS = ICS->getCapturedStmt();
2491 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS);
2492 OMPLoopNestStack.clear();
2493 if (AS)
2494 LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(),
2495 Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2496 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2497 else
2498 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2499 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2500
2501 // If there are any cleanups between here and the loop-exit scope,
2502 // create a block to stage a loop exit along.
2503 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2504 if (RequiresCleanup)
2505 ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup");
2506
2507 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body");
2508
2509 // Emit condition.
2510 EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(S: &S));
2511 if (ExitBlock != LoopExit.getBlock()) {
2512 EmitBlock(BB: ExitBlock);
2513 EmitBranchThroughCleanup(Dest: LoopExit);
2514 }
2515
2516 EmitBlock(BB: LoopBody);
2517 incrementProfileCounter(S: &S);
2518
2519 // Create a block for the increment.
2520 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc");
2521 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
2522
2523 BodyGen(*this);
2524
2525 // Emit "IV = IV + 1" and a back-edge to the condition block.
2526 EmitBlock(BB: Continue.getBlock());
2527 EmitIgnoredExpr(E: IncExpr);
2528 PostIncGen(*this);
2529 BreakContinueStack.pop_back();
2530 EmitBranch(Block: CondBlock);
2531 LoopStack.pop();
2532 // Emit the fall-through block.
2533 EmitBlock(BB: LoopExit.getBlock());
2534}
2535
2536bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2537 if (!HaveInsertPoint())
2538 return false;
2539 // Emit inits for the linear variables.
2540 bool HasLinears = false;
2541 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2542 for (const Expr *Init : C->inits()) {
2543 HasLinears = true;
2544 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Init)->getDecl());
2545 if (const auto *Ref =
2546 dyn_cast<DeclRefExpr>(Val: VD->getInit()->IgnoreImpCasts())) {
2547 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
2548 const auto *OrigVD = cast<VarDecl>(Val: Ref->getDecl());
2549 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2550 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2551 VD->getInit()->getType(), VK_LValue,
2552 VD->getInit()->getExprLoc());
2553 EmitExprAsInit(
2554 init: &DRE, D: VD,
2555 lvalue: MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: VD->getType()),
2556 /*capturedByInit=*/false);
2557 EmitAutoVarCleanups(emission: Emission);
2558 } else {
2559 EmitVarDecl(D: *VD);
2560 }
2561 }
2562 // Emit the linear steps for the linear clauses.
2563 // If a step is not constant, it is pre-calculated before the loop.
2564 if (const auto *CS = cast_or_null<BinaryOperator>(Val: C->getCalcStep()))
2565 if (const auto *SaveRef = cast<DeclRefExpr>(Val: CS->getLHS())) {
2566 EmitVarDecl(D: *cast<VarDecl>(Val: SaveRef->getDecl()));
2567 // Emit calculation of the linear step.
2568 EmitIgnoredExpr(E: CS);
2569 }
2570 }
2571 return HasLinears;
2572}
2573
2574void CodeGenFunction::EmitOMPLinearClauseFinal(
2575 const OMPLoopDirective &D,
2576 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2577 if (!HaveInsertPoint())
2578 return;
2579 llvm::BasicBlock *DoneBB = nullptr;
2580 // Emit the final values of the linear variables.
2581 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2582 auto IC = C->varlist_begin();
2583 for (const Expr *F : C->finals()) {
2584 if (!DoneBB) {
2585 if (llvm::Value *Cond = CondGen(*this)) {
2586 // If the first post-update expression is found, emit conditional
2587 // block if it was requested.
2588 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.linear.pu");
2589 DoneBB = createBasicBlock(name: ".omp.linear.pu.done");
2590 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2591 EmitBlock(BB: ThenBB);
2592 }
2593 }
2594 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl());
2595 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2596 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
2597 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2598 Address OrigAddr = EmitLValue(E: &DRE).getAddress();
2599 CodeGenFunction::OMPPrivateScope VarScope(*this);
2600 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2601 (void)VarScope.Privatize();
2602 EmitIgnoredExpr(E: F);
2603 ++IC;
2604 }
2605 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2606 EmitIgnoredExpr(E: PostUpdate);
2607 }
2608 if (DoneBB)
2609 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2610}
2611
2612static void emitAlignedClause(CodeGenFunction &CGF,
2613 const OMPExecutableDirective &D) {
2614 if (!CGF.HaveInsertPoint())
2615 return;
2616 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2617 llvm::APInt ClauseAlignment(64, 0);
2618 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2619 auto *AlignmentCI =
2620 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2621 ClauseAlignment = AlignmentCI->getValue();
2622 }
2623 for (const Expr *E : Clause->varlist()) {
2624 llvm::APInt Alignment(ClauseAlignment);
2625 if (Alignment == 0) {
2626 // OpenMP [2.8.1, Description]
2627 // If no optional parameter is specified, implementation-defined default
2628 // alignments for SIMD instructions on the target platforms are assumed.
2629 Alignment =
2630 CGF.getContext()
2631 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
2632 T: E->getType()->getPointeeType()))
2633 .getQuantity();
2634 }
2635 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2636 "alignment is not power of 2");
2637 if (Alignment != 0) {
2638 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2639 CGF.emitAlignmentAssumption(
2640 PtrValue, E, /*No second loc needed*/ AssumptionLoc: SourceLocation(),
2641 Alignment: llvm::ConstantInt::get(Context&: CGF.getLLVMContext(), V: Alignment));
2642 }
2643 }
2644 }
2645}
2646
2647void CodeGenFunction::EmitOMPPrivateLoopCounters(
2648 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2649 if (!HaveInsertPoint())
2650 return;
2651 auto I = S.private_counters().begin();
2652 for (const Expr *E : S.counters()) {
2653 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2654 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl());
2655 // Emit var without initialization.
2656 AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD);
2657 EmitAutoVarCleanups(emission: VarEmission);
2658 LocalDeclMap.erase(Val: PrivateVD);
2659 (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress());
2660 if (LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD) ||
2661 VD->hasGlobalStorage()) {
2662 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2663 LocalDeclMap.count(Val: VD) || CapturedStmtInfo->lookup(VD),
2664 E->getType(), VK_LValue, E->getExprLoc());
2665 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(E: &DRE).getAddress());
2666 } else {
2667 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress());
2668 }
2669 ++I;
2670 }
2671 // Privatize extra loop counters used in loops for ordered(n) clauses.
2672 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2673 if (!C->getNumForLoops())
2674 continue;
2675 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2676 I < E; ++I) {
2677 const auto *DRE = cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I));
2678 const auto *VD = cast<VarDecl>(Val: DRE->getDecl());
2679 // Override only those variables that can be captured to avoid re-emission
2680 // of the variables declared within the loops.
2681 if (DRE->refersToEnclosingVariableOrCapture()) {
2682 (void)LoopScope.addPrivate(
2683 LocalVD: VD, Addr: CreateMemTemp(T: DRE->getType(), Name: VD->getName()));
2684 }
2685 }
2686 }
2687}
2688
2689static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2690 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2691 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2692 if (!CGF.HaveInsertPoint())
2693 return;
2694 {
2695 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2696 CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope);
2697 (void)PreCondScope.Privatize();
2698 // Get initial values of real counters.
2699 for (const Expr *I : S.inits()) {
2700 CGF.EmitIgnoredExpr(E: I);
2701 }
2702 }
2703 // Create temp loop control variables with their init values to support
2704 // non-rectangular loops.
2705 CodeGenFunction::OMPMapVars PreCondVars;
2706 for (const Expr *E : S.dependent_counters()) {
2707 if (!E)
2708 continue;
2709 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2710 "dependent counter must not be an iterator.");
2711 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2712 Address CounterAddr =
2713 CGF.CreateMemTemp(T: VD->getType().getNonReferenceType());
2714 (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr);
2715 }
2716 (void)PreCondVars.apply(CGF);
2717 for (const Expr *E : S.dependent_inits()) {
2718 if (!E)
2719 continue;
2720 CGF.EmitIgnoredExpr(E);
2721 }
2722 // Check that loop is executed at least one time.
2723 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2724 PreCondVars.restore(CGF);
2725}
2726
2727void CodeGenFunction::EmitOMPLinearClause(
2728 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2729 if (!HaveInsertPoint())
2730 return;
2731 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2732 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2733 if (isOpenMPSimdDirective(DKind: EKind)) {
2734 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
2735 for (const Expr *C : LoopDirective->counters()) {
2736 SIMDLCVs.insert(
2737 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
2738 }
2739 }
2740 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2741 auto CurPrivate = C->privates().begin();
2742 for (const Expr *E : C->varlist()) {
2743 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2744 const auto *PrivateVD =
2745 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *CurPrivate)->getDecl());
2746 if (!SIMDLCVs.count(V: VD->getCanonicalDecl())) {
2747 // Emit private VarDecl with copy init.
2748 EmitVarDecl(D: *PrivateVD);
2749 bool IsRegistered =
2750 PrivateScope.addPrivate(LocalVD: VD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
2751 assert(IsRegistered && "linear var already registered as private");
2752 // Silence the warning about unused variable.
2753 (void)IsRegistered;
2754 } else {
2755 EmitVarDecl(D: *PrivateVD);
2756 }
2757 ++CurPrivate;
2758 }
2759 }
2760}
2761
2762static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2763 const OMPExecutableDirective &D) {
2764 if (!CGF.HaveInsertPoint())
2765 return;
2766 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2767 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2768 /*ignoreResult=*/true);
2769 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2770 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2771 // In presence of finite 'safelen', it may be unsafe to mark all
2772 // the memory instructions parallel, because loop-carried
2773 // dependences of 'safelen' iterations are possible.
2774 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2775 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2776 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2777 /*ignoreResult=*/true);
2778 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2779 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2780 // In presence of finite 'safelen', it may be unsafe to mark all
2781 // the memory instructions parallel, because loop-carried
2782 // dependences of 'safelen' iterations are possible.
2783 CGF.LoopStack.setParallel(/*Enable=*/false);
2784 }
2785}
2786
2787// Check for the presence of an `OMPOrderedDirective`,
2788// i.e., `ordered` in `#pragma omp ordered simd`.
2789//
2790// Consider the following source code:
2791// ```
2792// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2793// {
2794// for (int r = 1; r < ARRAY_SIZE; ++r) {
2795// for (int c = 1; c < ARRAY_SIZE; ++c) {
2796// #pragma omp simd
2797// for (int k = 2; k < ARRAY_SIZE; ++k) {
2798// #pragma omp ordered simd
2799// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2800// }
2801// }
2802// }
2803// }
2804// ```
2805//
2806// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2807// &D)`. By examining `D.dump()` we have the following AST containing
2808// `OMPOrderedDirective`:
2809//
2810// ```
2811// OMPSimdDirective 0x1c32950
2812// `-CapturedStmt 0x1c32028
2813// |-CapturedDecl 0x1c310e8
2814// | |-ForStmt 0x1c31e30
2815// | | |-DeclStmt 0x1c31298
2816// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2817// | | | `-IntegerLiteral 0x1c31278 'int' 2
2818// | | |-<<<NULL>>>
2819// | | |-BinaryOperator 0x1c31308 'int' '<'
2820// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2821// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2822// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2823// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2824// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2825// | | `-CompoundStmt 0x1c31e18
2826// | | `-OMPOrderedDirective 0x1c31dd8
2827// | | |-OMPSimdClause 0x1c31380
2828// | | `-CapturedStmt 0x1c31cd0
2829// ```
2830//
2831// Note the presence of `OMPOrderedDirective` above:
2832// It's (transitively) nested in a `CapturedStmt` representing the pragma
2833// annotated compound statement. Thus, we need to consider this nesting and
2834// include checking the `getCapturedStmt` in this case.
2835static bool hasOrderedDirective(const Stmt *S) {
2836 if (isa<OMPOrderedDirective>(Val: S))
2837 return true;
2838
2839 if (const auto *CS = dyn_cast<CapturedStmt>(Val: S))
2840 return hasOrderedDirective(S: CS->getCapturedStmt());
2841
2842 for (const Stmt *Child : S->children()) {
2843 if (Child && hasOrderedDirective(S: Child))
2844 return true;
2845 }
2846
2847 return false;
2848}
2849
2850static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2851 LoopInfoStack &LoopStack) {
2852 // Check for the presence of an `OMPOrderedDirective`
2853 // i.e., `ordered` in `#pragma omp ordered simd`
2854 bool HasOrderedDirective = hasOrderedDirective(S: &AssociatedStmt);
2855 // If present then conservatively disable loop vectorization
2856 // analogously to how `emitSimdlenSafelenClause` does.
2857 if (HasOrderedDirective)
2858 LoopStack.setParallel(/*Enable=*/false);
2859}
2860
2861void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2862 // Walk clauses and process safelen/lastprivate.
2863 LoopStack.setParallel(/*Enable=*/true);
2864 LoopStack.setVectorizeEnable();
2865 const Stmt *AssociatedStmt = D.getAssociatedStmt();
2866 applyConservativeSimdOrderedDirective(AssociatedStmt: *AssociatedStmt, LoopStack);
2867 emitSimdlenSafelenClause(CGF&: *this, D);
2868 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2869 if (C->getKind() == OMPC_ORDER_concurrent)
2870 LoopStack.setParallel(/*Enable=*/true);
2871 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S: D);
2872 if ((EKind == OMPD_simd ||
2873 (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(DKind: EKind))) &&
2874 llvm::any_of(Range: D.getClausesOfKind<OMPReductionClause>(),
2875 P: [](const OMPReductionClause *C) {
2876 return C->getModifier() == OMPC_REDUCTION_inscan;
2877 }))
2878 // Disable parallel access in case of prefix sum.
2879 LoopStack.setParallel(/*Enable=*/false);
2880}
2881
2882void CodeGenFunction::EmitOMPSimdFinal(
2883 const OMPLoopDirective &D,
2884 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2885 if (!HaveInsertPoint())
2886 return;
2887 llvm::BasicBlock *DoneBB = nullptr;
2888 auto IC = D.counters().begin();
2889 auto IPC = D.private_counters().begin();
2890 for (const Expr *F : D.finals()) {
2891 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl());
2892 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl());
2893 const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD);
2894 if (LocalDeclMap.count(Val: OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) ||
2895 OrigVD->hasGlobalStorage() || CED) {
2896 if (!DoneBB) {
2897 if (llvm::Value *Cond = CondGen(*this)) {
2898 // If the first post-update expression is found, emit conditional
2899 // block if it was requested.
2900 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then");
2901 DoneBB = createBasicBlock(name: ".omp.final.done");
2902 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2903 EmitBlock(BB: ThenBB);
2904 }
2905 }
2906 Address OrigAddr = Address::invalid();
2907 if (CED) {
2908 OrigAddr = EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress();
2909 } else {
2910 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2911 /*RefersToEnclosingVariableOrCapture=*/false,
2912 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2913 OrigAddr = EmitLValue(E: &DRE).getAddress();
2914 }
2915 OMPPrivateScope VarScope(*this);
2916 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2917 (void)VarScope.Privatize();
2918 EmitIgnoredExpr(E: F);
2919 }
2920 ++IC;
2921 ++IPC;
2922 }
2923 if (DoneBB)
2924 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2925}
2926
2927static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2928 const OMPLoopDirective &S,
2929 CodeGenFunction::JumpDest LoopExit) {
2930 CGF.EmitOMPLoopBody(D: S, LoopExit);
2931 CGF.EmitStopPoint(S: &S);
2932}
2933
2934/// Emit a helper variable and return corresponding lvalue.
2935static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2936 const DeclRefExpr *Helper) {
2937 auto VDecl = cast<VarDecl>(Val: Helper->getDecl());
2938 CGF.EmitVarDecl(D: *VDecl);
2939 return CGF.EmitLValue(E: Helper);
2940}
2941
2942static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2943 const RegionCodeGenTy &SimdInitGen,
2944 const RegionCodeGenTy &BodyCodeGen) {
2945 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2946 PrePostActionTy &) {
2947 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2948 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2949 SimdInitGen(CGF);
2950
2951 BodyCodeGen(CGF);
2952 };
2953 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2954 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2955 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2956
2957 BodyCodeGen(CGF);
2958 };
2959 const Expr *IfCond = nullptr;
2960 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
2961 if (isOpenMPSimdDirective(DKind: EKind)) {
2962 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2963 if (CGF.getLangOpts().OpenMP >= 50 &&
2964 (C->getNameModifier() == OMPD_unknown ||
2965 C->getNameModifier() == OMPD_simd)) {
2966 IfCond = C->getCondition();
2967 break;
2968 }
2969 }
2970 }
2971 if (IfCond) {
2972 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2973 } else {
2974 RegionCodeGenTy ThenRCG(ThenGen);
2975 ThenRCG(CGF);
2976 }
2977}
2978
2979static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2980 PrePostActionTy &Action) {
2981 Action.Enter(CGF);
2982 OMPLoopScope PreInitScope(CGF, S);
2983 // if (PreCond) {
2984 // for (IV in 0..LastIteration) BODY;
2985 // <Final counter/linear vars updates>;
2986 // }
2987
2988 // The presence of lower/upper bound variable depends on the actual directive
2989 // kind in the AST node. The variables must be emitted because some of the
2990 // expressions associated with the loop will use them.
2991 OpenMPDirectiveKind DKind = S.getDirectiveKind();
2992 if (isOpenMPDistributeDirective(DKind) ||
2993 isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
2994 isOpenMPGenericLoopDirective(DKind)) {
2995 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()));
2996 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()));
2997 }
2998
2999 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3000 // Emit: if (PreCond) - begin.
3001 // If the condition constant folds and can be elided, avoid emitting the
3002 // whole loop.
3003 bool CondConstant;
3004 llvm::BasicBlock *ContBlock = nullptr;
3005 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3006 if (!CondConstant)
3007 return;
3008 } else {
3009 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then");
3010 ContBlock = CGF.createBasicBlock(name: "simd.if.end");
3011 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3012 TrueCount: CGF.getProfileCount(S: &S));
3013 CGF.EmitBlock(BB: ThenBlock);
3014 CGF.incrementProfileCounter(S: &S);
3015 }
3016
3017 // Emit the loop iteration variable.
3018 const Expr *IVExpr = S.getIterationVariable();
3019 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
3020 CGF.EmitVarDecl(D: *IVDecl);
3021 CGF.EmitIgnoredExpr(E: S.getInit());
3022
3023 // Emit the iterations count variable.
3024 // If it is not a variable, Sema decided to calculate iterations count on
3025 // each iteration (e.g., it is foldable into a constant).
3026 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3027 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3028 // Emit calculation of the iterations count.
3029 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
3030 }
3031
3032 emitAlignedClause(CGF, D: S);
3033 (void)CGF.EmitOMPLinearClauseInit(D: S);
3034 {
3035 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
3036 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3037 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
3038 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3039 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3040 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3041 CGF, S, CGF.EmitLValue(E: S.getIterationVariable()));
3042 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3043 (void)LoopScope.Privatize();
3044 if (isOpenMPTargetExecutionDirective(DKind: EKind))
3045 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
3046
3047 emitCommonSimdLoop(
3048 CGF, S,
3049 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3050 CGF.EmitOMPSimdInit(D: S);
3051 },
3052 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3053 CGF.EmitOMPInnerLoop(
3054 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
3055 BodyGen: [&S](CodeGenFunction &CGF) {
3056 emitOMPLoopBodyWithStopPoint(CGF, S,
3057 LoopExit: CodeGenFunction::JumpDest());
3058 },
3059 PostIncGen: [](CodeGenFunction &) {});
3060 });
3061 CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
3062 // Emit final copy of the lastprivate variables at the end of loops.
3063 if (HasLastprivateClause)
3064 CGF.EmitOMPLastprivateClauseFinal(D: S, /*NoFinals=*/true);
3065 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_simd);
3066 emitPostUpdateForReductionClause(CGF, D: S,
3067 CondGen: [](CodeGenFunction &) { return nullptr; });
3068 LoopScope.restoreMap();
3069 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
3070 }
3071 // Emit: if (PreCond) - end.
3072 if (ContBlock) {
3073 CGF.EmitBranch(Block: ContBlock);
3074 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
3075 }
3076}
3077
3078// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
3079// available for "loop bind(thread)", which maps to "simd".
3080static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) {
3081 // Check for unsupported clauses
3082 for (OMPClause *C : S.clauses()) {
3083 // Currently only order, simdlen and safelen clauses are supported
3084 if (!(isa<OMPSimdlenClause>(Val: C) || isa<OMPSafelenClause>(Val: C) ||
3085 isa<OMPOrderClause>(Val: C) || isa<OMPAlignedClause>(Val: C)))
3086 return false;
3087 }
3088
3089 // Check if we have a statement with the ordered directive.
3090 // Visit the statement hierarchy to find a compound statement
3091 // with a ordered directive in it.
3092 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: S.getRawStmt())) {
3093 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
3094 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
3095 if (!SubStmt)
3096 continue;
3097 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Val: SubStmt)) {
3098 for (const Stmt *CSSubStmt : CS->children()) {
3099 if (!CSSubStmt)
3100 continue;
3101 if (isa<OMPOrderedDirective>(Val: CSSubStmt)) {
3102 return false;
3103 }
3104 }
3105 }
3106 }
3107 }
3108 }
3109 return true;
3110}
3111
3112static llvm::MapVector<llvm::Value *, llvm::Value *>
3113GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) {
3114 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
3115 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
3116 llvm::APInt ClauseAlignment(64, 0);
3117 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
3118 auto *AlignmentCI =
3119 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
3120 ClauseAlignment = AlignmentCI->getValue();
3121 }
3122 for (const Expr *E : Clause->varlist()) {
3123 llvm::APInt Alignment(ClauseAlignment);
3124 if (Alignment == 0) {
3125 // OpenMP [2.8.1, Description]
3126 // If no optional parameter is specified, implementation-defined default
3127 // alignments for SIMD instructions on the target platforms are assumed.
3128 Alignment =
3129 CGF.getContext()
3130 .toCharUnitsFromBits(BitSize: CGF.getContext().getOpenMPDefaultSimdAlign(
3131 T: E->getType()->getPointeeType()))
3132 .getQuantity();
3133 }
3134 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
3135 "alignment is not power of 2");
3136 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
3137 AlignedVars[PtrValue] = CGF.Builder.getInt64(C: Alignment.getSExtValue());
3138 }
3139 }
3140 return AlignedVars;
3141}
3142
3143// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
3144// available for "loop bind(thread)", which maps to "simd".
3145static void emitOMPSimdDirective(const OMPLoopDirective &S,
3146 CodeGenFunction &CGF, CodeGenModule &CGM) {
3147 bool UseOMPIRBuilder =
3148 CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S);
3149 if (UseOMPIRBuilder) {
3150 auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF,
3151 PrePostActionTy &) {
3152 // Use the OpenMPIRBuilder if enabled.
3153 if (UseOMPIRBuilder) {
3154 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
3155 GetAlignedMapping(S, CGF);
3156 // Emit the associated statement and get its loop representation.
3157 const Stmt *Inner = S.getRawStmt();
3158 llvm::CanonicalLoopInfo *CLI =
3159 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3160
3161 llvm::OpenMPIRBuilder &OMPBuilder =
3162 CGM.getOpenMPRuntime().getOMPBuilder();
3163 // Add SIMD specific metadata
3164 llvm::ConstantInt *Simdlen = nullptr;
3165 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
3166 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
3167 /*ignoreResult=*/true);
3168 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
3169 Simdlen = Val;
3170 }
3171 llvm::ConstantInt *Safelen = nullptr;
3172 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
3173 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
3174 /*ignoreResult=*/true);
3175 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
3176 Safelen = Val;
3177 }
3178 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
3179 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3180 if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) {
3181 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
3182 }
3183 }
3184 // Add simd metadata to the collapsed loop. Do not generate
3185 // another loop for if clause. Support for if clause is done earlier.
3186 OMPBuilder.applySimd(Loop: CLI, AlignedVars,
3187 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
3188 return;
3189 }
3190 };
3191 {
3192 auto LPCRegion =
3193 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
3194 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
3195 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
3196 CodeGen: CodeGenIRBuilder);
3197 }
3198 return;
3199 }
3200
3201 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3202 CGF.OMPFirstScanLoop = true;
3203 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3204 emitOMPSimdRegion(CGF, S, Action);
3205 };
3206 {
3207 auto LPCRegion =
3208 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
3209 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
3210 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd, CodeGen);
3211 }
3212 // Check for outer lastprivate conditional update.
3213 checkForLastprivateConditionalUpdate(CGF, S);
3214}
3215
3216void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
3217 emitOMPSimdDirective(S, CGF&: *this, CGM);
3218}
3219
3220void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
3221 // Emit the de-sugared statement.
3222 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
3223 EmitStmt(S: S.getTransformedStmt());
3224}
3225
3226void CodeGenFunction::EmitOMPStripeDirective(const OMPStripeDirective &S) {
3227 // Emit the de-sugared statement.
3228 OMPTransformDirectiveScopeRAII StripeScope(*this, &S);
3229 EmitStmt(S: S.getTransformedStmt());
3230}
3231
3232void CodeGenFunction::EmitOMPReverseDirective(const OMPReverseDirective &S) {
3233 // Emit the de-sugared statement.
3234 OMPTransformDirectiveScopeRAII ReverseScope(*this, &S);
3235 EmitStmt(S: S.getTransformedStmt());
3236}
3237
3238void CodeGenFunction::EmitOMPSplitDirective(const OMPSplitDirective &S) {
3239 // Emit the de-sugared statement (the split loops).
3240 OMPTransformDirectiveScopeRAII SplitScope(*this, &S);
3241 EmitStmt(S: S.getTransformedStmt());
3242}
3243
3244void CodeGenFunction::EmitOMPInterchangeDirective(
3245 const OMPInterchangeDirective &S) {
3246 // Emit the de-sugared statement.
3247 OMPTransformDirectiveScopeRAII InterchangeScope(*this, &S);
3248 EmitStmt(S: S.getTransformedStmt());
3249}
3250
3251void CodeGenFunction::EmitOMPFuseDirective(const OMPFuseDirective &S) {
3252 // Emit the de-sugared statement
3253 OMPTransformDirectiveScopeRAII FuseScope(*this, &S);
3254 EmitStmt(S: S.getTransformedStmt());
3255}
3256
3257void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
3258 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
3259
3260 if (UseOMPIRBuilder) {
3261 auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc());
3262 const Stmt *Inner = S.getRawStmt();
3263
3264 // Consume nested loop. Clear the entire remaining loop stack because a
3265 // fully unrolled loop is non-transformable. For partial unrolling the
3266 // generated outer loop is pushed back to the stack.
3267 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3268 OMPLoopNestStack.clear();
3269
3270 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3271
3272 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
3273 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
3274
3275 if (S.hasClausesOfKind<OMPFullClause>()) {
3276 assert(ExpectedOMPLoopDepth == 0);
3277 OMPBuilder.unrollLoopFull(DL, Loop: CLI);
3278 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3279 uint64_t Factor = 0;
3280 if (Expr *FactorExpr = PartialClause->getFactor()) {
3281 Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3282 assert(Factor >= 1 && "Only positive factors are valid");
3283 }
3284 OMPBuilder.unrollLoopPartial(DL, Loop: CLI, Factor,
3285 UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
3286 } else {
3287 OMPBuilder.unrollLoopHeuristic(DL, Loop: CLI);
3288 }
3289
3290 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
3291 "NeedsUnrolledCLI implies UnrolledCLI to be set");
3292 if (UnrolledCLI)
3293 OMPLoopNestStack.push_back(Elt: UnrolledCLI);
3294
3295 return;
3296 }
3297
3298 // This function is only called if the unrolled loop is not consumed by any
3299 // other loop-associated construct. Such a loop-associated construct will have
3300 // used the transformed AST.
3301
3302 // Set the unroll metadata for the next emitted loop.
3303 LoopStack.setUnrollState(LoopAttributes::Enable);
3304
3305 if (S.hasClausesOfKind<OMPFullClause>()) {
3306 LoopStack.setUnrollState(LoopAttributes::Full);
3307 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
3308 if (Expr *FactorExpr = PartialClause->getFactor()) {
3309 uint64_t Factor =
3310 FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
3311 assert(Factor >= 1 && "Only positive factors are valid");
3312 LoopStack.setUnrollCount(Factor);
3313 }
3314 }
3315
3316 EmitStmt(S: S.getAssociatedStmt());
3317}
3318
3319void CodeGenFunction::EmitOMPOuterLoop(
3320 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
3321 CodeGenFunction::OMPPrivateScope &LoopScope,
3322 const CodeGenFunction::OMPLoopArguments &LoopArgs,
3323 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
3324 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
3325 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3326
3327 const Expr *IVExpr = S.getIterationVariable();
3328 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3329 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3330
3331 JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end");
3332
3333 // Start the loop with a block that tests the condition.
3334 llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond");
3335 EmitBlock(BB: CondBlock);
3336 const SourceRange R = S.getSourceRange();
3337 OMPLoopNestStack.clear();
3338 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
3339 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
3340
3341 llvm::Value *BoolCondVal = nullptr;
3342 if (!DynamicOrOrdered) {
3343 // UB = min(UB, GlobalUB) or
3344 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
3345 // 'distribute parallel for')
3346 EmitIgnoredExpr(E: LoopArgs.EUB);
3347 // IV = LB
3348 EmitIgnoredExpr(E: LoopArgs.Init);
3349 // IV < UB
3350 BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond);
3351 } else {
3352 BoolCondVal =
3353 RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL,
3354 LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST);
3355 }
3356
3357 // If there are any cleanups between here and the loop-exit scope,
3358 // create a block to stage a loop exit along.
3359 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
3360 if (LoopScope.requiresCleanups())
3361 ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup");
3362
3363 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body");
3364 Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock);
3365 if (ExitBlock != LoopExit.getBlock()) {
3366 EmitBlock(BB: ExitBlock);
3367 EmitBranchThroughCleanup(Dest: LoopExit);
3368 }
3369 EmitBlock(BB: LoopBody);
3370
3371 // Emit "IV = LB" (in case of static schedule, we have already calculated new
3372 // LB for loop condition and emitted it above).
3373 if (DynamicOrOrdered)
3374 EmitIgnoredExpr(E: LoopArgs.Init);
3375
3376 // Create a block for the increment.
3377 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc");
3378 BreakContinueStack.push_back(Elt: BreakContinue(S, LoopExit, Continue));
3379
3380 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3381 emitCommonSimdLoop(
3382 CGF&: *this, S,
3383 SimdInitGen: [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3384 // Generate !llvm.loop.parallel metadata for loads and stores for loops
3385 // with dynamic/guided scheduling and without ordered clause.
3386 if (!isOpenMPSimdDirective(DKind: EKind)) {
3387 CGF.LoopStack.setParallel(!IsMonotonic);
3388 if (const auto *C = S.getSingleClause<OMPOrderClause>())
3389 if (C->getKind() == OMPC_ORDER_concurrent)
3390 CGF.LoopStack.setParallel(/*Enable=*/true);
3391 } else {
3392 CGF.EmitOMPSimdInit(D: S);
3393 }
3394 },
3395 BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
3396 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3397 SourceLocation Loc = S.getBeginLoc();
3398 // when 'distribute' is not combined with a 'for':
3399 // while (idx <= UB) { BODY; ++idx; }
3400 // when 'distribute' is combined with a 'for'
3401 // (e.g. 'distribute parallel for')
3402 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3403 CGF.EmitOMPInnerLoop(
3404 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: LoopArgs.Cond, IncExpr: LoopArgs.IncExpr,
3405 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3406 CodeGenLoop(CGF, S, LoopExit);
3407 },
3408 PostIncGen: [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
3409 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
3410 });
3411 });
3412
3413 EmitBlock(BB: Continue.getBlock());
3414 BreakContinueStack.pop_back();
3415 if (!DynamicOrOrdered) {
3416 // Emit "LB = LB + Stride", "UB = UB + Stride".
3417 EmitIgnoredExpr(E: LoopArgs.NextLB);
3418 EmitIgnoredExpr(E: LoopArgs.NextUB);
3419 }
3420
3421 EmitBranch(Block: CondBlock);
3422 OMPLoopNestStack.clear();
3423 LoopStack.pop();
3424 // Emit the fall-through block.
3425 EmitBlock(BB: LoopExit.getBlock());
3426
3427 // Tell the runtime we are done.
3428 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
3429 if (!DynamicOrOrdered)
3430 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3431 DKind: LoopArgs.DKind);
3432 };
3433 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3434}
3435
3436void CodeGenFunction::EmitOMPForOuterLoop(
3437 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
3438 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
3439 const OMPLoopArguments &LoopArgs,
3440 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3441 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3442
3443 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
3444 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule);
3445
3446 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
3447 LoopArgs.Chunk != nullptr)) &&
3448 "static non-chunked schedule does not need outer loop");
3449
3450 // Emit outer loop.
3451 //
3452 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3453 // When schedule(dynamic,chunk_size) is specified, the iterations are
3454 // distributed to threads in the team in chunks as the threads request them.
3455 // Each thread executes a chunk of iterations, then requests another chunk,
3456 // until no chunks remain to be distributed. Each chunk contains chunk_size
3457 // iterations, except for the last chunk to be distributed, which may have
3458 // fewer iterations. When no chunk_size is specified, it defaults to 1.
3459 //
3460 // When schedule(guided,chunk_size) is specified, the iterations are assigned
3461 // to threads in the team in chunks as the executing threads request them.
3462 // Each thread executes a chunk of iterations, then requests another chunk,
3463 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
3464 // each chunk is proportional to the number of unassigned iterations divided
3465 // by the number of threads in the team, decreasing to 1. For a chunk_size
3466 // with value k (greater than 1), the size of each chunk is determined in the
3467 // same way, with the restriction that the chunks do not contain fewer than k
3468 // iterations (except for the last chunk to be assigned, which may have fewer
3469 // than k iterations).
3470 //
3471 // When schedule(auto) is specified, the decision regarding scheduling is
3472 // delegated to the compiler and/or runtime system. The programmer gives the
3473 // implementation the freedom to choose any possible mapping of iterations to
3474 // threads in the team.
3475 //
3476 // When schedule(runtime) is specified, the decision regarding scheduling is
3477 // deferred until run time, and the schedule and chunk size are taken from the
3478 // run-sched-var ICV. If the ICV is set to auto, the schedule is
3479 // implementation defined
3480 //
3481 // __kmpc_dispatch_init();
3482 // while(__kmpc_dispatch_next(&LB, &UB)) {
3483 // idx = LB;
3484 // while (idx <= UB) { BODY; ++idx;
3485 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
3486 // } // inner loop
3487 // }
3488 // __kmpc_dispatch_deinit();
3489 //
3490 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3491 // When schedule(static, chunk_size) is specified, iterations are divided into
3492 // chunks of size chunk_size, and the chunks are assigned to the threads in
3493 // the team in a round-robin fashion in the order of the thread number.
3494 //
3495 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
3496 // while (idx <= UB) { BODY; ++idx; } // inner loop
3497 // LB = LB + ST;
3498 // UB = UB + ST;
3499 // }
3500 //
3501
3502 const Expr *IVExpr = S.getIterationVariable();
3503 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3504 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3505
3506 if (DynamicOrOrdered) {
3507 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3508 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3509 llvm::Value *LBVal = DispatchBounds.first;
3510 llvm::Value *UBVal = DispatchBounds.second;
3511 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3512 LoopArgs.Chunk};
3513 RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize,
3514 IVSigned, Ordered, DispatchValues: DipatchRTInputValues);
3515 } else {
3516 CGOpenMPRuntime::StaticRTInput StaticInit(
3517 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3518 LoopArgs.ST, LoopArgs.Chunk);
3519 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3520 RT.emitForStaticInit(CGF&: *this, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind,
3521 Values: StaticInit);
3522 }
3523
3524 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3525 const unsigned IVSize,
3526 const bool IVSigned) {
3527 if (Ordered) {
3528 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3529 IVSigned);
3530 }
3531 };
3532
3533 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3534 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3535 OuterLoopArgs.IncExpr = S.getInc();
3536 OuterLoopArgs.Init = S.getInit();
3537 OuterLoopArgs.Cond = S.getCond();
3538 OuterLoopArgs.NextLB = S.getNextLowerBound();
3539 OuterLoopArgs.NextUB = S.getNextUpperBound();
3540 OuterLoopArgs.DKind = LoopArgs.DKind;
3541 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs,
3542 CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3543 if (DynamicOrOrdered) {
3544 RT.emitForDispatchDeinit(CGF&: *this, Loc: S.getBeginLoc());
3545 }
3546}
3547
3548static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3549 const unsigned IVSize, const bool IVSigned) {}
3550
3551void CodeGenFunction::EmitOMPDistributeOuterLoop(
3552 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3553 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3554 const CodeGenLoopTy &CodeGenLoopContent) {
3555
3556 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3557
3558 // Emit outer loop.
3559 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3560 // dynamic
3561 //
3562
3563 const Expr *IVExpr = S.getIterationVariable();
3564 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3565 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3566 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3567
3568 CGOpenMPRuntime::StaticRTInput StaticInit(
3569 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3570 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3571 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit);
3572
3573 // for combined 'distribute' and 'for' the increment expression of distribute
3574 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3575 Expr *IncExpr;
3576 if (isOpenMPLoopBoundSharingDirective(Kind: EKind))
3577 IncExpr = S.getDistInc();
3578 else
3579 IncExpr = S.getInc();
3580
3581 // this routine is shared by 'omp distribute parallel for' and
3582 // 'omp distribute': select the right EUB expression depending on the
3583 // directive
3584 OMPLoopArguments OuterLoopArgs;
3585 OuterLoopArgs.LB = LoopArgs.LB;
3586 OuterLoopArgs.UB = LoopArgs.UB;
3587 OuterLoopArgs.ST = LoopArgs.ST;
3588 OuterLoopArgs.IL = LoopArgs.IL;
3589 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3590 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3591 ? S.getCombinedEnsureUpperBound()
3592 : S.getEnsureUpperBound();
3593 OuterLoopArgs.IncExpr = IncExpr;
3594 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3595 ? S.getCombinedInit()
3596 : S.getInit();
3597 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3598 ? S.getCombinedCond()
3599 : S.getCond();
3600 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3601 ? S.getCombinedNextLowerBound()
3602 : S.getNextLowerBound();
3603 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(Kind: EKind)
3604 ? S.getCombinedNextUpperBound()
3605 : S.getNextUpperBound();
3606 OuterLoopArgs.DKind = OMPD_distribute;
3607
3608 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3609 LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent,
3610 CodeGenOrdered: emitEmptyOrdered);
3611}
3612
3613static std::pair<LValue, LValue>
3614emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3615 const OMPExecutableDirective &S) {
3616 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3617 LValue LB =
3618 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3619 LValue UB =
3620 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3621
3622 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3623 // parallel for') we need to use the 'distribute'
3624 // chunk lower and upper bounds rather than the whole loop iteration
3625 // space. These are parameters to the outlined function for 'parallel'
3626 // and we copy the bounds of the previous schedule into the
3627 // the current ones.
3628 LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable());
3629 LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable());
3630 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3631 lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3632 PrevLBVal = CGF.EmitScalarConversion(
3633 Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(),
3634 DstTy: LS.getIterationVariable()->getType(),
3635 Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3636 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3637 lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3638 PrevUBVal = CGF.EmitScalarConversion(
3639 Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(),
3640 DstTy: LS.getIterationVariable()->getType(),
3641 Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3642
3643 CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB);
3644 CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB);
3645
3646 return {LB, UB};
3647}
3648
3649/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3650/// we need to use the LB and UB expressions generated by the worksharing
3651/// code generation support, whereas in non combined situations we would
3652/// just emit 0 and the LastIteration expression
3653/// This function is necessary due to the difference of the LB and UB
3654/// types for the RT emission routines for 'for_static_init' and
3655/// 'for_dispatch_init'
3656static std::pair<llvm::Value *, llvm::Value *>
3657emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3658 const OMPExecutableDirective &S,
3659 Address LB, Address UB) {
3660 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3661 const Expr *IVExpr = LS.getIterationVariable();
3662 // when implementing a dynamic schedule for a 'for' combined with a
3663 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3664 // is not normalized as each team only executes its own assigned
3665 // distribute chunk
3666 QualType IteratorTy = IVExpr->getType();
3667 llvm::Value *LBVal =
3668 CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3669 llvm::Value *UBVal =
3670 CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3671 return {LBVal, UBVal};
3672}
3673
3674static void emitDistributeParallelForDistributeInnerBoundParams(
3675 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3676 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3677 const auto &Dir = cast<OMPLoopDirective>(Val: S);
3678 LValue LB =
3679 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable()));
3680 llvm::Value *LBCast = CGF.Builder.CreateIntCast(
3681 V: CGF.Builder.CreateLoad(Addr: LB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3682 CapturedVars.push_back(Elt: LBCast);
3683 LValue UB =
3684 CGF.EmitLValue(E: cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable()));
3685
3686 llvm::Value *UBCast = CGF.Builder.CreateIntCast(
3687 V: CGF.Builder.CreateLoad(Addr: UB.getAddress()), DestTy: CGF.SizeTy, /*isSigned=*/false);
3688 CapturedVars.push_back(Elt: UBCast);
3689}
3690
3691static void
3692emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3693 const OMPLoopDirective &S,
3694 CodeGenFunction::JumpDest LoopExit) {
3695 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3696 auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
3697 PrePostActionTy &Action) {
3698 Action.Enter(CGF);
3699 bool HasCancel = false;
3700 if (!isOpenMPSimdDirective(DKind: EKind)) {
3701 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S))
3702 HasCancel = D->hasCancel();
3703 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S))
3704 HasCancel = D->hasCancel();
3705 else if (const auto *D =
3706 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S))
3707 HasCancel = D->hasCancel();
3708 }
3709 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
3710 CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(),
3711 CodeGenLoopBounds: emitDistributeParallelForInnerBounds,
3712 CGDispatchBounds: emitDistributeParallelForDispatchBounds);
3713 };
3714
3715 emitCommonOMPParallelDirective(
3716 CGF, S, InnermostKind: isOpenMPSimdDirective(DKind: EKind) ? OMPD_for_simd : OMPD_for,
3717 CodeGen: CGInlinedWorksharingLoop,
3718 CodeGenBoundParameters: emitDistributeParallelForDistributeInnerBoundParams);
3719}
3720
3721void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3722 const OMPDistributeParallelForDirective &S) {
3723 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3724 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3725 IncExpr: S.getDistInc());
3726 };
3727 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3728 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3729}
3730
3731void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3732 const OMPDistributeParallelForSimdDirective &S) {
3733 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3734 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3735 IncExpr: S.getDistInc());
3736 };
3737 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3738 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_distribute, CodeGen);
3739}
3740
3741void CodeGenFunction::EmitOMPDistributeSimdDirective(
3742 const OMPDistributeSimdDirective &S) {
3743 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3744 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
3745 };
3746 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3747 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
3748}
3749
3750void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3751 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3752 // Emit SPMD target parallel for region as a standalone region.
3753 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3754 emitOMPSimdRegion(CGF, S, Action);
3755 };
3756 llvm::Function *Fn;
3757 llvm::Constant *Addr;
3758 // Emit target region as a standalone region.
3759 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3760 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
3761 assert(Fn && Addr && "Target device function emission failed.");
3762}
3763
3764void CodeGenFunction::EmitOMPTargetSimdDirective(
3765 const OMPTargetSimdDirective &S) {
3766 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3767 emitOMPSimdRegion(CGF, S, Action);
3768 };
3769 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
3770}
3771
3772namespace {
3773struct ScheduleKindModifiersTy {
3774 OpenMPScheduleClauseKind Kind;
3775 OpenMPScheduleClauseModifier M1;
3776 OpenMPScheduleClauseModifier M2;
3777 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3778 OpenMPScheduleClauseModifier M1,
3779 OpenMPScheduleClauseModifier M2)
3780 : Kind(Kind), M1(M1), M2(M2) {}
3781};
3782} // namespace
3783
3784bool CodeGenFunction::EmitOMPWorksharingLoop(
3785 const OMPLoopDirective &S, Expr *EUB,
3786 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3787 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3788 // Emit the loop iteration variable.
3789 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
3790 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
3791 EmitVarDecl(D: *IVDecl);
3792
3793 // Emit the iterations count variable.
3794 // If it is not a variable, Sema decided to calculate iterations count on each
3795 // iteration (e.g., it is foldable into a constant).
3796 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3797 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3798 // Emit calculation of the iterations count.
3799 EmitIgnoredExpr(E: S.getCalcLastIteration());
3800 }
3801
3802 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3803
3804 bool HasLastprivateClause;
3805 // Check pre-condition.
3806 {
3807 OMPLoopScope PreInitScope(*this, S);
3808 // Skip the entire loop if we don't meet the precondition.
3809 // If the condition constant folds and can be elided, avoid emitting the
3810 // whole loop.
3811 bool CondConstant;
3812 llvm::BasicBlock *ContBlock = nullptr;
3813 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3814 if (!CondConstant)
3815 return false;
3816 } else {
3817 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
3818 ContBlock = createBasicBlock(name: "omp.precond.end");
3819 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3820 TrueCount: getProfileCount(S: &S));
3821 EmitBlock(BB: ThenBlock);
3822 incrementProfileCounter(S: &S);
3823 }
3824
3825 RunCleanupsScope DoacrossCleanupScope(*this);
3826 bool Ordered = false;
3827 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3828 if (OrderedClause->getNumForLoops())
3829 RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations());
3830 else
3831 Ordered = true;
3832 }
3833
3834 emitAlignedClause(CGF&: *this, D: S);
3835 bool HasLinears = EmitOMPLinearClauseInit(D: S);
3836 // Emit helper vars inits.
3837
3838 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3839 LValue LB = Bounds.first;
3840 LValue UB = Bounds.second;
3841 LValue ST =
3842 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
3843 LValue IL =
3844 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
3845
3846 // Emit 'then' code.
3847 {
3848 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
3849 OMPPrivateScope LoopScope(*this);
3850 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope) || HasLinears) {
3851 // Emit implicit barrier to synchronize threads and avoid data races on
3852 // initialization of firstprivate variables and post-update of
3853 // lastprivate variables.
3854 CGM.getOpenMPRuntime().emitBarrierCall(
3855 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
3856 /*ForceSimpleCall=*/true);
3857 }
3858 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
3859 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3860 *this, S, EmitLValue(E: S.getIterationVariable()));
3861 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
3862 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
3863 EmitOMPPrivateLoopCounters(S, LoopScope);
3864 EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3865 (void)LoopScope.Privatize();
3866 if (isOpenMPTargetExecutionDirective(DKind: EKind))
3867 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
3868
3869 // Detect the loop schedule kind and chunk.
3870 const Expr *ChunkExpr = nullptr;
3871 OpenMPScheduleTy ScheduleKind;
3872 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3873 ScheduleKind.Schedule = C->getScheduleKind();
3874 ScheduleKind.M1 = C->getFirstScheduleModifier();
3875 ScheduleKind.M2 = C->getSecondScheduleModifier();
3876 ChunkExpr = C->getChunkSize();
3877 } else {
3878 // Default behaviour for schedule clause.
3879 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3880 CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr);
3881 }
3882 bool HasChunkSizeOne = false;
3883 llvm::Value *Chunk = nullptr;
3884 if (ChunkExpr) {
3885 Chunk = EmitScalarExpr(E: ChunkExpr);
3886 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(),
3887 DstTy: S.getIterationVariable()->getType(),
3888 Loc: S.getBeginLoc());
3889 Expr::EvalResult Result;
3890 if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) {
3891 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3892 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3893 }
3894 }
3895 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3896 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3897 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3898 // If the static schedule kind is specified or if the ordered clause is
3899 // specified, and if no monotonic modifier is specified, the effect will
3900 // be as if the monotonic modifier was specified.
3901 bool StaticChunkedOne =
3902 RT.isStaticChunked(ScheduleKind: ScheduleKind.Schedule,
3903 /* Chunked */ Chunk != nullptr) &&
3904 HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(Kind: EKind);
3905 // GPU combined `distribute parallel for`: emit a single
3906 // for_static_init with the fused distr_static_chunk + static_chunkone
3907 // schedule (enum 93). The surrounding EmitOMPDistributeLoop must skip
3908 // its distribute_static_init under the same conditions. Both sites are
3909 // guarded by canEmitGPUFusedDistSchedule() alone so they cannot
3910 // disagree; the assert guards the invariant that makes this safe today,
3911 // aka that the implicit GPU default schedule is always static chunk-one.
3912 ScheduleKind.UseFusedDistChunkSchedule =
3913 canEmitGPUFusedDistSchedule(CGM, S, DKind: EKind);
3914 assert((!ScheduleKind.UseFusedDistChunkSchedule || StaticChunkedOne) &&
3915 "fused distribute schedule requires a static chunk-one schedule");
3916 bool IsMonotonic =
3917 Ordered ||
3918 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3919 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3920 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3921 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3922 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3923 if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule,
3924 /* Chunked */ Chunk != nullptr) ||
3925 StaticChunkedOne) &&
3926 !Ordered) {
3927 JumpDest LoopExit =
3928 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
3929 emitCommonSimdLoop(
3930 CGF&: *this, S,
3931 SimdInitGen: [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
3932 if (isOpenMPSimdDirective(DKind: EKind)) {
3933 CGF.EmitOMPSimdInit(D: S);
3934 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3935 if (C->getKind() == OMPC_ORDER_concurrent)
3936 CGF.LoopStack.setParallel(/*Enable=*/true);
3937 }
3938 },
3939 BodyCodeGen: [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3940 &S, ScheduleKind, LoopExit, EKind,
3941 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3942 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3943 // When no chunk_size is specified, the iteration space is divided
3944 // into chunks that are approximately equal in size, and at most
3945 // one chunk is distributed to each thread. Note that the size of
3946 // the chunks is unspecified in this case.
3947 CGOpenMPRuntime::StaticRTInput StaticInit(
3948 IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
3949 UB.getAddress(), ST.getAddress(),
3950 StaticChunkedOne ? Chunk : nullptr);
3951 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3952 CGF, Loc: S.getBeginLoc(), DKind: EKind, ScheduleKind, Values: StaticInit);
3953 // UB = min(UB, GlobalUB);
3954 if (!StaticChunkedOne)
3955 CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound());
3956 // IV = LB;
3957 CGF.EmitIgnoredExpr(E: S.getInit());
3958 // For unchunked static schedule generate:
3959 //
3960 // while (idx <= UB) {
3961 // BODY;
3962 // ++idx;
3963 // }
3964 //
3965 // For static schedule with chunk one:
3966 //
3967 // while (IV <= PrevUB) {
3968 // BODY;
3969 // IV += ST;
3970 // }
3971 CGF.EmitOMPInnerLoop(
3972 S, RequiresCleanup: LoopScope.requiresCleanups(),
3973 LoopCond: StaticChunkedOne ? S.getCombinedParForInDistCond()
3974 : S.getCond(),
3975 IncExpr: StaticChunkedOne ? S.getDistInc() : S.getInc(),
3976 BodyGen: [&S, LoopExit](CodeGenFunction &CGF) {
3977 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3978 },
3979 PostIncGen: [](CodeGenFunction &) {});
3980 });
3981 EmitBlock(BB: LoopExit.getBlock());
3982 // Tell the runtime we are done.
3983 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3984 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
3985 DKind: OMPD_for);
3986 };
3987 OMPCancelStack.emitExit(CGF&: *this, Kind: EKind, CodeGen);
3988 } else {
3989 // Emit the outer loop, which requests its work chunk [LB..UB] from
3990 // runtime and runs the inner loop to process it.
3991 OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
3992 ST.getAddress(), IL.getAddress(), Chunk,
3993 EUB);
3994 LoopArguments.DKind = OMPD_for;
3995 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3996 LoopArgs: LoopArguments, CGDispatchBounds);
3997 }
3998 if (isOpenMPSimdDirective(DKind: EKind)) {
3999 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4000 return CGF.Builder.CreateIsNotNull(
4001 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4002 });
4003 }
4004 EmitOMPReductionClauseFinal(
4005 D: S, /*ReductionKind=*/isOpenMPSimdDirective(DKind: EKind)
4006 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
4007 : /*Parallel only*/ OMPD_parallel);
4008 // Emit post-update of the reduction variables if IsLastIter != 0.
4009 emitPostUpdateForReductionClause(
4010 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4011 return CGF.Builder.CreateIsNotNull(
4012 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4013 });
4014 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4015 if (HasLastprivateClause)
4016 EmitOMPLastprivateClauseFinal(
4017 D: S, NoFinals: isOpenMPSimdDirective(DKind: EKind),
4018 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4019 LoopScope.restoreMap();
4020 EmitOMPLinearClauseFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4021 return CGF.Builder.CreateIsNotNull(
4022 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4023 });
4024 }
4025 DoacrossCleanupScope.ForceCleanup();
4026 // We're now done with the loop, so jump to the continuation block.
4027 if (ContBlock) {
4028 EmitBranch(Block: ContBlock);
4029 EmitBlock(BB: ContBlock, /*IsFinished=*/true);
4030 }
4031 }
4032 return HasLastprivateClause;
4033}
4034
4035/// The following two functions generate expressions for the loop lower
4036/// and upper bounds in case of static and dynamic (dispatch) schedule
4037/// of the associated 'for' or 'distribute' loop.
4038static std::pair<LValue, LValue>
4039emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4040 const auto &LS = cast<OMPLoopDirective>(Val: S);
4041 LValue LB =
4042 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
4043 LValue UB =
4044 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
4045 return {LB, UB};
4046}
4047
4048/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
4049/// consider the lower and upper bound expressions generated by the
4050/// worksharing loop support, but we use 0 and the iteration space size as
4051/// constants
4052static std::pair<llvm::Value *, llvm::Value *>
4053emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
4054 Address LB, Address UB) {
4055 const auto &LS = cast<OMPLoopDirective>(Val: S);
4056 const Expr *IVExpr = LS.getIterationVariable();
4057 const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType());
4058 llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0);
4059 llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration());
4060 return {LBVal, UBVal};
4061}
4062
4063/// Emits internal temp array declarations for the directive with inscan
4064/// reductions.
4065/// The code is the following:
4066/// \code
4067/// size num_iters = <num_iters>;
4068/// <type> buffer[num_iters];
4069/// \endcode
4070static void emitScanBasedDirectiveDecls(
4071 CodeGenFunction &CGF, const OMPLoopDirective &S,
4072 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
4073 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4074 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
4075 SmallVector<const Expr *, 4> Shareds;
4076 SmallVector<const Expr *, 4> Privates;
4077 SmallVector<const Expr *, 4> ReductionOps;
4078 SmallVector<const Expr *, 4> CopyArrayTemps;
4079 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4080 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4081 "Only inscan reductions are expected.");
4082 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4083 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
4084 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
4085 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
4086 in_end: C->copy_array_temps().end());
4087 }
4088 {
4089 // Emit buffers for each reduction variables.
4090 // ReductionCodeGen is required to emit correctly the code for array
4091 // reductions.
4092 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
4093 unsigned Count = 0;
4094 auto *ITA = CopyArrayTemps.begin();
4095 for (const Expr *IRef : Privates) {
4096 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
4097 // Emit variably modified arrays, used for arrays/array sections
4098 // reductions.
4099 if (PrivateVD->getType()->isVariablyModifiedType()) {
4100 RedCG.emitSharedOrigLValue(CGF, N: Count);
4101 RedCG.emitAggregateType(CGF, N: Count);
4102 }
4103 CodeGenFunction::OpaqueValueMapping DimMapping(
4104 CGF,
4105 cast<OpaqueValueExpr>(
4106 Val: cast<VariableArrayType>(Val: (*ITA)->getType()->getAsArrayTypeUnsafe())
4107 ->getSizeExpr()),
4108 RValue::get(V: OMPScanNumIterations));
4109 // Emit temp buffer.
4110 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl()));
4111 ++ITA;
4112 ++Count;
4113 }
4114 }
4115}
4116
4117/// Copies final inscan reductions values to the original variables.
4118/// The code is the following:
4119/// \code
4120/// <orig_var> = buffer[num_iters-1];
4121/// \endcode
4122static void emitScanBasedDirectiveFinals(
4123 CodeGenFunction &CGF, const OMPLoopDirective &S,
4124 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
4125 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4126 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
4127 SmallVector<const Expr *, 4> Shareds;
4128 SmallVector<const Expr *, 4> LHSs;
4129 SmallVector<const Expr *, 4> RHSs;
4130 SmallVector<const Expr *, 4> Privates;
4131 SmallVector<const Expr *, 4> CopyOps;
4132 SmallVector<const Expr *, 4> CopyArrayElems;
4133 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4134 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4135 "Only inscan reductions are expected.");
4136 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4137 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
4138 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
4139 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
4140 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
4141 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
4142 in_end: C->copy_array_elems().end());
4143 }
4144 // Create temp var and copy LHS value to this temp value.
4145 // LHS = TMP[LastIter];
4146 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
4147 LHS: OMPScanNumIterations,
4148 RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false));
4149 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4150 const Expr *PrivateExpr = Privates[I];
4151 const Expr *OrigExpr = Shareds[I];
4152 const Expr *CopyArrayElem = CopyArrayElems[I];
4153 CodeGenFunction::OpaqueValueMapping IdxMapping(
4154 CGF,
4155 cast<OpaqueValueExpr>(
4156 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4157 RValue::get(V: OMPLast));
4158 LValue DestLVal = CGF.EmitLValue(E: OrigExpr);
4159 LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem);
4160 CGF.EmitOMPCopy(
4161 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
4162 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
4163 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
4164 }
4165}
4166
4167/// Emits the code for the directive with inscan reductions.
4168/// The code is the following:
4169/// \code
4170/// #pragma omp ...
4171/// for (i: 0..<num_iters>) {
4172/// <input phase>;
4173/// buffer[i] = red;
4174/// }
4175/// #pragma omp master // in parallel region
4176/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
4177/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
4178/// buffer[i] op= buffer[i-pow(2,k)];
4179/// #pragma omp barrier // in parallel region
4180/// #pragma omp ...
4181/// for (0..<num_iters>) {
4182/// red = InclusiveScan ? buffer[i] : buffer[i-1];
4183/// <scan phase>;
4184/// }
4185/// \endcode
4186static void emitScanBasedDirective(
4187 CodeGenFunction &CGF, const OMPLoopDirective &S,
4188 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
4189 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
4190 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
4191 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
4192 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
4193 SmallVector<const Expr *, 4> Privates;
4194 SmallVector<const Expr *, 4> ReductionOps;
4195 SmallVector<const Expr *, 4> LHSs;
4196 SmallVector<const Expr *, 4> RHSs;
4197 SmallVector<const Expr *, 4> CopyArrayElems;
4198 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4199 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
4200 "Only inscan reductions are expected.");
4201 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
4202 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
4203 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
4204 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
4205 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
4206 in_end: C->copy_array_elems().end());
4207 }
4208 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
4209 {
4210 // Emit loop with input phase:
4211 // #pragma omp ...
4212 // for (i: 0..<num_iters>) {
4213 // <input phase>;
4214 // buffer[i] = red;
4215 // }
4216 CGF.OMPFirstScanLoop = true;
4217 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4218 FirstGen(CGF);
4219 }
4220 // #pragma omp barrier // in parallel region
4221 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
4222 &ReductionOps,
4223 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
4224 Action.Enter(CGF);
4225 // Emit prefix reduction:
4226 // #pragma omp master // in parallel region
4227 // for (int k = 0; k <= ceil(log2(n)); ++k)
4228 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
4229 llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body");
4230 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit");
4231 llvm::Function *F =
4232 CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::log2, Tys: CGF.DoubleTy);
4233 llvm::Value *Arg =
4234 CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy);
4235 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg);
4236 F = CGF.CGM.getIntrinsic(IID: llvm::Intrinsic::ceil, Tys: CGF.DoubleTy);
4237 LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal);
4238 LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy);
4239 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
4240 LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
4241 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc());
4242 CGF.EmitBlock(BB: LoopBB);
4243 auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2);
4244 // size pow2k = 1;
4245 auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
4246 Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB);
4247 Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB);
4248 // for (size i = n - 1; i >= 2 ^ k; --i)
4249 // tmp[i] op= tmp[i-pow2k];
4250 llvm::BasicBlock *InnerLoopBB =
4251 CGF.createBasicBlock(name: "omp.inner.log.scan.body");
4252 llvm::BasicBlock *InnerExitBB =
4253 CGF.createBasicBlock(name: "omp.inner.log.scan.exit");
4254 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K);
4255 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
4256 CGF.EmitBlock(BB: InnerLoopBB);
4257 auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
4258 IVal->addIncoming(V: NMin1, BB: LoopBB);
4259 {
4260 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
4261 auto *ILHS = LHSs.begin();
4262 auto *IRHS = RHSs.begin();
4263 for (const Expr *CopyArrayElem : CopyArrayElems) {
4264 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4265 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4266 Address LHSAddr = Address::invalid();
4267 {
4268 CodeGenFunction::OpaqueValueMapping IdxMapping(
4269 CGF,
4270 cast<OpaqueValueExpr>(
4271 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4272 RValue::get(V: IVal));
4273 LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4274 }
4275 PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr);
4276 Address RHSAddr = Address::invalid();
4277 {
4278 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K);
4279 CodeGenFunction::OpaqueValueMapping IdxMapping(
4280 CGF,
4281 cast<OpaqueValueExpr>(
4282 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
4283 RValue::get(V: OffsetIVal));
4284 RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress();
4285 }
4286 PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr);
4287 ++ILHS;
4288 ++IRHS;
4289 }
4290 PrivScope.Privatize();
4291 CGF.CGM.getOpenMPRuntime().emitReduction(
4292 CGF, Loc: S.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
4293 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
4294 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_unknown});
4295 }
4296 llvm::Value *NextIVal =
4297 CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
4298 IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock());
4299 CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K);
4300 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
4301 CGF.EmitBlock(BB: InnerExitBB);
4302 llvm::Value *Next =
4303 CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1));
4304 Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock());
4305 // pow2k <<= 1;
4306 llvm::Value *NextPow2K =
4307 CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "", /*HasNUW=*/true);
4308 Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock());
4309 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal);
4310 CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB);
4311 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc());
4312 CGF.EmitBlock(BB: ExitBB);
4313 };
4314 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4315 if (isOpenMPParallelDirective(DKind: EKind)) {
4316 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4317 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4318 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4319 /*ForceSimpleCall=*/true);
4320 } else {
4321 RegionCodeGenTy RCG(CodeGen);
4322 RCG(CGF);
4323 }
4324
4325 CGF.OMPFirstScanLoop = false;
4326 SecondGen(CGF);
4327}
4328
4329static bool emitWorksharingDirective(CodeGenFunction &CGF,
4330 const OMPLoopDirective &S,
4331 bool HasCancel) {
4332 bool HasLastprivates;
4333 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4334 if (llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
4335 P: [](const OMPReductionClause *C) {
4336 return C->getModifier() == OMPC_REDUCTION_inscan;
4337 })) {
4338 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4339 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4340 OMPLoopScope LoopScope(CGF, S);
4341 return CGF.EmitScalarExpr(E: S.getNumIterations());
4342 };
4343 const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) {
4344 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4345 (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4346 CodeGenLoopBounds: emitForLoopBounds,
4347 CGDispatchBounds: emitDispatchForLoopBounds);
4348 // Emit an implicit barrier at the end.
4349 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(),
4350 Kind: OMPD_for);
4351 };
4352 const auto &&SecondGen = [&S, HasCancel, EKind,
4353 &HasLastprivates](CodeGenFunction &CGF) {
4354 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4355 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4356 CodeGenLoopBounds: emitForLoopBounds,
4357 CGDispatchBounds: emitDispatchForLoopBounds);
4358 };
4359 if (!isOpenMPParallelDirective(DKind: EKind))
4360 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
4361 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
4362 if (!isOpenMPParallelDirective(DKind: EKind))
4363 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
4364 } else {
4365 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
4366 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
4367 CodeGenLoopBounds: emitForLoopBounds,
4368 CGDispatchBounds: emitDispatchForLoopBounds);
4369 }
4370 return HasLastprivates;
4371}
4372
4373// Pass OMPLoopDirective (instead of OMPForDirective) to make this check
4374// available for "loop bind(parallel)", which maps to "for".
4375static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S,
4376 bool HasCancel) {
4377 if (HasCancel)
4378 return false;
4379 for (OMPClause *C : S.clauses()) {
4380 if (isa<OMPNowaitClause, OMPBindClause>(Val: C))
4381 continue;
4382
4383 if (auto *SC = dyn_cast<OMPScheduleClause>(Val: C)) {
4384 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4385 return false;
4386 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
4387 return false;
4388 switch (SC->getScheduleKind()) {
4389 case OMPC_SCHEDULE_auto:
4390 case OMPC_SCHEDULE_dynamic:
4391 case OMPC_SCHEDULE_runtime:
4392 case OMPC_SCHEDULE_guided:
4393 case OMPC_SCHEDULE_static:
4394 continue;
4395 case OMPC_SCHEDULE_unknown:
4396 return false;
4397 }
4398 }
4399
4400 return false;
4401 }
4402
4403 return true;
4404}
4405
4406static llvm::omp::ScheduleKind
4407convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
4408 switch (ScheduleClauseKind) {
4409 case OMPC_SCHEDULE_unknown:
4410 return llvm::omp::OMP_SCHEDULE_Default;
4411 case OMPC_SCHEDULE_auto:
4412 return llvm::omp::OMP_SCHEDULE_Auto;
4413 case OMPC_SCHEDULE_dynamic:
4414 return llvm::omp::OMP_SCHEDULE_Dynamic;
4415 case OMPC_SCHEDULE_guided:
4416 return llvm::omp::OMP_SCHEDULE_Guided;
4417 case OMPC_SCHEDULE_runtime:
4418 return llvm::omp::OMP_SCHEDULE_Runtime;
4419 case OMPC_SCHEDULE_static:
4420 return llvm::omp::OMP_SCHEDULE_Static;
4421 }
4422 llvm_unreachable("Unhandled schedule kind");
4423}
4424
4425// Pass OMPLoopDirective (instead of OMPForDirective) to make this function
4426// available for "loop bind(parallel)", which maps to "for".
4427static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
4428 CodeGenModule &CGM, bool HasCancel) {
4429 bool HasLastprivates = false;
4430 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder &&
4431 isForSupportedByOpenMPIRBuilder(S, HasCancel);
4432 auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates,
4433 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
4434 // Use the OpenMPIRBuilder if enabled.
4435 if (UseOMPIRBuilder) {
4436 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
4437
4438 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
4439 llvm::Value *ChunkSize = nullptr;
4440 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
4441 SchedKind =
4442 convertClauseKindToSchedKind(ScheduleClauseKind: SchedClause->getScheduleKind());
4443 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
4444 ChunkSize = CGF.EmitScalarExpr(E: ChunkSizeExpr);
4445 }
4446
4447 // Emit the associated statement and get its loop representation.
4448 const Stmt *Inner = S.getRawStmt();
4449 llvm::CanonicalLoopInfo *CLI =
4450 CGF.EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
4451
4452 llvm::OpenMPIRBuilder &OMPBuilder =
4453 CGM.getOpenMPRuntime().getOMPBuilder();
4454 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4455 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
4456 cantFail(ValOrErr: OMPBuilder.applyWorkshareLoop(
4457 DL: CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
4458 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
4459 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
4460 /*HasOrderedClause=*/false));
4461 return;
4462 }
4463
4464 HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel);
4465 };
4466 {
4467 auto LPCRegion =
4468 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
4469 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
4470 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_for, CodeGen,
4471 HasCancel);
4472 }
4473
4474 if (!UseOMPIRBuilder) {
4475 // Emit an implicit barrier at the end.
4476 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4477 CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc: S.getBeginLoc(), Kind: OMPD_for);
4478 }
4479 // Check for outer lastprivate conditional update.
4480 checkForLastprivateConditionalUpdate(CGF, S);
4481}
4482
4483void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
4484 return emitOMPForDirective(S, CGF&: *this, CGM, HasCancel: S.hasCancel());
4485}
4486
4487void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
4488 bool HasLastprivates = false;
4489 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
4490 PrePostActionTy &) {
4491 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4492 };
4493 {
4494 auto LPCRegion =
4495 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4496 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4497 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_simd, CodeGen);
4498 }
4499
4500 // Emit an implicit barrier at the end.
4501 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
4502 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_for);
4503 // Check for outer lastprivate conditional update.
4504 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4505}
4506
4507static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
4508 const Twine &Name,
4509 llvm::Value *Init = nullptr) {
4510 LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty);
4511 if (Init)
4512 CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true);
4513 return LVal;
4514}
4515
4516void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
4517 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4518 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4519 bool HasLastprivates = false;
4520 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
4521 auto &&CodeGen = [&S, CapturedStmt, CS, EKind,
4522 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
4523 const ASTContext &C = CGF.getContext();
4524 QualType KmpInt32Ty =
4525 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4526 // Emit helper vars inits.
4527 LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb.",
4528 Init: CGF.Builder.getInt32(C: 0));
4529 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4530 ? CGF.Builder.getInt32(C: CS->size() - 1)
4531 : CGF.Builder.getInt32(C: 0);
4532 LValue UB =
4533 createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub.", Init: GlobalUBVal);
4534 LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st.",
4535 Init: CGF.Builder.getInt32(C: 1));
4536 LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il.",
4537 Init: CGF.Builder.getInt32(C: 0));
4538 // Loop counter.
4539 LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv.");
4540 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4541 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4542 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4543 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4544 // Generate condition for loop.
4545 BinaryOperator *Cond = BinaryOperator::Create(
4546 C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary,
4547 opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride());
4548 // Increment for loop counter.
4549 UnaryOperator *Inc = UnaryOperator::Create(
4550 C, input: &IVRefExpr, opc: UO_PreInc, type: KmpInt32Ty, VK: VK_PRValue, OK: OK_Ordinary,
4551 l: S.getBeginLoc(), CanOverflow: true, FPFeatures: FPOptionsOverride());
4552 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4553 // Iterate through all sections and emit a switch construct:
4554 // switch (IV) {
4555 // case 0:
4556 // <SectionStmt[0]>;
4557 // break;
4558 // ...
4559 // case <NumSection> - 1:
4560 // <SectionStmt[<NumSection> - 1]>;
4561 // break;
4562 // }
4563 // .omp.sections.exit:
4564 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit");
4565 llvm::SwitchInst *SwitchStmt =
4566 CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()),
4567 Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size());
4568 if (CS) {
4569 unsigned CaseNumber = 0;
4570 for (const Stmt *SubStmt : CS->children()) {
4571 auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4572 CGF.EmitBlock(BB: CaseBB);
4573 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB);
4574 CGF.EmitStmt(S: SubStmt);
4575 CGF.EmitBranch(Block: ExitBB);
4576 ++CaseNumber;
4577 }
4578 } else {
4579 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4580 CGF.EmitBlock(BB: CaseBB);
4581 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB);
4582 CGF.EmitStmt(S: CapturedStmt);
4583 CGF.EmitBranch(Block: ExitBB);
4584 }
4585 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
4586 };
4587
4588 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4589 if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
4590 // Emit implicit barrier to synchronize threads and avoid data races on
4591 // initialization of firstprivate variables and post-update of lastprivate
4592 // variables.
4593 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4594 CGF, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
4595 /*ForceSimpleCall=*/true);
4596 }
4597 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
4598 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4599 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
4600 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
4601 (void)LoopScope.Privatize();
4602 if (isOpenMPTargetExecutionDirective(DKind: EKind))
4603 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
4604
4605 // Emit static non-chunked loop.
4606 OpenMPScheduleTy ScheduleKind;
4607 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4608 CGOpenMPRuntime::StaticRTInput StaticInit(
4609 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
4610 LB.getAddress(), UB.getAddress(), ST.getAddress());
4611 CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, Loc: S.getBeginLoc(), DKind: EKind,
4612 ScheduleKind, Values: StaticInit);
4613 // UB = min(UB, GlobalUB);
4614 llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc());
4615 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4616 C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal);
4617 CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB);
4618 // IV = LB;
4619 CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV);
4620 // while (idx <= UB) { BODY; ++idx; }
4621 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, LoopCond: Cond, IncExpr: Inc, BodyGen,
4622 PostIncGen: [](CodeGenFunction &) {});
4623 // Tell the runtime we are done.
4624 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4625 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, Loc: S.getEndLoc(),
4626 DKind: OMPD_sections);
4627 };
4628 CGF.OMPCancelStack.emitExit(CGF, Kind: EKind, CodeGen);
4629 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4630 // Emit post-update of the reduction variables if IsLastIter != 0.
4631 emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4632 return CGF.Builder.CreateIsNotNull(
4633 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4634 });
4635
4636 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4637 if (HasLastprivates)
4638 CGF.EmitOMPLastprivateClauseFinal(
4639 D: S, /*NoFinals=*/false,
4640 IsLastIterCond: CGF.Builder.CreateIsNotNull(
4641 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4642 };
4643
4644 bool HasCancel = false;
4645 if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S))
4646 HasCancel = OSD->hasCancel();
4647 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S))
4648 HasCancel = OPSD->hasCancel();
4649 OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel);
4650 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_sections, CodeGen,
4651 HasCancel);
4652 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4653 // clause. Otherwise the barrier will be generated by the codegen for the
4654 // directive.
4655 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4656 // Emit implicit barrier to synchronize threads and avoid data races on
4657 // initialization of firstprivate variables.
4658 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4659 Kind: OMPD_unknown);
4660 }
4661}
4662
4663void CodeGenFunction::EmitOMPScopeDirective(const OMPScopeDirective &S) {
4664 {
4665 // Emit code for 'scope' region
4666 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4667 Action.Enter(CGF);
4668 OMPPrivateScope PrivateScope(CGF);
4669 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
4670 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
4671 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
4672 (void)PrivateScope.Privatize();
4673 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4674 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
4675 };
4676 auto LPCRegion =
4677 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4678 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4679 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_scope, CodeGen);
4680 }
4681 // Emit an implicit barrier at the end.
4682 if (!S.getSingleClause<OMPNowaitClause>()) {
4683 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_scope);
4684 }
4685 // Check for outer lastprivate conditional update.
4686 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4687}
4688
4689void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4690 if (CGM.getLangOpts().OpenMPIRBuilder) {
4691 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4692 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4693 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4694
4695 auto FiniCB = [](InsertPointTy IP) {
4696 // Don't FinalizeOMPRegion because this is done inside of OMPIRBuilder for
4697 // sections.
4698 return llvm::Error::success();
4699 };
4700
4701 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4702 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4703 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4704 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4705 if (CS) {
4706 for (const Stmt *SubStmt : CS->children()) {
4707 auto SectionCB = [this, SubStmt](
4708 InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4709 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4710 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(CGF&: *this, RegionBodyStmt: SubStmt, AllocaIP: AllocIP,
4711 CodeGenIP, RegionName: "section");
4712 return llvm::Error::success();
4713 };
4714 SectionCBVector.push_back(Elt: SectionCB);
4715 }
4716 } else {
4717 auto SectionCB =
4718 [this, CapturedStmt](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4719 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4720 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4721 CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP: AllocIP, CodeGenIP, RegionName: "section");
4722 return llvm::Error::success();
4723 };
4724 SectionCBVector.push_back(Elt: SectionCB);
4725 }
4726
4727 // Privatization callback that performs appropriate action for
4728 // shared/private/firstprivate/lastprivate/copyin/... variables.
4729 //
4730 // TODO: This defaults to shared right now.
4731 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4732 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4733 // The next line is appropriate only for variables (Val) with the
4734 // data-sharing attribute "shared".
4735 ReplVal = &Val;
4736
4737 return CodeGenIP;
4738 };
4739
4740 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4741 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4742 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4743 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4744 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4745 cantFail(ValOrErr: OMPBuilder.createSections(
4746 Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(),
4747 IsNowait: S.getSingleClause<OMPNowaitClause>()));
4748 Builder.restoreIP(IP: AfterIP);
4749 return;
4750 }
4751 {
4752 auto LPCRegion =
4753 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4754 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4755 EmitSections(S);
4756 }
4757 // Emit an implicit barrier at the end.
4758 if (!S.getSingleClause<OMPNowaitClause>()) {
4759 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(),
4760 Kind: OMPD_sections);
4761 }
4762 // Check for outer lastprivate conditional update.
4763 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4764}
4765
4766void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4767 if (CGM.getLangOpts().OpenMPIRBuilder) {
4768 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4769 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4770
4771 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4772 auto FiniCB = [this](InsertPointTy IP) {
4773 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4774 return llvm::Error::success();
4775 };
4776
4777 auto BodyGenCB = [SectionRegionBodyStmt,
4778 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4779 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4780 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4781 CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP: AllocIP, CodeGenIP, RegionName: "section");
4782 return llvm::Error::success();
4783 };
4784
4785 LexicalScope Scope(*this, S.getSourceRange());
4786 EmitStopPoint(S: &S);
4787 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4788 cantFail(ValOrErr: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB));
4789 Builder.restoreIP(IP: AfterIP);
4790
4791 return;
4792 }
4793 LexicalScope Scope(*this, S.getSourceRange());
4794 EmitStopPoint(S: &S);
4795 EmitStmt(S: S.getAssociatedStmt());
4796}
4797
4798void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4799 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4800 llvm::SmallVector<const Expr *, 8> DestExprs;
4801 llvm::SmallVector<const Expr *, 8> SrcExprs;
4802 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4803 // Check if there are any 'copyprivate' clauses associated with this
4804 // 'single' construct.
4805 // Build a list of copyprivate variables along with helper expressions
4806 // (<source>, <destination>, <destination>=<source> expressions)
4807 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4808 CopyprivateVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
4809 DestExprs.append(in_start: C->destination_exprs().begin(),
4810 in_end: C->destination_exprs().end());
4811 SrcExprs.append(in_start: C->source_exprs().begin(), in_end: C->source_exprs().end());
4812 AssignmentOps.append(in_start: C->assignment_ops().begin(),
4813 in_end: C->assignment_ops().end());
4814 }
4815 // Emit code for 'single' region along with 'copyprivate' clauses
4816 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4817 Action.Enter(CGF);
4818 OMPPrivateScope SingleScope(CGF);
4819 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: SingleScope);
4820 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: SingleScope);
4821 (void)SingleScope.Privatize();
4822 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4823 };
4824 {
4825 auto LPCRegion =
4826 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
4827 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4828 CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(),
4829 CopyprivateVars, DestExprs,
4830 SrcExprs, AssignmentOps);
4831 }
4832 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4833 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4834 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4835 CGM.getOpenMPRuntime().emitBarrierCall(
4836 CGF&: *this, Loc: S.getBeginLoc(),
4837 Kind: S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4838 }
4839 // Check for outer lastprivate conditional update.
4840 checkForLastprivateConditionalUpdate(CGF&: *this, S);
4841}
4842
4843static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4844 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4845 Action.Enter(CGF);
4846 CGF.EmitStmt(S: S.getRawStmt());
4847 };
4848 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4849}
4850
4851void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4852 if (CGM.getLangOpts().OpenMPIRBuilder) {
4853 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4854 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4855
4856 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4857
4858 auto FiniCB = [this](InsertPointTy IP) {
4859 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4860 return llvm::Error::success();
4861 };
4862
4863 auto BodyGenCB = [MasterRegionBodyStmt,
4864 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4865 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4866 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4867 CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP: AllocIP, CodeGenIP, RegionName: "master");
4868 return llvm::Error::success();
4869 };
4870
4871 LexicalScope Scope(*this, S.getSourceRange());
4872 EmitStopPoint(S: &S);
4873 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4874 cantFail(ValOrErr: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB));
4875 Builder.restoreIP(IP: AfterIP);
4876
4877 return;
4878 }
4879 LexicalScope Scope(*this, S.getSourceRange());
4880 EmitStopPoint(S: &S);
4881 emitMaster(CGF&: *this, S);
4882}
4883
4884static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4885 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4886 Action.Enter(CGF);
4887 CGF.EmitStmt(S: S.getRawStmt());
4888 };
4889 Expr *Filter = nullptr;
4890 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4891 Filter = FilterClause->getThreadID();
4892 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(),
4893 Filter);
4894}
4895
4896void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4897 if (CGM.getLangOpts().OpenMPIRBuilder) {
4898 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4899 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4900
4901 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4902 const Expr *Filter = nullptr;
4903 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4904 Filter = FilterClause->getThreadID();
4905 llvm::Value *FilterVal = Filter
4906 ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty)
4907 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
4908
4909 auto FiniCB = [this](InsertPointTy IP) {
4910 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4911 return llvm::Error::success();
4912 };
4913
4914 auto BodyGenCB = [MaskedRegionBodyStmt,
4915 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4916 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4917 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4918 CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP: AllocIP, CodeGenIP, RegionName: "masked");
4919 return llvm::Error::success();
4920 };
4921
4922 LexicalScope Scope(*this, S.getSourceRange());
4923 EmitStopPoint(S: &S);
4924 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
4925 ValOrErr: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal));
4926 Builder.restoreIP(IP: AfterIP);
4927
4928 return;
4929 }
4930 LexicalScope Scope(*this, S.getSourceRange());
4931 EmitStopPoint(S: &S);
4932 emitMasked(CGF&: *this, S);
4933}
4934
4935void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4936 if (CGM.getLangOpts().OpenMPIRBuilder) {
4937 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4938 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4939
4940 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4941 const Expr *Hint = nullptr;
4942 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4943 Hint = HintClause->getHint();
4944
4945 // TODO: This is slightly different from what's currently being done in
4946 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4947 // about typing is final.
4948 llvm::Value *HintInst = nullptr;
4949 if (Hint)
4950 HintInst =
4951 Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false);
4952
4953 auto FiniCB = [this](InsertPointTy IP) {
4954 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4955 return llvm::Error::success();
4956 };
4957
4958 auto BodyGenCB = [CriticalRegionBodyStmt,
4959 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
4960 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
4961 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4962 CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP: AllocIP, CodeGenIP, RegionName: "critical");
4963 return llvm::Error::success();
4964 };
4965
4966 LexicalScope Scope(*this, S.getSourceRange());
4967 EmitStopPoint(S: &S);
4968 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
4969 cantFail(ValOrErr: OMPBuilder.createCritical(Loc: Builder, BodyGenCB, FiniCB,
4970 CriticalName: S.getDirectiveName().getAsString(),
4971 HintInst));
4972 Builder.restoreIP(IP: AfterIP);
4973
4974 return;
4975 }
4976
4977 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4978 Action.Enter(CGF);
4979 CGF.EmitStmt(S: S.getAssociatedStmt());
4980 };
4981 const Expr *Hint = nullptr;
4982 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4983 Hint = HintClause->getHint();
4984 LexicalScope Scope(*this, S.getSourceRange());
4985 EmitStopPoint(S: &S);
4986 CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this,
4987 CriticalName: S.getDirectiveName().getAsString(),
4988 CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint);
4989}
4990
4991void CodeGenFunction::EmitOMPParallelForDirective(
4992 const OMPParallelForDirective &S) {
4993 // Emit directive as a combined directive that consists of two implicit
4994 // directives: 'parallel' with 'for' directive.
4995 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4996 Action.Enter(CGF);
4997 emitOMPCopyinClause(CGF, S);
4998 (void)emitWorksharingDirective(CGF, S, HasCancel: S.hasCancel());
4999 };
5000 {
5001 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
5002 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
5003 CGCapturedStmtInfo CGSI(CR_OpenMP);
5004 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
5005 OMPLoopScope LoopScope(CGF, S);
5006 return CGF.EmitScalarExpr(E: S.getNumIterations());
5007 };
5008 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
5009 P: [](const OMPReductionClause *C) {
5010 return C->getModifier() == OMPC_REDUCTION_inscan;
5011 });
5012 if (IsInscan)
5013 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
5014 auto LPCRegion =
5015 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5016 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
5017 CodeGenBoundParameters: emitEmptyBoundParameters);
5018 if (IsInscan)
5019 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
5020 }
5021 // Check for outer lastprivate conditional update.
5022 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5023}
5024
5025void CodeGenFunction::EmitOMPParallelForSimdDirective(
5026 const OMPParallelForSimdDirective &S) {
5027 // Emit directive as a combined directive that consists of two implicit
5028 // directives: 'parallel' with 'for' directive.
5029 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5030 Action.Enter(CGF);
5031 emitOMPCopyinClause(CGF, S);
5032 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
5033 };
5034 {
5035 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
5036 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
5037 CGCapturedStmtInfo CGSI(CR_OpenMP);
5038 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
5039 OMPLoopScope LoopScope(CGF, S);
5040 return CGF.EmitScalarExpr(E: S.getNumIterations());
5041 };
5042 bool IsInscan = llvm::any_of(Range: S.getClausesOfKind<OMPReductionClause>(),
5043 P: [](const OMPReductionClause *C) {
5044 return C->getModifier() == OMPC_REDUCTION_inscan;
5045 });
5046 if (IsInscan)
5047 emitScanBasedDirectiveDecls(CGF&: *this, S, NumIteratorsGen);
5048 auto LPCRegion =
5049 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5050 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for_simd, CodeGen,
5051 CodeGenBoundParameters: emitEmptyBoundParameters);
5052 if (IsInscan)
5053 emitScanBasedDirectiveFinals(CGF&: *this, S, NumIteratorsGen);
5054 }
5055 // Check for outer lastprivate conditional update.
5056 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5057}
5058
5059void CodeGenFunction::EmitOMPParallelMasterDirective(
5060 const OMPParallelMasterDirective &S) {
5061 // Emit directive as a combined directive that consists of two implicit
5062 // directives: 'parallel' with 'master' directive.
5063 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5064 Action.Enter(CGF);
5065 OMPPrivateScope PrivateScope(CGF);
5066 emitOMPCopyinClause(CGF, S);
5067 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
5068 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
5069 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
5070 (void)PrivateScope.Privatize();
5071 emitMaster(CGF, S);
5072 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
5073 };
5074 {
5075 auto LPCRegion =
5076 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5077 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master, CodeGen,
5078 CodeGenBoundParameters: emitEmptyBoundParameters);
5079 emitPostUpdateForReductionClause(CGF&: *this, D: S,
5080 CondGen: [](CodeGenFunction &) { return nullptr; });
5081 }
5082 // Check for outer lastprivate conditional update.
5083 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5084}
5085
5086void CodeGenFunction::EmitOMPParallelMaskedDirective(
5087 const OMPParallelMaskedDirective &S) {
5088 // Emit directive as a combined directive that consists of two implicit
5089 // directives: 'parallel' with 'masked' directive.
5090 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5091 Action.Enter(CGF);
5092 OMPPrivateScope PrivateScope(CGF);
5093 emitOMPCopyinClause(CGF, S);
5094 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
5095 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
5096 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
5097 (void)PrivateScope.Privatize();
5098 emitMasked(CGF, S);
5099 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
5100 };
5101 {
5102 auto LPCRegion =
5103 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5104 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked, CodeGen,
5105 CodeGenBoundParameters: emitEmptyBoundParameters);
5106 emitPostUpdateForReductionClause(CGF&: *this, D: S,
5107 CondGen: [](CodeGenFunction &) { return nullptr; });
5108 }
5109 // Check for outer lastprivate conditional update.
5110 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5111}
5112
5113void CodeGenFunction::EmitOMPParallelSectionsDirective(
5114 const OMPParallelSectionsDirective &S) {
5115 // Emit directive as a combined directive that consists of two implicit
5116 // directives: 'parallel' with 'sections' directive.
5117 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5118 Action.Enter(CGF);
5119 emitOMPCopyinClause(CGF, S);
5120 CGF.EmitSections(S);
5121 };
5122 {
5123 auto LPCRegion =
5124 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5125 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_sections, CodeGen,
5126 CodeGenBoundParameters: emitEmptyBoundParameters);
5127 }
5128 // Check for outer lastprivate conditional update.
5129 checkForLastprivateConditionalUpdate(CGF&: *this, S);
5130}
5131
5132namespace {
5133/// Get the list of variables declared in the context of the untied tasks.
5134class CheckVarsEscapingUntiedTaskDeclContext final
5135 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
5136 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
5137
5138public:
5139 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
5140 ~CheckVarsEscapingUntiedTaskDeclContext() = default;
5141 void VisitDeclStmt(const DeclStmt *S) {
5142 if (!S)
5143 return;
5144 // Need to privatize only local vars, static locals can be processed as is.
5145 for (const Decl *D : S->decls()) {
5146 if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D))
5147 if (VD->hasLocalStorage())
5148 PrivateDecls.push_back(Elt: VD);
5149 }
5150 }
5151 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
5152 void VisitCapturedStmt(const CapturedStmt *) {}
5153 void VisitLambdaExpr(const LambdaExpr *) {}
5154 void VisitBlockExpr(const BlockExpr *) {}
5155 void VisitStmt(const Stmt *S) {
5156 if (!S)
5157 return;
5158 for (const Stmt *Child : S->children())
5159 if (Child)
5160 Visit(S: Child);
5161 }
5162
5163 /// Swaps list of vars with the provided one.
5164 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
5165};
5166} // anonymous namespace
5167
5168static void buildDependences(const OMPExecutableDirective &S,
5169 OMPTaskDataTy &Data) {
5170
5171 // First look for 'omp_all_memory' and add this first.
5172 bool OmpAllMemory = false;
5173 if (llvm::any_of(
5174 Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) {
5175 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
5176 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
5177 })) {
5178 OmpAllMemory = true;
5179 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
5180 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
5181 // simplify.
5182 OMPTaskDataTy::DependData &DD =
5183 Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory,
5184 /*IteratorExpr=*/Args: nullptr);
5185 // Add a nullptr Expr to simplify the codegen in emitDependData.
5186 DD.DepExprs.push_back(Elt: nullptr);
5187 }
5188 // Add remaining dependences skipping any 'out' or 'inout' if they are
5189 // overridden by 'omp_all_memory'.
5190 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
5191 OpenMPDependClauseKind Kind = C->getDependencyKind();
5192 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
5193 continue;
5194 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
5195 continue;
5196 OMPTaskDataTy::DependData &DD =
5197 Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier());
5198 DD.DepExprs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5199 }
5200}
5201
5202void CodeGenFunction::EmitOMPTaskBasedDirective(
5203 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
5204 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
5205 OMPTaskDataTy &Data) {
5206 // Emit outlined function for task construct.
5207 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CapturedRegion);
5208 auto I = CS->getCapturedDecl()->param_begin();
5209 auto PartId = std::next(x: I);
5210 auto TaskT = std::next(x: I, n: 4);
5211 // Check if the task is final
5212 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
5213 // If the condition constant folds and can be elided, try to avoid emitting
5214 // the condition and the dead arm of the if/else.
5215 const Expr *Cond = Clause->getCondition();
5216 bool CondConstant;
5217 if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant))
5218 Data.Final.setInt(CondConstant);
5219 else
5220 Data.Final.setPointer(EvaluateExprAsBool(E: Cond));
5221 } else {
5222 // By default the task is not final.
5223 Data.Final.setInt(/*IntVal=*/false);
5224 }
5225 // Check if the task has 'priority' clause.
5226 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
5227 const Expr *Prio = Clause->getPriority();
5228 Data.Priority.setInt(/*IntVal=*/true);
5229 Data.Priority.setPointer(EmitScalarConversion(
5230 Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(),
5231 DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
5232 Loc: Prio->getExprLoc()));
5233 }
5234 // The first function argument for tasks is a thread id, the second one is a
5235 // part id (0 for tied tasks, >=0 for untied task).
5236 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
5237 // Get list of private variables.
5238 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
5239 auto IRef = C->varlist_begin();
5240 for (const Expr *IInit : C->private_copies()) {
5241 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
5242 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
5243 Data.PrivateVars.push_back(Elt: *IRef);
5244 Data.PrivateCopies.push_back(Elt: IInit);
5245 }
5246 ++IRef;
5247 }
5248 }
5249 EmittedAsPrivate.clear();
5250 // Get list of firstprivate variables.
5251 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5252 auto IRef = C->varlist_begin();
5253 auto IElemInitRef = C->inits().begin();
5254 for (const Expr *IInit : C->private_copies()) {
5255 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
5256 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
5257 Data.FirstprivateVars.push_back(Elt: *IRef);
5258 Data.FirstprivateCopies.push_back(Elt: IInit);
5259 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5260 }
5261 ++IRef;
5262 ++IElemInitRef;
5263 }
5264 }
5265 // Get list of lastprivate variables (for taskloops).
5266 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
5267 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
5268 auto IRef = C->varlist_begin();
5269 auto ID = C->destination_exprs().begin();
5270 for (const Expr *IInit : C->private_copies()) {
5271 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRef)->getDecl());
5272 if (EmittedAsPrivate.insert(V: OrigVD->getCanonicalDecl()).second) {
5273 Data.LastprivateVars.push_back(Elt: *IRef);
5274 Data.LastprivateCopies.push_back(Elt: IInit);
5275 }
5276 LastprivateDstsOrigs.insert(
5277 KV: std::make_pair(x: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()),
5278 y: cast<DeclRefExpr>(Val: *IRef)));
5279 ++IRef;
5280 ++ID;
5281 }
5282 }
5283 SmallVector<const Expr *, 4> LHSs;
5284 SmallVector<const Expr *, 4> RHSs;
5285 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
5286 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5287 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5288 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5289 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5290 in_end: C->reduction_ops().end());
5291 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5292 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5293 }
5294 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
5295 CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data);
5296 // Build list of dependences.
5297 buildDependences(S, Data);
5298 // Get list of local vars for untied tasks.
5299 if (!Data.Tied) {
5300 CheckVarsEscapingUntiedTaskDeclContext Checker;
5301 Checker.Visit(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5302 Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(),
5303 in_end: Checker.getPrivateDecls().end());
5304 }
5305 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
5306 CapturedRegion](CodeGenFunction &CGF,
5307 PrePostActionTy &Action) {
5308 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
5309 std::pair<Address, Address>>
5310 UntiedLocalVars;
5311 // Set proper addresses for generated private copies.
5312 OMPPrivateScope Scope(CGF);
5313 // Generate debug info for variables present in shared clause.
5314 if (auto *DI = CGF.getDebugInfo()) {
5315 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
5316 CGF.CapturedStmtInfo->getCaptureFields();
5317 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
5318 if (CaptureFields.size() && ContextValue) {
5319 unsigned CharWidth = CGF.getContext().getCharWidth();
5320 // The shared variables are packed together as members of structure.
5321 // So the address of each shared variable can be computed by adding
5322 // offset of it (within record) to the base address of record. For each
5323 // shared variable, debug intrinsic llvm.dbg.declare is generated with
5324 // appropriate expressions (DIExpression).
5325 // Ex:
5326 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
5327 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5328 // metadata !svar1,
5329 // metadata !DIExpression(DW_OP_deref))
5330 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
5331 // metadata !svar2,
5332 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
5333 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
5334 const VarDecl *SharedVar = It->first;
5335 RecordDecl *CaptureRecord = It->second->getParent();
5336 const ASTRecordLayout &Layout =
5337 CGF.getContext().getASTRecordLayout(D: CaptureRecord);
5338 unsigned Offset =
5339 Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth;
5340 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5341 (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue,
5342 Builder&: CGF.Builder, UsePointerValue: false);
5343 // Get the call dbg.declare instruction we just created and update
5344 // its DIExpression to add offset to base address.
5345 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
5346 unsigned Offset) {
5347 SmallVector<uint64_t, 8> Ops;
5348 // Add offset to the base address if non zero.
5349 if (Offset) {
5350 Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst);
5351 Ops.push_back(Elt: Offset);
5352 }
5353 Ops.push_back(Elt: llvm::dwarf::DW_OP_deref);
5354 Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops));
5355 };
5356 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
5357 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last))
5358 UpdateExpr(DDI->getContext(), DDI, Offset);
5359 // If we're emitting using the new debug info format into a block
5360 // without a terminator, the record will be "trailing".
5361 assert(!Last.isTerminator() && "unexpected terminator");
5362 if (auto *Marker =
5363 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
5364 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
5365 C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) {
5366 UpdateExpr(Last.getContext(), &DVR, Offset);
5367 break;
5368 }
5369 }
5370 }
5371 }
5372 }
5373 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
5374 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
5375 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
5376 enum { PrivatesParam = 2, CopyFnParam = 3 };
5377 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5378 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5379 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5380 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5381 // Map privates.
5382 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5383 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5384 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5385 CallArgs.push_back(Elt: PrivatesPtr);
5386 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5387 for (const Expr *E : Data.PrivateVars) {
5388 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5389 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5390 T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr");
5391 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5392 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5393 ParamTypes.push_back(Elt: PrivatePtr.getType());
5394 }
5395 for (const Expr *E : Data.FirstprivateVars) {
5396 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5397 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5398 T: CGF.getContext().getPointerType(T: E->getType()),
5399 Name: ".firstpriv.ptr.addr");
5400 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5401 FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5402 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5403 ParamTypes.push_back(Elt: PrivatePtr.getType());
5404 }
5405 for (const Expr *E : Data.LastprivateVars) {
5406 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5407 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5408 T: CGF.getContext().getPointerType(T: E->getType()),
5409 Name: ".lastpriv.ptr.addr");
5410 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5411 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5412 ParamTypes.push_back(Elt: PrivatePtr.getType());
5413 }
5414 for (const VarDecl *VD : Data.PrivateLocals) {
5415 QualType Ty = VD->getType().getNonReferenceType();
5416 if (VD->getType()->isLValueReferenceType())
5417 Ty = CGF.getContext().getPointerType(T: Ty);
5418 if (isAllocatableDecl(VD))
5419 Ty = CGF.getContext().getPointerType(T: Ty);
5420 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5421 T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr");
5422 auto Result = UntiedLocalVars.insert(
5423 KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())));
5424 // If key exists update in place.
5425 if (Result.second == false)
5426 *Result.first = std::make_pair(
5427 x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()));
5428 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5429 ParamTypes.push_back(Elt: PrivatePtr.getType());
5430 }
5431 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5432 Params: ParamTypes, /*isVarArg=*/false);
5433 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5434 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5435 for (const auto &Pair : LastprivateDstsOrigs) {
5436 const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl());
5437 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
5438 /*RefersToEnclosingVariableOrCapture=*/
5439 CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
5440 Pair.second->getType(), VK_LValue,
5441 Pair.second->getExprLoc());
5442 Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(E: &DRE).getAddress());
5443 }
5444 for (const auto &Pair : PrivatePtrs) {
5445 Address Replacement = Address(
5446 CGF.Builder.CreateLoad(Addr: Pair.second),
5447 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5448 CGF.getContext().getDeclAlign(D: Pair.first));
5449 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5450 if (auto *DI = CGF.getDebugInfo())
5451 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
5452 (void)DI->EmitDeclareOfAutoVariable(
5453 Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder,
5454 /*UsePointerValue*/ true);
5455 }
5456 // Adjust mapping for internal locals by mapping actual memory instead of
5457 // a pointer to this memory.
5458 for (auto &Pair : UntiedLocalVars) {
5459 QualType VDType = Pair.first->getType().getNonReferenceType();
5460 if (Pair.first->getType()->isLValueReferenceType())
5461 VDType = CGF.getContext().getPointerType(T: VDType);
5462 if (isAllocatableDecl(VD: Pair.first)) {
5463 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5464 Address Replacement(
5465 Ptr,
5466 CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)),
5467 CGF.getPointerAlign());
5468 Pair.second.first = Replacement;
5469 Ptr = CGF.Builder.CreateLoad(Addr: Replacement);
5470 Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType),
5471 CGF.getContext().getDeclAlign(D: Pair.first));
5472 Pair.second.second = Replacement;
5473 } else {
5474 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
5475 Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType),
5476 CGF.getContext().getDeclAlign(D: Pair.first));
5477 Pair.second.first = Replacement;
5478 }
5479 }
5480 }
5481 if (Data.Reductions) {
5482 OMPPrivateScope FirstprivateScope(CGF);
5483 for (const auto &Pair : FirstprivatePtrs) {
5484 Address Replacement(
5485 CGF.Builder.CreateLoad(Addr: Pair.second),
5486 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5487 CGF.getContext().getDeclAlign(D: Pair.first));
5488 FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5489 }
5490 (void)FirstprivateScope.Privatize();
5491 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5492 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5493 Data.ReductionCopies, Data.ReductionOps);
5494 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5495 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 9)));
5496 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5497 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5498 RedCG.emitAggregateType(CGF, N: Cnt);
5499 // FIXME: This must removed once the runtime library is fixed.
5500 // Emit required threadprivate variables for
5501 // initializer/combiner/finalizer.
5502 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5503 RCG&: RedCG, N: Cnt);
5504 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5505 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5506 Replacement = Address(
5507 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5508 SrcTy: CGF.getContext().VoidPtrTy,
5509 DstTy: CGF.getContext().getPointerType(
5510 T: Data.ReductionCopies[Cnt]->getType()),
5511 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5512 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5513 Replacement.getAlignment());
5514 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5515 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5516 }
5517 }
5518 // Privatize all private variables except for in_reduction items.
5519 (void)Scope.Privatize();
5520 SmallVector<const Expr *, 4> InRedVars;
5521 SmallVector<const Expr *, 4> InRedPrivs;
5522 SmallVector<const Expr *, 4> InRedOps;
5523 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5524 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5525 auto IPriv = C->privates().begin();
5526 auto IRed = C->reduction_ops().begin();
5527 auto ITD = C->taskgroup_descriptors().begin();
5528 for (const Expr *Ref : C->varlist()) {
5529 InRedVars.emplace_back(Args&: Ref);
5530 InRedPrivs.emplace_back(Args: *IPriv);
5531 InRedOps.emplace_back(Args: *IRed);
5532 TaskgroupDescriptors.emplace_back(Args: *ITD);
5533 std::advance(i&: IPriv, n: 1);
5534 std::advance(i&: IRed, n: 1);
5535 std::advance(i&: ITD, n: 1);
5536 }
5537 }
5538 // Privatize in_reduction items here, because taskgroup descriptors must be
5539 // privatized earlier.
5540 OMPPrivateScope InRedScope(CGF);
5541 if (!InRedVars.empty()) {
5542 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5543 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5544 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5545 RedCG.emitAggregateType(CGF, N: Cnt);
5546 // The taskgroup descriptor variable is always implicit firstprivate and
5547 // privatized already during processing of the firstprivates.
5548 // FIXME: This must removed once the runtime library is fixed.
5549 // Emit required threadprivate variables for
5550 // initializer/combiner/finalizer.
5551 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5552 RCG&: RedCG, N: Cnt);
5553 llvm::Value *ReductionsPtr;
5554 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5555 ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr),
5556 Loc: TRExpr->getExprLoc());
5557 } else {
5558 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5559 }
5560 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5561 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5562 Replacement = Address(
5563 CGF.EmitScalarConversion(
5564 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5565 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5566 Loc: InRedPrivs[Cnt]->getExprLoc()),
5567 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5568 Replacement.getAlignment());
5569 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5570 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5571 }
5572 }
5573 (void)InRedScope.Privatize();
5574
5575 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
5576 UntiedLocalVars);
5577 Action.Enter(CGF);
5578 BodyGen(CGF);
5579 };
5580 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5581 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5582 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, Tied: Data.Tied, NumberOfParts&: Data.NumberOfParts);
5583 OMPLexicalScope Scope(*this, S, std::nullopt,
5584 !isOpenMPParallelDirective(DKind: EKind) &&
5585 !isOpenMPSimdDirective(DKind: EKind));
5586 TaskGen(*this, OutlinedFn, Data);
5587}
5588
5589static ImplicitParamDecl *
5590createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5591 QualType Ty, CapturedDecl *CD,
5592 SourceLocation Loc) {
5593 auto *OrigVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5594 ParamKind: ImplicitParamKind::Other);
5595 auto *OrigRef = DeclRefExpr::Create(
5596 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: OrigVD,
5597 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5598 auto *PrivateVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: Ty,
5599 ParamKind: ImplicitParamKind::Other);
5600 auto *PrivateRef = DeclRefExpr::Create(
5601 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: PrivateVD,
5602 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: Ty, VK: VK_LValue);
5603 QualType ElemType = C.getBaseElementType(QT: Ty);
5604 auto *InitVD = ImplicitParamDecl::Create(C, DC: CD, IdLoc: Loc, /*Id=*/nullptr, T: ElemType,
5605 ParamKind: ImplicitParamKind::Other);
5606 auto *InitRef = DeclRefExpr::Create(
5607 Context: C, QualifierLoc: NestedNameSpecifierLoc(), TemplateKWLoc: SourceLocation(), D: InitVD,
5608 /*RefersToEnclosingVariableOrCapture=*/false, NameLoc: Loc, T: ElemType, VK: VK_LValue);
5609 PrivateVD->setInitStyle(VarDecl::CInit);
5610 PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue,
5611 Operand: InitRef, /*BasePath=*/nullptr,
5612 Cat: VK_PRValue, FPO: FPOptionsOverride()));
5613 Data.FirstprivateVars.emplace_back(Args&: OrigRef);
5614 Data.FirstprivateCopies.emplace_back(Args&: PrivateRef);
5615 Data.FirstprivateInits.emplace_back(Args&: InitRef);
5616 return OrigVD;
5617}
5618
5619void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5620 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5621 OMPTargetDataInfo &InputInfo) {
5622 // Emit outlined function for task construct.
5623 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5624 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5625 CanQualType SharedsTy =
5626 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5627 auto I = CS->getCapturedDecl()->param_begin();
5628 auto PartId = std::next(x: I);
5629 auto TaskT = std::next(x: I, n: 4);
5630 OMPTaskDataTy Data;
5631 // The task is not final.
5632 Data.Final.setInt(/*IntVal=*/false);
5633 // Get list of firstprivate variables.
5634 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5635 auto IRef = C->varlist_begin();
5636 auto IElemInitRef = C->inits().begin();
5637 for (auto *IInit : C->private_copies()) {
5638 Data.FirstprivateVars.push_back(Elt: *IRef);
5639 Data.FirstprivateCopies.push_back(Elt: IInit);
5640 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5641 ++IRef;
5642 ++IElemInitRef;
5643 }
5644 }
5645 SmallVector<const Expr *, 4> LHSs;
5646 SmallVector<const Expr *, 4> RHSs;
5647 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5648 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5649 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5650 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5651 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5652 in_end: C->reduction_ops().end());
5653 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5654 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5655 }
5656 OMPPrivateScope TargetScope(*this);
5657 VarDecl *BPVD = nullptr;
5658 VarDecl *PVD = nullptr;
5659 VarDecl *SVD = nullptr;
5660 VarDecl *MVD = nullptr;
5661 if (InputInfo.NumberOfTargetItems > 0) {
5662 auto *CD = CapturedDecl::Create(
5663 C&: getContext(), DC: getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5664 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5665 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5666 EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5667 /*IndexTypeQuals=*/0);
5668 BPVD = createImplicitFirstprivateForType(
5669 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5670 PVD = createImplicitFirstprivateForType(
5671 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5672 QualType SizesType = getContext().getConstantArrayType(
5673 EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5674 ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5675 /*IndexTypeQuals=*/0);
5676 SVD = createImplicitFirstprivateForType(C&: getContext(), Data, Ty: SizesType, CD,
5677 Loc: S.getBeginLoc());
5678 TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray);
5679 TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray);
5680 TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray);
5681 // If there is no user-defined mapper, the mapper array will be nullptr. In
5682 // this case, we don't need to privatize it.
5683 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5684 Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) {
5685 MVD = createImplicitFirstprivateForType(
5686 C&: getContext(), Data, Ty: BaseAndPointerAndMapperType, CD, Loc: S.getBeginLoc());
5687 TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray);
5688 }
5689 }
5690 (void)TargetScope.Privatize();
5691 buildDependences(S, Data);
5692 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5693 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind,
5694 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5695 // Set proper addresses for generated private copies.
5696 OMPPrivateScope Scope(CGF);
5697 if (!Data.FirstprivateVars.empty()) {
5698 enum { PrivatesParam = 2, CopyFnParam = 3 };
5699 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5700 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5701 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5702 VD: CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5703 // Map privates.
5704 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5705 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5706 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5707 CallArgs.push_back(Elt: PrivatesPtr);
5708 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5709 for (const Expr *E : Data.FirstprivateVars) {
5710 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5711 RawAddress PrivatePtr = CGF.CreateMemTempWithoutCast(
5712 T: CGF.getContext().getPointerType(T: E->getType()),
5713 Name: ".firstpriv.ptr.addr");
5714 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5715 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5716 ParamTypes.push_back(Elt: PrivatePtr.getType());
5717 }
5718 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5719 Params: ParamTypes, /*isVarArg=*/false);
5720 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5721 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5722 for (const auto &Pair : PrivatePtrs) {
5723 Address Replacement(
5724 CGF.Builder.CreateLoad(Addr: Pair.second),
5725 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5726 CGF.getContext().getDeclAlign(D: Pair.first));
5727 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5728 }
5729 }
5730 CGF.processInReduction(S, Data, CGF, CS, Scope);
5731 if (InputInfo.NumberOfTargetItems > 0) {
5732 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5733 Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0);
5734 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5735 Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0);
5736 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5737 Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0);
5738 // If MVD is nullptr, the mapper array is not privatized
5739 if (MVD)
5740 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5741 Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0);
5742 }
5743
5744 Action.Enter(CGF);
5745 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5746 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5747 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5748 needsTaskBasedThreadLimit(DKind: EKind) && TL) {
5749 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5750 // enclosing this target region. This will indirectly set the thread_limit
5751 // for every applicable construct within target region.
5752 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5753 CGF, ThreadLimit: TL->getThreadLimit().front(), Loc: S.getBeginLoc());
5754 }
5755 BodyGen(CGF);
5756 };
5757 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5758 D: S, ThreadIDVar: *I, PartIDVar: *PartId, TaskTVar: *TaskT, InnermostKind: EKind, CodeGen, /*Tied=*/true,
5759 NumberOfParts&: Data.NumberOfParts);
5760 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5761 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5762 getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
5763 SourceLocation());
5764 CGM.getOpenMPRuntime().emitTaskCall(CGF&: *this, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5765 SharedsTy, Shareds: CapturedStruct, IfCond: &IfCond, Data);
5766}
5767
5768void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5769 OMPTaskDataTy &Data,
5770 CodeGenFunction &CGF,
5771 const CapturedStmt *CS,
5772 OMPPrivateScope &Scope) {
5773 OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
5774 if (Data.Reductions) {
5775 OpenMPDirectiveKind CapturedRegion = EKind;
5776 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5777 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5778 Data.ReductionCopies, Data.ReductionOps);
5779 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5780 Addr: CGF.GetAddrOfLocalVar(VD: CS->getCapturedDecl()->getParam(i: 4)));
5781 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5782 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5783 RedCG.emitAggregateType(CGF, N: Cnt);
5784 // FIXME: This must removed once the runtime library is fixed.
5785 // Emit required threadprivate variables for
5786 // initializer/combiner/finalizer.
5787 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5788 RCG&: RedCG, N: Cnt);
5789 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5790 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5791 Replacement = Address(
5792 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5793 SrcTy: CGF.getContext().VoidPtrTy,
5794 DstTy: CGF.getContext().getPointerType(
5795 T: Data.ReductionCopies[Cnt]->getType()),
5796 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5797 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5798 Replacement.getAlignment());
5799 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5800 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5801 }
5802 }
5803 (void)Scope.Privatize();
5804 SmallVector<const Expr *, 4> InRedVars;
5805 SmallVector<const Expr *, 4> InRedPrivs;
5806 SmallVector<const Expr *, 4> InRedOps;
5807 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5808 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5809 auto IPriv = C->privates().begin();
5810 auto IRed = C->reduction_ops().begin();
5811 auto ITD = C->taskgroup_descriptors().begin();
5812 for (const Expr *Ref : C->varlist()) {
5813 InRedVars.emplace_back(Args&: Ref);
5814 InRedPrivs.emplace_back(Args: *IPriv);
5815 InRedOps.emplace_back(Args: *IRed);
5816 TaskgroupDescriptors.emplace_back(Args: *ITD);
5817 std::advance(i&: IPriv, n: 1);
5818 std::advance(i&: IRed, n: 1);
5819 std::advance(i&: ITD, n: 1);
5820 }
5821 }
5822 OMPPrivateScope InRedScope(CGF);
5823 if (!InRedVars.empty()) {
5824 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5825 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5826 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5827 RedCG.emitAggregateType(CGF, N: Cnt);
5828 // FIXME: This must removed once the runtime library is fixed.
5829 // Emit required threadprivate variables for
5830 // initializer/combiner/finalizer.
5831 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5832 RCG&: RedCG, N: Cnt);
5833 llvm::Value *ReductionsPtr;
5834 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5835 ReductionsPtr =
5836 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc());
5837 } else {
5838 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5839 }
5840 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5841 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5842 Replacement = Address(
5843 CGF.EmitScalarConversion(
5844 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5845 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5846 Loc: InRedPrivs[Cnt]->getExprLoc()),
5847 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5848 Replacement.getAlignment());
5849 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5850 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5851 }
5852 }
5853 (void)InRedScope.Privatize();
5854}
5855
5856void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5857 // Emit outlined function for task construct.
5858 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_task);
5859 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5860 CanQualType SharedsTy =
5861 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
5862 const Expr *IfCond = nullptr;
5863 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5864 if (C->getNameModifier() == OMPD_unknown ||
5865 C->getNameModifier() == OMPD_task) {
5866 IfCond = C->getCondition();
5867 break;
5868 }
5869 }
5870
5871 OMPTaskDataTy Data;
5872 // Check if we should emit tied or untied task.
5873 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5874 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5875 CGF.EmitStmt(S: CS->getCapturedStmt());
5876 };
5877 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5878 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5879 const OMPTaskDataTy &Data) {
5880 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5881 SharedsTy, Shareds: CapturedStruct, IfCond,
5882 Data);
5883 };
5884 auto LPCRegion =
5885 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
5886 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_task, BodyGen, TaskGen, Data);
5887}
5888
5889void CodeGenFunction::EmitOMPTaskyieldDirective(
5890 const OMPTaskyieldDirective &S) {
5891 CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc());
5892}
5893
5894void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5895 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5896 Expr *ME = MC ? MC->getMessageString() : nullptr;
5897 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5898 bool IsFatal = false;
5899 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5900 IsFatal = true;
5901 CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal);
5902}
5903
5904void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5905 CGM.getOpenMPRuntime().emitBarrierCall(CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_barrier);
5906}
5907
5908void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5909 OMPTaskDataTy Data;
5910 // Build list of dependences
5911 buildDependences(S, Data);
5912 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5913 CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data);
5914}
5915
5916static bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5917 return T.clauses().empty();
5918}
5919
5920void CodeGenFunction::EmitOMPTaskgroupDirective(
5921 const OMPTaskgroupDirective &S) {
5922 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5923 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) {
5924 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5925 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5926 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5927 AllocaInsertPt->getIterator());
5928
5929 auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
5930 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
5931 Builder.restoreIP(IP: CodeGenIP);
5932 EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5933 return llvm::Error::success();
5934 };
5935 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5936 if (!CapturedStmtInfo)
5937 CapturedStmtInfo = &CapStmtInfo;
5938 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
5939 cantFail(ValOrErr: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP,
5940 /*DeallocBlocks=*/{}, BodyGenCB));
5941 Builder.restoreIP(IP: AfterIP);
5942 return;
5943 }
5944 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5945 Action.Enter(CGF);
5946 if (const Expr *E = S.getReductionRef()) {
5947 SmallVector<const Expr *, 4> LHSs;
5948 SmallVector<const Expr *, 4> RHSs;
5949 OMPTaskDataTy Data;
5950 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5951 Data.ReductionVars.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5952 Data.ReductionOrigs.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
5953 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5954 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5955 in_end: C->reduction_ops().end());
5956 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5957 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5958 }
5959 llvm::Value *ReductionDesc =
5960 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(),
5961 LHSExprs: LHSs, RHSExprs: RHSs, Data);
5962 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5963 CGF.EmitVarDecl(D: *VD);
5964 CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD),
5965 /*Volatile=*/false, Ty: E->getType());
5966 }
5967 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5968 };
5969 CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc());
5970}
5971
5972void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5973 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5974 ? llvm::AtomicOrdering::NotAtomic
5975 : llvm::AtomicOrdering::AcquireRelease;
5976 CGM.getOpenMPRuntime().emitFlush(
5977 CGF&: *this,
5978 Vars: [&S]() -> ArrayRef<const Expr *> {
5979 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5980 return llvm::ArrayRef(FlushClause->varlist_begin(),
5981 FlushClause->varlist_end());
5982 return {};
5983 }(),
5984 Loc: S.getBeginLoc(), AO);
5985}
5986
5987void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5988 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5989 LValue DOLVal = EmitLValue(E: DO->getDepobj());
5990 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5991 // Build list and emit dependences
5992 OMPTaskDataTy Data;
5993 buildDependences(S, Data);
5994 for (auto &Dep : Data.Dependences) {
5995 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5996 CGF&: *this, Dependencies: Dep, Loc: DC->getBeginLoc());
5997 EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal);
5998 }
5999 return;
6000 }
6001 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
6002 CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc());
6003 return;
6004 }
6005 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
6006 CGM.getOpenMPRuntime().emitUpdateClause(
6007 CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc());
6008 return;
6009 }
6010}
6011
6012void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
6013 if (!OMPParentLoopDirectiveForScan)
6014 return;
6015 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
6016 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
6017 SmallVector<const Expr *, 4> Shareds;
6018 SmallVector<const Expr *, 4> Privates;
6019 SmallVector<const Expr *, 4> LHSs;
6020 SmallVector<const Expr *, 4> RHSs;
6021 SmallVector<const Expr *, 4> ReductionOps;
6022 SmallVector<const Expr *, 4> CopyOps;
6023 SmallVector<const Expr *, 4> CopyArrayTemps;
6024 SmallVector<const Expr *, 4> CopyArrayElems;
6025 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
6026 if (C->getModifier() != OMPC_REDUCTION_inscan)
6027 continue;
6028 Shareds.append(in_start: C->varlist_begin(), in_end: C->varlist_end());
6029 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
6030 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
6031 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
6032 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
6033 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
6034 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
6035 in_end: C->copy_array_temps().end());
6036 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
6037 in_end: C->copy_array_elems().end());
6038 }
6039 if (ParentDir.getDirectiveKind() == OMPD_simd ||
6040 (getLangOpts().OpenMPSimd &&
6041 isOpenMPSimdDirective(DKind: ParentDir.getDirectiveKind()))) {
6042 // For simd directive and simd-based directives in simd only mode, use the
6043 // following codegen:
6044 // int x = 0;
6045 // #pragma omp simd reduction(inscan, +: x)
6046 // for (..) {
6047 // <first part>
6048 // #pragma omp scan inclusive(x)
6049 // <second part>
6050 // }
6051 // is transformed to:
6052 // int x = 0;
6053 // for (..) {
6054 // int x_priv = 0;
6055 // <first part>
6056 // x = x_priv + x;
6057 // x_priv = x;
6058 // <second part>
6059 // }
6060 // and
6061 // int x = 0;
6062 // #pragma omp simd reduction(inscan, +: x)
6063 // for (..) {
6064 // <first part>
6065 // #pragma omp scan exclusive(x)
6066 // <second part>
6067 // }
6068 // to
6069 // int x = 0;
6070 // for (..) {
6071 // int x_priv = 0;
6072 // <second part>
6073 // int temp = x;
6074 // x = x_priv + x;
6075 // x_priv = temp;
6076 // <first part>
6077 // }
6078 llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce");
6079 EmitBranch(Block: IsInclusive
6080 ? OMPScanReduce
6081 : BreakContinueStack.back().ContinueBlock.getBlock());
6082 EmitBlock(BB: OMPScanDispatch);
6083 {
6084 // New scope for correct construction/destruction of temp variables for
6085 // exclusive scan.
6086 LexicalScope Scope(*this, S.getSourceRange());
6087 EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
6088 EmitBlock(BB: OMPScanReduce);
6089 if (!IsInclusive) {
6090 // Create temp var and copy LHS value to this temp value.
6091 // TMP = LHS;
6092 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6093 const Expr *PrivateExpr = Privates[I];
6094 const Expr *TempExpr = CopyArrayTemps[I];
6095 EmitAutoVarDecl(
6096 D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl()));
6097 LValue DestLVal = EmitLValue(E: TempExpr);
6098 LValue SrcLVal = EmitLValue(E: LHSs[I]);
6099 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(),
6100 SrcAddr: SrcLVal.getAddress(),
6101 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6102 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
6103 Copy: CopyOps[I]);
6104 }
6105 }
6106 CGM.getOpenMPRuntime().emitReduction(
6107 CGF&: *this, Loc: ParentDir.getEndLoc(), Privates, LHSExprs: LHSs, RHSExprs: RHSs, ReductionOps,
6108 Options: {/*WithNowait=*/true, /*SimpleReduction=*/true,
6109 /*IsPrivateVarReduction*/ {}, .ReductionKind: OMPD_simd});
6110 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6111 const Expr *PrivateExpr = Privates[I];
6112 LValue DestLVal;
6113 LValue SrcLVal;
6114 if (IsInclusive) {
6115 DestLVal = EmitLValue(E: RHSs[I]);
6116 SrcLVal = EmitLValue(E: LHSs[I]);
6117 } else {
6118 const Expr *TempExpr = CopyArrayTemps[I];
6119 DestLVal = EmitLValue(E: RHSs[I]);
6120 SrcLVal = EmitLValue(E: TempExpr);
6121 }
6122 EmitOMPCopy(
6123 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
6124 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6125 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
6126 }
6127 }
6128 EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
6129 OMPScanExitBlock = IsInclusive
6130 ? BreakContinueStack.back().ContinueBlock.getBlock()
6131 : OMPScanReduce;
6132 EmitBlock(BB: OMPAfterScanBlock);
6133 return;
6134 }
6135 if (!IsInclusive) {
6136 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
6137 EmitBlock(BB: OMPScanExitBlock);
6138 }
6139 if (OMPFirstScanLoop) {
6140 // Emit buffer[i] = red; at the end of the input phase.
6141 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
6142 .getIterationVariable()
6143 ->IgnoreParenImpCasts();
6144 LValue IdxLVal = EmitLValue(E: IVExpr);
6145 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
6146 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
6147 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6148 const Expr *PrivateExpr = Privates[I];
6149 const Expr *OrigExpr = Shareds[I];
6150 const Expr *CopyArrayElem = CopyArrayElems[I];
6151 OpaqueValueMapping IdxMapping(
6152 *this,
6153 cast<OpaqueValueExpr>(
6154 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
6155 RValue::get(V: IdxVal));
6156 LValue DestLVal = EmitLValue(E: CopyArrayElem);
6157 LValue SrcLVal = EmitLValue(E: OrigExpr);
6158 EmitOMPCopy(
6159 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
6160 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6161 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
6162 }
6163 }
6164 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
6165 if (IsInclusive) {
6166 EmitBlock(BB: OMPScanExitBlock);
6167 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
6168 }
6169 EmitBlock(BB: OMPScanDispatch);
6170 if (!OMPFirstScanLoop) {
6171 // Emit red = buffer[i]; at the entrance to the scan phase.
6172 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
6173 .getIterationVariable()
6174 ->IgnoreParenImpCasts();
6175 LValue IdxLVal = EmitLValue(E: IVExpr);
6176 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
6177 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
6178 llvm::BasicBlock *ExclusiveExitBB = nullptr;
6179 if (!IsInclusive) {
6180 llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec");
6181 ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit");
6182 llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal);
6183 Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB);
6184 EmitBlock(BB: ContBB);
6185 // Use idx - 1 iteration for exclusive scan.
6186 IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1));
6187 }
6188 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
6189 const Expr *PrivateExpr = Privates[I];
6190 const Expr *OrigExpr = Shareds[I];
6191 const Expr *CopyArrayElem = CopyArrayElems[I];
6192 OpaqueValueMapping IdxMapping(
6193 *this,
6194 cast<OpaqueValueExpr>(
6195 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
6196 RValue::get(V: IdxVal));
6197 LValue SrcLVal = EmitLValue(E: CopyArrayElem);
6198 LValue DestLVal = EmitLValue(E: OrigExpr);
6199 EmitOMPCopy(
6200 OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(), SrcAddr: SrcLVal.getAddress(),
6201 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
6202 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), Copy: CopyOps[I]);
6203 }
6204 if (!IsInclusive) {
6205 EmitBlock(BB: ExclusiveExitBB);
6206 }
6207 }
6208 EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
6209 : OMPAfterScanBlock);
6210 EmitBlock(BB: OMPAfterScanBlock);
6211}
6212
6213void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
6214 const CodeGenLoopTy &CodeGenLoop,
6215 Expr *IncExpr) {
6216 // Emit the loop iteration variable.
6217 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
6218 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
6219 EmitVarDecl(D: *IVDecl);
6220
6221 // Emit the iterations count variable.
6222 // If it is not a variable, Sema decided to calculate iterations count on each
6223 // iteration (e.g., it is foldable into a constant).
6224 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
6225 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
6226 // Emit calculation of the iterations count.
6227 EmitIgnoredExpr(E: S.getCalcLastIteration());
6228 }
6229
6230 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
6231
6232 bool HasLastprivateClause = false;
6233 // Check pre-condition.
6234 {
6235 OMPLoopScope PreInitScope(*this, S);
6236 // Skip the entire loop if we don't meet the precondition.
6237 // If the condition constant folds and can be elided, avoid emitting the
6238 // whole loop.
6239 bool CondConstant;
6240 llvm::BasicBlock *ContBlock = nullptr;
6241 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
6242 if (!CondConstant)
6243 return;
6244 } else {
6245 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
6246 ContBlock = createBasicBlock(name: "omp.precond.end");
6247 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
6248 TrueCount: getProfileCount(S: &S));
6249 EmitBlock(BB: ThenBlock);
6250 incrementProfileCounter(S: &S);
6251 }
6252
6253 emitAlignedClause(CGF&: *this, D: S);
6254 // Emit 'then' code.
6255 {
6256 // Emit helper vars inits.
6257
6258 LValue LB = EmitOMPHelperVar(
6259 CGF&: *this, Helper: cast<DeclRefExpr>(
6260 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6261 ? S.getCombinedLowerBoundVariable()
6262 : S.getLowerBoundVariable())));
6263 LValue UB = EmitOMPHelperVar(
6264 CGF&: *this, Helper: cast<DeclRefExpr>(
6265 Val: (isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6266 ? S.getCombinedUpperBoundVariable()
6267 : S.getUpperBoundVariable())));
6268 LValue ST =
6269 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
6270 LValue IL =
6271 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
6272
6273 OMPPrivateScope LoopScope(*this);
6274 if (EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
6275 // Emit implicit barrier to synchronize threads and avoid data races
6276 // on initialization of firstprivate variables and post-update of
6277 // lastprivate variables.
6278 CGM.getOpenMPRuntime().emitBarrierCall(
6279 CGF&: *this, Loc: S.getBeginLoc(), Kind: OMPD_unknown, /*EmitChecks=*/false,
6280 /*ForceSimpleCall=*/true);
6281 }
6282 EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
6283 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6284 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6285 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind()))
6286 EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
6287 HasLastprivateClause = EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
6288 EmitOMPPrivateLoopCounters(S, LoopScope);
6289 (void)LoopScope.Privatize();
6290 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
6291 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF&: *this, D: S);
6292
6293 // Detect the distribute schedule kind and chunk.
6294 llvm::Value *Chunk = nullptr;
6295 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
6296 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
6297 ScheduleKind = C->getDistScheduleKind();
6298 if (const Expr *Ch = C->getChunkSize()) {
6299 Chunk = EmitScalarExpr(E: Ch);
6300 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(),
6301 DstTy: S.getIterationVariable()->getType(),
6302 Loc: S.getBeginLoc());
6303 }
6304 } else {
6305 // Default behaviour for dist_schedule clause.
6306 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
6307 CGF&: *this, S, ScheduleKind, Chunk);
6308 }
6309 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
6310 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
6311
6312 // GPU fused schedule: omit the outer distribute loop and let the inner
6313 // worksharing loop schedule the flattened team/thread iteration space.
6314 if (canEmitGPUFusedDistSchedule(CGM, S, DKind: S.getDirectiveKind())) {
6315 JumpDest LoopExit =
6316 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
6317 CodeGenLoop(*this, S, LoopExit);
6318 EmitBlock(BB: LoopExit.getBlock());
6319 } else {
6320 // OpenMP [2.10.8, distribute Construct, Description]
6321 // If dist_schedule is specified, kind must be static. If specified,
6322 // iterations are divided into chunks of size chunk_size, chunks are
6323 // assigned to the teams of the league in a round-robin fashion in the
6324 // order of the team number. When no chunk_size is specified, the
6325 // iteration space is divided into chunks that are approximately equal
6326 // in size, and at most one chunk is distributed to each team of the
6327 // league. The size of the chunks is unspecified in this case.
6328 bool StaticChunked =
6329 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
6330 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind());
6331 if (RT.isStaticNonchunked(ScheduleKind,
6332 /* Chunked */ Chunk != nullptr) ||
6333 StaticChunked) {
6334 CGOpenMPRuntime::StaticRTInput StaticInit(
6335 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
6336 LB.getAddress(), UB.getAddress(), ST.getAddress(),
6337 StaticChunked ? Chunk : nullptr);
6338 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind,
6339 Values: StaticInit);
6340 JumpDest LoopExit =
6341 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
6342 // UB = min(UB, GlobalUB);
6343 EmitIgnoredExpr(
6344 E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6345 ? S.getCombinedEnsureUpperBound()
6346 : S.getEnsureUpperBound());
6347 // IV = LB;
6348 EmitIgnoredExpr(
6349 E: isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6350 ? S.getCombinedInit()
6351 : S.getInit());
6352
6353 const Expr *Cond =
6354 isOpenMPLoopBoundSharingDirective(Kind: S.getDirectiveKind())
6355 ? S.getCombinedCond()
6356 : S.getCond();
6357
6358 if (StaticChunked)
6359 Cond = S.getCombinedDistCond();
6360
6361 // For static unchunked schedules generate:
6362 //
6363 // 1. For distribute alone, codegen
6364 // while (idx <= UB) {
6365 // BODY;
6366 // ++idx;
6367 // }
6368 //
6369 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
6370 // while (idx <= UB) {
6371 // <CodeGen rest of pragma>(LB, UB);
6372 // idx += ST;
6373 // }
6374 //
6375 // For static chunk one schedule generate:
6376 //
6377 // while (IV <= GlobalUB) {
6378 // <CodeGen rest of pragma>(LB, UB);
6379 // LB += ST;
6380 // UB += ST;
6381 // UB = min(UB, GlobalUB);
6382 // IV = LB;
6383 // }
6384 //
6385 emitCommonSimdLoop(
6386 CGF&: *this, S,
6387 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6388 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
6389 CGF.EmitOMPSimdInit(D: S);
6390 },
6391 BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
6392 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
6393 CGF.EmitOMPInnerLoop(
6394 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: Cond, IncExpr,
6395 BodyGen: [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
6396 CodeGenLoop(CGF, S, LoopExit);
6397 },
6398 PostIncGen: [&S, StaticChunked](CodeGenFunction &CGF) {
6399 if (StaticChunked) {
6400 CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound());
6401 CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound());
6402 CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound());
6403 CGF.EmitIgnoredExpr(E: S.getCombinedInit());
6404 }
6405 });
6406 });
6407 EmitBlock(BB: LoopExit.getBlock());
6408 // Tell the runtime we are done.
6409 RT.emitForStaticFinish(CGF&: *this, Loc: S.getEndLoc(), DKind: OMPD_distribute);
6410 } else {
6411 // Emit the outer loop, which requests its work chunk [LB..UB] from
6412 // runtime and runs the inner loop to process it.
6413 const OMPLoopArguments LoopArguments = {
6414 LB.getAddress(), UB.getAddress(), ST.getAddress(),
6415 IL.getAddress(), Chunk};
6416 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments,
6417 CodeGenLoopContent: CodeGenLoop);
6418 }
6419 }
6420 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind())) {
6421 EmitOMPSimdFinal(D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6422 return CGF.Builder.CreateIsNotNull(
6423 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6424 });
6425 }
6426 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()) &&
6427 !isOpenMPParallelDirective(DKind: S.getDirectiveKind()) &&
6428 !isOpenMPTeamsDirective(DKind: S.getDirectiveKind())) {
6429 EmitOMPReductionClauseFinal(D: S, ReductionKind: OMPD_simd);
6430 // Emit post-update of the reduction variables if IsLastIter != 0.
6431 emitPostUpdateForReductionClause(
6432 CGF&: *this, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
6433 return CGF.Builder.CreateIsNotNull(
6434 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
6435 });
6436 }
6437 // Emit final copy of the lastprivate variables if IsLastIter != 0.
6438 if (HasLastprivateClause) {
6439 EmitOMPLastprivateClauseFinal(
6440 D: S, /*NoFinals=*/false,
6441 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
6442 }
6443 }
6444
6445 // We're now done with the loop, so jump to the continuation block.
6446 if (ContBlock) {
6447 EmitBranch(Block: ContBlock);
6448 EmitBlock(BB: ContBlock, IsFinished: true);
6449 }
6450 }
6451}
6452
6453// Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this
6454// function available for "loop bind(teams)", which maps to "distribute".
6455static void emitOMPDistributeDirective(const OMPLoopDirective &S,
6456 CodeGenFunction &CGF,
6457 CodeGenModule &CGM) {
6458 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6459 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6460 };
6461 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
6462 CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute, CodeGen);
6463}
6464
6465void CodeGenFunction::EmitOMPDistributeDirective(
6466 const OMPDistributeDirective &S) {
6467 emitOMPDistributeDirective(S, CGF&: *this, CGM);
6468}
6469
6470static llvm::Function *
6471emitOutlinedOrderedFunction(CodeGenModule &CGM, const CapturedStmt *S,
6472 const OMPExecutableDirective &D) {
6473 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
6474 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
6475 CGF.CapturedStmtInfo = &CapStmtInfo;
6476 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, D);
6477 Fn->setDoesNotRecurse();
6478 return Fn;
6479}
6480
6481template <typename T>
6482static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
6483 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
6484 llvm::OpenMPIRBuilder &OMPBuilder) {
6485
6486 unsigned NumLoops = C->getNumLoops();
6487 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
6488 /*DestWidth=*/64, /*Signed=*/1);
6489 llvm::SmallVector<llvm::Value *> StoreValues;
6490 for (unsigned I = 0; I < NumLoops; I++) {
6491 const Expr *CounterVal = C->getLoopData(I);
6492 assert(CounterVal);
6493 llvm::Value *StoreValue = CGF.EmitScalarConversion(
6494 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
6495 Loc: CounterVal->getExprLoc());
6496 StoreValues.emplace_back(Args&: StoreValue);
6497 }
6498 OMPDoacrossKind<T> ODK;
6499 bool IsDependSource = ODK.isSource(C);
6500 CGF.Builder.restoreIP(
6501 IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops,
6502 StoreValues, Name: ".cnt.addr", IsDependSource));
6503}
6504
6505void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
6506 if (CGM.getLangOpts().OpenMPIRBuilder) {
6507 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
6508 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
6509
6510 if (S.hasClausesOfKind<OMPDependClause>() ||
6511 S.hasClausesOfKind<OMPDoacrossClause>()) {
6512 // The ordered directive with depend clause.
6513 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
6514 "ordered depend|doacross construct.");
6515 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
6516 AllocaInsertPt->getIterator());
6517 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6518 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6519 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6520 emitRestoreIP(CGF&: *this, C: DC, AllocaIP, OMPBuilder);
6521 } else {
6522 // The ordered directive with threads or simd clause, or without clause.
6523 // Without clause, it behaves as if the threads clause is specified.
6524 const auto *C = S.getSingleClause<OMPSIMDClause>();
6525
6526 auto FiniCB = [this](InsertPointTy IP) {
6527 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
6528 return llvm::Error::success();
6529 };
6530
6531 auto BodyGenCB = [&S, C,
6532 this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
6533 ArrayRef<llvm::BasicBlock *> DeallocBlocks) {
6534 Builder.restoreIP(IP: CodeGenIP);
6535
6536 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6537 if (C) {
6538 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
6539 Builder, /*CreateBranch=*/false, Suffix: ".ordered.after");
6540 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6541 GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6542 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6543 assert(S.getBeginLoc().isValid() &&
6544 "Outlined function call location must be valid.");
6545 ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc());
6546 OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB,
6547 Fn: OutlinedFn, Args: CapturedVars);
6548 } else {
6549 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
6550 CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP: AllocIP, CodeGenIP, RegionName: "ordered");
6551 }
6552 return llvm::Error::success();
6553 };
6554
6555 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6556 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
6557 ValOrErr: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C));
6558 Builder.restoreIP(IP: AfterIP);
6559 }
6560 return;
6561 }
6562
6563 if (S.hasClausesOfKind<OMPDependClause>()) {
6564 assert(!S.hasAssociatedStmt() &&
6565 "No associated statement must be in ordered depend construct.");
6566 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
6567 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6568 return;
6569 }
6570 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
6571 assert(!S.hasAssociatedStmt() &&
6572 "No associated statement must be in ordered doacross construct.");
6573 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
6574 CGM.getOpenMPRuntime().emitDoacrossOrdered(CGF&: *this, C: DC);
6575 return;
6576 }
6577 const auto *C = S.getSingleClause<OMPSIMDClause>();
6578 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
6579 PrePostActionTy &Action) {
6580 const CapturedStmt *CS = S.getInnermostCapturedStmt();
6581 if (C) {
6582 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6583 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6584 llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, S: CS, D: S);
6585 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(),
6586 OutlinedFn, Args: CapturedVars);
6587 } else {
6588 Action.Enter(CGF);
6589 CGF.EmitStmt(S: CS->getCapturedStmt());
6590 }
6591 };
6592 OMPLexicalScope Scope(*this, S, OMPD_unknown);
6593 CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C);
6594}
6595
6596static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
6597 QualType SrcType, QualType DestType,
6598 SourceLocation Loc) {
6599 assert(CGF.hasScalarEvaluationKind(DestType) &&
6600 "DestType must have scalar evaluation kind.");
6601 assert(!Val.isAggregate() && "Must be a scalar or complex.");
6602 return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType,
6603 DstTy: DestType, Loc)
6604 : CGF.EmitComplexToScalarConversion(
6605 Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc);
6606}
6607
6608static CodeGenFunction::ComplexPairTy
6609convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6610 QualType DestType, SourceLocation Loc) {
6611 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6612 "DestType must have complex evaluation kind.");
6613 CodeGenFunction::ComplexPairTy ComplexVal;
6614 if (Val.isScalar()) {
6615 // Convert the input element to the element type of the complex.
6616 QualType DestElementType =
6617 DestType->castAs<ComplexType>()->getElementType();
6618 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6619 Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc);
6620 ComplexVal = CodeGenFunction::ComplexPairTy(
6621 ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType()));
6622 } else {
6623 assert(Val.isComplex() && "Must be a scalar or complex.");
6624 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6625 QualType DestElementType =
6626 DestType->castAs<ComplexType>()->getElementType();
6627 ComplexVal.first = CGF.EmitScalarConversion(
6628 Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6629 ComplexVal.second = CGF.EmitScalarConversion(
6630 Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6631 }
6632 return ComplexVal;
6633}
6634
6635static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6636 LValue LVal, RValue RVal) {
6637 if (LVal.isGlobalReg())
6638 CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal);
6639 else
6640 CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false);
6641}
6642
6643static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6644 llvm::AtomicOrdering AO, LValue LVal,
6645 SourceLocation Loc) {
6646 if (LVal.isGlobalReg())
6647 return CGF.EmitLoadOfLValue(V: LVal, Loc);
6648 return CGF.EmitAtomicLoad(
6649 lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO),
6650 IsVolatile: LVal.isVolatile());
6651}
6652
6653void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6654 QualType RValTy, SourceLocation Loc) {
6655 switch (getEvaluationKind(T: LVal.getType())) {
6656 case TEK_Scalar:
6657 EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue(
6658 CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)),
6659 Dst: LVal);
6660 break;
6661 case TEK_Complex:
6662 EmitStoreOfComplex(
6663 V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal,
6664 /*isInit=*/false);
6665 break;
6666 case TEK_Aggregate:
6667 llvm_unreachable("Must be a scalar or complex.");
6668 }
6669}
6670
6671static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6672 const Expr *X, const Expr *V,
6673 SourceLocation Loc) {
6674 // v = x;
6675 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6676 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6677 LValue XLValue = CGF.EmitLValue(E: X);
6678 LValue VLValue = CGF.EmitLValue(E: V);
6679 RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc);
6680 // OpenMP, 2.17.7, atomic Construct
6681 // If the read or capture clause is specified and the acquire, acq_rel, or
6682 // seq_cst clause is specified then the strong flush on exit from the atomic
6683 // operation is also an acquire flush.
6684 switch (AO) {
6685 case llvm::AtomicOrdering::Acquire:
6686 case llvm::AtomicOrdering::AcquireRelease:
6687 case llvm::AtomicOrdering::SequentiallyConsistent:
6688 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6689 AO: llvm::AtomicOrdering::Acquire);
6690 break;
6691 case llvm::AtomicOrdering::Monotonic:
6692 case llvm::AtomicOrdering::Release:
6693 break;
6694 case llvm::AtomicOrdering::NotAtomic:
6695 case llvm::AtomicOrdering::Unordered:
6696 llvm_unreachable("Unexpected ordering.");
6697 }
6698 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc);
6699 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6700}
6701
6702static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6703 llvm::AtomicOrdering AO, const Expr *X,
6704 const Expr *E, SourceLocation Loc) {
6705 // x = expr;
6706 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6707 emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E));
6708 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6709 // OpenMP, 2.17.7, atomic Construct
6710 // If the write, update, or capture clause is specified and the release,
6711 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6712 // the atomic operation is also a release flush.
6713 switch (AO) {
6714 case llvm::AtomicOrdering::Release:
6715 case llvm::AtomicOrdering::AcquireRelease:
6716 case llvm::AtomicOrdering::SequentiallyConsistent:
6717 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6718 AO: llvm::AtomicOrdering::Release);
6719 break;
6720 case llvm::AtomicOrdering::Acquire:
6721 case llvm::AtomicOrdering::Monotonic:
6722 break;
6723 case llvm::AtomicOrdering::NotAtomic:
6724 case llvm::AtomicOrdering::Unordered:
6725 llvm_unreachable("Unexpected ordering.");
6726 }
6727}
6728
6729static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6730 RValue Update,
6731 BinaryOperatorKind BO,
6732 llvm::AtomicOrdering AO,
6733 bool IsXLHSInRHSPart) {
6734 ASTContext &Context = CGF.getContext();
6735 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6736 // expression is simple and atomic is allowed for the given type for the
6737 // target platform.
6738 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6739 (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) &&
6740 (Update.getScalarVal()->getType() != X.getAddress().getElementType())) ||
6741 !Context.getTargetInfo().hasBuiltinAtomic(
6742 AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment())))
6743 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6744
6745 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6746 if (T->isIntegerTy())
6747 return true;
6748
6749 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6750 return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T));
6751
6752 return false;
6753 };
6754
6755 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6756 !CheckAtomicSupport(X.getAddress().getElementType(), BO))
6757 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6758
6759 bool IsInteger = X.getAddress().getElementType()->isIntegerTy();
6760 llvm::AtomicRMWInst::BinOp RMWOp;
6761 switch (BO) {
6762 case BO_Add:
6763 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6764 break;
6765 case BO_Sub:
6766 if (!IsXLHSInRHSPart)
6767 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6768 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6769 break;
6770 case BO_And:
6771 RMWOp = llvm::AtomicRMWInst::And;
6772 break;
6773 case BO_Or:
6774 RMWOp = llvm::AtomicRMWInst::Or;
6775 break;
6776 case BO_Xor:
6777 RMWOp = llvm::AtomicRMWInst::Xor;
6778 break;
6779 case BO_LT:
6780 if (IsInteger)
6781 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6782 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6783 : llvm::AtomicRMWInst::Max)
6784 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6785 : llvm::AtomicRMWInst::UMax);
6786 else
6787 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6788 : llvm::AtomicRMWInst::FMax;
6789 break;
6790 case BO_GT:
6791 if (IsInteger)
6792 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6793 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6794 : llvm::AtomicRMWInst::Min)
6795 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6796 : llvm::AtomicRMWInst::UMin);
6797 else
6798 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6799 : llvm::AtomicRMWInst::FMin;
6800 break;
6801 case BO_Assign:
6802 RMWOp = llvm::AtomicRMWInst::Xchg;
6803 break;
6804 case BO_Mul:
6805 case BO_Div:
6806 case BO_Rem:
6807 case BO_Shl:
6808 case BO_Shr:
6809 case BO_LAnd:
6810 case BO_LOr:
6811 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6812 case BO_PtrMemD:
6813 case BO_PtrMemI:
6814 case BO_LE:
6815 case BO_GE:
6816 case BO_EQ:
6817 case BO_NE:
6818 case BO_Cmp:
6819 case BO_AddAssign:
6820 case BO_SubAssign:
6821 case BO_AndAssign:
6822 case BO_OrAssign:
6823 case BO_XorAssign:
6824 case BO_MulAssign:
6825 case BO_DivAssign:
6826 case BO_RemAssign:
6827 case BO_ShlAssign:
6828 case BO_ShrAssign:
6829 case BO_Comma:
6830 llvm_unreachable("Unsupported atomic update operation");
6831 }
6832 llvm::Value *UpdateVal = Update.getScalarVal();
6833 if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) {
6834 if (IsInteger)
6835 UpdateVal = CGF.Builder.CreateIntCast(
6836 V: IC, DestTy: X.getAddress().getElementType(),
6837 isSigned: X.getType()->hasSignedIntegerRepresentation());
6838 else
6839 UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC,
6840 DestTy: X.getAddress().getElementType());
6841 }
6842 llvm::AtomicRMWInst *Res =
6843 CGF.emitAtomicRMWInst(Op: RMWOp, Addr: X.getAddress(), Val: UpdateVal, Order: AO);
6844 return std::make_pair(x: true, y: RValue::get(V: Res));
6845}
6846
6847std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6848 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6849 llvm::AtomicOrdering AO, SourceLocation Loc,
6850 const llvm::function_ref<RValue(RValue)> CommonGen) {
6851 // Update expressions are allowed to have the following forms:
6852 // x binop= expr; -> xrval + expr;
6853 // x++, ++x -> xrval + 1;
6854 // x--, --x -> xrval - 1;
6855 // x = x binop expr; -> xrval binop expr
6856 // x = expr Op x; - > expr binop xrval;
6857 auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart);
6858 if (!Res.first) {
6859 if (X.isGlobalReg()) {
6860 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6861 // 'xrval'.
6862 EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X);
6863 } else {
6864 // Perform compare-and-swap procedure.
6865 EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified());
6866 }
6867 }
6868 return Res;
6869}
6870
6871static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6872 llvm::AtomicOrdering AO, const Expr *X,
6873 const Expr *E, const Expr *UE,
6874 bool IsXLHSInRHSPart, SourceLocation Loc) {
6875 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6876 "Update expr in 'atomic update' must be a binary operator.");
6877 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6878 // Update expressions are allowed to have the following forms:
6879 // x binop= expr; -> xrval + expr;
6880 // x++, ++x -> xrval + 1;
6881 // x--, --x -> xrval - 1;
6882 // x = x binop expr; -> xrval binop expr
6883 // x = expr Op x; - > expr binop xrval;
6884 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6885 LValue XLValue = CGF.EmitLValue(E: X);
6886 RValue ExprRValue = CGF.EmitAnyExpr(E);
6887 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6888 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6889 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6890 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6891 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6892 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6893 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6894 return CGF.EmitAnyExpr(E: UE);
6895 };
6896 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6897 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6898 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6899 // OpenMP, 2.17.7, atomic Construct
6900 // If the write, update, or capture clause is specified and the release,
6901 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6902 // the atomic operation is also a release flush.
6903 switch (AO) {
6904 case llvm::AtomicOrdering::Release:
6905 case llvm::AtomicOrdering::AcquireRelease:
6906 case llvm::AtomicOrdering::SequentiallyConsistent:
6907 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
6908 AO: llvm::AtomicOrdering::Release);
6909 break;
6910 case llvm::AtomicOrdering::Acquire:
6911 case llvm::AtomicOrdering::Monotonic:
6912 break;
6913 case llvm::AtomicOrdering::NotAtomic:
6914 case llvm::AtomicOrdering::Unordered:
6915 llvm_unreachable("Unexpected ordering.");
6916 }
6917}
6918
6919static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6920 QualType SourceType, QualType ResType,
6921 SourceLocation Loc) {
6922 switch (CGF.getEvaluationKind(T: ResType)) {
6923 case TEK_Scalar:
6924 return RValue::get(
6925 V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc));
6926 case TEK_Complex: {
6927 auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc);
6928 return RValue::getComplex(V1: Res.first, V2: Res.second);
6929 }
6930 case TEK_Aggregate:
6931 break;
6932 }
6933 llvm_unreachable("Must be a scalar or complex.");
6934}
6935
6936static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6937 llvm::AtomicOrdering AO,
6938 bool IsPostfixUpdate, const Expr *V,
6939 const Expr *X, const Expr *E,
6940 const Expr *UE, bool IsXLHSInRHSPart,
6941 SourceLocation Loc) {
6942 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6943 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6944 RValue NewVVal;
6945 LValue VLValue = CGF.EmitLValue(E: V);
6946 LValue XLValue = CGF.EmitLValue(E: X);
6947 RValue ExprRValue = CGF.EmitAnyExpr(E);
6948 QualType NewVValType;
6949 if (UE) {
6950 // 'x' is updated with some additional value.
6951 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6952 "Update expr in 'atomic capture' must be a binary operator.");
6953 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6954 // Update expressions are allowed to have the following forms:
6955 // x binop= expr; -> xrval + expr;
6956 // x++, ++x -> xrval + 1;
6957 // x--, --x -> xrval - 1;
6958 // x = x binop expr; -> xrval binop expr
6959 // x = expr Op x; - > expr binop xrval;
6960 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6961 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6962 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6963 NewVValType = XRValExpr->getType();
6964 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6965 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6966 IsPostfixUpdate](RValue XRValue) {
6967 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6968 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6969 RValue Res = CGF.EmitAnyExpr(E: UE);
6970 NewVVal = IsPostfixUpdate ? XRValue : Res;
6971 return Res;
6972 };
6973 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6974 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6975 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6976 if (Res.first) {
6977 // 'atomicrmw' instruction was generated.
6978 if (IsPostfixUpdate) {
6979 // Use old value from 'atomicrmw'.
6980 NewVVal = Res.second;
6981 } else {
6982 // 'atomicrmw' does not provide new value, so evaluate it using old
6983 // value of 'x'.
6984 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6985 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6986 NewVVal = CGF.EmitAnyExpr(E: UE);
6987 }
6988 }
6989 } else {
6990 // 'x' is simply rewritten with some 'expr'.
6991 NewVValType = X->getType().getNonReferenceType();
6992 ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(),
6993 ResType: X->getType().getNonReferenceType(), Loc);
6994 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6995 NewVVal = XRValue;
6996 return ExprRValue;
6997 };
6998 // Try to perform atomicrmw xchg, otherwise simple exchange.
6999 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
7000 X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
7001 Loc, CommonGen: Gen);
7002 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
7003 if (Res.first) {
7004 // 'atomicrmw' instruction was generated.
7005 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
7006 }
7007 }
7008 // Emit post-update store to 'v' of old/new 'x' value.
7009 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc);
7010 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
7011 // OpenMP 5.1 removes the required flush for capture clause.
7012 if (CGF.CGM.getLangOpts().OpenMP < 51) {
7013 // OpenMP, 2.17.7, atomic Construct
7014 // If the write, update, or capture clause is specified and the release,
7015 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
7016 // the atomic operation is also a release flush.
7017 // If the read or capture clause is specified and the acquire, acq_rel, or
7018 // seq_cst clause is specified then the strong flush on exit from the atomic
7019 // operation is also an acquire flush.
7020 switch (AO) {
7021 case llvm::AtomicOrdering::Release:
7022 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
7023 AO: llvm::AtomicOrdering::Release);
7024 break;
7025 case llvm::AtomicOrdering::Acquire:
7026 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: {}, Loc,
7027 AO: llvm::AtomicOrdering::Acquire);
7028 break;
7029 case llvm::AtomicOrdering::AcquireRelease:
7030 case llvm::AtomicOrdering::SequentiallyConsistent:
7031 CGF.CGM.getOpenMPRuntime().emitFlush(
7032 CGF, Vars: {}, Loc, AO: llvm::AtomicOrdering::AcquireRelease);
7033 break;
7034 case llvm::AtomicOrdering::Monotonic:
7035 break;
7036 case llvm::AtomicOrdering::NotAtomic:
7037 case llvm::AtomicOrdering::Unordered:
7038 llvm_unreachable("Unexpected ordering.");
7039 }
7040 }
7041}
7042
7043static void emitOMPAtomicCompareExpr(
7044 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
7045 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
7046 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
7047 SourceLocation Loc) {
7048 llvm::OpenMPIRBuilder &OMPBuilder =
7049 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
7050
7051 OMPAtomicCompareOp Op;
7052 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
7053 switch (cast<BinaryOperator>(Val: CE)->getOpcode()) {
7054 case BO_EQ:
7055 Op = OMPAtomicCompareOp::EQ;
7056 break;
7057 case BO_LT:
7058 Op = OMPAtomicCompareOp::MIN;
7059 break;
7060 case BO_GT:
7061 Op = OMPAtomicCompareOp::MAX;
7062 break;
7063 default:
7064 llvm_unreachable("unsupported atomic compare binary operator");
7065 }
7066
7067 LValue XLVal = CGF.EmitLValue(E: X);
7068 Address XAddr = XLVal.getAddress();
7069
7070 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
7071 if (X->getType() == E->getType())
7072 return CGF.EmitScalarExpr(E);
7073 const Expr *NewE = E->IgnoreImplicitAsWritten();
7074 llvm::Value *V = CGF.EmitScalarExpr(E: NewE);
7075 if (NewE->getType() == X->getType())
7076 return V;
7077 return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc);
7078 };
7079
7080 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
7081 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
7082 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal))
7083 EVal = CGF.Builder.CreateIntCast(
7084 V: CI, DestTy: XLVal.getAddress().getElementType(),
7085 isSigned: E->getType()->hasSignedIntegerRepresentation());
7086 if (DVal)
7087 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal))
7088 DVal = CGF.Builder.CreateIntCast(
7089 V: CI, DestTy: XLVal.getAddress().getElementType(),
7090 isSigned: D->getType()->hasSignedIntegerRepresentation());
7091
7092 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
7093 .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(),
7094 .IsSigned: X->getType()->hasSignedIntegerRepresentation(),
7095 .IsVolatile: X->getType().isVolatileQualified()};
7096 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
7097 if (V) {
7098 LValue LV = CGF.EmitLValue(E: V);
7099 Address Addr = LV.getAddress();
7100 VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
7101 .IsSigned: V->getType()->hasSignedIntegerRepresentation(),
7102 .IsVolatile: V->getType().isVolatileQualified()};
7103 }
7104 if (R) {
7105 LValue LV = CGF.EmitLValue(E: R);
7106 Address Addr = LV.getAddress();
7107 ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
7108 .IsSigned: R->getType()->hasSignedIntegerRepresentation(),
7109 .IsVolatile: R->getType().isVolatileQualified()};
7110 }
7111
7112 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
7113 // fail clause was not mentioned on the
7114 // "#pragma omp atomic compare" construct.
7115 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
7116 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
7117 IsPostfixUpdate, IsFailOnly));
7118 } else
7119 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
7120 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
7121 IsPostfixUpdate, IsFailOnly, Failure: FailAO));
7122}
7123
7124static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
7125 llvm::AtomicOrdering AO,
7126 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
7127 const Expr *X, const Expr *V, const Expr *R,
7128 const Expr *E, const Expr *UE, const Expr *D,
7129 const Expr *CE, bool IsXLHSInRHSPart,
7130 bool IsFailOnly, SourceLocation Loc) {
7131 switch (Kind) {
7132 case OMPC_read:
7133 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
7134 break;
7135 case OMPC_write:
7136 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
7137 break;
7138 case OMPC_unknown:
7139 case OMPC_update:
7140 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
7141 break;
7142 case OMPC_capture:
7143 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
7144 IsXLHSInRHSPart, Loc);
7145 break;
7146 case OMPC_compare: {
7147 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
7148 IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
7149 break;
7150 }
7151 default:
7152 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
7153 }
7154}
7155
7156void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
7157 llvm::AtomicOrdering AO = CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
7158 // Fail Memory Clause Ordering.
7159 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
7160 bool MemOrderingSpecified = false;
7161 if (S.getSingleClause<OMPSeqCstClause>()) {
7162 AO = llvm::AtomicOrdering::SequentiallyConsistent;
7163 MemOrderingSpecified = true;
7164 } else if (S.getSingleClause<OMPAcqRelClause>()) {
7165 AO = llvm::AtomicOrdering::AcquireRelease;
7166 MemOrderingSpecified = true;
7167 } else if (S.getSingleClause<OMPAcquireClause>()) {
7168 AO = llvm::AtomicOrdering::Acquire;
7169 MemOrderingSpecified = true;
7170 } else if (S.getSingleClause<OMPReleaseClause>()) {
7171 AO = llvm::AtomicOrdering::Release;
7172 MemOrderingSpecified = true;
7173 } else if (S.getSingleClause<OMPRelaxedClause>()) {
7174 AO = llvm::AtomicOrdering::Monotonic;
7175 MemOrderingSpecified = true;
7176 }
7177 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
7178 OpenMPClauseKind Kind = OMPC_unknown;
7179 for (const OMPClause *C : S.clauses()) {
7180 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
7181 // if it is first).
7182 OpenMPClauseKind K = C->getClauseKind();
7183 // TBD
7184 if (K == OMPC_weak)
7185 return;
7186 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
7187 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
7188 continue;
7189 Kind = K;
7190 KindsEncountered.insert(V: K);
7191 }
7192 // We just need to correct Kind here. No need to set a bool saying it is
7193 // actually compare capture because we can tell from whether V and R are
7194 // nullptr.
7195 if (KindsEncountered.contains(V: OMPC_compare) &&
7196 KindsEncountered.contains(V: OMPC_capture))
7197 Kind = OMPC_compare;
7198 if (!MemOrderingSpecified) {
7199 llvm::AtomicOrdering DefaultOrder =
7200 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
7201 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
7202 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
7203 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
7204 Kind == OMPC_capture)) {
7205 AO = DefaultOrder;
7206 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
7207 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
7208 AO = llvm::AtomicOrdering::Release;
7209 } else if (Kind == OMPC_read) {
7210 assert(Kind == OMPC_read && "Unexpected atomic kind.");
7211 AO = llvm::AtomicOrdering::Acquire;
7212 }
7213 }
7214 }
7215
7216 if (KindsEncountered.contains(V: OMPC_compare) &&
7217 KindsEncountered.contains(V: OMPC_fail)) {
7218 Kind = OMPC_compare;
7219 const auto *FailClause = S.getSingleClause<OMPFailClause>();
7220 if (FailClause) {
7221 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
7222 if (FailParameter == llvm::omp::OMPC_relaxed)
7223 FailAO = llvm::AtomicOrdering::Monotonic;
7224 else if (FailParameter == llvm::omp::OMPC_acquire)
7225 FailAO = llvm::AtomicOrdering::Acquire;
7226 else if (FailParameter == llvm::omp::OMPC_seq_cst)
7227 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
7228 }
7229 }
7230
7231 LexicalScope Scope(*this, S.getSourceRange());
7232 EmitStopPoint(S: S.getAssociatedStmt());
7233 emitOMPAtomicExpr(CGF&: *this, Kind, AO, FailAO, IsPostfixUpdate: S.isPostfixUpdate(), X: S.getX(),
7234 V: S.getV(), R: S.getR(), E: S.getExpr(), UE: S.getUpdateExpr(),
7235 D: S.getD(), CE: S.getCondExpr(), IsXLHSInRHSPart: S.isXLHSInRHSPart(),
7236 IsFailOnly: S.isFailOnly(), Loc: S.getBeginLoc());
7237}
7238
7239static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
7240 const OMPExecutableDirective &S,
7241 const RegionCodeGenTy &CodeGen) {
7242 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
7243 CodeGenModule &CGM = CGF.CGM;
7244
7245 // On device emit this construct as inlined code.
7246 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
7247 OMPLexicalScope Scope(CGF, S, OMPD_target);
7248 CGM.getOpenMPRuntime().emitInlinedDirective(
7249 CGF, InnermostKind: OMPD_target, CodeGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7250 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7251 });
7252 return;
7253 }
7254
7255 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
7256 llvm::Function *Fn = nullptr;
7257 llvm::Constant *FnID = nullptr;
7258
7259 const Expr *IfCond = nullptr;
7260 // Check for the at most one if clause associated with the target region.
7261 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7262 if (C->getNameModifier() == OMPD_unknown ||
7263 C->getNameModifier() == OMPD_target) {
7264 IfCond = C->getCondition();
7265 break;
7266 }
7267 }
7268
7269 // Check if we have any device clause associated with the directive.
7270 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
7271 nullptr, OMPC_DEVICE_unknown);
7272 if (auto *C = S.getSingleClause<OMPDeviceClause>())
7273 Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier());
7274
7275 // Check if we have an if clause whose conditional always evaluates to false
7276 // or if we do not have any targets specified. If so the target region is not
7277 // an offload entry point.
7278 bool IsOffloadEntry = true;
7279 if (IfCond) {
7280 bool Val;
7281 if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val)
7282 IsOffloadEntry = false;
7283 }
7284 if (CGM.getLangOpts().OMPTargetTriples.empty())
7285 IsOffloadEntry = false;
7286
7287 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
7288 CGM.getDiags().Report(DiagID: diag::err_missing_mandatory_offloading);
7289 }
7290
7291 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
7292 StringRef ParentName;
7293 // In case we have Ctors/Dtors we use the complete type variant to produce
7294 // the mangling of the device outlined kernel.
7295 if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl))
7296 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete));
7297 else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl))
7298 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete));
7299 else
7300 ParentName =
7301 CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl)));
7302
7303 // Emit target region as a standalone region.
7304 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID,
7305 IsOffloadEntry, CodeGen);
7306 OMPLexicalScope Scope(CGF, S, OMPD_task);
7307 auto &&SizeEmitter =
7308 [IsOffloadEntry](CodeGenFunction &CGF,
7309 const OMPLoopDirective &D) -> llvm::Value * {
7310 if (IsOffloadEntry) {
7311 OMPLoopScope(CGF, D);
7312 // Emit calculation of the iterations count.
7313 llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations());
7314 NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty,
7315 /*isSigned=*/false);
7316 return NumIterations;
7317 }
7318 return nullptr;
7319 };
7320 CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device,
7321 SizeEmitter);
7322}
7323
7324static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
7325 PrePostActionTy &Action) {
7326 Action.Enter(CGF);
7327 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7328 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7329 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7330 (void)PrivateScope.Privatize();
7331 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7332 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7333
7334 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_target)->getCapturedStmt());
7335 CGF.EnsureInsertPoint();
7336}
7337
7338void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
7339 StringRef ParentName,
7340 const OMPTargetDirective &S) {
7341 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7342 emitTargetRegion(CGF, S, Action);
7343 };
7344 llvm::Function *Fn;
7345 llvm::Constant *Addr;
7346 // Emit target region as a standalone region.
7347 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7348 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7349 assert(Fn && Addr && "Target device function emission failed.");
7350}
7351
7352void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
7353 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7354 emitTargetRegion(CGF, S, Action);
7355 };
7356 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7357}
7358
7359static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
7360 const OMPExecutableDirective &S,
7361 OpenMPDirectiveKind InnermostKind,
7362 const RegionCodeGenTy &CodeGen) {
7363 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7364 llvm::Function *OutlinedFn =
7365 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
7366 CGF, D: S, ThreadIDVar: *CS->getCapturedDecl()->param_begin(), InnermostKind,
7367 CodeGen);
7368
7369 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
7370 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
7371 if (NT || TL) {
7372 const Expr *NumTeams = NT ? NT->getNumTeams().front() : nullptr;
7373 const Expr *ThreadLimit = TL ? TL->getThreadLimit().front() : nullptr;
7374
7375 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
7376 Loc: S.getBeginLoc());
7377 }
7378
7379 OMPTeamsScope Scope(CGF, S);
7380 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
7381 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
7382 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn,
7383 CapturedVars);
7384}
7385
7386void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
7387 // Emit teams region as a standalone region.
7388 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7389 Action.Enter(CGF);
7390 OMPPrivateScope PrivateScope(CGF);
7391 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7392 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7393 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7394 (void)PrivateScope.Privatize();
7395 CGF.EmitStmt(S: S.getCapturedStmt(RegionKind: OMPD_teams)->getCapturedStmt());
7396 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7397 };
7398 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7399 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7400 CondGen: [](CodeGenFunction &) { return nullptr; });
7401}
7402
7403static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7404 const OMPTargetTeamsDirective &S) {
7405 auto *CS = S.getCapturedStmt(RegionKind: OMPD_teams);
7406 Action.Enter(CGF);
7407 // Emit teams region as a standalone region.
7408 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7409 Action.Enter(CGF);
7410 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7411 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
7412 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
7413 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7414 (void)PrivateScope.Privatize();
7415 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
7416 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
7417 CGF.EmitStmt(S: CS->getCapturedStmt());
7418 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7419 };
7420 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_teams, CodeGen);
7421 emitPostUpdateForReductionClause(CGF, D: S,
7422 CondGen: [](CodeGenFunction &) { return nullptr; });
7423}
7424
7425void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7426 CodeGenModule &CGM, StringRef ParentName,
7427 const OMPTargetTeamsDirective &S) {
7428 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7429 emitTargetTeamsRegion(CGF, Action, S);
7430 };
7431 llvm::Function *Fn;
7432 llvm::Constant *Addr;
7433 // Emit target region as a standalone region.
7434 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7435 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7436 assert(Fn && Addr && "Target device function emission failed.");
7437}
7438
7439void CodeGenFunction::EmitOMPTargetTeamsDirective(
7440 const OMPTargetTeamsDirective &S) {
7441 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7442 emitTargetTeamsRegion(CGF, Action, S);
7443 };
7444 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7445}
7446
7447static void
7448emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
7449 const OMPTargetTeamsDistributeDirective &S) {
7450 Action.Enter(CGF);
7451 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7452 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7453 };
7454
7455 // Emit teams region as a standalone region.
7456 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7457 PrePostActionTy &Action) {
7458 Action.Enter(CGF);
7459 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7460 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7461 (void)PrivateScope.Privatize();
7462 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7463 CodeGen: CodeGenDistribute);
7464 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7465 };
7466 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
7467 emitPostUpdateForReductionClause(CGF, D: S,
7468 CondGen: [](CodeGenFunction &) { return nullptr; });
7469}
7470
7471void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
7472 CodeGenModule &CGM, StringRef ParentName,
7473 const OMPTargetTeamsDistributeDirective &S) {
7474 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7475 emitTargetTeamsDistributeRegion(CGF, Action, S);
7476 };
7477 llvm::Function *Fn;
7478 llvm::Constant *Addr;
7479 // Emit target region as a standalone region.
7480 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7481 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7482 assert(Fn && Addr && "Target device function emission failed.");
7483}
7484
7485void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
7486 const OMPTargetTeamsDistributeDirective &S) {
7487 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7488 emitTargetTeamsDistributeRegion(CGF, Action, S);
7489 };
7490 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7491}
7492
7493static void emitTargetTeamsDistributeSimdRegion(
7494 CodeGenFunction &CGF, PrePostActionTy &Action,
7495 const OMPTargetTeamsDistributeSimdDirective &S) {
7496 Action.Enter(CGF);
7497 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7498 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7499 };
7500
7501 // Emit teams region as a standalone region.
7502 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7503 PrePostActionTy &Action) {
7504 Action.Enter(CGF);
7505 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7506 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7507 (void)PrivateScope.Privatize();
7508 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7509 CodeGen: CodeGenDistribute);
7510 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7511 };
7512 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7513 emitPostUpdateForReductionClause(CGF, D: S,
7514 CondGen: [](CodeGenFunction &) { return nullptr; });
7515}
7516
7517void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
7518 CodeGenModule &CGM, StringRef ParentName,
7519 const OMPTargetTeamsDistributeSimdDirective &S) {
7520 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7521 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7522 };
7523 llvm::Function *Fn;
7524 llvm::Constant *Addr;
7525 // Emit target region as a standalone region.
7526 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7527 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7528 assert(Fn && Addr && "Target device function emission failed.");
7529}
7530
7531void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
7532 const OMPTargetTeamsDistributeSimdDirective &S) {
7533 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7534 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
7535 };
7536 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7537}
7538
7539void CodeGenFunction::EmitOMPTeamsDistributeDirective(
7540 const OMPTeamsDistributeDirective &S) {
7541
7542 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7543 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7544 };
7545
7546 // Emit teams region as a standalone region.
7547 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7548 PrePostActionTy &Action) {
7549 Action.Enter(CGF);
7550 OMPPrivateScope PrivateScope(CGF);
7551 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7552 (void)PrivateScope.Privatize();
7553 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7554 CodeGen: CodeGenDistribute);
7555 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7556 };
7557 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
7558 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7559 CondGen: [](CodeGenFunction &) { return nullptr; });
7560}
7561
7562void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
7563 const OMPTeamsDistributeSimdDirective &S) {
7564 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7565 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7566 };
7567
7568 // Emit teams region as a standalone region.
7569 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7570 PrePostActionTy &Action) {
7571 Action.Enter(CGF);
7572 OMPPrivateScope PrivateScope(CGF);
7573 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7574 (void)PrivateScope.Privatize();
7575 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_simd,
7576 CodeGen: CodeGenDistribute);
7577 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7578 };
7579 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_simd, CodeGen);
7580 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7581 CondGen: [](CodeGenFunction &) { return nullptr; });
7582}
7583
7584void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
7585 const OMPTeamsDistributeParallelForDirective &S) {
7586 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7587 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7588 IncExpr: S.getDistInc());
7589 };
7590
7591 // Emit teams region as a standalone region.
7592 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7593 PrePostActionTy &Action) {
7594 Action.Enter(CGF);
7595 OMPPrivateScope PrivateScope(CGF);
7596 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7597 (void)PrivateScope.Privatize();
7598 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
7599 CodeGen: CodeGenDistribute);
7600 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7601 };
7602 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for, CodeGen);
7603 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7604 CondGen: [](CodeGenFunction &) { return nullptr; });
7605}
7606
7607void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7608 const OMPTeamsDistributeParallelForSimdDirective &S) {
7609 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7610 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7611 IncExpr: S.getDistInc());
7612 };
7613
7614 // Emit teams region as a standalone region.
7615 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7616 PrePostActionTy &Action) {
7617 Action.Enter(CGF);
7618 OMPPrivateScope PrivateScope(CGF);
7619 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7620 (void)PrivateScope.Privatize();
7621 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7622 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7623 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7624 };
7625 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7626 CodeGen);
7627 emitPostUpdateForReductionClause(CGF&: *this, D: S,
7628 CondGen: [](CodeGenFunction &) { return nullptr; });
7629}
7630
7631void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7632 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7633 llvm::Value *Device = nullptr;
7634 llvm::Value *NumDependences = nullptr;
7635 llvm::Value *DependenceList = nullptr;
7636
7637 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7638 Device = EmitScalarExpr(E: C->getDevice());
7639
7640 // Build list and emit dependences
7641 OMPTaskDataTy Data;
7642 buildDependences(S, Data);
7643 if (!Data.Dependences.empty()) {
7644 Address DependenciesArray = Address::invalid();
7645 std::tie(args&: NumDependences, args&: DependenciesArray) =
7646 CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences,
7647 Loc: S.getBeginLoc());
7648 DependenceList = DependenciesArray.emitRawPointer(CGF&: *this);
7649 }
7650 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7651
7652 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7653 S.getSingleClause<OMPDestroyClause>() ||
7654 S.getSingleClause<OMPUseClause>())) &&
7655 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7656
7657 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7658 if (!ItOMPInitClause.empty()) {
7659 // Look at the multiple init clauses
7660 for (const OMPInitClause *C : ItOMPInitClause) {
7661 llvm::Value *InteropvarPtr =
7662 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7663 llvm::omp::OMPInteropType InteropType =
7664 llvm::omp::OMPInteropType::Unknown;
7665 if (C->getIsTarget()) {
7666 InteropType = llvm::omp::OMPInteropType::Target;
7667 } else {
7668 assert(C->getIsTargetSync() &&
7669 "Expected interop-type target/targetsync");
7670 InteropType = llvm::omp::OMPInteropType::TargetSync;
7671 }
7672 OMPBuilder.createOMPInteropInit(Loc: Builder, InteropVar: InteropvarPtr, InteropType,
7673 Device, NumDependences, DependenceAddress: DependenceList,
7674 HaveNowaitClause: Data.HasNowaitClause);
7675 }
7676 }
7677 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7678 if (!ItOMPDestroyClause.empty()) {
7679 // Look at the multiple destroy clauses
7680 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7681 llvm::Value *InteropvarPtr =
7682 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7683 OMPBuilder.createOMPInteropDestroy(Loc: Builder, InteropVar: InteropvarPtr, Device,
7684 NumDependences, DependenceAddress: DependenceList,
7685 HaveNowaitClause: Data.HasNowaitClause);
7686 }
7687 }
7688 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7689 if (!ItOMPUseClause.empty()) {
7690 // Look at the multiple use clauses
7691 for (const OMPUseClause *C : ItOMPUseClause) {
7692 llvm::Value *InteropvarPtr =
7693 EmitLValue(E: C->getInteropVar()).getPointer(CGF&: *this);
7694 OMPBuilder.createOMPInteropUse(Loc: Builder, InteropVar: InteropvarPtr, Device,
7695 NumDependences, DependenceAddress: DependenceList,
7696 HaveNowaitClause: Data.HasNowaitClause);
7697 }
7698 }
7699}
7700
7701static void emitTargetTeamsDistributeParallelForRegion(
7702 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7703 PrePostActionTy &Action) {
7704 Action.Enter(CGF);
7705 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7706 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7707 IncExpr: S.getDistInc());
7708 };
7709
7710 // Emit teams region as a standalone region.
7711 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7712 PrePostActionTy &Action) {
7713 Action.Enter(CGF);
7714 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7715 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7716 (void)PrivateScope.Privatize();
7717 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7718 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7719 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7720 };
7721
7722 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
7723 CodeGen: CodeGenTeams);
7724 emitPostUpdateForReductionClause(CGF, D: S,
7725 CondGen: [](CodeGenFunction &) { return nullptr; });
7726}
7727
7728void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7729 CodeGenModule &CGM, StringRef ParentName,
7730 const OMPTargetTeamsDistributeParallelForDirective &S) {
7731 // Emit SPMD target teams distribute parallel for region as a standalone
7732 // region.
7733 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7734 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7735 };
7736 llvm::Function *Fn;
7737 llvm::Constant *Addr;
7738 // Emit target region as a standalone region.
7739 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7740 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7741 assert(Fn && Addr && "Target device function emission failed.");
7742}
7743
7744void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7745 const OMPTargetTeamsDistributeParallelForDirective &S) {
7746 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7747 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7748 };
7749 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7750}
7751
7752static void emitTargetTeamsDistributeParallelForSimdRegion(
7753 CodeGenFunction &CGF,
7754 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7755 PrePostActionTy &Action) {
7756 Action.Enter(CGF);
7757 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7758 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7759 IncExpr: S.getDistInc());
7760 };
7761
7762 // Emit teams region as a standalone region.
7763 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7764 PrePostActionTy &Action) {
7765 Action.Enter(CGF);
7766 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7767 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
7768 (void)PrivateScope.Privatize();
7769 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7770 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
7771 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
7772 };
7773
7774 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for_simd,
7775 CodeGen: CodeGenTeams);
7776 emitPostUpdateForReductionClause(CGF, D: S,
7777 CondGen: [](CodeGenFunction &) { return nullptr; });
7778}
7779
7780void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7781 CodeGenModule &CGM, StringRef ParentName,
7782 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7783 // Emit SPMD target teams distribute parallel for simd region as a standalone
7784 // region.
7785 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7786 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7787 };
7788 llvm::Function *Fn;
7789 llvm::Constant *Addr;
7790 // Emit target region as a standalone region.
7791 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7792 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
7793 assert(Fn && Addr && "Target device function emission failed.");
7794}
7795
7796void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7797 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7798 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7799 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7800 };
7801 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
7802}
7803
7804void CodeGenFunction::EmitOMPCancellationPointDirective(
7805 const OMPCancellationPointDirective &S) {
7806 CGM.getOpenMPRuntime().emitCancellationPointCall(CGF&: *this, Loc: S.getBeginLoc(),
7807 CancelRegion: S.getCancelRegion());
7808}
7809
7810void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7811 const Expr *IfCond = nullptr;
7812 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7813 if (C->getNameModifier() == OMPD_unknown ||
7814 C->getNameModifier() == OMPD_cancel) {
7815 IfCond = C->getCondition();
7816 break;
7817 }
7818 }
7819 if (CGM.getLangOpts().OpenMPIRBuilder) {
7820 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7821 // TODO: This check is necessary as we only generate `omp parallel` through
7822 // the OpenMPIRBuilder for now.
7823 if (S.getCancelRegion() == OMPD_parallel ||
7824 S.getCancelRegion() == OMPD_sections ||
7825 S.getCancelRegion() == OMPD_section) {
7826 llvm::Value *IfCondition = nullptr;
7827 if (IfCond)
7828 IfCondition = EmitScalarExpr(E: IfCond,
7829 /*IgnoreResultAssign=*/true);
7830 llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
7831 ValOrErr: OMPBuilder.createCancel(Loc: Builder, IfCondition, CanceledDirective: S.getCancelRegion()));
7832 return Builder.restoreIP(IP: AfterIP);
7833 }
7834 }
7835
7836 CGM.getOpenMPRuntime().emitCancelCall(CGF&: *this, Loc: S.getBeginLoc(), IfCond,
7837 CancelRegion: S.getCancelRegion());
7838}
7839
7840CodeGenFunction::JumpDest
7841CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7842 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7843 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7844 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7845 return ReturnBlock;
7846 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7847 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7848 Kind == OMPD_distribute_parallel_for ||
7849 Kind == OMPD_target_parallel_for ||
7850 Kind == OMPD_teams_distribute_parallel_for ||
7851 Kind == OMPD_target_teams_distribute_parallel_for);
7852 return OMPCancelStack.getExitBlock();
7853}
7854
7855void CodeGenFunction::EmitOMPUseDevicePtrClause(
7856 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7857 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7858 CaptureDeviceAddrMap) {
7859 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7860 for (const Expr *OrigVarIt : C.varlist()) {
7861 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: OrigVarIt)->getDecl());
7862 if (!Processed.insert(V: OrigVD).second)
7863 continue;
7864
7865 // In order to identify the right initializer we need to match the
7866 // declaration used by the mapping logic. In some cases we may get
7867 // OMPCapturedExprDecl that refers to the original declaration.
7868 const ValueDecl *MatchingVD = OrigVD;
7869 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7870 // OMPCapturedExprDecl are used to privative fields of the current
7871 // structure.
7872 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7873 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7874 "Base should be the current struct!");
7875 MatchingVD = ME->getMemberDecl();
7876 }
7877
7878 // If we don't have information about the current list item, move on to
7879 // the next one.
7880 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7881 if (InitAddrIt == CaptureDeviceAddrMap.end())
7882 continue;
7883
7884 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7885
7886 // Return the address of the private variable.
7887 bool IsRegistered = PrivateScope.addPrivate(
7888 LocalVD: OrigVD,
7889 Addr: Address(InitAddrIt->second, Ty,
7890 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy)));
7891 assert(IsRegistered && "firstprivate var already registered as private");
7892 // Silence the warning about unused variable.
7893 (void)IsRegistered;
7894 }
7895}
7896
7897static const VarDecl *getBaseDecl(const Expr *Ref) {
7898 const Expr *Base = Ref->IgnoreParenImpCasts();
7899 while (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Base))
7900 Base = OASE->getBase()->IgnoreParenImpCasts();
7901 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
7902 Base = ASE->getBase()->IgnoreParenImpCasts();
7903 return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl());
7904}
7905
7906void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7907 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7908 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7909 CaptureDeviceAddrMap) {
7910 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7911 for (const Expr *Ref : C.varlist()) {
7912 const VarDecl *OrigVD = getBaseDecl(Ref);
7913 if (!Processed.insert(V: OrigVD).second)
7914 continue;
7915 // In order to identify the right initializer we need to match the
7916 // declaration used by the mapping logic. In some cases we may get
7917 // OMPCapturedExprDecl that refers to the original declaration.
7918 const ValueDecl *MatchingVD = OrigVD;
7919 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: MatchingVD)) {
7920 // OMPCapturedExprDecl are used to privative fields of the current
7921 // structure.
7922 const auto *ME = cast<MemberExpr>(Val: OED->getInit());
7923 assert(isa<CXXThisExpr>(ME->getBase()) &&
7924 "Base should be the current struct!");
7925 MatchingVD = ME->getMemberDecl();
7926 }
7927
7928 // If we don't have information about the current list item, move on to
7929 // the next one.
7930 auto InitAddrIt = CaptureDeviceAddrMap.find(Val: MatchingVD);
7931 if (InitAddrIt == CaptureDeviceAddrMap.end())
7932 continue;
7933
7934 llvm::Type *Ty = ConvertTypeForMem(T: OrigVD->getType().getNonReferenceType());
7935
7936 Address PrivAddr =
7937 Address(InitAddrIt->second, Ty,
7938 getContext().getTypeAlignInChars(T: getContext().VoidPtrTy));
7939 // For declrefs and variable length array need to load the pointer for
7940 // correct mapping, since the pointer to the data was passed to the runtime.
7941 if (isa<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts()) ||
7942 MatchingVD->getType()->isArrayType()) {
7943 QualType PtrTy = getContext().getPointerType(
7944 T: OrigVD->getType().getNonReferenceType());
7945 PrivAddr =
7946 EmitLoadOfPointer(Ptr: PrivAddr.withElementType(ElemTy: ConvertTypeForMem(T: PtrTy)),
7947 PtrTy: PtrTy->castAs<PointerType>());
7948 }
7949
7950 (void)PrivateScope.addPrivate(LocalVD: OrigVD, Addr: PrivAddr);
7951 }
7952}
7953
7954// Generate the instructions for '#pragma omp target data' directive.
7955void CodeGenFunction::EmitOMPTargetDataDirective(
7956 const OMPTargetDataDirective &S) {
7957 // Emit vtable only from host for target data directive.
7958 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
7959 CGM.getOpenMPRuntime().registerVTable(D: S);
7960
7961 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7962 /*SeparateBeginEndCalls=*/true);
7963
7964 // Create a pre/post action to signal the privatization of the device pointer.
7965 // This action can be replaced by the OpenMP runtime code generation to
7966 // deactivate privatization.
7967 bool PrivatizeDevicePointers = false;
7968 class DevicePointerPrivActionTy : public PrePostActionTy {
7969 bool &PrivatizeDevicePointers;
7970
7971 public:
7972 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7973 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7974 void Enter(CodeGenFunction &CGF) override {
7975 PrivatizeDevicePointers = true;
7976 }
7977 };
7978 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7979
7980 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7981 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7982 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7983 };
7984
7985 // Codegen that selects whether to generate the privatization code or not.
7986 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7987 RegionCodeGenTy RCG(InnermostCodeGen);
7988 PrivatizeDevicePointers = false;
7989
7990 // Call the pre-action to change the status of PrivatizeDevicePointers if
7991 // needed.
7992 Action.Enter(CGF);
7993
7994 if (PrivatizeDevicePointers) {
7995 OMPPrivateScope PrivateScope(CGF);
7996 // Emit all instances of the use_device_ptr clause.
7997 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7998 CGF.EmitOMPUseDevicePtrClause(C: *C, PrivateScope,
7999 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
8000 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
8001 CGF.EmitOMPUseDeviceAddrClause(C: *C, PrivateScope,
8002 CaptureDeviceAddrMap: Info.CaptureDeviceAddrMap);
8003 (void)PrivateScope.Privatize();
8004 RCG(CGF);
8005 } else {
8006 // If we don't have target devices, don't bother emitting the data
8007 // mapping code.
8008 std::optional<OpenMPDirectiveKind> CaptureRegion;
8009 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
8010 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
8011 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
8012 for (const Expr *E : C->varlist()) {
8013 const Decl *D = cast<DeclRefExpr>(Val: E)->getDecl();
8014 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
8015 CGF.EmitVarDecl(D: *OED);
8016 }
8017 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
8018 for (const Expr *E : C->varlist()) {
8019 const Decl *D = getBaseDecl(Ref: E);
8020 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(Val: D))
8021 CGF.EmitVarDecl(D: *OED);
8022 }
8023 } else {
8024 CaptureRegion = OMPD_unknown;
8025 }
8026
8027 OMPLexicalScope Scope(CGF, S, CaptureRegion);
8028 RCG(CGF);
8029 }
8030 };
8031
8032 // Forward the provided action to the privatization codegen.
8033 RegionCodeGenTy PrivRCG(PrivCodeGen);
8034 PrivRCG.setAction(Action);
8035
8036 // Notwithstanding the body of the region is emitted as inlined directive,
8037 // we don't use an inline scope as changes in the references inside the
8038 // region are expected to be visible outside, so we do not privative them.
8039 OMPLexicalScope Scope(CGF, S);
8040 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_target_data,
8041 CodeGen: PrivRCG);
8042 };
8043
8044 RegionCodeGenTy RCG(CodeGen);
8045
8046 // If we don't have target devices, don't bother emitting the data mapping
8047 // code.
8048 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
8049 RCG(*this);
8050 return;
8051 }
8052
8053 // Check if we have any if clause associated with the directive.
8054 const Expr *IfCond = nullptr;
8055 if (const auto *C = S.getSingleClause<OMPIfClause>())
8056 IfCond = C->getCondition();
8057
8058 // Check if we have any device clause associated with the directive.
8059 const Expr *Device = nullptr;
8060 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8061 Device = C->getDevice();
8062
8063 // Set the action to signal privatization of device pointers.
8064 RCG.setAction(PrivAction);
8065
8066 // Emit region code.
8067 CGM.getOpenMPRuntime().emitTargetDataCalls(CGF&: *this, D: S, IfCond, Device, CodeGen: RCG,
8068 Info);
8069}
8070
8071void CodeGenFunction::EmitOMPTargetEnterDataDirective(
8072 const OMPTargetEnterDataDirective &S) {
8073 // If we don't have target devices, don't bother emitting the data mapping
8074 // code.
8075 if (CGM.getLangOpts().OMPTargetTriples.empty())
8076 return;
8077
8078 // Check if we have any if clause associated with the directive.
8079 const Expr *IfCond = nullptr;
8080 if (const auto *C = S.getSingleClause<OMPIfClause>())
8081 IfCond = C->getCondition();
8082
8083 // Check if we have any device clause associated with the directive.
8084 const Expr *Device = nullptr;
8085 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8086 Device = C->getDevice();
8087
8088 OMPLexicalScope Scope(*this, S, OMPD_task);
8089 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8090}
8091
8092void CodeGenFunction::EmitOMPTargetExitDataDirective(
8093 const OMPTargetExitDataDirective &S) {
8094 // If we don't have target devices, don't bother emitting the data mapping
8095 // code.
8096 if (CGM.getLangOpts().OMPTargetTriples.empty())
8097 return;
8098
8099 // Check if we have any if clause associated with the directive.
8100 const Expr *IfCond = nullptr;
8101 if (const auto *C = S.getSingleClause<OMPIfClause>())
8102 IfCond = C->getCondition();
8103
8104 // Check if we have any device clause associated with the directive.
8105 const Expr *Device = nullptr;
8106 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8107 Device = C->getDevice();
8108
8109 OMPLexicalScope Scope(*this, S, OMPD_task);
8110 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8111}
8112
8113static void emitTargetParallelRegion(CodeGenFunction &CGF,
8114 const OMPTargetParallelDirective &S,
8115 PrePostActionTy &Action) {
8116 // Get the captured statement associated with the 'parallel' region.
8117 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_parallel);
8118 Action.Enter(CGF);
8119 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
8120 Action.Enter(CGF);
8121 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8122 (void)CGF.EmitOMPFirstprivateClause(D: S, PrivateScope);
8123 CGF.EmitOMPPrivateClause(D: S, PrivateScope);
8124 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8125 (void)PrivateScope.Privatize();
8126 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()))
8127 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
8128 // TODO: Add support for clauses.
8129 CGF.EmitStmt(S: CS->getCapturedStmt());
8130 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_parallel);
8131 };
8132 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_parallel, CodeGen,
8133 CodeGenBoundParameters: emitEmptyBoundParameters);
8134 emitPostUpdateForReductionClause(CGF, D: S,
8135 CondGen: [](CodeGenFunction &) { return nullptr; });
8136}
8137
8138void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
8139 CodeGenModule &CGM, StringRef ParentName,
8140 const OMPTargetParallelDirective &S) {
8141 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8142 emitTargetParallelRegion(CGF, S, Action);
8143 };
8144 llvm::Function *Fn;
8145 llvm::Constant *Addr;
8146 // Emit target region as a standalone region.
8147 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8148 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8149 assert(Fn && Addr && "Target device function emission failed.");
8150}
8151
8152void CodeGenFunction::EmitOMPTargetParallelDirective(
8153 const OMPTargetParallelDirective &S) {
8154 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8155 emitTargetParallelRegion(CGF, S, Action);
8156 };
8157 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8158}
8159
8160static void emitTargetParallelForRegion(CodeGenFunction &CGF,
8161 const OMPTargetParallelForDirective &S,
8162 PrePostActionTy &Action) {
8163 Action.Enter(CGF);
8164 // Emit directive as a combined directive that consists of two implicit
8165 // directives: 'parallel' with 'for' directive.
8166 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8167 Action.Enter(CGF);
8168 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8169 CGF, OMPD_target_parallel_for, S.hasCancel());
8170 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8171 CGDispatchBounds: emitDispatchForLoopBounds);
8172 };
8173 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8174 CodeGenBoundParameters: emitEmptyBoundParameters);
8175}
8176
8177void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
8178 CodeGenModule &CGM, StringRef ParentName,
8179 const OMPTargetParallelForDirective &S) {
8180 // Emit SPMD target parallel for region as a standalone region.
8181 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8182 emitTargetParallelForRegion(CGF, S, Action);
8183 };
8184 llvm::Function *Fn;
8185 llvm::Constant *Addr;
8186 // Emit target region as a standalone region.
8187 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8188 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8189 assert(Fn && Addr && "Target device function emission failed.");
8190}
8191
8192void CodeGenFunction::EmitOMPTargetParallelForDirective(
8193 const OMPTargetParallelForDirective &S) {
8194 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8195 emitTargetParallelForRegion(CGF, S, Action);
8196 };
8197 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8198}
8199
8200static void
8201emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
8202 const OMPTargetParallelForSimdDirective &S,
8203 PrePostActionTy &Action) {
8204 Action.Enter(CGF);
8205 // Emit directive as a combined directive that consists of two implicit
8206 // directives: 'parallel' with 'for' directive.
8207 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8208 Action.Enter(CGF);
8209 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8210 CGDispatchBounds: emitDispatchForLoopBounds);
8211 };
8212 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_simd, CodeGen,
8213 CodeGenBoundParameters: emitEmptyBoundParameters);
8214}
8215
8216void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
8217 CodeGenModule &CGM, StringRef ParentName,
8218 const OMPTargetParallelForSimdDirective &S) {
8219 // Emit SPMD target parallel for region as a standalone region.
8220 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8221 emitTargetParallelForSimdRegion(CGF, S, Action);
8222 };
8223 llvm::Function *Fn;
8224 llvm::Constant *Addr;
8225 // Emit target region as a standalone region.
8226 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8227 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8228 assert(Fn && Addr && "Target device function emission failed.");
8229}
8230
8231void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
8232 const OMPTargetParallelForSimdDirective &S) {
8233 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8234 emitTargetParallelForSimdRegion(CGF, S, Action);
8235 };
8236 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8237}
8238
8239/// Emit a helper variable and return corresponding lvalue.
8240static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
8241 const ImplicitParamDecl *PVD,
8242 CodeGenFunction::OMPPrivateScope &Privates) {
8243 const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl());
8244 Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(VD: PVD));
8245}
8246
8247void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
8248 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
8249 // Emit outlined function for task construct.
8250 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: OMPD_taskloop);
8251 Address CapturedStruct = Address::invalid();
8252 {
8253 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8254 CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
8255 }
8256 CanQualType SharedsTy =
8257 getContext().getCanonicalTagType(TD: CS->getCapturedRecordDecl());
8258 const Expr *IfCond = nullptr;
8259 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
8260 if (C->getNameModifier() == OMPD_unknown ||
8261 C->getNameModifier() == OMPD_taskloop) {
8262 IfCond = C->getCondition();
8263 break;
8264 }
8265 }
8266
8267 OMPTaskDataTy Data;
8268 // Check if taskloop must be emitted without taskgroup.
8269 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
8270 // TODO: Check if we should emit tied or untied task.
8271 Data.Tied = true;
8272 // Set scheduling for taskloop
8273 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
8274 // grainsize clause
8275 Data.Schedule.setInt(/*IntVal=*/false);
8276 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize()));
8277 Data.HasModifier =
8278 (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
8279 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
8280 // num_tasks clause
8281 Data.Schedule.setInt(/*IntVal=*/true);
8282 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks()));
8283 Data.HasModifier =
8284 (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
8285 }
8286
8287 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
8288 // if (PreCond) {
8289 // for (IV in 0..LastIteration) BODY;
8290 // <Final counter/linear vars updates>;
8291 // }
8292 //
8293
8294 // Emit: if (PreCond) - begin.
8295 // If the condition constant folds and can be elided, avoid emitting the
8296 // whole loop.
8297 bool CondConstant;
8298 llvm::BasicBlock *ContBlock = nullptr;
8299 OMPLoopScope PreInitScope(CGF, S);
8300 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
8301 if (!CondConstant)
8302 return;
8303 } else {
8304 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then");
8305 ContBlock = CGF.createBasicBlock(name: "taskloop.if.end");
8306 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
8307 TrueCount: CGF.getProfileCount(S: &S));
8308 CGF.EmitBlock(BB: ThenBlock);
8309 CGF.incrementProfileCounter(S: &S);
8310 }
8311
8312 (void)CGF.EmitOMPLinearClauseInit(D: S);
8313
8314 OMPPrivateScope LoopScope(CGF);
8315 // Emit helper vars inits.
8316 enum { LowerBound = 5, UpperBound, Stride, LastIter };
8317 auto *I = CS->getCapturedDecl()->param_begin();
8318 auto *LBP = std::next(x: I, n: LowerBound);
8319 auto *UBP = std::next(x: I, n: UpperBound);
8320 auto *STP = std::next(x: I, n: Stride);
8321 auto *LIP = std::next(x: I, n: LastIter);
8322 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP,
8323 Privates&: LoopScope);
8324 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP,
8325 Privates&: LoopScope);
8326 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope);
8327 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP,
8328 Privates&: LoopScope);
8329 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8330 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
8331 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
8332 (void)LoopScope.Privatize();
8333 // Emit the loop iteration variable.
8334 const Expr *IVExpr = S.getIterationVariable();
8335 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
8336 CGF.EmitVarDecl(D: *IVDecl);
8337 CGF.EmitIgnoredExpr(E: S.getInit());
8338
8339 // Emit the iterations count variable.
8340 // If it is not a variable, Sema decided to calculate iterations count on
8341 // each iteration (e.g., it is foldable into a constant).
8342 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
8343 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
8344 // Emit calculation of the iterations count.
8345 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
8346 }
8347
8348 {
8349 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
8350 emitCommonSimdLoop(
8351 CGF, S,
8352 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8353 if (isOpenMPSimdDirective(DKind: S.getDirectiveKind()))
8354 CGF.EmitOMPSimdInit(D: S);
8355 },
8356 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
8357 CGF.EmitOMPInnerLoop(
8358 S, RequiresCleanup: LoopScope.requiresCleanups(), LoopCond: S.getCond(), IncExpr: S.getInc(),
8359 BodyGen: [&S](CodeGenFunction &CGF) {
8360 emitOMPLoopBodyWithStopPoint(CGF, S,
8361 LoopExit: CodeGenFunction::JumpDest());
8362 },
8363 PostIncGen: [](CodeGenFunction &) {});
8364 });
8365 }
8366 // Emit: if (PreCond) - end.
8367 if (ContBlock) {
8368 CGF.EmitBranch(Block: ContBlock);
8369 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
8370 }
8371 // Emit final copy of the lastprivate variables if IsLastIter != 0.
8372 if (HasLastprivateClause) {
8373 CGF.EmitOMPLastprivateClauseFinal(
8374 D: S, NoFinals: isOpenMPSimdDirective(DKind: S.getDirectiveKind()),
8375 IsLastIterCond: CGF.Builder.CreateIsNotNull(Arg: CGF.EmitLoadOfScalar(
8376 Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8377 Ty: (*LIP)->getType(), Loc: S.getBeginLoc())));
8378 }
8379 LoopScope.restoreMap();
8380 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [LIP, &S](CodeGenFunction &CGF) {
8381 return CGF.Builder.CreateIsNotNull(
8382 Arg: CGF.EmitLoadOfScalar(Addr: CGF.GetAddrOfLocalVar(VD: *LIP), /*Volatile=*/false,
8383 Ty: (*LIP)->getType(), Loc: S.getBeginLoc()));
8384 });
8385 };
8386 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
8387 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
8388 const OMPTaskDataTy &Data) {
8389 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
8390 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
8391 OMPLoopScope PreInitScope(CGF, S);
8392 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S,
8393 TaskFunction: OutlinedFn, SharedsTy,
8394 Shareds: CapturedStruct, IfCond, Data);
8395 };
8396 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_taskloop,
8397 CodeGen);
8398 };
8399 if (Data.Nogroup) {
8400 EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen, Data);
8401 } else {
8402 CGM.getOpenMPRuntime().emitTaskgroupRegion(
8403 CGF&: *this,
8404 TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
8405 PrePostActionTy &Action) {
8406 Action.Enter(CGF);
8407 CGF.EmitOMPTaskBasedDirective(S, CapturedRegion: OMPD_taskloop, BodyGen, TaskGen,
8408 Data);
8409 },
8410 Loc: S.getBeginLoc());
8411 }
8412}
8413
8414void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
8415 auto LPCRegion =
8416 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8417 EmitOMPTaskLoopBasedDirective(S);
8418}
8419
8420void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
8421 const OMPTaskLoopSimdDirective &S) {
8422 auto LPCRegion =
8423 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8424 OMPLexicalScope Scope(*this, S);
8425 EmitOMPTaskLoopBasedDirective(S);
8426}
8427
8428void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
8429 const OMPMasterTaskLoopDirective &S) {
8430 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8431 Action.Enter(CGF);
8432 EmitOMPTaskLoopBasedDirective(S);
8433 };
8434 auto LPCRegion =
8435 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8436 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8437 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8438}
8439
8440void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
8441 const OMPMaskedTaskLoopDirective &S) {
8442 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8443 Action.Enter(CGF);
8444 EmitOMPTaskLoopBasedDirective(S);
8445 };
8446 auto LPCRegion =
8447 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8448 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
8449 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8450}
8451
8452void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
8453 const OMPMasterTaskLoopSimdDirective &S) {
8454 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8455 Action.Enter(CGF);
8456 EmitOMPTaskLoopBasedDirective(S);
8457 };
8458 auto LPCRegion =
8459 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8460 OMPLexicalScope Scope(*this, S);
8461 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
8462}
8463
8464void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
8465 const OMPMaskedTaskLoopSimdDirective &S) {
8466 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8467 Action.Enter(CGF);
8468 EmitOMPTaskLoopBasedDirective(S);
8469 };
8470 auto LPCRegion =
8471 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8472 OMPLexicalScope Scope(*this, S);
8473 CGM.getOpenMPRuntime().emitMaskedRegion(CGF&: *this, MaskedOpGen: CodeGen, Loc: S.getBeginLoc());
8474}
8475
8476void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
8477 const OMPParallelMasterTaskLoopDirective &S) {
8478 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8479 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8480 PrePostActionTy &Action) {
8481 Action.Enter(CGF);
8482 CGF.EmitOMPTaskLoopBasedDirective(S);
8483 };
8484 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8485 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8486 Loc: S.getBeginLoc());
8487 };
8488 auto LPCRegion =
8489 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8490 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop, CodeGen,
8491 CodeGenBoundParameters: emitEmptyBoundParameters);
8492}
8493
8494void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
8495 const OMPParallelMaskedTaskLoopDirective &S) {
8496 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8497 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8498 PrePostActionTy &Action) {
8499 Action.Enter(CGF);
8500 CGF.EmitOMPTaskLoopBasedDirective(S);
8501 };
8502 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8503 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8504 Loc: S.getBeginLoc());
8505 };
8506 auto LPCRegion =
8507 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8508 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop, CodeGen,
8509 CodeGenBoundParameters: emitEmptyBoundParameters);
8510}
8511
8512void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
8513 const OMPParallelMasterTaskLoopSimdDirective &S) {
8514 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8515 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8516 PrePostActionTy &Action) {
8517 Action.Enter(CGF);
8518 CGF.EmitOMPTaskLoopBasedDirective(S);
8519 };
8520 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8521 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
8522 Loc: S.getBeginLoc());
8523 };
8524 auto LPCRegion =
8525 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8526 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_master_taskloop_simd, CodeGen,
8527 CodeGenBoundParameters: emitEmptyBoundParameters);
8528}
8529
8530void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
8531 const OMPParallelMaskedTaskLoopSimdDirective &S) {
8532 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8533 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
8534 PrePostActionTy &Action) {
8535 Action.Enter(CGF);
8536 CGF.EmitOMPTaskLoopBasedDirective(S);
8537 };
8538 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
8539 CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: TaskLoopCodeGen,
8540 Loc: S.getBeginLoc());
8541 };
8542 auto LPCRegion =
8543 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8544 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_masked_taskloop_simd, CodeGen,
8545 CodeGenBoundParameters: emitEmptyBoundParameters);
8546}
8547
8548// Generate the instructions for '#pragma omp target update' directive.
8549void CodeGenFunction::EmitOMPTargetUpdateDirective(
8550 const OMPTargetUpdateDirective &S) {
8551 // If we don't have target devices, don't bother emitting the data mapping
8552 // code.
8553 if (CGM.getLangOpts().OMPTargetTriples.empty())
8554 return;
8555
8556 // Check if we have any if clause associated with the directive.
8557 const Expr *IfCond = nullptr;
8558 if (const auto *C = S.getSingleClause<OMPIfClause>())
8559 IfCond = C->getCondition();
8560
8561 // Check if we have any device clause associated with the directive.
8562 const Expr *Device = nullptr;
8563 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
8564 Device = C->getDevice();
8565
8566 OMPLexicalScope Scope(*this, S, OMPD_task);
8567 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF&: *this, D: S, IfCond, Device);
8568}
8569
8570void CodeGenFunction::EmitOMPGenericLoopDirective(
8571 const OMPGenericLoopDirective &S) {
8572 // Always expect a bind clause on the loop directive. It it wasn't
8573 // in the source, it should have been added in sema.
8574
8575 OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
8576 if (const auto *C = S.getSingleClause<OMPBindClause>())
8577 BindKind = C->getBindKind();
8578
8579 switch (BindKind) {
8580 case OMPC_BIND_parallel: // for
8581 return emitOMPForDirective(S, CGF&: *this, CGM, /*HasCancel=*/false);
8582 case OMPC_BIND_teams: // distribute
8583 return emitOMPDistributeDirective(S, CGF&: *this, CGM);
8584 case OMPC_BIND_thread: // simd
8585 return emitOMPSimdDirective(S, CGF&: *this, CGM);
8586 case OMPC_BIND_unknown:
8587 break;
8588 }
8589
8590 // Unimplemented, just inline the underlying statement for now.
8591 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8592 // Emit the loop iteration variable.
8593 const Stmt *CS =
8594 cast<CapturedStmt>(Val: S.getAssociatedStmt())->getCapturedStmt();
8595 const auto *ForS = dyn_cast<ForStmt>(Val: CS);
8596 if (ForS && !isa<DeclStmt>(Val: ForS->getInit())) {
8597 OMPPrivateScope LoopScope(CGF);
8598 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
8599 (void)LoopScope.Privatize();
8600 CGF.EmitStmt(S: CS);
8601 LoopScope.restoreMap();
8602 } else {
8603 CGF.EmitStmt(S: CS);
8604 }
8605 };
8606 OMPLexicalScope Scope(*this, S, OMPD_unknown);
8607 CGM.getOpenMPRuntime().emitInlinedDirective(CGF&: *this, InnermostKind: OMPD_loop, CodeGen);
8608}
8609
8610void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
8611 const OMPLoopDirective &S) {
8612 // Emit combined directive as if its constituent constructs are 'parallel'
8613 // and 'for'.
8614 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8615 Action.Enter(CGF);
8616 emitOMPCopyinClause(CGF, S);
8617 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
8618 };
8619 {
8620 auto LPCRegion =
8621 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S);
8622 emitCommonOMPParallelDirective(CGF&: *this, S, InnermostKind: OMPD_for, CodeGen,
8623 CodeGenBoundParameters: emitEmptyBoundParameters);
8624 }
8625 // Check for outer lastprivate conditional update.
8626 checkForLastprivateConditionalUpdate(CGF&: *this, S);
8627}
8628
8629void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
8630 const OMPTeamsGenericLoopDirective &S) {
8631 // To be consistent with current behavior of 'target teams loop', emit
8632 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
8633 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8634 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8635 };
8636
8637 // Emit teams region as a standalone region.
8638 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8639 PrePostActionTy &Action) {
8640 Action.Enter(CGF);
8641 OMPPrivateScope PrivateScope(CGF);
8642 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8643 (void)PrivateScope.Privatize();
8644 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, InnermostKind: OMPD_distribute,
8645 CodeGen: CodeGenDistribute);
8646 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8647 };
8648 emitCommonOMPTeamsDirective(CGF&: *this, S, InnermostKind: OMPD_distribute, CodeGen);
8649 emitPostUpdateForReductionClause(CGF&: *this, D: S,
8650 CondGen: [](CodeGenFunction &) { return nullptr; });
8651}
8652
8653#ifndef NDEBUG
8654static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
8655 std::string StatusMsg,
8656 const OMPExecutableDirective &D) {
8657 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
8658 if (IsDevice)
8659 StatusMsg += ": DEVICE";
8660 else
8661 StatusMsg += ": HOST";
8662 SourceLocation L = D.getBeginLoc();
8663 auto &SM = CGF.getContext().getSourceManager();
8664 PresumedLoc PLoc = SM.getPresumedLoc(L);
8665 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
8666 unsigned LineNo =
8667 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L);
8668 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
8669}
8670#endif
8671
8672static void emitTargetTeamsGenericLoopRegionAsParallel(
8673 CodeGenFunction &CGF, PrePostActionTy &Action,
8674 const OMPTargetTeamsGenericLoopDirective &S) {
8675 Action.Enter(CGF);
8676 // Emit 'teams loop' as if its constituent constructs are 'distribute,
8677 // 'parallel, and 'for'.
8678 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8679 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
8680 IncExpr: S.getDistInc());
8681 };
8682
8683 // Emit teams region as a standalone region.
8684 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8685 PrePostActionTy &Action) {
8686 Action.Enter(CGF);
8687 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8688 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8689 (void)PrivateScope.Privatize();
8690 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8691 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8692 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8693 };
8694 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8695 emitTargetTeamsLoopCodegenStatus(
8696 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8697 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute_parallel_for,
8698 CodeGen: CodeGenTeams);
8699 emitPostUpdateForReductionClause(CGF, D: S,
8700 CondGen: [](CodeGenFunction &) { return nullptr; });
8701}
8702
8703static void emitTargetTeamsGenericLoopRegionAsDistribute(
8704 CodeGenFunction &CGF, PrePostActionTy &Action,
8705 const OMPTargetTeamsGenericLoopDirective &S) {
8706 Action.Enter(CGF);
8707 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8708 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8709 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8710 };
8711
8712 // Emit teams region as a standalone region.
8713 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8714 PrePostActionTy &Action) {
8715 Action.Enter(CGF);
8716 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8717 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope);
8718 (void)PrivateScope.Privatize();
8719 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8720 CGF, InnermostKind: OMPD_distribute, CodeGen: CodeGenDistribute, /*HasCancel=*/false);
8721 CGF.EmitOMPReductionClauseFinal(D: S, /*ReductionKind=*/OMPD_teams);
8722 };
8723 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8724 emitTargetTeamsLoopCodegenStatus(
8725 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8726 emitCommonOMPTeamsDirective(CGF, S, InnermostKind: OMPD_distribute, CodeGen);
8727 emitPostUpdateForReductionClause(CGF, D: S,
8728 CondGen: [](CodeGenFunction &) { return nullptr; });
8729}
8730
8731void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8732 const OMPTargetTeamsGenericLoopDirective &S) {
8733 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8734 if (S.canBeParallelFor())
8735 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8736 else
8737 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8738 };
8739 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8740}
8741
8742void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8743 CodeGenModule &CGM, StringRef ParentName,
8744 const OMPTargetTeamsGenericLoopDirective &S) {
8745 // Emit SPMD target parallel loop region as a standalone region.
8746 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8747 if (S.canBeParallelFor())
8748 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8749 else
8750 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8751 };
8752 llvm::Function *Fn;
8753 llvm::Constant *Addr;
8754 // Emit target region as a standalone region.
8755 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8756 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8757 assert(Fn && Addr &&
8758 "Target device function emission failed for 'target teams loop'.");
8759}
8760
8761static void emitTargetParallelGenericLoopRegion(
8762 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8763 PrePostActionTy &Action) {
8764 Action.Enter(CGF);
8765 // Emit as 'parallel for'.
8766 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8767 Action.Enter(CGF);
8768 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8769 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8770 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8771 CGDispatchBounds: emitDispatchForLoopBounds);
8772 };
8773 emitCommonOMPParallelDirective(CGF, S, InnermostKind: OMPD_for, CodeGen,
8774 CodeGenBoundParameters: emitEmptyBoundParameters);
8775}
8776
8777void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8778 CodeGenModule &CGM, StringRef ParentName,
8779 const OMPTargetParallelGenericLoopDirective &S) {
8780 // Emit target parallel loop region as a standalone region.
8781 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8782 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8783 };
8784 llvm::Function *Fn;
8785 llvm::Constant *Addr;
8786 // Emit target region as a standalone region.
8787 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8788 D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: Addr, /*IsOffloadEntry=*/true, CodeGen);
8789 assert(Fn && Addr && "Target device function emission failed.");
8790}
8791
8792/// Emit combined directive 'target parallel loop' as if its constituent
8793/// constructs are 'target', 'parallel', and 'for'.
8794void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8795 const OMPTargetParallelGenericLoopDirective &S) {
8796 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8797 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8798 };
8799 emitCommonOMPTargetDirective(CGF&: *this, S, CodeGen);
8800}
8801
8802void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8803 const OMPExecutableDirective &D) {
8804 if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) {
8805 EmitOMPScanDirective(S: *SD);
8806 return;
8807 }
8808 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8809 return;
8810 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8811 OMPPrivateScope GlobalsScope(CGF);
8812 if (isOpenMPTaskingDirective(Kind: D.getDirectiveKind())) {
8813 // Capture global firstprivates to avoid crash.
8814 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8815 for (const Expr *Ref : C->varlist()) {
8816 const auto *DRE = cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
8817 if (!DRE)
8818 continue;
8819 const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl());
8820 if (!VD || VD->hasLocalStorage())
8821 continue;
8822 if (!CGF.LocalDeclMap.count(Val: VD)) {
8823 LValue GlobLVal = CGF.EmitLValue(E: Ref);
8824 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8825 }
8826 }
8827 }
8828 }
8829 if (isOpenMPSimdDirective(DKind: D.getDirectiveKind())) {
8830 (void)GlobalsScope.Privatize();
8831 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8832 emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action);
8833 } else {
8834 if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) {
8835 for (const Expr *E : LD->counters()) {
8836 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
8837 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(Val: VD)) {
8838 LValue GlobLVal = CGF.EmitLValue(E);
8839 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress());
8840 }
8841 if (isa<OMPCapturedExprDecl>(Val: VD)) {
8842 // Emit only those that were not explicitly referenced in clauses.
8843 if (!CGF.LocalDeclMap.count(Val: VD))
8844 CGF.EmitVarDecl(D: *VD);
8845 }
8846 }
8847 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8848 if (!C->getNumForLoops())
8849 continue;
8850 for (unsigned I = LD->getLoopsNumber(),
8851 E = C->getLoopNumIterations().size();
8852 I < E; ++I) {
8853 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8854 Val: cast<DeclRefExpr>(Val: C->getLoopCounter(NumLoop: I))->getDecl())) {
8855 // Emit only those that were not explicitly referenced in clauses.
8856 if (!CGF.LocalDeclMap.count(Val: VD))
8857 CGF.EmitVarDecl(D: *VD);
8858 }
8859 }
8860 }
8861 }
8862 (void)GlobalsScope.Privatize();
8863 CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt());
8864 }
8865 };
8866 if (D.getDirectiveKind() == OMPD_atomic ||
8867 D.getDirectiveKind() == OMPD_critical ||
8868 D.getDirectiveKind() == OMPD_section ||
8869 D.getDirectiveKind() == OMPD_master ||
8870 D.getDirectiveKind() == OMPD_masked ||
8871 D.getDirectiveKind() == OMPD_unroll ||
8872 D.getDirectiveKind() == OMPD_assume) {
8873 EmitStmt(S: D.getAssociatedStmt());
8874 } else {
8875 auto LPCRegion =
8876 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D);
8877 OMPSimdLexicalScope Scope(*this, D);
8878 CGM.getOpenMPRuntime().emitInlinedDirective(
8879 CGF&: *this,
8880 InnermostKind: isOpenMPSimdDirective(DKind: D.getDirectiveKind()) ? OMPD_simd
8881 : D.getDirectiveKind(),
8882 CodeGen);
8883 }
8884 // Check for outer lastprivate conditional update.
8885 checkForLastprivateConditionalUpdate(CGF&: *this, S: D);
8886}
8887
8888void CodeGenFunction::EmitOMPAssumeDirective(const OMPAssumeDirective &S) {
8889 EmitStmt(S: S.getAssociatedStmt());
8890}
8891